commit - 81a7ff41a7bd5b30c98404719a4dada24651f7e1
commit + 75a8a1ecac7f6cd7162686de382f16ba0d1f21f2
blob - fab17364d6a92dd526452fa24f03a65476350584
blob + 1900f6e86054d439679f189eb54ca84865cbd42b
--- Makefile.am
+++ Makefile.am
pages.c \
pages.h \
parse.y \
- parser.c \
parser.h \
- parser_gemtext.c \
- parser_gophermap.c \
- parser_textpatch.c \
- parser_textplain.c \
+ parser/parser.c \
+ parser/parser_gemtext.c \
+ parser/parser_gophermap.c \
+ parser/parser_textpatch.c \
+ parser/parser_textplain.c \
sandbox.c \
session.c \
session.h \
blob - a44cdba406d715fdfc5e068f0848918ce48fb8d7 (mode 644)
blob + /dev/null
--- parser.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "parser.h"
-#include "telescope.h"
-
-void
-parser_init(struct tab *tab, parserfn fn)
-{
- erase_buffer(&tab->buffer);
- fn(&tab->buffer.page);
- tab->buffer.page.init = fn;
-}
-
-int
-parser_parse(struct tab *tab, const char *chunk, size_t len)
-{
- return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
-}
-
-int
-parser_free(struct tab *tab)
-{
- int r;
- char *tilde, *slash;
-
- r = tab->buffer.page.free(&tab->buffer.page);
-
- if (*tab->buffer.page.title != '\0')
- return r;
-
- /*
- * heuristic: see if there is a "tilde user" and use that as
- * page title, using the full domain name as fallback.
- */
- if ((tilde = strstr(tab->hist_cur->h, "/~")) != NULL) {
- strlcpy(tab->buffer.page.title, tilde+1,
- sizeof(tab->buffer.page.title));
-
- if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
- *slash = '\0';
- } else
- strlcpy(tab->buffer.page.title, tab->uri.host,
- sizeof(tab->buffer.page.title));
-
- return r;
-}
-
-int
-parser_serialize(struct tab *tab, struct evbuffer *evb)
-{
- struct line *line;
- const char *text;
- int r;
-
- if (tab->buffer.page.serialize != NULL)
- return tab->buffer.page.serialize(&tab->buffer.page, evb);
-
- /* a default implementation good enough for plain text */
- TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
- if ((text = line->line) == NULL)
- text = "";
-
- r = evbuffer_add_printf(evb, "%s\n", text);
- if (r == -1)
- return 0;
- }
-
- return 1;
-}
-
-int
-parser_append(struct parser *p, const char *buf, size_t len)
-{
- size_t newlen;
- char *t;
-
- newlen = len + p->len;
- if ((t = calloc(1, newlen)) == NULL)
- return 0;
- memcpy(t, p->buf, p->len);
- memcpy(t + p->len, buf, len);
- free(p->buf);
- p->buf = t;
- p->len = newlen;
- return 1;
-}
-
-int
-parser_set_buf(struct parser *p, const char *buf, size_t len)
-{
- char *tmp;
-
- if (len == 0) {
- p->len = 0;
- free(p->buf);
- p->buf = NULL;
- return 1;
- }
-
- /*
- * p->buf and buf can (and probably almost always will)
- * overlap!
- */
-
- if ((tmp = calloc(1, len)) == NULL)
- return 0;
- memcpy(tmp, buf, len);
- free(p->buf);
- p->buf = tmp;
- p->len = len;
- return 1;
-}
-
-int
-parser_foreach_line(struct parser *p, const char *buf, size_t size,
- parsechunkfn fn)
-{
- char *b, *e;
- unsigned int ch;
- size_t i, l, len;
-
- if (!parser_append(p, buf, size))
- return 0;
- b = p->buf;
- len = p->len;
-
- if (!(p->flags & PARSER_IN_BODY) && len < 3)
- return 1;
-
- if (!(p->flags & PARSER_IN_BODY)) {
- p->flags |= PARSER_IN_BODY;
-
- /*
- * drop the BOM: only UTF-8 is supported, and there
- * it's useless; some editors may still add one
- * though.
- */
- if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
- b += 3;
- len -= 3;
- }
- }
-
- /* drop every "funny" ASCII character */
- for (i = 0; i < len; ) {
- ch = b[i];
- if ((ch >= ' ' || ch == '\n' || ch == '\t')
- && ch != 127) { /* del */
- ++i;
- continue;
- }
- memmove(&b[i], &b[i+1], len - i - 1);
- len--;
- }
-
- while (len > 0) {
- if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
- break;
- l = e - b;
-
- if (!fn(p, b, l))
- return 0;
-
- len -= l;
- b += l;
-
- if (len > 0) {
- /* skip \n */
- len--;
- b++;
- }
- }
-
- return parser_set_buf(p, b, len);
-}
blob - /dev/null
blob + a44cdba406d715fdfc5e068f0848918ce48fb8d7 (mode 644)
--- /dev/null
+++ parser/parser.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "parser.h"
+#include "telescope.h"
+
+void
+parser_init(struct tab *tab, parserfn fn)
+{
+ erase_buffer(&tab->buffer);
+ fn(&tab->buffer.page);
+ tab->buffer.page.init = fn;
+}
+
+int
+parser_parse(struct tab *tab, const char *chunk, size_t len)
+{
+ return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
+}
+
+int
+parser_free(struct tab *tab)
+{
+ int r;
+ char *tilde, *slash;
+
+ r = tab->buffer.page.free(&tab->buffer.page);
+
+ if (*tab->buffer.page.title != '\0')
+ return r;
+
+ /*
+ * heuristic: see if there is a "tilde user" and use that as
+ * page title, using the full domain name as fallback.
+ */
+ if ((tilde = strstr(tab->hist_cur->h, "/~")) != NULL) {
+ strlcpy(tab->buffer.page.title, tilde+1,
+ sizeof(tab->buffer.page.title));
+
+ if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
+ *slash = '\0';
+ } else
+ strlcpy(tab->buffer.page.title, tab->uri.host,
+ sizeof(tab->buffer.page.title));
+
+ return r;
+}
+
+int
+parser_serialize(struct tab *tab, struct evbuffer *evb)
+{
+ struct line *line;
+ const char *text;
+ int r;
+
+ if (tab->buffer.page.serialize != NULL)
+ return tab->buffer.page.serialize(&tab->buffer.page, evb);
+
+ /* a default implementation good enough for plain text */
+ TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
+ if ((text = line->line) == NULL)
+ text = "";
+
+ r = evbuffer_add_printf(evb, "%s\n", text);
+ if (r == -1)
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+parser_append(struct parser *p, const char *buf, size_t len)
+{
+ size_t newlen;
+ char *t;
+
+ newlen = len + p->len;
+ if ((t = calloc(1, newlen)) == NULL)
+ return 0;
+ memcpy(t, p->buf, p->len);
+ memcpy(t + p->len, buf, len);
+ free(p->buf);
+ p->buf = t;
+ p->len = newlen;
+ return 1;
+}
+
+int
+parser_set_buf(struct parser *p, const char *buf, size_t len)
+{
+ char *tmp;
+
+ if (len == 0) {
+ p->len = 0;
+ free(p->buf);
+ p->buf = NULL;
+ return 1;
+ }
+
+ /*
+ * p->buf and buf can (and probably almost always will)
+ * overlap!
+ */
+
+ if ((tmp = calloc(1, len)) == NULL)
+ return 0;
+ memcpy(tmp, buf, len);
+ free(p->buf);
+ p->buf = tmp;
+ p->len = len;
+ return 1;
+}
+
+int
+parser_foreach_line(struct parser *p, const char *buf, size_t size,
+ parsechunkfn fn)
+{
+ char *b, *e;
+ unsigned int ch;
+ size_t i, l, len;
+
+ if (!parser_append(p, buf, size))
+ return 0;
+ b = p->buf;
+ len = p->len;
+
+ if (!(p->flags & PARSER_IN_BODY) && len < 3)
+ return 1;
+
+ if (!(p->flags & PARSER_IN_BODY)) {
+ p->flags |= PARSER_IN_BODY;
+
+ /*
+ * drop the BOM: only UTF-8 is supported, and there
+ * it's useless; some editors may still add one
+ * though.
+ */
+ if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
+ b += 3;
+ len -= 3;
+ }
+ }
+
+ /* drop every "funny" ASCII character */
+ for (i = 0; i < len; ) {
+ ch = b[i];
+ if ((ch >= ' ' || ch == '\n' || ch == '\t')
+ && ch != 127) { /* del */
+ ++i;
+ continue;
+ }
+ memmove(&b[i], &b[i+1], len - i - 1);
+ len--;
+ }
+
+ while (len > 0) {
+ if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
+ break;
+ l = e - b;
+
+ if (!fn(p, b, l))
+ return 0;
+
+ len -= l;
+ b += l;
+
+ if (len > 0) {
+ /* skip \n */
+ len--;
+ b++;
+ }
+ }
+
+ return parser_set_buf(p, b, len);
+}
blob - /dev/null
blob + e23b368b5089ed079d433d6d5f8803e3f2480373 (mode 644)
--- /dev/null
+++ parser/parser_gemtext.c
+/*
+ * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming gemtext parser.
+ *
+ * TODO:
+ * - handle NULs
+ * - UTF8
+ */
+
+#include "compat.h"
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "defaults.h"
+#include "parser.h"
+#include "utf8.h"
+
+static int gemtext_parse(struct parser *, const char *, size_t);
+static int gemtext_foreach_line(struct parser *, const char *, size_t);
+static int gemtext_free(struct parser *);
+static int gemtext_serialize(struct parser *, struct evbuffer *);
+
+static int parse_text(struct parser*, enum line_type, const char*, size_t);
+static int parse_link(struct parser*, enum line_type, const char*, size_t);
+static int parse_title(struct parser*, enum line_type, const char*, size_t);
+static int parse_item(struct parser*, enum line_type, const char*, size_t);
+static int parse_quote(struct parser*, enum line_type, const char*, size_t);
+static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
+static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
+static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
+static void search_title(struct parser*, enum line_type);
+
+typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
+
+static parselinefn *parsers[] = {
+ [LINE_TEXT] = parse_text,
+ [LINE_LINK] = parse_link,
+ [LINE_TITLE_1] = parse_title,
+ [LINE_TITLE_2] = parse_title,
+ [LINE_TITLE_3] = parse_title,
+ [LINE_ITEM] = parse_item,
+ [LINE_QUOTE] = parse_quote,
+ [LINE_PRE_START] = parse_pre_start,
+ [LINE_PRE_CONTENT] = parse_pre_cnt,
+ [LINE_PRE_END] = parse_pre_end,
+};
+
+void
+gemtext_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "text/gemini";
+ p->parse = &gemtext_parse;
+ p->free = &gemtext_free;
+ p->serialize = &gemtext_serialize;
+
+ TAILQ_INIT(&p->head);
+}
+
+static inline int
+emit_line(struct parser *p, enum line_type type, char *line, char *alt)
+{
+ struct line *l;
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ return 0;
+
+ l->type = type;
+ l->line = line;
+ l->alt = alt;
+
+ switch (l->type) {
+ case LINE_PRE_START:
+ case LINE_PRE_END:
+ if (hide_pre_context)
+ l->flags = L_HIDDEN;
+ if (l->type == LINE_PRE_END &&
+ hide_pre_closing_line)
+ l->flags = L_HIDDEN;
+ break;
+ case LINE_PRE_CONTENT:
+ if (hide_pre_blocks)
+ l->flags = L_HIDDEN;
+ break;
+ case LINE_LINK:
+ if (emojify_link &&
+ !emojied_line(line, (const char **)&l->data))
+ l->data = NULL;
+ break;
+ default:
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+}
+
+static int
+parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l, *u;
+ const char *url_start;
+
+ if (len <= 2)
+ return emit_line(p, LINE_TEXT, NULL, NULL);
+ buf += 2;
+ len -= 2;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, LINE_TEXT, NULL, NULL);
+
+ url_start = buf;
+ while (len > 0 && !isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if ((u = calloc(1, buf - url_start + 1)) == NULL)
+ return 0;
+ memcpy(u, url_start, buf - url_start);
+
+ if (len == 0)
+ goto nolabel;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ goto nolabel;
+
+ if ((l = calloc(1, len + 1)) == NULL)
+ return 0;
+
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, u);
+
+nolabel:
+ if ((l = strdup(u)) == NULL)
+ return 0;
+ return emit_line(p, t, l, u);
+}
+
+static int
+parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ switch (t) {
+ case LINE_TITLE_1:
+ if (len <= 1)
+ return emit_line(p, t, NULL, NULL);
+ buf++;
+ len--;
+ break;
+ case LINE_TITLE_2:
+ if (len <= 2)
+ return emit_line(p, t, NULL, NULL);
+ buf += 2;
+ len -= 2;
+ break;
+ case LINE_TITLE_3:
+ if (len <= 3)
+ return emit_line(p, t, NULL, NULL);
+ buf += 3;
+ len -= 3;
+ break;
+ default:
+ /* unreachable */
+ abort();
+ }
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if (t == LINE_TITLE_1 && *p->title == '\0')
+ strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len == 1)
+ return emit_line(p, t, NULL, NULL);
+
+ buf++;
+ len--;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len == 1)
+ return emit_line(p, t, NULL, NULL);
+
+ buf++;
+ len--;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len <= 3)
+ return emit_line(p, t, NULL, NULL);
+
+ buf += 3;
+ len -= 3;
+
+ while (len > 0 && isspace(buf[0])) {
+ buf++;
+ len--;
+ }
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ char *l;
+
+ if (len == 0)
+ return emit_line(p, t, NULL, NULL);
+
+ if ((l = calloc(1, len+1)) == NULL)
+ return 0;
+ memcpy(l, buf, len);
+ return emit_line(p, t, l, NULL);
+}
+
+static int
+parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
+{
+ return emit_line(p, t, NULL, NULL);
+}
+
+static inline enum line_type
+detect_line_type(const char *buf, size_t len, int in_pre)
+{
+ if (in_pre) {
+ if (len >= 3 &&
+ buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
+ return LINE_PRE_END;
+ else
+ return LINE_PRE_CONTENT;
+ }
+
+ if (len == 0)
+ return LINE_TEXT;
+
+ switch (*buf) {
+ case '*':
+ if (len > 1 && buf[1] == ' ')
+ return LINE_ITEM;
+ break;
+ case '>': return LINE_QUOTE;
+ case '=':
+ if (len >= 1 && buf[1] == '>')
+ return LINE_LINK;
+ break;
+ case '#':
+ if (len == 1)
+ return LINE_TEXT;
+ if (buf[1] != '#')
+ return LINE_TITLE_1;
+ if (len == 2)
+ return LINE_TEXT;
+ if (buf[2] != '#')
+ return LINE_TITLE_2;
+ if (len == 3)
+ return LINE_TEXT;
+ return LINE_TITLE_3;
+ case '`':
+ if (len < 3)
+ return LINE_TEXT;
+ if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
+ return LINE_PRE_START;
+ break;
+ }
+
+ return LINE_TEXT;
+}
+
+static int
+gemtext_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, gemtext_foreach_line);
+}
+
+static int
+gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ enum line_type t;
+
+ t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
+ if (t == LINE_PRE_START)
+ p->flags ^= PARSER_IN_PRE;
+ if (t == LINE_PRE_END)
+ p->flags ^= PARSER_IN_PRE;
+ return parsers[t](p, t, line, linelen);
+}
+
+static int
+gemtext_free(struct parser *p)
+{
+ enum line_type t;
+
+ /* flush the buffer */
+ if (p->len != 0) {
+ t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
+ if (!parsers[t](p, t, p->buf, p->len))
+ return 0;
+ if ((p->flags & PARSER_IN_PRE) &&
+ !emit_line(p, LINE_PRE_END, NULL, NULL))
+ return 0;
+ }
+
+ free(p->buf);
+
+ /*
+ * use the first level 2 or 3 header as page title if none
+ * found yet.
+ */
+ if (*p->title == '\0')
+ search_title(p, LINE_TITLE_2);
+ if (*p->title == '\0')
+ search_title(p, LINE_TITLE_3);
+
+ return 1;
+}
+
+static void
+search_title(struct parser *p, enum line_type level)
+{
+ struct line *l;
+
+ TAILQ_FOREACH(l, &p->head, lines) {
+ if (l->type == level) {
+ if (l->line == NULL)
+ continue;
+ strlcpy(p->title, l->line, sizeof(p->title));
+ break;
+ }
+ }
+}
+
+static const char *gemtext_prefixes[] = {
+ [LINE_TEXT] = "",
+ [LINE_TITLE_1] = "# ",
+ [LINE_TITLE_2] = "## ",
+ [LINE_TITLE_3] = "### ",
+ [LINE_ITEM] = "* ",
+ [LINE_QUOTE] = "> ",
+ [LINE_PRE_START] = "``` ",
+ [LINE_PRE_CONTENT] = "",
+ [LINE_PRE_END] = "```",
+};
+
+static int
+gemtext_serialize(struct parser *p, struct evbuffer *evb)
+{
+ struct line *line;
+ const char *text;
+ const char *alt;
+ int r;
+
+ TAILQ_FOREACH(line, &p->head, lines) {
+ if ((text = line->line) == NULL)
+ text = "";
+
+ if ((alt = line->alt) == NULL)
+ alt = "";
+
+ switch (line->type) {
+ case LINE_TEXT:
+ case LINE_TITLE_1:
+ case LINE_TITLE_2:
+ case LINE_TITLE_3:
+ case LINE_ITEM:
+ case LINE_QUOTE:
+ case LINE_PRE_START:
+ case LINE_PRE_CONTENT:
+ case LINE_PRE_END:
+ r = evbuffer_add_printf(evb, "%s%s\n",
+ gemtext_prefixes[line->type], text);
+ break;
+
+ case LINE_LINK:
+ r = evbuffer_add_printf(evb, "=> %s %s\n",
+ alt, text);
+ break;
+
+ default:
+ /* not reached */
+ abort();
+ }
+
+ if (r == -1)
+ return 0;
+ }
+
+ return 1;
+}
blob - /dev/null
blob + 27a3142a5eb4ef23844d4074b1022269612c4ffa (mode 644)
--- /dev/null
+++ parser/parser_gophermap.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "parser.h"
+#include "utils.h"
+
+#define evap evbuffer_add_printf
+
+struct gm_selector {
+ char type;
+ const char *ds;
+ const char *selector;
+ const char *addr;
+ const char *port;
+};
+
+static void gm_parse_selector(char *, struct gm_selector *);
+
+static int gm_parse(struct parser *, const char *, size_t);
+static int gm_foreach_line(struct parser *, const char *, size_t);
+static int gm_free(struct parser *);
+static int gm_serialize(struct parser *, struct evbuffer *);
+
+void
+gophermap_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "gophermap";
+ p->parse = &gm_parse;
+ p->free = &gm_free;
+ p->serialize = &gm_serialize;
+
+ TAILQ_INIT(&p->head);
+}
+
+static void
+gm_parse_selector(char *line, struct gm_selector *s)
+{
+ s->type = *line++;
+ s->ds = line;
+ s->selector = "";
+ s->addr = "";
+ s->port = "";
+
+ if ((line = strchr(line, '\t')) == NULL)
+ return;
+ *line++ = '\0';
+ s->selector = line;
+
+ if ((line = strchr(line, '\t')) == NULL)
+ return;
+ *line++ = '\0';
+ s->addr = line;
+
+ if ((line = strchr(line, '\t')) == NULL)
+ return;
+ *line++ = '\0';
+ s->port = line;
+}
+
+static int
+gm_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, gm_foreach_line);
+}
+
+static inline int
+emit_line(struct parser *p, enum line_type type, struct gm_selector *s)
+{
+ struct line *l;
+ char buf[LINE_MAX], b[2] = {0};
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ goto err;
+
+ if ((l->line = strdup(s->ds)) == NULL)
+ goto err;
+
+ switch (l->type = type) {
+ case LINE_LINK:
+ if (s->type == 'h' && has_prefix(s->selector, "URL:")) {
+ strlcpy(buf, s->selector+4, sizeof(buf));
+ } else {
+ strlcpy(buf, "gopher://", sizeof(buf));
+ strlcat(buf, s->addr, sizeof(buf));
+ strlcat(buf, ":", sizeof(buf));
+ strlcat(buf, s->port, sizeof(buf));
+ strlcat(buf, "/", sizeof(buf));
+ b[0] = s->type;
+ strlcat(buf, b, sizeof(buf));
+ if (*s->selector != '/')
+ strlcat(buf, "/", sizeof(buf));
+ strlcat(buf, s->selector, sizeof(buf));
+ }
+
+ if ((l->alt = strdup(buf)) == NULL)
+ goto err;
+ break;
+
+ default:
+ break;
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+
+err:
+ if (l != NULL) {
+ free(l->line);
+ free(l->alt);
+ free(l);
+ }
+ return 0;
+}
+
+static int
+gm_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ char buf[LINE_MAX] = {0};
+ struct gm_selector s = {0};
+
+ memcpy(buf, line, MIN(sizeof(buf)-1, linelen));
+ gm_parse_selector(buf, &s);
+
+ switch (s.type) {
+ case '0': /* text file */
+ case '1': /* gopher submenu */
+ case '2': /* CCSO nameserver */
+ case '4': /* binhex-encoded file */
+ case '5': /* DOS file */
+ case '6': /* uuencoded file */
+ case '7': /* full-text search */
+ case '8': /* telnet */
+ case '9': /* binary file */
+ case '+': /* mirror or alternate server */
+ case 'g': /* gif */
+ case 'I': /* image */
+ case 'T': /* telnet 3270 */
+ case ':': /* gopher+: bitmap image */
+ case ';': /* gopher+: movie file */
+ case 'd': /* non-canonical: doc */
+ case 'h': /* non-canonical: html file */
+ case 's': /* non-canonical: sound file */
+ if (!emit_line(p, LINE_LINK, &s))
+ return 0;
+ break;
+
+ break;
+
+ case 'i': /* non-canonical: message */
+ if (!emit_line(p, LINE_TEXT, &s))
+ return 0;
+ break;
+
+ case '3': /* error code */
+ if (!emit_line(p, LINE_QUOTE, &s))
+ return 0;
+ break;
+ }
+
+ return 1;
+}
+
+static int
+gm_free(struct parser *p)
+{
+ /* flush the buffer */
+ if (p->len != 0)
+ gm_foreach_line(p, p->buf, p->len);
+
+ free(p->buf);
+
+ return 1;
+}
+
+static inline const char *
+gopher_skip_selector(const char *path, int *ret_type)
+{
+ *ret_type = 0;
+
+ if (!strcmp(path, "/") || *path == '\0') {
+ *ret_type = '1';
+ return path;
+ }
+
+ if (*path != '/')
+ return path;
+ path++;
+
+ switch (*ret_type = *path) {
+ case '0':
+ case '1':
+ case '7':
+ break;
+
+ default:
+ *ret_type = 0;
+ path -= 1;
+ return path;
+ }
+
+ return ++path;
+}
+
+static int
+serialize_link(struct line *line, const char *text, struct evbuffer *evb)
+{
+ size_t portlen = 0;
+ int type;
+ const char *uri, *endhost, *port, *path, *colon;
+
+ if ((uri = line->alt) == NULL)
+ return -1;
+
+ if (!has_prefix(uri, "gopher://"))
+ return evap(evb, "h%s\tURL:%s\terror.host\t1\n",
+ text, line->alt);
+
+ uri += 9; /* skip gopher:// */
+
+ path = strchr(uri, '/');
+ colon = strchr(uri, ':');
+
+ if (path != NULL && colon > path)
+ colon = NULL;
+
+ if ((endhost = colon) == NULL &&
+ (endhost = path) == NULL)
+ endhost = strchr(path, '\0');
+
+ if (colon != NULL) {
+ for (port = colon+1; *port && *port != '/'; ++port)
+ ++portlen;
+ port = colon+1;
+ } else {
+ port = "70";
+ portlen = 2;
+ }
+
+ if (path == NULL) {
+ type = '1';
+ path = "";
+ } else
+ path = gopher_skip_selector(path, &type);
+
+ return evap(evb, "%c%s\t%s\t%.*s\t%.*s\n", type, text,
+ path, (int)(endhost - uri), uri, (int)portlen, port);
+}
+
+static int
+gm_serialize(struct parser *p, struct evbuffer *evb)
+{
+ struct line *line;
+ const char *text;
+ int r;
+
+ TAILQ_FOREACH(line, &p->head, lines) {
+ if ((text = line->line) == NULL)
+ text = "";
+
+ switch (line->type) {
+ case LINE_LINK:
+ r = serialize_link(line, text, evb);
+ break;
+
+ case LINE_TEXT:
+ r = evap(evb, "i%s\t\terror.host\t1\n",
+ text);
+ break;
+
+ case LINE_QUOTE:
+ r = evap(evb, "3%s\t\terror.host\t1\n",
+ text);
+ break;
+
+ default:
+ /* unreachable */
+ abort();
+ }
+
+ if (r == -1)
+ return 0;
+ }
+
+ return 1;
+}
blob - /dev/null
blob + e94d85197fd198edd3e5749080f110f489b2931b (mode 644)
--- /dev/null
+++ parser/parser_textpatch.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming text/x-patch parser
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "telescope.h"
+#include "parser.h"
+#include "utils.h"
+
+static int tpatch_parse(struct parser *, const char *, size_t);
+static int tpatch_emit_line(struct parser *, const char *, size_t);
+static int tpatch_foreach_line(struct parser *, const char *, size_t);
+static int tpatch_free(struct parser *);
+
+void
+textpatch_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "text/x-patch";
+ p->parse = &tpatch_parse;
+ p->free = &tpatch_free;
+
+ p->flags = PARSER_IN_PATCH_HDR;
+
+ TAILQ_INIT(&p->head);
+}
+
+static int
+tpatch_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, tpatch_foreach_line);
+}
+
+static int
+tpatch_emit_line(struct parser *p, const char *line, size_t linelen)
+{
+ struct line *l;
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ return 0;
+
+ if (p->flags & PARSER_IN_PATCH_HDR)
+ l->type = LINE_PATCH_HDR;
+ else
+ l->type = LINE_PATCH;
+
+ if (linelen != 0) {
+ if ((l->line = calloc(1, linelen+1)) == NULL) {
+ free(l);
+ return 0;
+ }
+
+ memcpy(l->line, line, linelen);
+
+ if (!(p->flags & PARSER_IN_PATCH_HDR))
+ switch (*l->line) {
+ case '+':
+ l->type = LINE_PATCH_ADD;
+ break;
+ case '-':
+ l->type = LINE_PATCH_DEL;
+ break;
+ case '@':
+ l->type = LINE_PATCH_HUNK_HDR;
+ break;
+ case ' ':
+ /* context lines */
+ break;
+ default:
+ /*
+ * A single patch file can have more
+ * than one "header" if touches more
+ * than one file.
+ */
+ l->type = LINE_PATCH_HDR;
+ p->flags |= PARSER_IN_PATCH_HDR;
+ break;
+ }
+
+ if (has_prefix(l->line, "+++"))
+ p->flags &= ~PARSER_IN_PATCH_HDR;
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+}
+
+static int
+tpatch_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ return tpatch_emit_line(p, line, linelen);
+}
+
+static int
+tpatch_free(struct parser *p)
+{
+ if (p->len != 0)
+ return tpatch_emit_line(p, p->buf, p->len);
+ return 1;
+}
blob - /dev/null
blob + 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644)
--- /dev/null
+++ parser/parser_textplain.c
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A streaming text/plain "parser."
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "telescope.h"
+#include "parser.h"
+
+static int textplain_parse(struct parser*, const char*, size_t);
+static int textplain_foreach_line(struct parser*, const char*, size_t);
+static int textplain_free(struct parser*);
+
+static inline int
+emit_line(struct parser *p, const char *line, size_t len)
+{
+ struct line *l;
+
+ if ((l = calloc(1, sizeof(*l))) == NULL)
+ return 0;
+
+ l->type = LINE_TEXT;
+
+ if (len != 0) {
+ if ((l->line = calloc(1, len+1)) == NULL) {
+ free(l);
+ return 0;
+ }
+
+ memcpy(l->line, line, len);
+ }
+
+ TAILQ_INSERT_TAIL(&p->head, l, lines);
+
+ return 1;
+}
+
+void
+textplain_initparser(struct parser *p)
+{
+ memset(p, 0, sizeof(*p));
+
+ p->name = "text/plain";
+ p->parse = &textplain_parse;
+ p->free = &textplain_free;
+
+ TAILQ_INIT(&p->head);
+}
+
+static int
+textplain_parse(struct parser *p, const char *buf, size_t size)
+{
+ return parser_foreach_line(p, buf, size, textplain_foreach_line);
+}
+
+static int
+textplain_foreach_line(struct parser *p, const char *line, size_t linelen)
+{
+ return emit_line(p, line, linelen);
+}
+
+static int
+textplain_free(struct parser *p)
+{
+ if (p->len != 0)
+ return emit_line(p, p->buf, p->len);
+ return 1;
+}
blob - e23b368b5089ed079d433d6d5f8803e3f2480373 (mode 644)
blob + /dev/null
--- parser_gemtext.c
+++ /dev/null
-/*
- * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming gemtext parser.
- *
- * TODO:
- * - handle NULs
- * - UTF8
- */
-
-#include "compat.h"
-
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "defaults.h"
-#include "parser.h"
-#include "utf8.h"
-
-static int gemtext_parse(struct parser *, const char *, size_t);
-static int gemtext_foreach_line(struct parser *, const char *, size_t);
-static int gemtext_free(struct parser *);
-static int gemtext_serialize(struct parser *, struct evbuffer *);
-
-static int parse_text(struct parser*, enum line_type, const char*, size_t);
-static int parse_link(struct parser*, enum line_type, const char*, size_t);
-static int parse_title(struct parser*, enum line_type, const char*, size_t);
-static int parse_item(struct parser*, enum line_type, const char*, size_t);
-static int parse_quote(struct parser*, enum line_type, const char*, size_t);
-static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
-static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
-static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
-static void search_title(struct parser*, enum line_type);
-
-typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
-
-static parselinefn *parsers[] = {
- [LINE_TEXT] = parse_text,
- [LINE_LINK] = parse_link,
- [LINE_TITLE_1] = parse_title,
- [LINE_TITLE_2] = parse_title,
- [LINE_TITLE_3] = parse_title,
- [LINE_ITEM] = parse_item,
- [LINE_QUOTE] = parse_quote,
- [LINE_PRE_START] = parse_pre_start,
- [LINE_PRE_CONTENT] = parse_pre_cnt,
- [LINE_PRE_END] = parse_pre_end,
-};
-
-void
-gemtext_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "text/gemini";
- p->parse = &gemtext_parse;
- p->free = &gemtext_free;
- p->serialize = &gemtext_serialize;
-
- TAILQ_INIT(&p->head);
-}
-
-static inline int
-emit_line(struct parser *p, enum line_type type, char *line, char *alt)
-{
- struct line *l;
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- return 0;
-
- l->type = type;
- l->line = line;
- l->alt = alt;
-
- switch (l->type) {
- case LINE_PRE_START:
- case LINE_PRE_END:
- if (hide_pre_context)
- l->flags = L_HIDDEN;
- if (l->type == LINE_PRE_END &&
- hide_pre_closing_line)
- l->flags = L_HIDDEN;
- break;
- case LINE_PRE_CONTENT:
- if (hide_pre_blocks)
- l->flags = L_HIDDEN;
- break;
- case LINE_LINK:
- if (emojify_link &&
- !emojied_line(line, (const char **)&l->data))
- l->data = NULL;
- break;
- default:
- break;
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-}
-
-static int
-parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l, *u;
- const char *url_start;
-
- if (len <= 2)
- return emit_line(p, LINE_TEXT, NULL, NULL);
- buf += 2;
- len -= 2;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, LINE_TEXT, NULL, NULL);
-
- url_start = buf;
- while (len > 0 && !isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if ((u = calloc(1, buf - url_start + 1)) == NULL)
- return 0;
- memcpy(u, url_start, buf - url_start);
-
- if (len == 0)
- goto nolabel;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- goto nolabel;
-
- if ((l = calloc(1, len + 1)) == NULL)
- return 0;
-
- memcpy(l, buf, len);
- return emit_line(p, t, l, u);
-
-nolabel:
- if ((l = strdup(u)) == NULL)
- return 0;
- return emit_line(p, t, l, u);
-}
-
-static int
-parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- switch (t) {
- case LINE_TITLE_1:
- if (len <= 1)
- return emit_line(p, t, NULL, NULL);
- buf++;
- len--;
- break;
- case LINE_TITLE_2:
- if (len <= 2)
- return emit_line(p, t, NULL, NULL);
- buf += 2;
- len -= 2;
- break;
- case LINE_TITLE_3:
- if (len <= 3)
- return emit_line(p, t, NULL, NULL);
- buf += 3;
- len -= 3;
- break;
- default:
- /* unreachable */
- abort();
- }
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if (t == LINE_TITLE_1 && *p->title == '\0')
- strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len == 1)
- return emit_line(p, t, NULL, NULL);
-
- buf++;
- len--;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len == 1)
- return emit_line(p, t, NULL, NULL);
-
- buf++;
- len--;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len <= 3)
- return emit_line(p, t, NULL, NULL);
-
- buf += 3;
- len -= 3;
-
- while (len > 0 && isspace(buf[0])) {
- buf++;
- len--;
- }
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
-
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- char *l;
-
- if (len == 0)
- return emit_line(p, t, NULL, NULL);
-
- if ((l = calloc(1, len+1)) == NULL)
- return 0;
- memcpy(l, buf, len);
- return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
- return emit_line(p, t, NULL, NULL);
-}
-
-static inline enum line_type
-detect_line_type(const char *buf, size_t len, int in_pre)
-{
- if (in_pre) {
- if (len >= 3 &&
- buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
- return LINE_PRE_END;
- else
- return LINE_PRE_CONTENT;
- }
-
- if (len == 0)
- return LINE_TEXT;
-
- switch (*buf) {
- case '*':
- if (len > 1 && buf[1] == ' ')
- return LINE_ITEM;
- break;
- case '>': return LINE_QUOTE;
- case '=':
- if (len >= 1 && buf[1] == '>')
- return LINE_LINK;
- break;
- case '#':
- if (len == 1)
- return LINE_TEXT;
- if (buf[1] != '#')
- return LINE_TITLE_1;
- if (len == 2)
- return LINE_TEXT;
- if (buf[2] != '#')
- return LINE_TITLE_2;
- if (len == 3)
- return LINE_TEXT;
- return LINE_TITLE_3;
- case '`':
- if (len < 3)
- return LINE_TEXT;
- if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
- return LINE_PRE_START;
- break;
- }
-
- return LINE_TEXT;
-}
-
-static int
-gemtext_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, gemtext_foreach_line);
-}
-
-static int
-gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- enum line_type t;
-
- t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
- if (t == LINE_PRE_START)
- p->flags ^= PARSER_IN_PRE;
- if (t == LINE_PRE_END)
- p->flags ^= PARSER_IN_PRE;
- return parsers[t](p, t, line, linelen);
-}
-
-static int
-gemtext_free(struct parser *p)
-{
- enum line_type t;
-
- /* flush the buffer */
- if (p->len != 0) {
- t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
- if (!parsers[t](p, t, p->buf, p->len))
- return 0;
- if ((p->flags & PARSER_IN_PRE) &&
- !emit_line(p, LINE_PRE_END, NULL, NULL))
- return 0;
- }
-
- free(p->buf);
-
- /*
- * use the first level 2 or 3 header as page title if none
- * found yet.
- */
- if (*p->title == '\0')
- search_title(p, LINE_TITLE_2);
- if (*p->title == '\0')
- search_title(p, LINE_TITLE_3);
-
- return 1;
-}
-
-static void
-search_title(struct parser *p, enum line_type level)
-{
- struct line *l;
-
- TAILQ_FOREACH(l, &p->head, lines) {
- if (l->type == level) {
- if (l->line == NULL)
- continue;
- strlcpy(p->title, l->line, sizeof(p->title));
- break;
- }
- }
-}
-
-static const char *gemtext_prefixes[] = {
- [LINE_TEXT] = "",
- [LINE_TITLE_1] = "# ",
- [LINE_TITLE_2] = "## ",
- [LINE_TITLE_3] = "### ",
- [LINE_ITEM] = "* ",
- [LINE_QUOTE] = "> ",
- [LINE_PRE_START] = "``` ",
- [LINE_PRE_CONTENT] = "",
- [LINE_PRE_END] = "```",
-};
-
-static int
-gemtext_serialize(struct parser *p, struct evbuffer *evb)
-{
- struct line *line;
- const char *text;
- const char *alt;
- int r;
-
- TAILQ_FOREACH(line, &p->head, lines) {
- if ((text = line->line) == NULL)
- text = "";
-
- if ((alt = line->alt) == NULL)
- alt = "";
-
- switch (line->type) {
- case LINE_TEXT:
- case LINE_TITLE_1:
- case LINE_TITLE_2:
- case LINE_TITLE_3:
- case LINE_ITEM:
- case LINE_QUOTE:
- case LINE_PRE_START:
- case LINE_PRE_CONTENT:
- case LINE_PRE_END:
- r = evbuffer_add_printf(evb, "%s%s\n",
- gemtext_prefixes[line->type], text);
- break;
-
- case LINE_LINK:
- r = evbuffer_add_printf(evb, "=> %s %s\n",
- alt, text);
- break;
-
- default:
- /* not reached */
- abort();
- }
-
- if (r == -1)
- return 0;
- }
-
- return 1;
-}
blob - 27a3142a5eb4ef23844d4074b1022269612c4ffa (mode 644)
blob + /dev/null
--- parser_gophermap.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "parser.h"
-#include "utils.h"
-
-#define evap evbuffer_add_printf
-
-struct gm_selector {
- char type;
- const char *ds;
- const char *selector;
- const char *addr;
- const char *port;
-};
-
-static void gm_parse_selector(char *, struct gm_selector *);
-
-static int gm_parse(struct parser *, const char *, size_t);
-static int gm_foreach_line(struct parser *, const char *, size_t);
-static int gm_free(struct parser *);
-static int gm_serialize(struct parser *, struct evbuffer *);
-
-void
-gophermap_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "gophermap";
- p->parse = &gm_parse;
- p->free = &gm_free;
- p->serialize = &gm_serialize;
-
- TAILQ_INIT(&p->head);
-}
-
-static void
-gm_parse_selector(char *line, struct gm_selector *s)
-{
- s->type = *line++;
- s->ds = line;
- s->selector = "";
- s->addr = "";
- s->port = "";
-
- if ((line = strchr(line, '\t')) == NULL)
- return;
- *line++ = '\0';
- s->selector = line;
-
- if ((line = strchr(line, '\t')) == NULL)
- return;
- *line++ = '\0';
- s->addr = line;
-
- if ((line = strchr(line, '\t')) == NULL)
- return;
- *line++ = '\0';
- s->port = line;
-}
-
-static int
-gm_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, gm_foreach_line);
-}
-
-static inline int
-emit_line(struct parser *p, enum line_type type, struct gm_selector *s)
-{
- struct line *l;
- char buf[LINE_MAX], b[2] = {0};
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- goto err;
-
- if ((l->line = strdup(s->ds)) == NULL)
- goto err;
-
- switch (l->type = type) {
- case LINE_LINK:
- if (s->type == 'h' && has_prefix(s->selector, "URL:")) {
- strlcpy(buf, s->selector+4, sizeof(buf));
- } else {
- strlcpy(buf, "gopher://", sizeof(buf));
- strlcat(buf, s->addr, sizeof(buf));
- strlcat(buf, ":", sizeof(buf));
- strlcat(buf, s->port, sizeof(buf));
- strlcat(buf, "/", sizeof(buf));
- b[0] = s->type;
- strlcat(buf, b, sizeof(buf));
- if (*s->selector != '/')
- strlcat(buf, "/", sizeof(buf));
- strlcat(buf, s->selector, sizeof(buf));
- }
-
- if ((l->alt = strdup(buf)) == NULL)
- goto err;
- break;
-
- default:
- break;
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-
-err:
- if (l != NULL) {
- free(l->line);
- free(l->alt);
- free(l);
- }
- return 0;
-}
-
-static int
-gm_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- char buf[LINE_MAX] = {0};
- struct gm_selector s = {0};
-
- memcpy(buf, line, MIN(sizeof(buf)-1, linelen));
- gm_parse_selector(buf, &s);
-
- switch (s.type) {
- case '0': /* text file */
- case '1': /* gopher submenu */
- case '2': /* CCSO nameserver */
- case '4': /* binhex-encoded file */
- case '5': /* DOS file */
- case '6': /* uuencoded file */
- case '7': /* full-text search */
- case '8': /* telnet */
- case '9': /* binary file */
- case '+': /* mirror or alternate server */
- case 'g': /* gif */
- case 'I': /* image */
- case 'T': /* telnet 3270 */
- case ':': /* gopher+: bitmap image */
- case ';': /* gopher+: movie file */
- case 'd': /* non-canonical: doc */
- case 'h': /* non-canonical: html file */
- case 's': /* non-canonical: sound file */
- if (!emit_line(p, LINE_LINK, &s))
- return 0;
- break;
-
- break;
-
- case 'i': /* non-canonical: message */
- if (!emit_line(p, LINE_TEXT, &s))
- return 0;
- break;
-
- case '3': /* error code */
- if (!emit_line(p, LINE_QUOTE, &s))
- return 0;
- break;
- }
-
- return 1;
-}
-
-static int
-gm_free(struct parser *p)
-{
- /* flush the buffer */
- if (p->len != 0)
- gm_foreach_line(p, p->buf, p->len);
-
- free(p->buf);
-
- return 1;
-}
-
-static inline const char *
-gopher_skip_selector(const char *path, int *ret_type)
-{
- *ret_type = 0;
-
- if (!strcmp(path, "/") || *path == '\0') {
- *ret_type = '1';
- return path;
- }
-
- if (*path != '/')
- return path;
- path++;
-
- switch (*ret_type = *path) {
- case '0':
- case '1':
- case '7':
- break;
-
- default:
- *ret_type = 0;
- path -= 1;
- return path;
- }
-
- return ++path;
-}
-
-static int
-serialize_link(struct line *line, const char *text, struct evbuffer *evb)
-{
- size_t portlen = 0;
- int type;
- const char *uri, *endhost, *port, *path, *colon;
-
- if ((uri = line->alt) == NULL)
- return -1;
-
- if (!has_prefix(uri, "gopher://"))
- return evap(evb, "h%s\tURL:%s\terror.host\t1\n",
- text, line->alt);
-
- uri += 9; /* skip gopher:// */
-
- path = strchr(uri, '/');
- colon = strchr(uri, ':');
-
- if (path != NULL && colon > path)
- colon = NULL;
-
- if ((endhost = colon) == NULL &&
- (endhost = path) == NULL)
- endhost = strchr(path, '\0');
-
- if (colon != NULL) {
- for (port = colon+1; *port && *port != '/'; ++port)
- ++portlen;
- port = colon+1;
- } else {
- port = "70";
- portlen = 2;
- }
-
- if (path == NULL) {
- type = '1';
- path = "";
- } else
- path = gopher_skip_selector(path, &type);
-
- return evap(evb, "%c%s\t%s\t%.*s\t%.*s\n", type, text,
- path, (int)(endhost - uri), uri, (int)portlen, port);
-}
-
-static int
-gm_serialize(struct parser *p, struct evbuffer *evb)
-{
- struct line *line;
- const char *text;
- int r;
-
- TAILQ_FOREACH(line, &p->head, lines) {
- if ((text = line->line) == NULL)
- text = "";
-
- switch (line->type) {
- case LINE_LINK:
- r = serialize_link(line, text, evb);
- break;
-
- case LINE_TEXT:
- r = evap(evb, "i%s\t\terror.host\t1\n",
- text);
- break;
-
- case LINE_QUOTE:
- r = evap(evb, "3%s\t\terror.host\t1\n",
- text);
- break;
-
- default:
- /* unreachable */
- abort();
- }
-
- if (r == -1)
- return 0;
- }
-
- return 1;
-}
blob - e94d85197fd198edd3e5749080f110f489b2931b (mode 644)
blob + /dev/null
--- parser_textpatch.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming text/x-patch parser
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "telescope.h"
-#include "parser.h"
-#include "utils.h"
-
-static int tpatch_parse(struct parser *, const char *, size_t);
-static int tpatch_emit_line(struct parser *, const char *, size_t);
-static int tpatch_foreach_line(struct parser *, const char *, size_t);
-static int tpatch_free(struct parser *);
-
-void
-textpatch_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "text/x-patch";
- p->parse = &tpatch_parse;
- p->free = &tpatch_free;
-
- p->flags = PARSER_IN_PATCH_HDR;
-
- TAILQ_INIT(&p->head);
-}
-
-static int
-tpatch_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, tpatch_foreach_line);
-}
-
-static int
-tpatch_emit_line(struct parser *p, const char *line, size_t linelen)
-{
- struct line *l;
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- return 0;
-
- if (p->flags & PARSER_IN_PATCH_HDR)
- l->type = LINE_PATCH_HDR;
- else
- l->type = LINE_PATCH;
-
- if (linelen != 0) {
- if ((l->line = calloc(1, linelen+1)) == NULL) {
- free(l);
- return 0;
- }
-
- memcpy(l->line, line, linelen);
-
- if (!(p->flags & PARSER_IN_PATCH_HDR))
- switch (*l->line) {
- case '+':
- l->type = LINE_PATCH_ADD;
- break;
- case '-':
- l->type = LINE_PATCH_DEL;
- break;
- case '@':
- l->type = LINE_PATCH_HUNK_HDR;
- break;
- case ' ':
- /* context lines */
- break;
- default:
- /*
- * A single patch file can have more
- * than one "header" if touches more
- * than one file.
- */
- l->type = LINE_PATCH_HDR;
- p->flags |= PARSER_IN_PATCH_HDR;
- break;
- }
-
- if (has_prefix(l->line, "+++"))
- p->flags &= ~PARSER_IN_PATCH_HDR;
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-}
-
-static int
-tpatch_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- return tpatch_emit_line(p, line, linelen);
-}
-
-static int
-tpatch_free(struct parser *p)
-{
- if (p->len != 0)
- return tpatch_emit_line(p, p->buf, p->len);
- return 1;
-}
blob - 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644)
blob + /dev/null
--- parser_textplain.c
+++ /dev/null
-/*
- * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/*
- * A streaming text/plain "parser."
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "telescope.h"
-#include "parser.h"
-
-static int textplain_parse(struct parser*, const char*, size_t);
-static int textplain_foreach_line(struct parser*, const char*, size_t);
-static int textplain_free(struct parser*);
-
-static inline int
-emit_line(struct parser *p, const char *line, size_t len)
-{
- struct line *l;
-
- if ((l = calloc(1, sizeof(*l))) == NULL)
- return 0;
-
- l->type = LINE_TEXT;
-
- if (len != 0) {
- if ((l->line = calloc(1, len+1)) == NULL) {
- free(l);
- return 0;
- }
-
- memcpy(l->line, line, len);
- }
-
- TAILQ_INSERT_TAIL(&p->head, l, lines);
-
- return 1;
-}
-
-void
-textplain_initparser(struct parser *p)
-{
- memset(p, 0, sizeof(*p));
-
- p->name = "text/plain";
- p->parse = &textplain_parse;
- p->free = &textplain_free;
-
- TAILQ_INIT(&p->head);
-}
-
-static int
-textplain_parse(struct parser *p, const char *buf, size_t size)
-{
- return parser_foreach_line(p, buf, size, textplain_foreach_line);
-}
-
-static int
-textplain_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
- return emit_line(p, line, linelen);
-}
-
-static int
-textplain_free(struct parser *p)
-{
- if (p->len != 0)
- return emit_line(p, p->buf, p->len);
- return 1;
-}
blob - c2104ba3bdadf4fd5a481067d7b8b15a10710b7b
blob + e08198e4e7f2f1c4ed4d0ab0727eefbd9d7cd6af
--- test/Makefile.am
+++ test/Makefile.am
gmparser_SOURCES = gmparser.c \
$(top_srcdir)/compat.h \
- $(top_srcdir)/parser.c \
$(top_srcdir)/parser.h \
- $(top_srcdir)/parser_gophermap.c \
+ $(top_srcdir)/parser/parser.c \
+ $(top_srcdir)/parser/parser_gophermap.c \
$(top_srcdir)/utils.c
gmparser_CFLAGS = -I$(top_srcdir)