commit 75a8a1ecac7f6cd7162686de382f16ba0d1f21f2 from: Omar Polo date: Tue Feb 08 21:49:15 2022 UTC move parsers to their own subdir commit - 81a7ff41a7bd5b30c98404719a4dada24651f7e1 commit + 75a8a1ecac7f6cd7162686de382f16ba0d1f21f2 blob - fab17364d6a92dd526452fa24f03a65476350584 blob + 1900f6e86054d439679f189eb54ca84865cbd42b --- Makefile.am +++ Makefile.am @@ -31,12 +31,12 @@ telescope_SOURCES = cmd.c \ pages.c \ pages.h \ parse.y \ - parser.c \ parser.h \ - parser_gemtext.c \ - parser_gophermap.c \ - parser_textpatch.c \ - parser_textplain.c \ + parser/parser.c \ + parser/parser_gemtext.c \ + parser/parser_gophermap.c \ + parser/parser_textpatch.c \ + parser/parser_textplain.c \ sandbox.c \ session.c \ session.h \ blob - a44cdba406d715fdfc5e068f0848918ce48fb8d7 (mode 644) blob + /dev/null --- parser.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "compat.h" - -#include -#include - -#include "parser.h" -#include "telescope.h" - -void -parser_init(struct tab *tab, parserfn fn) -{ - erase_buffer(&tab->buffer); - fn(&tab->buffer.page); - tab->buffer.page.init = fn; -} - -int -parser_parse(struct tab *tab, const char *chunk, size_t len) -{ - return tab->buffer.page.parse(&tab->buffer.page, chunk, len); -} - -int -parser_free(struct tab *tab) -{ - int r; - char *tilde, *slash; - - r = tab->buffer.page.free(&tab->buffer.page); - - if (*tab->buffer.page.title != '\0') - return r; - - /* - * heuristic: see if there is a "tilde user" and use that as - * page title, using the full domain name as fallback. - */ - if ((tilde = strstr(tab->hist_cur->h, "/~")) != NULL) { - strlcpy(tab->buffer.page.title, tilde+1, - sizeof(tab->buffer.page.title)); - - if ((slash = strchr(tab->buffer.page.title, '/')) != NULL) - *slash = '\0'; - } else - strlcpy(tab->buffer.page.title, tab->uri.host, - sizeof(tab->buffer.page.title)); - - return r; -} - -int -parser_serialize(struct tab *tab, struct evbuffer *evb) -{ - struct line *line; - const char *text; - int r; - - if (tab->buffer.page.serialize != NULL) - return tab->buffer.page.serialize(&tab->buffer.page, evb); - - /* a default implementation good enough for plain text */ - TAILQ_FOREACH(line, &tab->buffer.page.head, lines) { - if ((text = line->line) == NULL) - text = ""; - - r = evbuffer_add_printf(evb, "%s\n", text); - if (r == -1) - return 0; - } - - return 1; -} - -int -parser_append(struct parser *p, const char *buf, size_t len) -{ - size_t newlen; - char *t; - - newlen = len + p->len; - if ((t = calloc(1, newlen)) == NULL) - return 0; - memcpy(t, p->buf, p->len); - memcpy(t + p->len, buf, len); - free(p->buf); - p->buf = t; - p->len = newlen; - return 1; -} - -int -parser_set_buf(struct parser *p, const char *buf, size_t len) -{ - char *tmp; - - if (len == 0) { - p->len = 0; - free(p->buf); - p->buf = NULL; - return 1; - } - - /* - * p->buf and buf can (and probably almost always will) - * overlap! - */ - - if ((tmp = calloc(1, len)) == NULL) - return 0; - memcpy(tmp, buf, len); - free(p->buf); - p->buf = tmp; - p->len = len; - return 1; -} - -int -parser_foreach_line(struct parser *p, const char *buf, size_t size, - parsechunkfn fn) -{ - char *b, *e; - unsigned int ch; - size_t i, l, len; - - if (!parser_append(p, buf, size)) - return 0; - b = p->buf; - len = p->len; - - if (!(p->flags & PARSER_IN_BODY) && len < 3) - return 1; - - if (!(p->flags & PARSER_IN_BODY)) { - p->flags |= PARSER_IN_BODY; - - /* - * drop the BOM: only UTF-8 is supported, and there - * it's useless; some editors may still add one - * though. - */ - if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) { - b += 3; - len -= 3; - } - } - - /* drop every "funny" ASCII character */ - for (i = 0; i < len; ) { - ch = b[i]; - if ((ch >= ' ' || ch == '\n' || ch == '\t') - && ch != 127) { /* del */ - ++i; - continue; - } - memmove(&b[i], &b[i+1], len - i - 1); - len--; - } - - while (len > 0) { - if ((e = memmem((char*)b, len, "\n", 1)) == NULL) - break; - l = e - b; - - if (!fn(p, b, l)) - return 0; - - len -= l; - b += l; - - if (len > 0) { - /* skip \n */ - len--; - b++; - } - } - - return parser_set_buf(p, b, len); -} blob - /dev/null blob + a44cdba406d715fdfc5e068f0848918ce48fb8d7 (mode 644) --- /dev/null +++ parser/parser.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "compat.h" + +#include +#include + +#include "parser.h" +#include "telescope.h" + +void +parser_init(struct tab *tab, parserfn fn) +{ + erase_buffer(&tab->buffer); + fn(&tab->buffer.page); + tab->buffer.page.init = fn; +} + +int +parser_parse(struct tab *tab, const char *chunk, size_t len) +{ + return tab->buffer.page.parse(&tab->buffer.page, chunk, len); +} + +int +parser_free(struct tab *tab) +{ + int r; + char *tilde, *slash; + + r = tab->buffer.page.free(&tab->buffer.page); + + if (*tab->buffer.page.title != '\0') + return r; + + /* + * heuristic: see if there is a "tilde user" and use that as + * page title, using the full domain name as fallback. + */ + if ((tilde = strstr(tab->hist_cur->h, "/~")) != NULL) { + strlcpy(tab->buffer.page.title, tilde+1, + sizeof(tab->buffer.page.title)); + + if ((slash = strchr(tab->buffer.page.title, '/')) != NULL) + *slash = '\0'; + } else + strlcpy(tab->buffer.page.title, tab->uri.host, + sizeof(tab->buffer.page.title)); + + return r; +} + +int +parser_serialize(struct tab *tab, struct evbuffer *evb) +{ + struct line *line; + const char *text; + int r; + + if (tab->buffer.page.serialize != NULL) + return tab->buffer.page.serialize(&tab->buffer.page, evb); + + /* a default implementation good enough for plain text */ + TAILQ_FOREACH(line, &tab->buffer.page.head, lines) { + if ((text = line->line) == NULL) + text = ""; + + r = evbuffer_add_printf(evb, "%s\n", text); + if (r == -1) + return 0; + } + + return 1; +} + +int +parser_append(struct parser *p, const char *buf, size_t len) +{ + size_t newlen; + char *t; + + newlen = len + p->len; + if ((t = calloc(1, newlen)) == NULL) + return 0; + memcpy(t, p->buf, p->len); + memcpy(t + p->len, buf, len); + free(p->buf); + p->buf = t; + p->len = newlen; + return 1; +} + +int +parser_set_buf(struct parser *p, const char *buf, size_t len) +{ + char *tmp; + + if (len == 0) { + p->len = 0; + free(p->buf); + p->buf = NULL; + return 1; + } + + /* + * p->buf and buf can (and probably almost always will) + * overlap! + */ + + if ((tmp = calloc(1, len)) == NULL) + return 0; + memcpy(tmp, buf, len); + free(p->buf); + p->buf = tmp; + p->len = len; + return 1; +} + +int +parser_foreach_line(struct parser *p, const char *buf, size_t size, + parsechunkfn fn) +{ + char *b, *e; + unsigned int ch; + size_t i, l, len; + + if (!parser_append(p, buf, size)) + return 0; + b = p->buf; + len = p->len; + + if (!(p->flags & PARSER_IN_BODY) && len < 3) + return 1; + + if (!(p->flags & PARSER_IN_BODY)) { + p->flags |= PARSER_IN_BODY; + + /* + * drop the BOM: only UTF-8 is supported, and there + * it's useless; some editors may still add one + * though. + */ + if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) { + b += 3; + len -= 3; + } + } + + /* drop every "funny" ASCII character */ + for (i = 0; i < len; ) { + ch = b[i]; + if ((ch >= ' ' || ch == '\n' || ch == '\t') + && ch != 127) { /* del */ + ++i; + continue; + } + memmove(&b[i], &b[i+1], len - i - 1); + len--; + } + + while (len > 0) { + if ((e = memmem((char*)b, len, "\n", 1)) == NULL) + break; + l = e - b; + + if (!fn(p, b, l)) + return 0; + + len -= l; + b += l; + + if (len > 0) { + /* skip \n */ + len--; + b++; + } + } + + return parser_set_buf(p, b, len); +} blob - /dev/null blob + e23b368b5089ed079d433d6d5f8803e3f2480373 (mode 644) --- /dev/null +++ parser/parser_gemtext.c @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2021, 2022 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * A streaming gemtext parser. + * + * TODO: + * - handle NULs + * - UTF8 + */ + +#include "compat.h" + +#include +#include +#include + +#include "defaults.h" +#include "parser.h" +#include "utf8.h" + +static int gemtext_parse(struct parser *, const char *, size_t); +static int gemtext_foreach_line(struct parser *, const char *, size_t); +static int gemtext_free(struct parser *); +static int gemtext_serialize(struct parser *, struct evbuffer *); + +static int parse_text(struct parser*, enum line_type, const char*, size_t); +static int parse_link(struct parser*, enum line_type, const char*, size_t); +static int parse_title(struct parser*, enum line_type, const char*, size_t); +static int parse_item(struct parser*, enum line_type, const char*, size_t); +static int parse_quote(struct parser*, enum line_type, const char*, size_t); +static int parse_pre_start(struct parser*, enum line_type, const char*, size_t); +static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t); +static int parse_pre_end(struct parser*, enum line_type, const char*, size_t); +static void search_title(struct parser*, enum line_type); + +typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t); + +static parselinefn *parsers[] = { + [LINE_TEXT] = parse_text, + [LINE_LINK] = parse_link, + [LINE_TITLE_1] = parse_title, + [LINE_TITLE_2] = parse_title, + [LINE_TITLE_3] = parse_title, + [LINE_ITEM] = parse_item, + [LINE_QUOTE] = parse_quote, + [LINE_PRE_START] = parse_pre_start, + [LINE_PRE_CONTENT] = parse_pre_cnt, + [LINE_PRE_END] = parse_pre_end, +}; + +void +gemtext_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "text/gemini"; + p->parse = &gemtext_parse; + p->free = &gemtext_free; + p->serialize = &gemtext_serialize; + + TAILQ_INIT(&p->head); +} + +static inline int +emit_line(struct parser *p, enum line_type type, char *line, char *alt) +{ + struct line *l; + + if ((l = calloc(1, sizeof(*l))) == NULL) + return 0; + + l->type = type; + l->line = line; + l->alt = alt; + + switch (l->type) { + case LINE_PRE_START: + case LINE_PRE_END: + if (hide_pre_context) + l->flags = L_HIDDEN; + if (l->type == LINE_PRE_END && + hide_pre_closing_line) + l->flags = L_HIDDEN; + break; + case LINE_PRE_CONTENT: + if (hide_pre_blocks) + l->flags = L_HIDDEN; + break; + case LINE_LINK: + if (emojify_link && + !emojied_line(line, (const char **)&l->data)) + l->data = NULL; + break; + default: + break; + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; +} + +static int +parse_text(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_link(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l, *u; + const char *url_start; + + if (len <= 2) + return emit_line(p, LINE_TEXT, NULL, NULL); + buf += 2; + len -= 2; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, LINE_TEXT, NULL, NULL); + + url_start = buf; + while (len > 0 && !isspace(buf[0])) { + buf++; + len--; + } + + if ((u = calloc(1, buf - url_start + 1)) == NULL) + return 0; + memcpy(u, url_start, buf - url_start); + + if (len == 0) + goto nolabel; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + goto nolabel; + + if ((l = calloc(1, len + 1)) == NULL) + return 0; + + memcpy(l, buf, len); + return emit_line(p, t, l, u); + +nolabel: + if ((l = strdup(u)) == NULL) + return 0; + return emit_line(p, t, l, u); +} + +static int +parse_title(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + switch (t) { + case LINE_TITLE_1: + if (len <= 1) + return emit_line(p, t, NULL, NULL); + buf++; + len--; + break; + case LINE_TITLE_2: + if (len <= 2) + return emit_line(p, t, NULL, NULL); + buf += 2; + len -= 2; + break; + case LINE_TITLE_3: + if (len <= 3) + return emit_line(p, t, NULL, NULL); + buf += 3; + len -= 3; + break; + default: + /* unreachable */ + abort(); + } + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if (t == LINE_TITLE_1 && *p->title == '\0') + strncpy(p->title, buf, MIN(sizeof(p->title)-1, len)); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_item(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len == 1) + return emit_line(p, t, NULL, NULL); + + buf++; + len--; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len == 1) + return emit_line(p, t, NULL, NULL); + + buf++; + len--; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len <= 3) + return emit_line(p, t, NULL, NULL); + + buf += 3; + len -= 3; + + while (len > 0 && isspace(buf[0])) { + buf++; + len--; + } + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + char *l; + + if (len == 0) + return emit_line(p, t, NULL, NULL); + + if ((l = calloc(1, len+1)) == NULL) + return 0; + memcpy(l, buf, len); + return emit_line(p, t, l, NULL); +} + +static int +parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len) +{ + return emit_line(p, t, NULL, NULL); +} + +static inline enum line_type +detect_line_type(const char *buf, size_t len, int in_pre) +{ + if (in_pre) { + if (len >= 3 && + buf[0] == '`' && buf[1] == '`' && buf[2] == '`') + return LINE_PRE_END; + else + return LINE_PRE_CONTENT; + } + + if (len == 0) + return LINE_TEXT; + + switch (*buf) { + case '*': + if (len > 1 && buf[1] == ' ') + return LINE_ITEM; + break; + case '>': return LINE_QUOTE; + case '=': + if (len >= 1 && buf[1] == '>') + return LINE_LINK; + break; + case '#': + if (len == 1) + return LINE_TEXT; + if (buf[1] != '#') + return LINE_TITLE_1; + if (len == 2) + return LINE_TEXT; + if (buf[2] != '#') + return LINE_TITLE_2; + if (len == 3) + return LINE_TEXT; + return LINE_TITLE_3; + case '`': + if (len < 3) + return LINE_TEXT; + if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`') + return LINE_PRE_START; + break; + } + + return LINE_TEXT; +} + +static int +gemtext_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, gemtext_foreach_line); +} + +static int +gemtext_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + enum line_type t; + + t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE); + if (t == LINE_PRE_START) + p->flags ^= PARSER_IN_PRE; + if (t == LINE_PRE_END) + p->flags ^= PARSER_IN_PRE; + return parsers[t](p, t, line, linelen); +} + +static int +gemtext_free(struct parser *p) +{ + enum line_type t; + + /* flush the buffer */ + if (p->len != 0) { + t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE); + if (!parsers[t](p, t, p->buf, p->len)) + return 0; + if ((p->flags & PARSER_IN_PRE) && + !emit_line(p, LINE_PRE_END, NULL, NULL)) + return 0; + } + + free(p->buf); + + /* + * use the first level 2 or 3 header as page title if none + * found yet. + */ + if (*p->title == '\0') + search_title(p, LINE_TITLE_2); + if (*p->title == '\0') + search_title(p, LINE_TITLE_3); + + return 1; +} + +static void +search_title(struct parser *p, enum line_type level) +{ + struct line *l; + + TAILQ_FOREACH(l, &p->head, lines) { + if (l->type == level) { + if (l->line == NULL) + continue; + strlcpy(p->title, l->line, sizeof(p->title)); + break; + } + } +} + +static const char *gemtext_prefixes[] = { + [LINE_TEXT] = "", + [LINE_TITLE_1] = "# ", + [LINE_TITLE_2] = "## ", + [LINE_TITLE_3] = "### ", + [LINE_ITEM] = "* ", + [LINE_QUOTE] = "> ", + [LINE_PRE_START] = "``` ", + [LINE_PRE_CONTENT] = "", + [LINE_PRE_END] = "```", +}; + +static int +gemtext_serialize(struct parser *p, struct evbuffer *evb) +{ + struct line *line; + const char *text; + const char *alt; + int r; + + TAILQ_FOREACH(line, &p->head, lines) { + if ((text = line->line) == NULL) + text = ""; + + if ((alt = line->alt) == NULL) + alt = ""; + + switch (line->type) { + case LINE_TEXT: + case LINE_TITLE_1: + case LINE_TITLE_2: + case LINE_TITLE_3: + case LINE_ITEM: + case LINE_QUOTE: + case LINE_PRE_START: + case LINE_PRE_CONTENT: + case LINE_PRE_END: + r = evbuffer_add_printf(evb, "%s%s\n", + gemtext_prefixes[line->type], text); + break; + + case LINE_LINK: + r = evbuffer_add_printf(evb, "=> %s %s\n", + alt, text); + break; + + default: + /* not reached */ + abort(); + } + + if (r == -1) + return 0; + } + + return 1; +} blob - /dev/null blob + 27a3142a5eb4ef23844d4074b1022269612c4ffa (mode 644) --- /dev/null +++ parser/parser_gophermap.c @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "compat.h" + +#include +#include +#include + +#include "parser.h" +#include "utils.h" + +#define evap evbuffer_add_printf + +struct gm_selector { + char type; + const char *ds; + const char *selector; + const char *addr; + const char *port; +}; + +static void gm_parse_selector(char *, struct gm_selector *); + +static int gm_parse(struct parser *, const char *, size_t); +static int gm_foreach_line(struct parser *, const char *, size_t); +static int gm_free(struct parser *); +static int gm_serialize(struct parser *, struct evbuffer *); + +void +gophermap_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "gophermap"; + p->parse = &gm_parse; + p->free = &gm_free; + p->serialize = &gm_serialize; + + TAILQ_INIT(&p->head); +} + +static void +gm_parse_selector(char *line, struct gm_selector *s) +{ + s->type = *line++; + s->ds = line; + s->selector = ""; + s->addr = ""; + s->port = ""; + + if ((line = strchr(line, '\t')) == NULL) + return; + *line++ = '\0'; + s->selector = line; + + if ((line = strchr(line, '\t')) == NULL) + return; + *line++ = '\0'; + s->addr = line; + + if ((line = strchr(line, '\t')) == NULL) + return; + *line++ = '\0'; + s->port = line; +} + +static int +gm_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, gm_foreach_line); +} + +static inline int +emit_line(struct parser *p, enum line_type type, struct gm_selector *s) +{ + struct line *l; + char buf[LINE_MAX], b[2] = {0}; + + if ((l = calloc(1, sizeof(*l))) == NULL) + goto err; + + if ((l->line = strdup(s->ds)) == NULL) + goto err; + + switch (l->type = type) { + case LINE_LINK: + if (s->type == 'h' && has_prefix(s->selector, "URL:")) { + strlcpy(buf, s->selector+4, sizeof(buf)); + } else { + strlcpy(buf, "gopher://", sizeof(buf)); + strlcat(buf, s->addr, sizeof(buf)); + strlcat(buf, ":", sizeof(buf)); + strlcat(buf, s->port, sizeof(buf)); + strlcat(buf, "/", sizeof(buf)); + b[0] = s->type; + strlcat(buf, b, sizeof(buf)); + if (*s->selector != '/') + strlcat(buf, "/", sizeof(buf)); + strlcat(buf, s->selector, sizeof(buf)); + } + + if ((l->alt = strdup(buf)) == NULL) + goto err; + break; + + default: + break; + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; + +err: + if (l != NULL) { + free(l->line); + free(l->alt); + free(l); + } + return 0; +} + +static int +gm_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + char buf[LINE_MAX] = {0}; + struct gm_selector s = {0}; + + memcpy(buf, line, MIN(sizeof(buf)-1, linelen)); + gm_parse_selector(buf, &s); + + switch (s.type) { + case '0': /* text file */ + case '1': /* gopher submenu */ + case '2': /* CCSO nameserver */ + case '4': /* binhex-encoded file */ + case '5': /* DOS file */ + case '6': /* uuencoded file */ + case '7': /* full-text search */ + case '8': /* telnet */ + case '9': /* binary file */ + case '+': /* mirror or alternate server */ + case 'g': /* gif */ + case 'I': /* image */ + case 'T': /* telnet 3270 */ + case ':': /* gopher+: bitmap image */ + case ';': /* gopher+: movie file */ + case 'd': /* non-canonical: doc */ + case 'h': /* non-canonical: html file */ + case 's': /* non-canonical: sound file */ + if (!emit_line(p, LINE_LINK, &s)) + return 0; + break; + + break; + + case 'i': /* non-canonical: message */ + if (!emit_line(p, LINE_TEXT, &s)) + return 0; + break; + + case '3': /* error code */ + if (!emit_line(p, LINE_QUOTE, &s)) + return 0; + break; + } + + return 1; +} + +static int +gm_free(struct parser *p) +{ + /* flush the buffer */ + if (p->len != 0) + gm_foreach_line(p, p->buf, p->len); + + free(p->buf); + + return 1; +} + +static inline const char * +gopher_skip_selector(const char *path, int *ret_type) +{ + *ret_type = 0; + + if (!strcmp(path, "/") || *path == '\0') { + *ret_type = '1'; + return path; + } + + if (*path != '/') + return path; + path++; + + switch (*ret_type = *path) { + case '0': + case '1': + case '7': + break; + + default: + *ret_type = 0; + path -= 1; + return path; + } + + return ++path; +} + +static int +serialize_link(struct line *line, const char *text, struct evbuffer *evb) +{ + size_t portlen = 0; + int type; + const char *uri, *endhost, *port, *path, *colon; + + if ((uri = line->alt) == NULL) + return -1; + + if (!has_prefix(uri, "gopher://")) + return evap(evb, "h%s\tURL:%s\terror.host\t1\n", + text, line->alt); + + uri += 9; /* skip gopher:// */ + + path = strchr(uri, '/'); + colon = strchr(uri, ':'); + + if (path != NULL && colon > path) + colon = NULL; + + if ((endhost = colon) == NULL && + (endhost = path) == NULL) + endhost = strchr(path, '\0'); + + if (colon != NULL) { + for (port = colon+1; *port && *port != '/'; ++port) + ++portlen; + port = colon+1; + } else { + port = "70"; + portlen = 2; + } + + if (path == NULL) { + type = '1'; + path = ""; + } else + path = gopher_skip_selector(path, &type); + + return evap(evb, "%c%s\t%s\t%.*s\t%.*s\n", type, text, + path, (int)(endhost - uri), uri, (int)portlen, port); +} + +static int +gm_serialize(struct parser *p, struct evbuffer *evb) +{ + struct line *line; + const char *text; + int r; + + TAILQ_FOREACH(line, &p->head, lines) { + if ((text = line->line) == NULL) + text = ""; + + switch (line->type) { + case LINE_LINK: + r = serialize_link(line, text, evb); + break; + + case LINE_TEXT: + r = evap(evb, "i%s\t\terror.host\t1\n", + text); + break; + + case LINE_QUOTE: + r = evap(evb, "3%s\t\terror.host\t1\n", + text); + break; + + default: + /* unreachable */ + abort(); + } + + if (r == -1) + return 0; + } + + return 1; +} blob - /dev/null blob + e94d85197fd198edd3e5749080f110f489b2931b (mode 644) --- /dev/null +++ parser/parser_textpatch.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * A streaming text/x-patch parser + */ + +#include +#include + +#include "telescope.h" +#include "parser.h" +#include "utils.h" + +static int tpatch_parse(struct parser *, const char *, size_t); +static int tpatch_emit_line(struct parser *, const char *, size_t); +static int tpatch_foreach_line(struct parser *, const char *, size_t); +static int tpatch_free(struct parser *); + +void +textpatch_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "text/x-patch"; + p->parse = &tpatch_parse; + p->free = &tpatch_free; + + p->flags = PARSER_IN_PATCH_HDR; + + TAILQ_INIT(&p->head); +} + +static int +tpatch_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, tpatch_foreach_line); +} + +static int +tpatch_emit_line(struct parser *p, const char *line, size_t linelen) +{ + struct line *l; + + if ((l = calloc(1, sizeof(*l))) == NULL) + return 0; + + if (p->flags & PARSER_IN_PATCH_HDR) + l->type = LINE_PATCH_HDR; + else + l->type = LINE_PATCH; + + if (linelen != 0) { + if ((l->line = calloc(1, linelen+1)) == NULL) { + free(l); + return 0; + } + + memcpy(l->line, line, linelen); + + if (!(p->flags & PARSER_IN_PATCH_HDR)) + switch (*l->line) { + case '+': + l->type = LINE_PATCH_ADD; + break; + case '-': + l->type = LINE_PATCH_DEL; + break; + case '@': + l->type = LINE_PATCH_HUNK_HDR; + break; + case ' ': + /* context lines */ + break; + default: + /* + * A single patch file can have more + * than one "header" if touches more + * than one file. + */ + l->type = LINE_PATCH_HDR; + p->flags |= PARSER_IN_PATCH_HDR; + break; + } + + if (has_prefix(l->line, "+++")) + p->flags &= ~PARSER_IN_PATCH_HDR; + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; +} + +static int +tpatch_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + return tpatch_emit_line(p, line, linelen); +} + +static int +tpatch_free(struct parser *p) +{ + if (p->len != 0) + return tpatch_emit_line(p, p->buf, p->len); + return 1; +} blob - /dev/null blob + 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644) --- /dev/null +++ parser/parser_textplain.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * A streaming text/plain "parser." + */ + +#include +#include + +#include "telescope.h" +#include "parser.h" + +static int textplain_parse(struct parser*, const char*, size_t); +static int textplain_foreach_line(struct parser*, const char*, size_t); +static int textplain_free(struct parser*); + +static inline int +emit_line(struct parser *p, const char *line, size_t len) +{ + struct line *l; + + if ((l = calloc(1, sizeof(*l))) == NULL) + return 0; + + l->type = LINE_TEXT; + + if (len != 0) { + if ((l->line = calloc(1, len+1)) == NULL) { + free(l); + return 0; + } + + memcpy(l->line, line, len); + } + + TAILQ_INSERT_TAIL(&p->head, l, lines); + + return 1; +} + +void +textplain_initparser(struct parser *p) +{ + memset(p, 0, sizeof(*p)); + + p->name = "text/plain"; + p->parse = &textplain_parse; + p->free = &textplain_free; + + TAILQ_INIT(&p->head); +} + +static int +textplain_parse(struct parser *p, const char *buf, size_t size) +{ + return parser_foreach_line(p, buf, size, textplain_foreach_line); +} + +static int +textplain_foreach_line(struct parser *p, const char *line, size_t linelen) +{ + return emit_line(p, line, linelen); +} + +static int +textplain_free(struct parser *p) +{ + if (p->len != 0) + return emit_line(p, p->buf, p->len); + return 1; +} blob - e23b368b5089ed079d433d6d5f8803e3f2480373 (mode 644) blob + /dev/null --- parser_gemtext.c +++ /dev/null @@ -1,488 +0,0 @@ -/* - * Copyright (c) 2021, 2022 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * A streaming gemtext parser. - * - * TODO: - * - handle NULs - * - UTF8 - */ - -#include "compat.h" - -#include -#include -#include - -#include "defaults.h" -#include "parser.h" -#include "utf8.h" - -static int gemtext_parse(struct parser *, const char *, size_t); -static int gemtext_foreach_line(struct parser *, const char *, size_t); -static int gemtext_free(struct parser *); -static int gemtext_serialize(struct parser *, struct evbuffer *); - -static int parse_text(struct parser*, enum line_type, const char*, size_t); -static int parse_link(struct parser*, enum line_type, const char*, size_t); -static int parse_title(struct parser*, enum line_type, const char*, size_t); -static int parse_item(struct parser*, enum line_type, const char*, size_t); -static int parse_quote(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_start(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_end(struct parser*, enum line_type, const char*, size_t); -static void search_title(struct parser*, enum line_type); - -typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t); - -static parselinefn *parsers[] = { - [LINE_TEXT] = parse_text, - [LINE_LINK] = parse_link, - [LINE_TITLE_1] = parse_title, - [LINE_TITLE_2] = parse_title, - [LINE_TITLE_3] = parse_title, - [LINE_ITEM] = parse_item, - [LINE_QUOTE] = parse_quote, - [LINE_PRE_START] = parse_pre_start, - [LINE_PRE_CONTENT] = parse_pre_cnt, - [LINE_PRE_END] = parse_pre_end, -}; - -void -gemtext_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "text/gemini"; - p->parse = &gemtext_parse; - p->free = &gemtext_free; - p->serialize = &gemtext_serialize; - - TAILQ_INIT(&p->head); -} - -static inline int -emit_line(struct parser *p, enum line_type type, char *line, char *alt) -{ - struct line *l; - - if ((l = calloc(1, sizeof(*l))) == NULL) - return 0; - - l->type = type; - l->line = line; - l->alt = alt; - - switch (l->type) { - case LINE_PRE_START: - case LINE_PRE_END: - if (hide_pre_context) - l->flags = L_HIDDEN; - if (l->type == LINE_PRE_END && - hide_pre_closing_line) - l->flags = L_HIDDEN; - break; - case LINE_PRE_CONTENT: - if (hide_pre_blocks) - l->flags = L_HIDDEN; - break; - case LINE_LINK: - if (emojify_link && - !emojied_line(line, (const char **)&l->data)) - l->data = NULL; - break; - default: - break; - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; -} - -static int -parse_text(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_link(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l, *u; - const char *url_start; - - if (len <= 2) - return emit_line(p, LINE_TEXT, NULL, NULL); - buf += 2; - len -= 2; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, LINE_TEXT, NULL, NULL); - - url_start = buf; - while (len > 0 && !isspace(buf[0])) { - buf++; - len--; - } - - if ((u = calloc(1, buf - url_start + 1)) == NULL) - return 0; - memcpy(u, url_start, buf - url_start); - - if (len == 0) - goto nolabel; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - goto nolabel; - - if ((l = calloc(1, len + 1)) == NULL) - return 0; - - memcpy(l, buf, len); - return emit_line(p, t, l, u); - -nolabel: - if ((l = strdup(u)) == NULL) - return 0; - return emit_line(p, t, l, u); -} - -static int -parse_title(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - switch (t) { - case LINE_TITLE_1: - if (len <= 1) - return emit_line(p, t, NULL, NULL); - buf++; - len--; - break; - case LINE_TITLE_2: - if (len <= 2) - return emit_line(p, t, NULL, NULL); - buf += 2; - len -= 2; - break; - case LINE_TITLE_3: - if (len <= 3) - return emit_line(p, t, NULL, NULL); - buf += 3; - len -= 3; - break; - default: - /* unreachable */ - abort(); - } - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if (t == LINE_TITLE_1 && *p->title == '\0') - strncpy(p->title, buf, MIN(sizeof(p->title)-1, len)); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_item(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len == 1) - return emit_line(p, t, NULL, NULL); - - buf++; - len--; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len == 1) - return emit_line(p, t, NULL, NULL); - - buf++; - len--; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len <= 3) - return emit_line(p, t, NULL, NULL); - - buf += 3; - len -= 3; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - return emit_line(p, t, NULL, NULL); -} - -static inline enum line_type -detect_line_type(const char *buf, size_t len, int in_pre) -{ - if (in_pre) { - if (len >= 3 && - buf[0] == '`' && buf[1] == '`' && buf[2] == '`') - return LINE_PRE_END; - else - return LINE_PRE_CONTENT; - } - - if (len == 0) - return LINE_TEXT; - - switch (*buf) { - case '*': - if (len > 1 && buf[1] == ' ') - return LINE_ITEM; - break; - case '>': return LINE_QUOTE; - case '=': - if (len >= 1 && buf[1] == '>') - return LINE_LINK; - break; - case '#': - if (len == 1) - return LINE_TEXT; - if (buf[1] != '#') - return LINE_TITLE_1; - if (len == 2) - return LINE_TEXT; - if (buf[2] != '#') - return LINE_TITLE_2; - if (len == 3) - return LINE_TEXT; - return LINE_TITLE_3; - case '`': - if (len < 3) - return LINE_TEXT; - if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`') - return LINE_PRE_START; - break; - } - - return LINE_TEXT; -} - -static int -gemtext_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, gemtext_foreach_line); -} - -static int -gemtext_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - enum line_type t; - - t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE); - if (t == LINE_PRE_START) - p->flags ^= PARSER_IN_PRE; - if (t == LINE_PRE_END) - p->flags ^= PARSER_IN_PRE; - return parsers[t](p, t, line, linelen); -} - -static int -gemtext_free(struct parser *p) -{ - enum line_type t; - - /* flush the buffer */ - if (p->len != 0) { - t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE); - if (!parsers[t](p, t, p->buf, p->len)) - return 0; - if ((p->flags & PARSER_IN_PRE) && - !emit_line(p, LINE_PRE_END, NULL, NULL)) - return 0; - } - - free(p->buf); - - /* - * use the first level 2 or 3 header as page title if none - * found yet. - */ - if (*p->title == '\0') - search_title(p, LINE_TITLE_2); - if (*p->title == '\0') - search_title(p, LINE_TITLE_3); - - return 1; -} - -static void -search_title(struct parser *p, enum line_type level) -{ - struct line *l; - - TAILQ_FOREACH(l, &p->head, lines) { - if (l->type == level) { - if (l->line == NULL) - continue; - strlcpy(p->title, l->line, sizeof(p->title)); - break; - } - } -} - -static const char *gemtext_prefixes[] = { - [LINE_TEXT] = "", - [LINE_TITLE_1] = "# ", - [LINE_TITLE_2] = "## ", - [LINE_TITLE_3] = "### ", - [LINE_ITEM] = "* ", - [LINE_QUOTE] = "> ", - [LINE_PRE_START] = "``` ", - [LINE_PRE_CONTENT] = "", - [LINE_PRE_END] = "```", -}; - -static int -gemtext_serialize(struct parser *p, struct evbuffer *evb) -{ - struct line *line; - const char *text; - const char *alt; - int r; - - TAILQ_FOREACH(line, &p->head, lines) { - if ((text = line->line) == NULL) - text = ""; - - if ((alt = line->alt) == NULL) - alt = ""; - - switch (line->type) { - case LINE_TEXT: - case LINE_TITLE_1: - case LINE_TITLE_2: - case LINE_TITLE_3: - case LINE_ITEM: - case LINE_QUOTE: - case LINE_PRE_START: - case LINE_PRE_CONTENT: - case LINE_PRE_END: - r = evbuffer_add_printf(evb, "%s%s\n", - gemtext_prefixes[line->type], text); - break; - - case LINE_LINK: - r = evbuffer_add_printf(evb, "=> %s %s\n", - alt, text); - break; - - default: - /* not reached */ - abort(); - } - - if (r == -1) - return 0; - } - - return 1; -} blob - 27a3142a5eb4ef23844d4074b1022269612c4ffa (mode 644) blob + /dev/null --- parser_gophermap.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#include "compat.h" - -#include -#include -#include - -#include "parser.h" -#include "utils.h" - -#define evap evbuffer_add_printf - -struct gm_selector { - char type; - const char *ds; - const char *selector; - const char *addr; - const char *port; -}; - -static void gm_parse_selector(char *, struct gm_selector *); - -static int gm_parse(struct parser *, const char *, size_t); -static int gm_foreach_line(struct parser *, const char *, size_t); -static int gm_free(struct parser *); -static int gm_serialize(struct parser *, struct evbuffer *); - -void -gophermap_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "gophermap"; - p->parse = &gm_parse; - p->free = &gm_free; - p->serialize = &gm_serialize; - - TAILQ_INIT(&p->head); -} - -static void -gm_parse_selector(char *line, struct gm_selector *s) -{ - s->type = *line++; - s->ds = line; - s->selector = ""; - s->addr = ""; - s->port = ""; - - if ((line = strchr(line, '\t')) == NULL) - return; - *line++ = '\0'; - s->selector = line; - - if ((line = strchr(line, '\t')) == NULL) - return; - *line++ = '\0'; - s->addr = line; - - if ((line = strchr(line, '\t')) == NULL) - return; - *line++ = '\0'; - s->port = line; -} - -static int -gm_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, gm_foreach_line); -} - -static inline int -emit_line(struct parser *p, enum line_type type, struct gm_selector *s) -{ - struct line *l; - char buf[LINE_MAX], b[2] = {0}; - - if ((l = calloc(1, sizeof(*l))) == NULL) - goto err; - - if ((l->line = strdup(s->ds)) == NULL) - goto err; - - switch (l->type = type) { - case LINE_LINK: - if (s->type == 'h' && has_prefix(s->selector, "URL:")) { - strlcpy(buf, s->selector+4, sizeof(buf)); - } else { - strlcpy(buf, "gopher://", sizeof(buf)); - strlcat(buf, s->addr, sizeof(buf)); - strlcat(buf, ":", sizeof(buf)); - strlcat(buf, s->port, sizeof(buf)); - strlcat(buf, "/", sizeof(buf)); - b[0] = s->type; - strlcat(buf, b, sizeof(buf)); - if (*s->selector != '/') - strlcat(buf, "/", sizeof(buf)); - strlcat(buf, s->selector, sizeof(buf)); - } - - if ((l->alt = strdup(buf)) == NULL) - goto err; - break; - - default: - break; - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; - -err: - if (l != NULL) { - free(l->line); - free(l->alt); - free(l); - } - return 0; -} - -static int -gm_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - char buf[LINE_MAX] = {0}; - struct gm_selector s = {0}; - - memcpy(buf, line, MIN(sizeof(buf)-1, linelen)); - gm_parse_selector(buf, &s); - - switch (s.type) { - case '0': /* text file */ - case '1': /* gopher submenu */ - case '2': /* CCSO nameserver */ - case '4': /* binhex-encoded file */ - case '5': /* DOS file */ - case '6': /* uuencoded file */ - case '7': /* full-text search */ - case '8': /* telnet */ - case '9': /* binary file */ - case '+': /* mirror or alternate server */ - case 'g': /* gif */ - case 'I': /* image */ - case 'T': /* telnet 3270 */ - case ':': /* gopher+: bitmap image */ - case ';': /* gopher+: movie file */ - case 'd': /* non-canonical: doc */ - case 'h': /* non-canonical: html file */ - case 's': /* non-canonical: sound file */ - if (!emit_line(p, LINE_LINK, &s)) - return 0; - break; - - break; - - case 'i': /* non-canonical: message */ - if (!emit_line(p, LINE_TEXT, &s)) - return 0; - break; - - case '3': /* error code */ - if (!emit_line(p, LINE_QUOTE, &s)) - return 0; - break; - } - - return 1; -} - -static int -gm_free(struct parser *p) -{ - /* flush the buffer */ - if (p->len != 0) - gm_foreach_line(p, p->buf, p->len); - - free(p->buf); - - return 1; -} - -static inline const char * -gopher_skip_selector(const char *path, int *ret_type) -{ - *ret_type = 0; - - if (!strcmp(path, "/") || *path == '\0') { - *ret_type = '1'; - return path; - } - - if (*path != '/') - return path; - path++; - - switch (*ret_type = *path) { - case '0': - case '1': - case '7': - break; - - default: - *ret_type = 0; - path -= 1; - return path; - } - - return ++path; -} - -static int -serialize_link(struct line *line, const char *text, struct evbuffer *evb) -{ - size_t portlen = 0; - int type; - const char *uri, *endhost, *port, *path, *colon; - - if ((uri = line->alt) == NULL) - return -1; - - if (!has_prefix(uri, "gopher://")) - return evap(evb, "h%s\tURL:%s\terror.host\t1\n", - text, line->alt); - - uri += 9; /* skip gopher:// */ - - path = strchr(uri, '/'); - colon = strchr(uri, ':'); - - if (path != NULL && colon > path) - colon = NULL; - - if ((endhost = colon) == NULL && - (endhost = path) == NULL) - endhost = strchr(path, '\0'); - - if (colon != NULL) { - for (port = colon+1; *port && *port != '/'; ++port) - ++portlen; - port = colon+1; - } else { - port = "70"; - portlen = 2; - } - - if (path == NULL) { - type = '1'; - path = ""; - } else - path = gopher_skip_selector(path, &type); - - return evap(evb, "%c%s\t%s\t%.*s\t%.*s\n", type, text, - path, (int)(endhost - uri), uri, (int)portlen, port); -} - -static int -gm_serialize(struct parser *p, struct evbuffer *evb) -{ - struct line *line; - const char *text; - int r; - - TAILQ_FOREACH(line, &p->head, lines) { - if ((text = line->line) == NULL) - text = ""; - - switch (line->type) { - case LINE_LINK: - r = serialize_link(line, text, evb); - break; - - case LINE_TEXT: - r = evap(evb, "i%s\t\terror.host\t1\n", - text); - break; - - case LINE_QUOTE: - r = evap(evb, "3%s\t\terror.host\t1\n", - text); - break; - - default: - /* unreachable */ - abort(); - } - - if (r == -1) - return 0; - } - - return 1; -} blob - e94d85197fd198edd3e5749080f110f489b2931b (mode 644) blob + /dev/null --- parser_textpatch.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * A streaming text/x-patch parser - */ - -#include -#include - -#include "telescope.h" -#include "parser.h" -#include "utils.h" - -static int tpatch_parse(struct parser *, const char *, size_t); -static int tpatch_emit_line(struct parser *, const char *, size_t); -static int tpatch_foreach_line(struct parser *, const char *, size_t); -static int tpatch_free(struct parser *); - -void -textpatch_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "text/x-patch"; - p->parse = &tpatch_parse; - p->free = &tpatch_free; - - p->flags = PARSER_IN_PATCH_HDR; - - TAILQ_INIT(&p->head); -} - -static int -tpatch_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, tpatch_foreach_line); -} - -static int -tpatch_emit_line(struct parser *p, const char *line, size_t linelen) -{ - struct line *l; - - if ((l = calloc(1, sizeof(*l))) == NULL) - return 0; - - if (p->flags & PARSER_IN_PATCH_HDR) - l->type = LINE_PATCH_HDR; - else - l->type = LINE_PATCH; - - if (linelen != 0) { - if ((l->line = calloc(1, linelen+1)) == NULL) { - free(l); - return 0; - } - - memcpy(l->line, line, linelen); - - if (!(p->flags & PARSER_IN_PATCH_HDR)) - switch (*l->line) { - case '+': - l->type = LINE_PATCH_ADD; - break; - case '-': - l->type = LINE_PATCH_DEL; - break; - case '@': - l->type = LINE_PATCH_HUNK_HDR; - break; - case ' ': - /* context lines */ - break; - default: - /* - * A single patch file can have more - * than one "header" if touches more - * than one file. - */ - l->type = LINE_PATCH_HDR; - p->flags |= PARSER_IN_PATCH_HDR; - break; - } - - if (has_prefix(l->line, "+++")) - p->flags &= ~PARSER_IN_PATCH_HDR; - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; -} - -static int -tpatch_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - return tpatch_emit_line(p, line, linelen); -} - -static int -tpatch_free(struct parser *p) -{ - if (p->len != 0) - return tpatch_emit_line(p, p->buf, p->len); - return 1; -} blob - 325e06eff619f6c4962579e5ca7b96d7e5118adc (mode 644) blob + /dev/null --- parser_textplain.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2021 Omar Polo - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -/* - * A streaming text/plain "parser." - */ - -#include -#include - -#include "telescope.h" -#include "parser.h" - -static int textplain_parse(struct parser*, const char*, size_t); -static int textplain_foreach_line(struct parser*, const char*, size_t); -static int textplain_free(struct parser*); - -static inline int -emit_line(struct parser *p, const char *line, size_t len) -{ - struct line *l; - - if ((l = calloc(1, sizeof(*l))) == NULL) - return 0; - - l->type = LINE_TEXT; - - if (len != 0) { - if ((l->line = calloc(1, len+1)) == NULL) { - free(l); - return 0; - } - - memcpy(l->line, line, len); - } - - TAILQ_INSERT_TAIL(&p->head, l, lines); - - return 1; -} - -void -textplain_initparser(struct parser *p) -{ - memset(p, 0, sizeof(*p)); - - p->name = "text/plain"; - p->parse = &textplain_parse; - p->free = &textplain_free; - - TAILQ_INIT(&p->head); -} - -static int -textplain_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, textplain_foreach_line); -} - -static int -textplain_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - return emit_line(p, line, linelen); -} - -static int -textplain_free(struct parser *p) -{ - if (p->len != 0) - return emit_line(p, p->buf, p->len); - return 1; -} blob - c2104ba3bdadf4fd5a481067d7b8b15a10710b7b blob + e08198e4e7f2f1c4ed4d0ab0727eefbd9d7cd6af --- test/Makefile.am +++ test/Makefile.am @@ -2,9 +2,9 @@ check_PROGRAMS = gmparser gmparser_SOURCES = gmparser.c \ $(top_srcdir)/compat.h \ - $(top_srcdir)/parser.c \ $(top_srcdir)/parser.h \ - $(top_srcdir)/parser_gophermap.c \ + $(top_srcdir)/parser/parser.c \ + $(top_srcdir)/parser/parser_gophermap.c \ $(top_srcdir)/utils.c gmparser_CFLAGS = -I$(top_srcdir)