commit 069c9a92f36df9d307d734ffa8903035c7181129 from: Omar Polo date: Wed Jan 19 23:56:36 2022 UTC parsers: add serialize function Add a parser_serialize function and an optional function `serialize' for parsers. The default implementation just prints the text line, which is suitable for parser_textplain and parser_textpatch; gemtext and gophermap uses their own specific unparse functions. This is intended for an incoming cache refactoring: we'll cache the page serialized in the correct format, not once it has been translated to gemtext. This will allow in the future to provide commands such as ``save page to disk'' or interface with an on-disk cache. commit - 5a21e0eb51554d72c1b6465c2b4afc8cdef5dcf1 commit + 069c9a92f36df9d307d734ffa8903035c7181129 blob - b89e63d8364e6a429b45b91d9389d6f1c3cc8b46 blob + 44bab64a89cb50c8229446662395b7912dc7e7f6 --- parser.c +++ parser.c @@ -82,6 +82,29 @@ parser_free(struct tab *tab) } int +parser_serialize(struct tab *tab, struct evbuffer *evb) +{ + struct line *line; + const char *text; + int r; + + if (tab->buffer.page.serialize != NULL) + return tab->buffer.page.serialize(&tab->buffer.page, evb); + + /* a default implementation good enough for plain text */ + TAILQ_FOREACH(line, &tab->buffer.page.head, lines) { + if ((text = line->line) == NULL) + text = ""; + + r = evbuffer_add_printf(evb, "%s\n", text); + if (r == -1) + return 0; + } + + return 1; +} + +int parser_append(struct parser *p, const char *buf, size_t len) { size_t newlen; blob - 63be7ce5d280b6dd51d837b3fc21ca0132203642 blob + b4cb238fd315805241fb22da61a10bbc6d54b1b7 --- parser.h +++ parser.h @@ -27,6 +27,7 @@ int load_page_from_str(struct tab *, const char *); void parser_init(struct tab *, parserfn); int parser_parse(struct tab *, const char *, size_t); int parser_free(struct tab *); +int parser_serialize(struct tab *, struct evbuffer *); int parser_append(struct parser*, const char*, size_t); int parser_set_buf(struct parser*, const char*, size_t); blob - e772078332f5f233f43f5e6af086619664ed3d8f blob + e23b368b5089ed079d433d6d5f8803e3f2480373 --- parser_gemtext.c +++ parser_gemtext.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Omar Polo + * Copyright (c) 2021, 2022 Omar Polo * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -32,9 +32,10 @@ #include "parser.h" #include "utf8.h" -static int gemtext_parse(struct parser*, const char*, size_t); -static int gemtext_foreach_line(struct parser*, const char*, size_t); -static int gemtext_free(struct parser*); +static int gemtext_parse(struct parser *, const char *, size_t); +static int gemtext_foreach_line(struct parser *, const char *, size_t); +static int gemtext_free(struct parser *); +static int gemtext_serialize(struct parser *, struct evbuffer *); static int parse_text(struct parser*, enum line_type, const char*, size_t); static int parse_link(struct parser*, enum line_type, const char*, size_t); @@ -69,6 +70,7 @@ gemtext_initparser(struct parser *p) p->name = "text/gemini"; p->parse = &gemtext_parse; p->free = &gemtext_free; + p->serialize = &gemtext_serialize; TAILQ_INIT(&p->head); } @@ -422,7 +424,65 @@ search_title(struct parser *p, enum line_type level) if (l->line == NULL) continue; strlcpy(p->title, l->line, sizeof(p->title)); + break; + } + } +} + +static const char *gemtext_prefixes[] = { + [LINE_TEXT] = "", + [LINE_TITLE_1] = "# ", + [LINE_TITLE_2] = "## ", + [LINE_TITLE_3] = "### ", + [LINE_ITEM] = "* ", + [LINE_QUOTE] = "> ", + [LINE_PRE_START] = "``` ", + [LINE_PRE_CONTENT] = "", + [LINE_PRE_END] = "```", +}; + +static int +gemtext_serialize(struct parser *p, struct evbuffer *evb) +{ + struct line *line; + const char *text; + const char *alt; + int r; + + TAILQ_FOREACH(line, &p->head, lines) { + if ((text = line->line) == NULL) + text = ""; + + if ((alt = line->alt) == NULL) + alt = ""; + + switch (line->type) { + case LINE_TEXT: + case LINE_TITLE_1: + case LINE_TITLE_2: + case LINE_TITLE_3: + case LINE_ITEM: + case LINE_QUOTE: + case LINE_PRE_START: + case LINE_PRE_CONTENT: + case LINE_PRE_END: + r = evbuffer_add_printf(evb, "%s%s\n", + gemtext_prefixes[line->type], text); break; + + case LINE_LINK: + r = evbuffer_add_printf(evb, "=> %s %s\n", + alt, text); + break; + + default: + /* not reached */ + abort(); } + + if (r == -1) + return 0; } + + return 1; } blob - bc51dcb440974643ff5fa60e8629c61fef3cc857 blob + 27a3142a5eb4ef23844d4074b1022269612c4ffa --- parser_gophermap.c +++ parser_gophermap.c @@ -23,6 +23,8 @@ #include "parser.h" #include "utils.h" +#define evap evbuffer_add_printf + struct gm_selector { char type; const char *ds; @@ -36,6 +38,7 @@ static void gm_parse_selector(char *, struct gm_select static int gm_parse(struct parser *, const char *, size_t); static int gm_foreach_line(struct parser *, const char *, size_t); static int gm_free(struct parser *); +static int gm_serialize(struct parser *, struct evbuffer *); void gophermap_initparser(struct parser *p) @@ -45,6 +48,7 @@ gophermap_initparser(struct parser *p) p->name = "gophermap"; p->parse = &gm_parse; p->free = &gm_free; + p->serialize = &gm_serialize; TAILQ_INIT(&p->head); } @@ -189,3 +193,115 @@ gm_free(struct parser *p) return 1; } + +static inline const char * +gopher_skip_selector(const char *path, int *ret_type) +{ + *ret_type = 0; + + if (!strcmp(path, "/") || *path == '\0') { + *ret_type = '1'; + return path; + } + + if (*path != '/') + return path; + path++; + + switch (*ret_type = *path) { + case '0': + case '1': + case '7': + break; + + default: + *ret_type = 0; + path -= 1; + return path; + } + + return ++path; +} + +static int +serialize_link(struct line *line, const char *text, struct evbuffer *evb) +{ + size_t portlen = 0; + int type; + const char *uri, *endhost, *port, *path, *colon; + + if ((uri = line->alt) == NULL) + return -1; + + if (!has_prefix(uri, "gopher://")) + return evap(evb, "h%s\tURL:%s\terror.host\t1\n", + text, line->alt); + + uri += 9; /* skip gopher:// */ + + path = strchr(uri, '/'); + colon = strchr(uri, ':'); + + if (path != NULL && colon > path) + colon = NULL; + + if ((endhost = colon) == NULL && + (endhost = path) == NULL) + endhost = strchr(path, '\0'); + + if (colon != NULL) { + for (port = colon+1; *port && *port != '/'; ++port) + ++portlen; + port = colon+1; + } else { + port = "70"; + portlen = 2; + } + + if (path == NULL) { + type = '1'; + path = ""; + } else + path = gopher_skip_selector(path, &type); + + return evap(evb, "%c%s\t%s\t%.*s\t%.*s\n", type, text, + path, (int)(endhost - uri), uri, (int)portlen, port); +} + +static int +gm_serialize(struct parser *p, struct evbuffer *evb) +{ + struct line *line; + const char *text; + int r; + + TAILQ_FOREACH(line, &p->head, lines) { + if ((text = line->line) == NULL) + text = ""; + + switch (line->type) { + case LINE_LINK: + r = serialize_link(line, text, evb); + break; + + case LINE_TEXT: + r = evap(evb, "i%s\t\terror.host\t1\n", + text); + break; + + case LINE_QUOTE: + r = evap(evb, "3%s\t\terror.host\t1\n", + text); + break; + + default: + /* unreachable */ + abort(); + } + + if (r == -1) + return 0; + } + + return 1; +} blob - a0c403b048f20cad632c8a14e9a2edd106e24969 blob + 0d382ec23caf170cc4fcefab743e0b2626029bea --- telescope.h +++ telescope.h @@ -137,6 +137,7 @@ struct parser; typedef int (*parsechunkfn)(struct parser*, const char*, size_t); typedef int (*parserfreefn)(struct parser*); +typedef int (*parserserial)(struct parser*, struct evbuffer *); typedef void (imsg_handlerfn)(struct imsg*, size_t); @@ -154,6 +155,7 @@ struct parser { void (*init)(struct parser *); parsechunkfn parse; parserfreefn free; + parserserial serialize; TAILQ_HEAD(, line) head; };