2 * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 * A streaming gemtext parser.
35 static int gemtext_parse(struct parser *, const char *, size_t);
36 static int gemtext_foreach_line(struct parser *, const char *, size_t);
37 static int gemtext_free(struct parser *);
38 static int gemtext_serialize(struct parser *, struct evbuffer *);
40 static int parse_text(struct parser*, enum line_type, const char*, size_t);
41 static int parse_link(struct parser*, enum line_type, const char*, size_t);
42 static int parse_title(struct parser*, enum line_type, const char*, size_t);
43 static int parse_item(struct parser*, enum line_type, const char*, size_t);
44 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
45 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
46 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
47 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
48 static void search_title(struct parser*, enum line_type);
50 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
52 static parselinefn *parsers[] = {
53 [LINE_TEXT] = parse_text,
54 [LINE_LINK] = parse_link,
55 [LINE_TITLE_1] = parse_title,
56 [LINE_TITLE_2] = parse_title,
57 [LINE_TITLE_3] = parse_title,
58 [LINE_ITEM] = parse_item,
59 [LINE_QUOTE] = parse_quote,
60 [LINE_PRE_START] = parse_pre_start,
61 [LINE_PRE_CONTENT] = parse_pre_cnt,
62 [LINE_PRE_END] = parse_pre_end,
66 gemtext_initparser(struct parser *p)
68 memset(p, 0, sizeof(*p));
70 p->name = "text/gemini";
71 p->parse = &gemtext_parse;
72 p->free = &gemtext_free;
73 p->serialize = &gemtext_serialize;
79 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
83 if ((l = calloc(1, sizeof(*l))) == NULL)
95 if (l->type == LINE_PRE_END &&
96 hide_pre_closing_line)
99 case LINE_PRE_CONTENT:
105 !emojied_line(line, (const char **)&l->data))
112 TAILQ_INSERT_TAIL(&p->head, l, lines);
118 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
122 if ((l = calloc(1, len+1)) == NULL)
125 return emit_line(p, t, l, NULL);
129 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
132 const char *url_start;
135 return emit_line(p, LINE_TEXT, NULL, NULL);
139 while (len > 0 && isspace(buf[0])) {
145 return emit_line(p, LINE_TEXT, NULL, NULL);
148 while (len > 0 && !isspace(buf[0])) {
153 if ((u = calloc(1, buf - url_start + 1)) == NULL)
155 memcpy(u, url_start, buf - url_start);
160 while (len > 0 && isspace(buf[0])) {
168 if ((l = calloc(1, len + 1)) == NULL)
172 return emit_line(p, t, l, u);
175 if ((l = strdup(u)) == NULL)
177 return emit_line(p, t, l, u);
181 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
188 return emit_line(p, t, NULL, NULL);
194 return emit_line(p, t, NULL, NULL);
200 return emit_line(p, t, NULL, NULL);
209 while (len > 0 && isspace(buf[0])) {
215 return emit_line(p, t, NULL, NULL);
217 if (t == LINE_TITLE_1 && *p->title == '\0')
218 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
220 if ((l = calloc(1, len+1)) == NULL)
223 return emit_line(p, t, l, NULL);
227 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
232 return emit_line(p, t, NULL, NULL);
237 while (len > 0 && isspace(buf[0])) {
243 return emit_line(p, t, NULL, NULL);
245 if ((l = calloc(1, len+1)) == NULL)
248 return emit_line(p, t, l, NULL);
252 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
257 return emit_line(p, t, NULL, NULL);
262 while (len > 0 && isspace(buf[0])) {
268 return emit_line(p, t, NULL, NULL);
270 if ((l = calloc(1, len+1)) == NULL)
273 return emit_line(p, t, l, NULL);
277 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
282 return emit_line(p, t, NULL, NULL);
287 while (len > 0 && isspace(buf[0])) {
293 return emit_line(p, t, NULL, NULL);
295 if ((l = calloc(1, len+1)) == NULL)
299 return emit_line(p, t, l, NULL);
303 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
308 return emit_line(p, t, NULL, NULL);
310 if ((l = calloc(1, len+1)) == NULL)
313 return emit_line(p, t, l, NULL);
317 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
319 return emit_line(p, t, NULL, NULL);
322 static inline enum line_type
323 detect_line_type(const char *buf, size_t len, int in_pre)
327 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
330 return LINE_PRE_CONTENT;
338 if (len > 1 && buf[1] == ' ')
341 case '>': return LINE_QUOTE;
343 if (len >= 1 && buf[1] == '>')
361 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
362 return LINE_PRE_START;
370 gemtext_parse(struct parser *p, const char *buf, size_t size)
372 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
376 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
380 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
381 if (t == LINE_PRE_START)
382 p->flags ^= PARSER_IN_PRE;
383 if (t == LINE_PRE_END)
384 p->flags ^= PARSER_IN_PRE;
385 return parsers[t](p, t, line, linelen);
389 gemtext_free(struct parser *p)
393 /* flush the buffer */
395 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
396 if (!parsers[t](p, t, p->buf, p->len))
398 if ((p->flags & PARSER_IN_PRE) &&
399 !emit_line(p, LINE_PRE_END, NULL, NULL))
406 * use the first level 2 or 3 header as page title if none
409 if (*p->title == '\0')
410 search_title(p, LINE_TITLE_2);
411 if (*p->title == '\0')
412 search_title(p, LINE_TITLE_3);
418 search_title(struct parser *p, enum line_type level)
422 TAILQ_FOREACH(l, &p->head, lines) {
423 if (l->type == level) {
426 strlcpy(p->title, l->line, sizeof(p->title));
432 static const char *gemtext_prefixes[] = {
434 [LINE_TITLE_1] = "# ",
435 [LINE_TITLE_2] = "## ",
436 [LINE_TITLE_3] = "### ",
439 [LINE_PRE_START] = "``` ",
440 [LINE_PRE_CONTENT] = "",
441 [LINE_PRE_END] = "```",
445 gemtext_serialize(struct parser *p, struct evbuffer *evb)
452 TAILQ_FOREACH(line, &p->head, lines) {
453 if ((text = line->line) == NULL)
456 if ((alt = line->alt) == NULL)
459 switch (line->type) {
467 case LINE_PRE_CONTENT:
469 r = evbuffer_add_printf(evb, "%s%s\n",
470 gemtext_prefixes[line->type], text);
474 r = evbuffer_add_printf(evb, "=> %s %s\n",