2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 * A streaming gemtext parser.
26 #include "telescope.h"
32 static int gemtext_parse(struct parser*, const char*, size_t);
33 static int gemtext_foreach_line(struct parser*, const char*, size_t);
34 static int gemtext_free(struct parser*);
36 static int parse_text(struct parser*, enum line_type, const char*, size_t);
37 static int parse_link(struct parser*, enum line_type, const char*, size_t);
38 static int parse_title(struct parser*, enum line_type, const char*, size_t);
39 static int parse_item(struct parser*, enum line_type, const char*, size_t);
40 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
43 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
44 static void search_title(struct parser*, enum line_type);
46 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
48 static parselinefn *parsers[] = {
49 parse_text, /* LINE_TEXT */
50 parse_link, /* LINE_LINK */
51 parse_title, /* LINE_TITLE_1 */
52 parse_title, /* LINE_TITLE_2 */
53 parse_title, /* LINE_TITLE_3 */
54 parse_item, /* LINE_ITEM */
55 parse_quote, /* LINE_QUOTE */
56 parse_pre_start, /* LINE_PRE_START */
57 parse_pre_cnt, /* LINE_PRE_CONTENT */
58 parse_pre_end, /* LINE_PRE_END */
62 gemtext_initparser(struct parser *p)
64 memset(p, 0, sizeof(*p));
66 p->name = "text/gemini";
67 p->parse = &gemtext_parse;
68 p->free = &gemtext_free;
72 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
76 if ((l = calloc(1, sizeof(*l))) == NULL)
89 case LINE_PRE_CONTENT:
98 if (TAILQ_EMPTY(&p->head))
99 TAILQ_INSERT_HEAD(&p->head, l, lines);
101 TAILQ_INSERT_TAIL(&p->head, l, lines);
107 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
111 if ((l = calloc(1, len+1)) == NULL)
114 return emit_line(p, t, l, NULL);
118 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
121 const char *url_start;
124 return emit_line(p, t, NULL, NULL);
128 while (len > 0 && isspace(buf[0])) {
134 return emit_line(p, t, NULL, NULL);
137 while (len > 0 && !isspace(buf[0])) {
142 if ((u = calloc(1, buf - url_start + 1)) == NULL)
144 memcpy(u, url_start, buf - url_start);
149 while (len > 0 && isspace(buf[0])) {
157 if ((l = calloc(1, len + 1)) == NULL)
161 return emit_line(p, t, l, u);
164 if ((l = strdup(u)) == NULL)
166 return emit_line(p, t, l, u);
170 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
177 return emit_line(p, t, NULL, NULL);
183 return emit_line(p, t, NULL, NULL);
189 return emit_line(p, t, NULL, NULL);
198 while (len > 0 && isspace(buf[0])) {
204 return emit_line(p, t, NULL, NULL);
206 if (t == LINE_TITLE_1 && *p->title == '\0')
207 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
209 if ((l = calloc(1, len+1)) == NULL)
212 return emit_line(p, t, l, NULL);
216 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
221 return emit_line(p, t, NULL, NULL);
226 while (len > 0 && isspace(buf[0])) {
232 return emit_line(p, t, NULL, NULL);
234 if ((l = calloc(1, len+1)) == NULL)
237 return emit_line(p, t, l, NULL);
241 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
246 return emit_line(p, t, NULL, NULL);
251 while (len > 0 && isspace(buf[0])) {
257 return emit_line(p, t, NULL, NULL);
259 if ((l = calloc(1, len+1)) == NULL)
262 return emit_line(p, t, l, NULL);
266 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
271 return emit_line(p, t, NULL, NULL);
276 while (len > 0 && isspace(buf[0])) {
282 return emit_line(p, t, NULL, NULL);
284 if ((l = calloc(1, len+1)) == NULL)
288 return emit_line(p, t, l, NULL);
292 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
297 return emit_line(p, t, NULL, NULL);
299 if ((l = calloc(1, len+1)) == NULL)
302 return emit_line(p, t, l, NULL);
306 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
308 return emit_line(p, t, NULL, NULL);
311 static inline enum line_type
312 detect_line_type(const char *buf, size_t len, int in_pre)
316 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
319 return LINE_PRE_CONTENT;
326 case '*': return LINE_ITEM;
327 case '>': return LINE_QUOTE;
329 if (len >= 1 && buf[1] == '>')
347 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
348 return LINE_PRE_START;
356 gemtext_parse(struct parser *p, const char *buf, size_t size)
358 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
362 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
366 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
367 if (t == LINE_PRE_START)
368 p->flags ^= PARSER_IN_PRE;
369 if (t == LINE_PRE_END)
370 p->flags ^= PARSER_IN_PRE;
371 return parsers[t](p, t, line, linelen);
375 gemtext_free(struct parser *p)
379 /* flush the buffer */
381 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
382 if (!parsers[t](p, t, p->buf, p->len))
384 if ((p->flags & PARSER_IN_PRE) &&
385 !emit_line(p, LINE_PRE_END, NULL, NULL))
392 * use the first level 2 or 3 header as page title if none
395 if (*p->title == '\0')
396 search_title(p, LINE_TITLE_2);
397 if (*p->title == '\0')
398 search_title(p, LINE_TITLE_3);
404 search_title(struct parser *p, enum line_type level)
408 TAILQ_FOREACH(l, &p->head, lines) {
409 if (l->type == level) {
412 strlcpy(p->title, l->line, sizeof(p->title));