2 5e11c00c 2021-03-02 op * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
4 5e11c00c 2021-03-02 op * Permission to use, copy, modify, and distribute this software for any
5 5e11c00c 2021-03-02 op * purpose with or without fee is hereby granted, provided that the above
6 5e11c00c 2021-03-02 op * copyright notice and this permission notice appear in all copies.
8 5e11c00c 2021-03-02 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 5e11c00c 2021-03-02 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 5e11c00c 2021-03-02 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 5e11c00c 2021-03-02 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 5e11c00c 2021-03-02 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 5e11c00c 2021-03-02 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 5e11c00c 2021-03-02 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 5e11c00c 2021-03-02 op * A streaming gemtext parser.
21 5e11c00c 2021-03-02 op * - handle NULs
25 5f2dc0bd 2021-07-13 op #include "defaults.h"
26 395b9f4e 2021-07-12 op #include "parser.h"
27 a4ec04c5 2021-05-13 op #include "telescope.h"
29 5e11c00c 2021-03-02 op #include <ctype.h>
30 5e11c00c 2021-03-02 op #include <string.h>
31 5e11c00c 2021-03-02 op #include <stdlib.h>
33 5e11c00c 2021-03-02 op static int gemtext_parse(struct parser*, const char*, size_t);
34 a5845bb5 2021-03-20 op static int gemtext_foreach_line(struct parser*, const char*, size_t);
35 a5c3e03d 2021-03-02 op static int gemtext_free(struct parser*);
37 5e11c00c 2021-03-02 op static int parse_text(struct parser*, enum line_type, const char*, size_t);
38 5e11c00c 2021-03-02 op static int parse_link(struct parser*, enum line_type, const char*, size_t);
39 5e11c00c 2021-03-02 op static int parse_title(struct parser*, enum line_type, const char*, size_t);
40 5e11c00c 2021-03-02 op static int parse_item(struct parser*, enum line_type, const char*, size_t);
41 5e11c00c 2021-03-02 op static int parse_quote(struct parser*, enum line_type, const char*, size_t);
42 5e11c00c 2021-03-02 op static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
43 5e11c00c 2021-03-02 op static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
44 5e11c00c 2021-03-02 op static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
45 48e3f26d 2021-05-13 op static void search_title(struct parser*, enum line_type);
47 5e11c00c 2021-03-02 op typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
49 5e11c00c 2021-03-02 op static parselinefn *parsers[] = {
50 3902f7b9 2021-07-14 op [LINE_TEXT] = parse_text,
51 3902f7b9 2021-07-14 op [LINE_LINK] = parse_link,
52 3902f7b9 2021-07-14 op [LINE_TITLE_1] = parse_title,
53 3902f7b9 2021-07-14 op [LINE_TITLE_3] = parse_title,
54 3902f7b9 2021-07-14 op [LINE_QUOTE] = parse_quote,
55 3902f7b9 2021-07-14 op [LINE_PRE_START] = parse_pre_start,
56 3902f7b9 2021-07-14 op [LINE_PRE_END] = parse_pre_end,
60 5e11c00c 2021-03-02 op gemtext_initparser(struct parser *p)
62 5e11c00c 2021-03-02 op memset(p, 0, sizeof(*p));
64 fc43eadd 2021-03-12 op p->name = "text/gemini";
65 5e11c00c 2021-03-02 op p->parse = &gemtext_parse;
66 5e11c00c 2021-03-02 op p->free = &gemtext_free;
69 5e11c00c 2021-03-02 op static inline int
70 5e11c00c 2021-03-02 op emit_line(struct parser *p, enum line_type type, char *line, char *alt)
72 5e11c00c 2021-03-02 op struct line *l;
74 5e11c00c 2021-03-02 op if ((l = calloc(1, sizeof(*l))) == NULL)
77 5e11c00c 2021-03-02 op l->type = type;
78 5e11c00c 2021-03-02 op l->line = line;
81 b1379f34 2021-07-05 op switch (l->type) {
82 b1379f34 2021-07-05 op case LINE_PRE_START:
83 b1379f34 2021-07-05 op case LINE_PRE_END:
84 b1379f34 2021-07-05 op if (hide_pre_context)
85 b1379f34 2021-07-05 op l->flags = L_HIDDEN;
87 b1379f34 2021-07-05 op case LINE_PRE_CONTENT:
88 b1379f34 2021-07-05 op if (hide_pre_blocks)
89 b1379f34 2021-07-05 op l->flags = L_HIDDEN;
96 5e11c00c 2021-03-02 op if (TAILQ_EMPTY(&p->head))
97 5e11c00c 2021-03-02 op TAILQ_INSERT_HEAD(&p->head, l, lines);
99 5e11c00c 2021-03-02 op TAILQ_INSERT_TAIL(&p->head, l, lines);
105 5e11c00c 2021-03-02 op parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
109 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
111 5e11c00c 2021-03-02 op memcpy(l, buf, len);
112 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
116 5e11c00c 2021-03-02 op parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
119 5e11c00c 2021-03-02 op const char *url_start;
121 5e11c00c 2021-03-02 op if (len <= 2)
122 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
126 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
131 5e11c00c 2021-03-02 op if (len == 0)
132 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
134 5e11c00c 2021-03-02 op url_start = buf;
135 5e11c00c 2021-03-02 op while (len > 0 && !isspace(buf[0])) {
140 5e11c00c 2021-03-02 op if ((u = calloc(1, buf - url_start + 1)) == NULL)
142 5e11c00c 2021-03-02 op memcpy(u, url_start, buf - url_start);
144 5e11c00c 2021-03-02 op if (len == 0)
145 f250a75b 2021-03-06 op goto nolabel;
147 f250a75b 2021-03-06 op while (len > 0 && isspace(buf[0])) {
152 5e11c00c 2021-03-02 op if (len == 0)
153 f250a75b 2021-03-06 op goto nolabel;
155 5e11c00c 2021-03-02 op if ((l = calloc(1, len + 1)) == NULL)
158 5e11c00c 2021-03-02 op memcpy(l, buf, len);
159 f250a75b 2021-03-06 op return emit_line(p, t, l, u);
162 f250a75b 2021-03-06 op if ((l = strdup(u)) == NULL)
164 f250a75b 2021-03-06 op return emit_line(p, t, l, u);
168 5e11c00c 2021-03-02 op parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
173 5e11c00c 2021-03-02 op case LINE_TITLE_1:
174 5e11c00c 2021-03-02 op if (len <= 1)
175 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
179 5e11c00c 2021-03-02 op case LINE_TITLE_2:
180 5e11c00c 2021-03-02 op if (len <= 2)
181 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
185 5e11c00c 2021-03-02 op case LINE_TITLE_3:
186 5e11c00c 2021-03-02 op if (len <= 3)
187 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
192 5e11c00c 2021-03-02 op /* unreachable */
196 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
201 5e11c00c 2021-03-02 op if (len == 0)
202 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
204 dc5df781 2021-03-13 op if (t == LINE_TITLE_1 && *p->title == '\0')
205 dc5df781 2021-03-13 op strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
207 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
209 5e11c00c 2021-03-02 op memcpy(l, buf, len);
210 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
214 5e11c00c 2021-03-02 op parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
218 5e11c00c 2021-03-02 op if (len == 1)
219 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
224 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
229 5e11c00c 2021-03-02 op if (len == 0)
230 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
232 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
234 5e11c00c 2021-03-02 op memcpy(l, buf, len);
235 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
239 5e11c00c 2021-03-02 op parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
243 5e11c00c 2021-03-02 op if (len == 1)
244 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
249 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
254 5e11c00c 2021-03-02 op if (len == 0)
255 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
257 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
259 5e11c00c 2021-03-02 op memcpy(l, buf, len);
260 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
264 5e11c00c 2021-03-02 op parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
268 5e11c00c 2021-03-02 op if (len <= 3)
269 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
274 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
279 5e11c00c 2021-03-02 op if (len == 0)
280 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
282 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
285 5e11c00c 2021-03-02 op memcpy(l, buf, len);
286 e2060f05 2021-03-17 op return emit_line(p, t, l, NULL);
290 5e11c00c 2021-03-02 op parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
294 5e11c00c 2021-03-02 op if (len == 0)
295 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
297 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
299 5e11c00c 2021-03-02 op memcpy(l, buf, len);
300 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
304 5e11c00c 2021-03-02 op parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
306 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
309 5e11c00c 2021-03-02 op static inline enum line_type
310 5e11c00c 2021-03-02 op detect_line_type(const char *buf, size_t len, int in_pre)
312 5e11c00c 2021-03-02 op if (in_pre) {
313 5e11c00c 2021-03-02 op if (len >= 3 &&
314 5e11c00c 2021-03-02 op buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
315 5e11c00c 2021-03-02 op return LINE_PRE_END;
317 5e11c00c 2021-03-02 op return LINE_PRE_CONTENT;
320 1575cb8a 2021-07-07 op if (len == 0)
321 1575cb8a 2021-07-07 op return LINE_TEXT;
323 5e11c00c 2021-03-02 op switch (*buf) {
324 5e11c00c 2021-03-02 op case '*': return LINE_ITEM;
325 5e11c00c 2021-03-02 op case '>': return LINE_QUOTE;
327 5e11c00c 2021-03-02 op if (len >= 1 && buf[1] == '>')
328 5e11c00c 2021-03-02 op return LINE_LINK;
331 5e11c00c 2021-03-02 op if (len == 1)
332 5e11c00c 2021-03-02 op return LINE_TEXT;
333 5e11c00c 2021-03-02 op if (buf[1] != '#')
334 5e11c00c 2021-03-02 op return LINE_TITLE_1;
335 5e11c00c 2021-03-02 op if (len == 2)
336 5e11c00c 2021-03-02 op return LINE_TEXT;
337 5e11c00c 2021-03-02 op if (buf[2] != '#')
338 5e11c00c 2021-03-02 op return LINE_TITLE_2;
339 5e11c00c 2021-03-02 op if (len == 3)
340 5e11c00c 2021-03-02 op return LINE_TEXT;
341 5e11c00c 2021-03-02 op return LINE_TITLE_3;
344 5e11c00c 2021-03-02 op return LINE_TEXT;
345 5e11c00c 2021-03-02 op if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
346 5e11c00c 2021-03-02 op return LINE_PRE_START;
350 5e11c00c 2021-03-02 op return LINE_TEXT;
354 5e11c00c 2021-03-02 op gemtext_parse(struct parser *p, const char *buf, size_t size)
356 a5845bb5 2021-03-20 op return parser_foreach_line(p, buf, size, gemtext_foreach_line);
360 a5845bb5 2021-03-20 op gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
362 a5845bb5 2021-03-20 op enum line_type t;
364 27dce34f 2021-07-06 op t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
365 a5845bb5 2021-03-20 op if (t == LINE_PRE_START)
366 27dce34f 2021-07-06 op p->flags ^= PARSER_IN_PRE;
367 a5845bb5 2021-03-20 op if (t == LINE_PRE_END)
368 27dce34f 2021-07-06 op p->flags ^= PARSER_IN_PRE;
369 a5845bb5 2021-03-20 op return parsers[t](p, t, line, linelen);
373 5e11c00c 2021-03-02 op gemtext_free(struct parser *p)
375 a5c3e03d 2021-03-02 op enum line_type t;
377 a5c3e03d 2021-03-02 op /* flush the buffer */
378 a5c3e03d 2021-03-02 op if (p->len != 0) {
379 27dce34f 2021-07-06 op t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
380 a5c3e03d 2021-03-02 op if (!parsers[t](p, t, p->buf, p->len))
382 27dce34f 2021-07-06 op if ((p->flags & PARSER_IN_PRE) &&
383 27dce34f 2021-07-06 op !emit_line(p, LINE_PRE_END, NULL, NULL))
387 5e11c00c 2021-03-02 op free(p->buf);
390 48e3f26d 2021-05-13 op * use the first level 2 or 3 header as page title if none
393 48e3f26d 2021-05-13 op if (*p->title == '\0')
394 48e3f26d 2021-05-13 op search_title(p, LINE_TITLE_2);
395 48e3f26d 2021-05-13 op if (*p->title == '\0')
396 48e3f26d 2021-05-13 op search_title(p, LINE_TITLE_3);
402 48e3f26d 2021-05-13 op search_title(struct parser *p, enum line_type level)
404 48e3f26d 2021-05-13 op struct line *l;
406 48e3f26d 2021-05-13 op TAILQ_FOREACH(l, &p->head, lines) {
407 48e3f26d 2021-05-13 op if (l->type == level) {
408 48e3f26d 2021-05-13 op if (l->line == NULL)
410 48e3f26d 2021-05-13 op strlcpy(p->title, l->line, sizeof(p->title));