Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "compat.h"
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 #include "defaults.h"
32 #include "parser.h"
33 #include "utf8.h"
35 static int gemtext_parse(struct parser*, const char*, size_t);
36 static int gemtext_foreach_line(struct parser*, const char*, size_t);
37 static int gemtext_free(struct parser*);
39 static int parse_text(struct parser*, enum line_type, const char*, size_t);
40 static int parse_link(struct parser*, enum line_type, const char*, size_t);
41 static int parse_title(struct parser*, enum line_type, const char*, size_t);
42 static int parse_item(struct parser*, enum line_type, const char*, size_t);
43 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
44 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
45 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
46 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
47 static void search_title(struct parser*, enum line_type);
49 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
51 static parselinefn *parsers[] = {
52 [LINE_TEXT] = parse_text,
53 [LINE_LINK] = parse_link,
54 [LINE_TITLE_1] = parse_title,
55 [LINE_TITLE_2] = parse_title,
56 [LINE_TITLE_3] = parse_title,
57 [LINE_ITEM] = parse_item,
58 [LINE_QUOTE] = parse_quote,
59 [LINE_PRE_START] = parse_pre_start,
60 [LINE_PRE_CONTENT] = parse_pre_cnt,
61 [LINE_PRE_END] = parse_pre_end,
62 };
64 void
65 gemtext_initparser(struct parser *p)
66 {
67 memset(p, 0, sizeof(*p));
69 p->name = "text/gemini";
70 p->parse = &gemtext_parse;
71 p->free = &gemtext_free;
72 }
74 static inline int
75 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
76 {
77 struct line *l;
79 if ((l = calloc(1, sizeof(*l))) == NULL)
80 return 0;
82 l->type = type;
83 l->line = line;
84 l->alt = alt;
86 switch (l->type) {
87 case LINE_PRE_START:
88 case LINE_PRE_END:
89 if (hide_pre_context)
90 l->flags = L_HIDDEN;
91 if (l->type == LINE_PRE_END &&
92 hide_pre_closing_line)
93 l->flags = L_HIDDEN;
94 break;
95 case LINE_PRE_CONTENT:
96 if (hide_pre_blocks)
97 l->flags = L_HIDDEN;
98 break;
99 case LINE_LINK:
100 if (emojify_link &&
101 !emojied_line(line, (const char **)&l->data))
102 l->data = NULL;
103 break;
104 default:
105 break;
108 if (TAILQ_EMPTY(&p->head))
109 TAILQ_INSERT_HEAD(&p->head, l, lines);
110 else
111 TAILQ_INSERT_TAIL(&p->head, l, lines);
113 return 1;
116 static int
117 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
119 char *l;
121 if ((l = calloc(1, len+1)) == NULL)
122 return 0;
123 memcpy(l, buf, len);
124 return emit_line(p, t, l, NULL);
127 static int
128 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
130 char *l, *u;
131 const char *url_start;
133 if (len <= 2)
134 return emit_line(p, t, NULL, NULL);
135 buf += 2;
136 len -= 2;
138 while (len > 0 && isspace(buf[0])) {
139 buf++;
140 len--;
143 if (len == 0)
144 return emit_line(p, t, NULL, NULL);
146 url_start = buf;
147 while (len > 0 && !isspace(buf[0])) {
148 buf++;
149 len--;
152 if ((u = calloc(1, buf - url_start + 1)) == NULL)
153 return 0;
154 memcpy(u, url_start, buf - url_start);
156 if (len == 0)
157 goto nolabel;
159 while (len > 0 && isspace(buf[0])) {
160 buf++;
161 len--;
164 if (len == 0)
165 goto nolabel;
167 if ((l = calloc(1, len + 1)) == NULL)
168 return 0;
170 memcpy(l, buf, len);
171 return emit_line(p, t, l, u);
173 nolabel:
174 if ((l = strdup(u)) == NULL)
175 return 0;
176 return emit_line(p, t, l, u);
179 static int
180 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
182 char *l;
184 switch (t) {
185 case LINE_TITLE_1:
186 if (len <= 1)
187 return emit_line(p, t, NULL, NULL);
188 buf++;
189 len--;
190 break;
191 case LINE_TITLE_2:
192 if (len <= 2)
193 return emit_line(p, t, NULL, NULL);
194 buf += 2;
195 len -= 2;
196 break;
197 case LINE_TITLE_3:
198 if (len <= 3)
199 return emit_line(p, t, NULL, NULL);
200 buf += 3;
201 len -= 3;
202 break;
203 default:
204 /* unreachable */
205 abort();
208 while (len > 0 && isspace(buf[0])) {
209 buf++;
210 len--;
213 if (len == 0)
214 return emit_line(p, t, NULL, NULL);
216 if (t == LINE_TITLE_1 && *p->title == '\0')
217 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
219 if ((l = calloc(1, len+1)) == NULL)
220 return 0;
221 memcpy(l, buf, len);
222 return emit_line(p, t, l, NULL);
225 static int
226 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
228 char *l;
230 if (len == 1)
231 return emit_line(p, t, NULL, NULL);
233 buf++;
234 len--;
236 while (len > 0 && isspace(buf[0])) {
237 buf++;
238 len--;
241 if (len == 0)
242 return emit_line(p, t, NULL, NULL);
244 if ((l = calloc(1, len+1)) == NULL)
245 return 0;
246 memcpy(l, buf, len);
247 return emit_line(p, t, l, NULL);
250 static int
251 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
253 char *l;
255 if (len == 1)
256 return emit_line(p, t, NULL, NULL);
258 buf++;
259 len--;
261 while (len > 0 && isspace(buf[0])) {
262 buf++;
263 len--;
266 if (len == 0)
267 return emit_line(p, t, NULL, NULL);
269 if ((l = calloc(1, len+1)) == NULL)
270 return 0;
271 memcpy(l, buf, len);
272 return emit_line(p, t, l, NULL);
275 static int
276 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
278 char *l;
280 if (len <= 3)
281 return emit_line(p, t, NULL, NULL);
283 buf += 3;
284 len -= 3;
286 while (len > 0 && isspace(buf[0])) {
287 buf++;
288 len--;
291 if (len == 0)
292 return emit_line(p, t, NULL, NULL);
294 if ((l = calloc(1, len+1)) == NULL)
295 return 0;
297 memcpy(l, buf, len);
298 return emit_line(p, t, l, NULL);
301 static int
302 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
304 char *l;
306 if (len == 0)
307 return emit_line(p, t, NULL, NULL);
309 if ((l = calloc(1, len+1)) == NULL)
310 return 0;
311 memcpy(l, buf, len);
312 return emit_line(p, t, l, NULL);
315 static int
316 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
318 return emit_line(p, t, NULL, NULL);
321 static inline enum line_type
322 detect_line_type(const char *buf, size_t len, int in_pre)
324 if (in_pre) {
325 if (len >= 3 &&
326 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
327 return LINE_PRE_END;
328 else
329 return LINE_PRE_CONTENT;
332 if (len == 0)
333 return LINE_TEXT;
335 switch (*buf) {
336 case '*': return LINE_ITEM;
337 case '>': return LINE_QUOTE;
338 case '=':
339 if (len >= 1 && buf[1] == '>')
340 return LINE_LINK;
341 break;
342 case '#':
343 if (len == 1)
344 return LINE_TEXT;
345 if (buf[1] != '#')
346 return LINE_TITLE_1;
347 if (len == 2)
348 return LINE_TEXT;
349 if (buf[2] != '#')
350 return LINE_TITLE_2;
351 if (len == 3)
352 return LINE_TEXT;
353 return LINE_TITLE_3;
354 case '`':
355 if (len < 3)
356 return LINE_TEXT;
357 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
358 return LINE_PRE_START;
359 break;
362 return LINE_TEXT;
365 static int
366 gemtext_parse(struct parser *p, const char *buf, size_t size)
368 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
371 static int
372 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
374 enum line_type t;
376 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
377 if (t == LINE_PRE_START)
378 p->flags ^= PARSER_IN_PRE;
379 if (t == LINE_PRE_END)
380 p->flags ^= PARSER_IN_PRE;
381 return parsers[t](p, t, line, linelen);
384 static int
385 gemtext_free(struct parser *p)
387 enum line_type t;
389 /* flush the buffer */
390 if (p->len != 0) {
391 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
392 if (!parsers[t](p, t, p->buf, p->len))
393 return 0;
394 if ((p->flags & PARSER_IN_PRE) &&
395 !emit_line(p, LINE_PRE_END, NULL, NULL))
396 return 0;
399 free(p->buf);
401 /*
402 * use the first level 2 or 3 header as page title if none
403 * found yet.
404 */
405 if (*p->title == '\0')
406 search_title(p, LINE_TITLE_2);
407 if (*p->title == '\0')
408 search_title(p, LINE_TITLE_3);
410 return 1;
413 static void
414 search_title(struct parser *p, enum line_type level)
416 struct line *l;
418 TAILQ_FOREACH(l, &p->head, lines) {
419 if (l->type == level) {
420 if (l->line == NULL)
421 continue;
422 strlcpy(p->title, l->line, sizeof(p->title));
423 break;