Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "compat.h"
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 #include "defaults.h"
32 #include "parser.h"
33 #include "utf8.h"
35 static int gemtext_parse(struct parser*, const char*, size_t);
36 static int gemtext_foreach_line(struct parser*, const char*, size_t);
37 static int gemtext_free(struct parser*);
39 static int parse_text(struct parser*, enum line_type, const char*, size_t);
40 static int parse_link(struct parser*, enum line_type, const char*, size_t);
41 static int parse_title(struct parser*, enum line_type, const char*, size_t);
42 static int parse_item(struct parser*, enum line_type, const char*, size_t);
43 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
44 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
45 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
46 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
47 static int parse_linebreak(struct parser *, enum line_type, const char *, size_t);
48 static void search_title(struct parser*, enum line_type);
50 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
52 static parselinefn *parsers[] = {
53 [LINE_TEXT] = parse_text,
54 [LINE_LINK] = parse_link,
55 [LINE_TITLE_1] = parse_title,
56 [LINE_TITLE_2] = parse_title,
57 [LINE_TITLE_3] = parse_title,
58 [LINE_ITEM] = parse_item,
59 [LINE_QUOTE] = parse_quote,
60 [LINE_PRE_START] = parse_pre_start,
61 [LINE_PRE_CONTENT] = parse_pre_cnt,
62 [LINE_PRE_END] = parse_pre_end,
63 [LINE_BREAK] = parse_linebreak,
64 };
66 void
67 gemtext_initparser(struct parser *p)
68 {
69 memset(p, 0, sizeof(*p));
71 p->name = "text/gemini";
72 p->parse = &gemtext_parse;
73 p->free = &gemtext_free;
75 TAILQ_INIT(&p->head);
76 }
78 static inline int
79 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
80 {
81 struct line *l;
83 if ((l = calloc(1, sizeof(*l))) == NULL)
84 return 0;
86 l->type = type;
87 l->line = line;
88 l->alt = alt;
90 switch (l->type) {
91 case LINE_PRE_START:
92 case LINE_PRE_END:
93 if (hide_pre_context)
94 l->flags = L_HIDDEN;
95 if (l->type == LINE_PRE_END &&
96 hide_pre_closing_line)
97 l->flags = L_HIDDEN;
98 break;
99 case LINE_PRE_CONTENT:
100 if (hide_pre_blocks)
101 l->flags = L_HIDDEN;
102 break;
103 case LINE_LINK:
104 if (emojify_link &&
105 !emojied_line(line, (const char **)&l->data))
106 l->data = NULL;
107 break;
108 default:
109 break;
112 TAILQ_INSERT_TAIL(&p->head, l, lines);
114 return 1;
117 static int
118 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
120 char *l;
122 if ((l = calloc(1, len+1)) == NULL)
123 return 0;
124 memcpy(l, buf, len);
125 return emit_line(p, t, l, NULL);
128 static int
129 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
131 char *l, *u;
132 const char *url_start;
134 if (len <= 2)
135 return emit_line(p, t, NULL, NULL);
136 buf += 2;
137 len -= 2;
139 while (len > 0 && isspace(buf[0])) {
140 buf++;
141 len--;
144 if (len == 0)
145 return emit_line(p, t, NULL, NULL);
147 url_start = buf;
148 while (len > 0 && !isspace(buf[0])) {
149 buf++;
150 len--;
153 if ((u = calloc(1, buf - url_start + 1)) == NULL)
154 return 0;
155 memcpy(u, url_start, buf - url_start);
157 if (len == 0)
158 goto nolabel;
160 while (len > 0 && isspace(buf[0])) {
161 buf++;
162 len--;
165 if (len == 0)
166 goto nolabel;
168 if ((l = calloc(1, len + 1)) == NULL)
169 return 0;
171 memcpy(l, buf, len);
172 return emit_line(p, t, l, u);
174 nolabel:
175 if ((l = strdup(u)) == NULL)
176 return 0;
177 return emit_line(p, t, l, u);
180 static int
181 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
183 char *l;
185 switch (t) {
186 case LINE_TITLE_1:
187 if (len <= 1)
188 return emit_line(p, t, NULL, NULL);
189 buf++;
190 len--;
191 break;
192 case LINE_TITLE_2:
193 if (len <= 2)
194 return emit_line(p, t, NULL, NULL);
195 buf += 2;
196 len -= 2;
197 break;
198 case LINE_TITLE_3:
199 if (len <= 3)
200 return emit_line(p, t, NULL, NULL);
201 buf += 3;
202 len -= 3;
203 break;
204 default:
205 /* unreachable */
206 abort();
209 while (len > 0 && isspace(buf[0])) {
210 buf++;
211 len--;
214 if (len == 0)
215 return emit_line(p, t, NULL, NULL);
217 if (t == LINE_TITLE_1 && *p->title == '\0')
218 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
220 if ((l = calloc(1, len+1)) == NULL)
221 return 0;
222 memcpy(l, buf, len);
223 return emit_line(p, t, l, NULL);
226 static int
227 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
229 char *l;
231 if (len == 1)
232 return emit_line(p, t, NULL, NULL);
234 buf++;
235 len--;
237 while (len > 0 && isspace(buf[0])) {
238 buf++;
239 len--;
242 if (len == 0)
243 return emit_line(p, t, NULL, NULL);
245 if ((l = calloc(1, len+1)) == NULL)
246 return 0;
247 memcpy(l, buf, len);
248 return emit_line(p, t, l, NULL);
251 static int
252 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
254 char *l;
256 if (len == 1)
257 return emit_line(p, t, NULL, NULL);
259 buf++;
260 len--;
262 while (len > 0 && isspace(buf[0])) {
263 buf++;
264 len--;
267 if (len == 0)
268 return emit_line(p, t, NULL, NULL);
270 if ((l = calloc(1, len+1)) == NULL)
271 return 0;
272 memcpy(l, buf, len);
273 return emit_line(p, t, l, NULL);
276 static int
277 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
279 char *l;
281 if (len <= 3)
282 return emit_line(p, t, NULL, NULL);
284 buf += 3;
285 len -= 3;
287 while (len > 0 && isspace(buf[0])) {
288 buf++;
289 len--;
292 if (len == 0)
293 return emit_line(p, t, NULL, NULL);
295 if ((l = calloc(1, len+1)) == NULL)
296 return 0;
298 memcpy(l, buf, len);
299 return emit_line(p, t, l, NULL);
302 static int
303 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
305 char *l;
307 if (len == 0)
308 return emit_line(p, t, NULL, NULL);
310 if ((l = calloc(1, len+1)) == NULL)
311 return 0;
312 memcpy(l, buf, len);
313 return emit_line(p, t, l, NULL);
316 static int
317 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
319 return emit_line(p, t, NULL, NULL);
322 static inline enum line_type
323 detect_line_type(const char *buf, size_t len, int in_pre)
325 size_t i;
327 if (in_pre) {
328 if (len >= 3 &&
329 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
330 return LINE_PRE_END;
331 else
332 return LINE_PRE_CONTENT;
335 if (len == 0)
336 return LINE_TEXT;
338 switch (*buf) {
339 case '*': return LINE_ITEM;
340 case '>': return LINE_QUOTE;
341 case '=':
342 if (len >= 1 && buf[1] == '>')
343 return LINE_LINK;
344 break;
345 case '#':
346 if (len == 1)
347 return LINE_TEXT;
348 if (buf[1] != '#')
349 return LINE_TITLE_1;
350 if (len == 2)
351 return LINE_TEXT;
352 if (buf[2] != '#')
353 return LINE_TITLE_2;
354 if (len == 3)
355 return LINE_TEXT;
356 return LINE_TITLE_3;
357 case '`':
358 if (len < 3)
359 return LINE_TEXT;
360 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
361 return LINE_PRE_START;
362 break;
363 case '-':
364 for (i = 0; i < len; ++i)
365 if (buf[i] != '-')
366 return LINE_TEXT;
367 return LINE_BREAK;
370 return LINE_TEXT;
373 static int
374 gemtext_parse(struct parser *p, const char *buf, size_t size)
376 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
379 static int
380 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
382 enum line_type t;
384 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
385 if (t == LINE_PRE_START)
386 p->flags ^= PARSER_IN_PRE;
387 if (t == LINE_PRE_END)
388 p->flags ^= PARSER_IN_PRE;
389 return parsers[t](p, t, line, linelen);
392 static int
393 gemtext_free(struct parser *p)
395 enum line_type t;
397 /* flush the buffer */
398 if (p->len != 0) {
399 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
400 if (!parsers[t](p, t, p->buf, p->len))
401 return 0;
402 if ((p->flags & PARSER_IN_PRE) &&
403 !emit_line(p, LINE_PRE_END, NULL, NULL))
404 return 0;
407 free(p->buf);
409 /*
410 * use the first level 2 or 3 header as page title if none
411 * found yet.
412 */
413 if (*p->title == '\0')
414 search_title(p, LINE_TITLE_2);
415 if (*p->title == '\0')
416 search_title(p, LINE_TITLE_3);
418 return 1;
421 static int
422 parse_linebreak(struct parser *p, enum line_type t, const char *buf, size_t len)
424 return emit_line(p, t, NULL, NULL);
427 static void
428 search_title(struct parser *p, enum line_type level)
430 struct line *l;
432 TAILQ_FOREACH(l, &p->head, lines) {
433 if (l->type == level) {
434 if (l->line == NULL)
435 continue;
436 strlcpy(p->title, l->line, sizeof(p->title));
437 break;