Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "compat.h"
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 #include "defaults.h"
32 #include "parser.h"
33 #include "utf8.h"
35 static int gemtext_parse(struct parser*, const char*, size_t);
36 static int gemtext_foreach_line(struct parser*, const char*, size_t);
37 static int gemtext_free(struct parser*);
39 static int parse_text(struct parser*, enum line_type, const char*, size_t);
40 static int parse_link(struct parser*, enum line_type, const char*, size_t);
41 static int parse_title(struct parser*, enum line_type, const char*, size_t);
42 static int parse_item(struct parser*, enum line_type, const char*, size_t);
43 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
44 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
45 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
46 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
47 static void search_title(struct parser*, enum line_type);
49 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
51 static parselinefn *parsers[] = {
52 [LINE_TEXT] = parse_text,
53 [LINE_LINK] = parse_link,
54 [LINE_TITLE_1] = parse_title,
55 [LINE_TITLE_2] = parse_title,
56 [LINE_TITLE_3] = parse_title,
57 [LINE_ITEM] = parse_item,
58 [LINE_QUOTE] = parse_quote,
59 [LINE_PRE_START] = parse_pre_start,
60 [LINE_PRE_CONTENT] = parse_pre_cnt,
61 [LINE_PRE_END] = parse_pre_end,
62 };
64 void
65 gemtext_initparser(struct parser *p)
66 {
67 memset(p, 0, sizeof(*p));
69 p->name = "text/gemini";
70 p->parse = &gemtext_parse;
71 p->free = &gemtext_free;
73 TAILQ_INIT(&p->head);
74 }
76 static inline int
77 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
78 {
79 struct line *l;
81 if ((l = calloc(1, sizeof(*l))) == NULL)
82 return 0;
84 l->type = type;
85 l->line = line;
86 l->alt = alt;
88 switch (l->type) {
89 case LINE_PRE_START:
90 case LINE_PRE_END:
91 if (hide_pre_context)
92 l->flags = L_HIDDEN;
93 if (l->type == LINE_PRE_END &&
94 hide_pre_closing_line)
95 l->flags = L_HIDDEN;
96 break;
97 case LINE_PRE_CONTENT:
98 if (hide_pre_blocks)
99 l->flags = L_HIDDEN;
100 break;
101 case LINE_LINK:
102 if (emojify_link &&
103 !emojied_line(line, (const char **)&l->data))
104 l->data = NULL;
105 break;
106 default:
107 break;
110 if (TAILQ_EMPTY(&p->head))
111 TAILQ_INSERT_HEAD(&p->head, l, lines);
112 else
113 TAILQ_INSERT_TAIL(&p->head, l, lines);
115 return 1;
118 static int
119 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
121 char *l;
123 if ((l = calloc(1, len+1)) == NULL)
124 return 0;
125 memcpy(l, buf, len);
126 return emit_line(p, t, l, NULL);
129 static int
130 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
132 char *l, *u;
133 const char *url_start;
135 if (len <= 2)
136 return emit_line(p, t, NULL, NULL);
137 buf += 2;
138 len -= 2;
140 while (len > 0 && isspace(buf[0])) {
141 buf++;
142 len--;
145 if (len == 0)
146 return emit_line(p, t, NULL, NULL);
148 url_start = buf;
149 while (len > 0 && !isspace(buf[0])) {
150 buf++;
151 len--;
154 if ((u = calloc(1, buf - url_start + 1)) == NULL)
155 return 0;
156 memcpy(u, url_start, buf - url_start);
158 if (len == 0)
159 goto nolabel;
161 while (len > 0 && isspace(buf[0])) {
162 buf++;
163 len--;
166 if (len == 0)
167 goto nolabel;
169 if ((l = calloc(1, len + 1)) == NULL)
170 return 0;
172 memcpy(l, buf, len);
173 return emit_line(p, t, l, u);
175 nolabel:
176 if ((l = strdup(u)) == NULL)
177 return 0;
178 return emit_line(p, t, l, u);
181 static int
182 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
184 char *l;
186 switch (t) {
187 case LINE_TITLE_1:
188 if (len <= 1)
189 return emit_line(p, t, NULL, NULL);
190 buf++;
191 len--;
192 break;
193 case LINE_TITLE_2:
194 if (len <= 2)
195 return emit_line(p, t, NULL, NULL);
196 buf += 2;
197 len -= 2;
198 break;
199 case LINE_TITLE_3:
200 if (len <= 3)
201 return emit_line(p, t, NULL, NULL);
202 buf += 3;
203 len -= 3;
204 break;
205 default:
206 /* unreachable */
207 abort();
210 while (len > 0 && isspace(buf[0])) {
211 buf++;
212 len--;
215 if (len == 0)
216 return emit_line(p, t, NULL, NULL);
218 if (t == LINE_TITLE_1 && *p->title == '\0')
219 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
221 if ((l = calloc(1, len+1)) == NULL)
222 return 0;
223 memcpy(l, buf, len);
224 return emit_line(p, t, l, NULL);
227 static int
228 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
230 char *l;
232 if (len == 1)
233 return emit_line(p, t, NULL, NULL);
235 buf++;
236 len--;
238 while (len > 0 && isspace(buf[0])) {
239 buf++;
240 len--;
243 if (len == 0)
244 return emit_line(p, t, NULL, NULL);
246 if ((l = calloc(1, len+1)) == NULL)
247 return 0;
248 memcpy(l, buf, len);
249 return emit_line(p, t, l, NULL);
252 static int
253 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
255 char *l;
257 if (len == 1)
258 return emit_line(p, t, NULL, NULL);
260 buf++;
261 len--;
263 while (len > 0 && isspace(buf[0])) {
264 buf++;
265 len--;
268 if (len == 0)
269 return emit_line(p, t, NULL, NULL);
271 if ((l = calloc(1, len+1)) == NULL)
272 return 0;
273 memcpy(l, buf, len);
274 return emit_line(p, t, l, NULL);
277 static int
278 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
280 char *l;
282 if (len <= 3)
283 return emit_line(p, t, NULL, NULL);
285 buf += 3;
286 len -= 3;
288 while (len > 0 && isspace(buf[0])) {
289 buf++;
290 len--;
293 if (len == 0)
294 return emit_line(p, t, NULL, NULL);
296 if ((l = calloc(1, len+1)) == NULL)
297 return 0;
299 memcpy(l, buf, len);
300 return emit_line(p, t, l, NULL);
303 static int
304 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
306 char *l;
308 if (len == 0)
309 return emit_line(p, t, NULL, NULL);
311 if ((l = calloc(1, len+1)) == NULL)
312 return 0;
313 memcpy(l, buf, len);
314 return emit_line(p, t, l, NULL);
317 static int
318 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
320 return emit_line(p, t, NULL, NULL);
323 static inline enum line_type
324 detect_line_type(const char *buf, size_t len, int in_pre)
326 if (in_pre) {
327 if (len >= 3 &&
328 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
329 return LINE_PRE_END;
330 else
331 return LINE_PRE_CONTENT;
334 if (len == 0)
335 return LINE_TEXT;
337 switch (*buf) {
338 case '*': return LINE_ITEM;
339 case '>': return LINE_QUOTE;
340 case '=':
341 if (len >= 1 && buf[1] == '>')
342 return LINE_LINK;
343 break;
344 case '#':
345 if (len == 1)
346 return LINE_TEXT;
347 if (buf[1] != '#')
348 return LINE_TITLE_1;
349 if (len == 2)
350 return LINE_TEXT;
351 if (buf[2] != '#')
352 return LINE_TITLE_2;
353 if (len == 3)
354 return LINE_TEXT;
355 return LINE_TITLE_3;
356 case '`':
357 if (len < 3)
358 return LINE_TEXT;
359 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
360 return LINE_PRE_START;
361 break;
364 return LINE_TEXT;
367 static int
368 gemtext_parse(struct parser *p, const char *buf, size_t size)
370 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
373 static int
374 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
376 enum line_type t;
378 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
379 if (t == LINE_PRE_START)
380 p->flags ^= PARSER_IN_PRE;
381 if (t == LINE_PRE_END)
382 p->flags ^= PARSER_IN_PRE;
383 return parsers[t](p, t, line, linelen);
386 static int
387 gemtext_free(struct parser *p)
389 enum line_type t;
391 /* flush the buffer */
392 if (p->len != 0) {
393 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
394 if (!parsers[t](p, t, p->buf, p->len))
395 return 0;
396 if ((p->flags & PARSER_IN_PRE) &&
397 !emit_line(p, LINE_PRE_END, NULL, NULL))
398 return 0;
401 free(p->buf);
403 /*
404 * use the first level 2 or 3 header as page title if none
405 * found yet.
406 */
407 if (*p->title == '\0')
408 search_title(p, LINE_TITLE_2);
409 if (*p->title == '\0')
410 search_title(p, LINE_TITLE_3);
412 return 1;
415 static void
416 search_title(struct parser *p, enum line_type level)
418 struct line *l;
420 TAILQ_FOREACH(l, &p->head, lines) {
421 if (l->type == level) {
422 if (l->line == NULL)
423 continue;
424 strlcpy(p->title, l->line, sizeof(p->title));
425 break;