Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "parser.h"
26 #include "telescope.h"
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
32 static int gemtext_parse(struct parser*, const char*, size_t);
33 static int gemtext_foreach_line(struct parser*, const char*, size_t);
34 static int gemtext_free(struct parser*);
36 static int parse_text(struct parser*, enum line_type, const char*, size_t);
37 static int parse_link(struct parser*, enum line_type, const char*, size_t);
38 static int parse_title(struct parser*, enum line_type, const char*, size_t);
39 static int parse_item(struct parser*, enum line_type, const char*, size_t);
40 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
43 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
44 static void search_title(struct parser*, enum line_type);
46 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
48 static parselinefn *parsers[] = {
49 parse_text, /* LINE_TEXT */
50 parse_link, /* LINE_LINK */
51 parse_title, /* LINE_TITLE_1 */
52 parse_title, /* LINE_TITLE_2 */
53 parse_title, /* LINE_TITLE_3 */
54 parse_item, /* LINE_ITEM */
55 parse_quote, /* LINE_QUOTE */
56 parse_pre_start, /* LINE_PRE_START */
57 parse_pre_cnt, /* LINE_PRE_CONTENT */
58 parse_pre_end, /* LINE_PRE_END */
59 };
61 void
62 gemtext_initparser(struct parser *p)
63 {
64 memset(p, 0, sizeof(*p));
66 p->name = "text/gemini";
67 p->parse = &gemtext_parse;
68 p->free = &gemtext_free;
69 }
71 static inline int
72 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
73 {
74 struct line *l;
76 if ((l = calloc(1, sizeof(*l))) == NULL)
77 return 0;
79 l->type = type;
80 l->line = line;
81 l->alt = alt;
83 switch (l->type) {
84 case LINE_PRE_START:
85 case LINE_PRE_END:
86 if (hide_pre_context)
87 l->flags = L_HIDDEN;
88 break;
89 case LINE_PRE_CONTENT:
90 if (hide_pre_blocks)
91 l->flags = L_HIDDEN;
92 break;
93 default:
94 l->flags = 0;
95 break;
96 }
98 if (TAILQ_EMPTY(&p->head))
99 TAILQ_INSERT_HEAD(&p->head, l, lines);
100 else
101 TAILQ_INSERT_TAIL(&p->head, l, lines);
103 return 1;
106 static int
107 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
109 char *l;
111 if ((l = calloc(1, len+1)) == NULL)
112 return 0;
113 memcpy(l, buf, len);
114 return emit_line(p, t, l, NULL);
117 static int
118 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
120 char *l, *u;
121 const char *url_start;
123 if (len <= 2)
124 return emit_line(p, t, NULL, NULL);
125 buf += 2;
126 len -= 2;
128 while (len > 0 && isspace(buf[0])) {
129 buf++;
130 len--;
133 if (len == 0)
134 return emit_line(p, t, NULL, NULL);
136 url_start = buf;
137 while (len > 0 && !isspace(buf[0])) {
138 buf++;
139 len--;
142 if ((u = calloc(1, buf - url_start + 1)) == NULL)
143 return 0;
144 memcpy(u, url_start, buf - url_start);
146 if (len == 0)
147 goto nolabel;
149 while (len > 0 && isspace(buf[0])) {
150 buf++;
151 len--;
154 if (len == 0)
155 goto nolabel;
157 if ((l = calloc(1, len + 1)) == NULL)
158 return 0;
160 memcpy(l, buf, len);
161 return emit_line(p, t, l, u);
163 nolabel:
164 if ((l = strdup(u)) == NULL)
165 return 0;
166 return emit_line(p, t, l, u);
169 static int
170 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
172 char *l;
174 switch (t) {
175 case LINE_TITLE_1:
176 if (len <= 1)
177 return emit_line(p, t, NULL, NULL);
178 buf++;
179 len--;
180 break;
181 case LINE_TITLE_2:
182 if (len <= 2)
183 return emit_line(p, t, NULL, NULL);
184 buf += 2;
185 len -= 2;
186 break;
187 case LINE_TITLE_3:
188 if (len <= 3)
189 return emit_line(p, t, NULL, NULL);
190 buf += 3;
191 len -= 3;
192 break;
193 default:
194 /* unreachable */
195 abort();
198 while (len > 0 && isspace(buf[0])) {
199 buf++;
200 len--;
203 if (len == 0)
204 return emit_line(p, t, NULL, NULL);
206 if (t == LINE_TITLE_1 && *p->title == '\0')
207 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
209 if ((l = calloc(1, len+1)) == NULL)
210 return 0;
211 memcpy(l, buf, len);
212 return emit_line(p, t, l, NULL);
215 static int
216 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
218 char *l;
220 if (len == 1)
221 return emit_line(p, t, NULL, NULL);
223 buf++;
224 len--;
226 while (len > 0 && isspace(buf[0])) {
227 buf++;
228 len--;
231 if (len == 0)
232 return emit_line(p, t, NULL, NULL);
234 if ((l = calloc(1, len+1)) == NULL)
235 return 0;
236 memcpy(l, buf, len);
237 return emit_line(p, t, l, NULL);
240 static int
241 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
243 char *l;
245 if (len == 1)
246 return emit_line(p, t, NULL, NULL);
248 buf++;
249 len--;
251 while (len > 0 && isspace(buf[0])) {
252 buf++;
253 len--;
256 if (len == 0)
257 return emit_line(p, t, NULL, NULL);
259 if ((l = calloc(1, len+1)) == NULL)
260 return 0;
261 memcpy(l, buf, len);
262 return emit_line(p, t, l, NULL);
265 static int
266 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
268 char *l;
270 if (len <= 3)
271 return emit_line(p, t, NULL, NULL);
273 buf += 3;
274 len -= 3;
276 while (len > 0 && isspace(buf[0])) {
277 buf++;
278 len--;
281 if (len == 0)
282 return emit_line(p, t, NULL, NULL);
284 if ((l = calloc(1, len+1)) == NULL)
285 return 0;
287 memcpy(l, buf, len);
288 return emit_line(p, t, l, NULL);
291 static int
292 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
294 char *l;
296 if (len == 0)
297 return emit_line(p, t, NULL, NULL);
299 if ((l = calloc(1, len+1)) == NULL)
300 return 0;
301 memcpy(l, buf, len);
302 return emit_line(p, t, l, NULL);
305 static int
306 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
308 return emit_line(p, t, NULL, NULL);
311 static inline enum line_type
312 detect_line_type(const char *buf, size_t len, int in_pre)
314 if (in_pre) {
315 if (len >= 3 &&
316 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
317 return LINE_PRE_END;
318 else
319 return LINE_PRE_CONTENT;
322 if (len == 0)
323 return LINE_TEXT;
325 switch (*buf) {
326 case '*': return LINE_ITEM;
327 case '>': return LINE_QUOTE;
328 case '=':
329 if (len >= 1 && buf[1] == '>')
330 return LINE_LINK;
331 break;
332 case '#':
333 if (len == 1)
334 return LINE_TEXT;
335 if (buf[1] != '#')
336 return LINE_TITLE_1;
337 if (len == 2)
338 return LINE_TEXT;
339 if (buf[2] != '#')
340 return LINE_TITLE_2;
341 if (len == 3)
342 return LINE_TEXT;
343 return LINE_TITLE_3;
344 case '`':
345 if (len < 3)
346 return LINE_TEXT;
347 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
348 return LINE_PRE_START;
349 break;
352 return LINE_TEXT;
355 static int
356 gemtext_parse(struct parser *p, const char *buf, size_t size)
358 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
361 static int
362 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
364 enum line_type t;
366 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
367 if (t == LINE_PRE_START)
368 p->flags ^= PARSER_IN_PRE;
369 if (t == LINE_PRE_END)
370 p->flags ^= PARSER_IN_PRE;
371 return parsers[t](p, t, line, linelen);
374 static int
375 gemtext_free(struct parser *p)
377 enum line_type t;
379 /* flush the buffer */
380 if (p->len != 0) {
381 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
382 if (!parsers[t](p, t, p->buf, p->len))
383 return 0;
384 if ((p->flags & PARSER_IN_PRE) &&
385 !emit_line(p, LINE_PRE_END, NULL, NULL))
386 return 0;
389 free(p->buf);
391 /*
392 * use the first level 2 or 3 header as page title if none
393 * found yet.
394 */
395 if (*p->title == '\0')
396 search_title(p, LINE_TITLE_2);
397 if (*p->title == '\0')
398 search_title(p, LINE_TITLE_3);
400 return 1;
403 static void
404 search_title(struct parser *p, enum line_type level)
406 struct line *l;
408 TAILQ_FOREACH(l, &p->head, lines) {
409 if (l->type == level) {
410 if (l->line == NULL)
411 continue;
412 strlcpy(p->title, l->line, sizeof(p->title));
413 break;