Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "defaults.h"
26 #include "parser.h"
27 #include "telescope.h"
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
33 static int gemtext_parse(struct parser*, const char*, size_t);
34 static int gemtext_foreach_line(struct parser*, const char*, size_t);
35 static int gemtext_free(struct parser*);
37 static int parse_text(struct parser*, enum line_type, const char*, size_t);
38 static int parse_link(struct parser*, enum line_type, const char*, size_t);
39 static int parse_title(struct parser*, enum line_type, const char*, size_t);
40 static int parse_item(struct parser*, enum line_type, const char*, size_t);
41 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
43 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
44 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
45 static void search_title(struct parser*, enum line_type);
47 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
49 static parselinefn *parsers[] = {
50 [LINE_TEXT] = parse_text,
51 [LINE_LINK] = parse_link,
52 [LINE_TITLE_1] = parse_title,
53 [LINE_TITLE_2] = parse_title,
54 [LINE_TITLE_3] = parse_title,
55 [LINE_ITEM] = parse_item,
56 [LINE_QUOTE] = parse_quote,
57 [LINE_PRE_START] = parse_pre_start,
58 [LINE_PRE_CONTENT] = parse_pre_cnt,
59 [LINE_PRE_END] = parse_pre_end,
60 };
62 void
63 gemtext_initparser(struct parser *p)
64 {
65 memset(p, 0, sizeof(*p));
67 p->name = "text/gemini";
68 p->parse = &gemtext_parse;
69 p->free = &gemtext_free;
70 }
72 static inline int
73 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
74 {
75 struct line *l;
77 if ((l = calloc(1, sizeof(*l))) == NULL)
78 return 0;
80 l->type = type;
81 l->line = line;
82 l->meta.alt = alt;
84 switch (l->type) {
85 case LINE_PRE_START:
86 case LINE_PRE_END:
87 if (hide_pre_context)
88 l->flags = L_HIDDEN;
89 break;
90 case LINE_PRE_CONTENT:
91 if (hide_pre_blocks)
92 l->flags = L_HIDDEN;
93 break;
94 default:
95 l->flags = 0;
96 break;
97 }
99 if (TAILQ_EMPTY(&p->head))
100 TAILQ_INSERT_HEAD(&p->head, l, lines);
101 else
102 TAILQ_INSERT_TAIL(&p->head, l, lines);
104 return 1;
107 static int
108 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
110 char *l;
112 if ((l = calloc(1, len+1)) == NULL)
113 return 0;
114 memcpy(l, buf, len);
115 return emit_line(p, t, l, NULL);
118 static int
119 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
121 char *l, *u;
122 const char *url_start;
124 if (len <= 2)
125 return emit_line(p, t, NULL, NULL);
126 buf += 2;
127 len -= 2;
129 while (len > 0 && isspace(buf[0])) {
130 buf++;
131 len--;
134 if (len == 0)
135 return emit_line(p, t, NULL, NULL);
137 url_start = buf;
138 while (len > 0 && !isspace(buf[0])) {
139 buf++;
140 len--;
143 if ((u = calloc(1, buf - url_start + 1)) == NULL)
144 return 0;
145 memcpy(u, url_start, buf - url_start);
147 if (len == 0)
148 goto nolabel;
150 while (len > 0 && isspace(buf[0])) {
151 buf++;
152 len--;
155 if (len == 0)
156 goto nolabel;
158 if ((l = calloc(1, len + 1)) == NULL)
159 return 0;
161 memcpy(l, buf, len);
162 return emit_line(p, t, l, u);
164 nolabel:
165 if ((l = strdup(u)) == NULL)
166 return 0;
167 return emit_line(p, t, l, u);
170 static int
171 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
173 char *l;
175 switch (t) {
176 case LINE_TITLE_1:
177 if (len <= 1)
178 return emit_line(p, t, NULL, NULL);
179 buf++;
180 len--;
181 break;
182 case LINE_TITLE_2:
183 if (len <= 2)
184 return emit_line(p, t, NULL, NULL);
185 buf += 2;
186 len -= 2;
187 break;
188 case LINE_TITLE_3:
189 if (len <= 3)
190 return emit_line(p, t, NULL, NULL);
191 buf += 3;
192 len -= 3;
193 break;
194 default:
195 /* unreachable */
196 abort();
199 while (len > 0 && isspace(buf[0])) {
200 buf++;
201 len--;
204 if (len == 0)
205 return emit_line(p, t, NULL, NULL);
207 if (t == LINE_TITLE_1 && *p->title == '\0')
208 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
210 if ((l = calloc(1, len+1)) == NULL)
211 return 0;
212 memcpy(l, buf, len);
213 return emit_line(p, t, l, NULL);
216 static int
217 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
219 char *l;
221 if (len == 1)
222 return emit_line(p, t, NULL, NULL);
224 buf++;
225 len--;
227 while (len > 0 && isspace(buf[0])) {
228 buf++;
229 len--;
232 if (len == 0)
233 return emit_line(p, t, NULL, NULL);
235 if ((l = calloc(1, len+1)) == NULL)
236 return 0;
237 memcpy(l, buf, len);
238 return emit_line(p, t, l, NULL);
241 static int
242 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
244 char *l;
246 if (len == 1)
247 return emit_line(p, t, NULL, NULL);
249 buf++;
250 len--;
252 while (len > 0 && isspace(buf[0])) {
253 buf++;
254 len--;
257 if (len == 0)
258 return emit_line(p, t, NULL, NULL);
260 if ((l = calloc(1, len+1)) == NULL)
261 return 0;
262 memcpy(l, buf, len);
263 return emit_line(p, t, l, NULL);
266 static int
267 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
269 char *l;
271 if (len <= 3)
272 return emit_line(p, t, NULL, NULL);
274 buf += 3;
275 len -= 3;
277 while (len > 0 && isspace(buf[0])) {
278 buf++;
279 len--;
282 if (len == 0)
283 return emit_line(p, t, NULL, NULL);
285 if ((l = calloc(1, len+1)) == NULL)
286 return 0;
288 memcpy(l, buf, len);
289 return emit_line(p, t, l, NULL);
292 static int
293 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
295 char *l;
297 if (len == 0)
298 return emit_line(p, t, NULL, NULL);
300 if ((l = calloc(1, len+1)) == NULL)
301 return 0;
302 memcpy(l, buf, len);
303 return emit_line(p, t, l, NULL);
306 static int
307 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
309 return emit_line(p, t, NULL, NULL);
312 static inline enum line_type
313 detect_line_type(const char *buf, size_t len, int in_pre)
315 if (in_pre) {
316 if (len >= 3 &&
317 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
318 return LINE_PRE_END;
319 else
320 return LINE_PRE_CONTENT;
323 if (len == 0)
324 return LINE_TEXT;
326 switch (*buf) {
327 case '*': return LINE_ITEM;
328 case '>': return LINE_QUOTE;
329 case '=':
330 if (len >= 1 && buf[1] == '>')
331 return LINE_LINK;
332 break;
333 case '#':
334 if (len == 1)
335 return LINE_TEXT;
336 if (buf[1] != '#')
337 return LINE_TITLE_1;
338 if (len == 2)
339 return LINE_TEXT;
340 if (buf[2] != '#')
341 return LINE_TITLE_2;
342 if (len == 3)
343 return LINE_TEXT;
344 return LINE_TITLE_3;
345 case '`':
346 if (len < 3)
347 return LINE_TEXT;
348 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
349 return LINE_PRE_START;
350 break;
353 return LINE_TEXT;
356 static int
357 gemtext_parse(struct parser *p, const char *buf, size_t size)
359 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
362 static int
363 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
365 enum line_type t;
367 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
368 if (t == LINE_PRE_START)
369 p->flags ^= PARSER_IN_PRE;
370 if (t == LINE_PRE_END)
371 p->flags ^= PARSER_IN_PRE;
372 return parsers[t](p, t, line, linelen);
375 static int
376 gemtext_free(struct parser *p)
378 enum line_type t;
380 /* flush the buffer */
381 if (p->len != 0) {
382 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
383 if (!parsers[t](p, t, p->buf, p->len))
384 return 0;
385 if ((p->flags & PARSER_IN_PRE) &&
386 !emit_line(p, LINE_PRE_END, NULL, NULL))
387 return 0;
390 free(p->buf);
392 /*
393 * use the first level 2 or 3 header as page title if none
394 * found yet.
395 */
396 if (*p->title == '\0')
397 search_title(p, LINE_TITLE_2);
398 if (*p->title == '\0')
399 search_title(p, LINE_TITLE_3);
401 return 1;
404 static void
405 search_title(struct parser *p, enum line_type level)
407 struct line *l;
409 TAILQ_FOREACH(l, &p->head, lines) {
410 if (l->type == level) {
411 if (l->line == NULL)
412 continue;
413 strlcpy(p->title, l->line, sizeof(p->title));
414 break;