Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "defaults.h"
26 #include "parser.h"
27 #include "telescope.h"
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
33 static int gemtext_parse(struct parser*, const char*, size_t);
34 static int gemtext_foreach_line(struct parser*, const char*, size_t);
35 static int gemtext_free(struct parser*);
37 static int parse_text(struct parser*, enum line_type, const char*, size_t);
38 static int parse_link(struct parser*, enum line_type, const char*, size_t);
39 static int parse_title(struct parser*, enum line_type, const char*, size_t);
40 static int parse_item(struct parser*, enum line_type, const char*, size_t);
41 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
43 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
44 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
45 static void search_title(struct parser*, enum line_type);
47 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
49 static parselinefn *parsers[] = {
50 [LINE_TEXT] = parse_text,
51 [LINE_LINK] = parse_link,
52 [LINE_TITLE_1] = parse_title,
53 [LINE_TITLE_3] = parse_title,
54 [LINE_QUOTE] = parse_quote,
55 [LINE_PRE_START] = parse_pre_start,
56 [LINE_PRE_END] = parse_pre_end,
57 };
59 void
60 gemtext_initparser(struct parser *p)
61 {
62 memset(p, 0, sizeof(*p));
64 p->name = "text/gemini";
65 p->parse = &gemtext_parse;
66 p->free = &gemtext_free;
67 }
69 static inline int
70 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
71 {
72 struct line *l;
74 if ((l = calloc(1, sizeof(*l))) == NULL)
75 return 0;
77 l->type = type;
78 l->line = line;
79 l->alt = alt;
81 switch (l->type) {
82 case LINE_PRE_START:
83 case LINE_PRE_END:
84 if (hide_pre_context)
85 l->flags = L_HIDDEN;
86 break;
87 case LINE_PRE_CONTENT:
88 if (hide_pre_blocks)
89 l->flags = L_HIDDEN;
90 break;
91 default:
92 l->flags = 0;
93 break;
94 }
96 if (TAILQ_EMPTY(&p->head))
97 TAILQ_INSERT_HEAD(&p->head, l, lines);
98 else
99 TAILQ_INSERT_TAIL(&p->head, l, lines);
101 return 1;
104 static int
105 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
107 char *l;
109 if ((l = calloc(1, len+1)) == NULL)
110 return 0;
111 memcpy(l, buf, len);
112 return emit_line(p, t, l, NULL);
115 static int
116 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
118 char *l, *u;
119 const char *url_start;
121 if (len <= 2)
122 return emit_line(p, t, NULL, NULL);
123 buf += 2;
124 len -= 2;
126 while (len > 0 && isspace(buf[0])) {
127 buf++;
128 len--;
131 if (len == 0)
132 return emit_line(p, t, NULL, NULL);
134 url_start = buf;
135 while (len > 0 && !isspace(buf[0])) {
136 buf++;
137 len--;
140 if ((u = calloc(1, buf - url_start + 1)) == NULL)
141 return 0;
142 memcpy(u, url_start, buf - url_start);
144 if (len == 0)
145 goto nolabel;
147 while (len > 0 && isspace(buf[0])) {
148 buf++;
149 len--;
152 if (len == 0)
153 goto nolabel;
155 if ((l = calloc(1, len + 1)) == NULL)
156 return 0;
158 memcpy(l, buf, len);
159 return emit_line(p, t, l, u);
161 nolabel:
162 if ((l = strdup(u)) == NULL)
163 return 0;
164 return emit_line(p, t, l, u);
167 static int
168 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
170 char *l;
172 switch (t) {
173 case LINE_TITLE_1:
174 if (len <= 1)
175 return emit_line(p, t, NULL, NULL);
176 buf++;
177 len--;
178 break;
179 case LINE_TITLE_2:
180 if (len <= 2)
181 return emit_line(p, t, NULL, NULL);
182 buf += 2;
183 len -= 2;
184 break;
185 case LINE_TITLE_3:
186 if (len <= 3)
187 return emit_line(p, t, NULL, NULL);
188 buf += 3;
189 len -= 3;
190 break;
191 default:
192 /* unreachable */
193 abort();
196 while (len > 0 && isspace(buf[0])) {
197 buf++;
198 len--;
201 if (len == 0)
202 return emit_line(p, t, NULL, NULL);
204 if (t == LINE_TITLE_1 && *p->title == '\0')
205 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
207 if ((l = calloc(1, len+1)) == NULL)
208 return 0;
209 memcpy(l, buf, len);
210 return emit_line(p, t, l, NULL);
213 static int
214 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
216 char *l;
218 if (len == 1)
219 return emit_line(p, t, NULL, NULL);
221 buf++;
222 len--;
224 while (len > 0 && isspace(buf[0])) {
225 buf++;
226 len--;
229 if (len == 0)
230 return emit_line(p, t, NULL, NULL);
232 if ((l = calloc(1, len+1)) == NULL)
233 return 0;
234 memcpy(l, buf, len);
235 return emit_line(p, t, l, NULL);
238 static int
239 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
241 char *l;
243 if (len == 1)
244 return emit_line(p, t, NULL, NULL);
246 buf++;
247 len--;
249 while (len > 0 && isspace(buf[0])) {
250 buf++;
251 len--;
254 if (len == 0)
255 return emit_line(p, t, NULL, NULL);
257 if ((l = calloc(1, len+1)) == NULL)
258 return 0;
259 memcpy(l, buf, len);
260 return emit_line(p, t, l, NULL);
263 static int
264 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
266 char *l;
268 if (len <= 3)
269 return emit_line(p, t, NULL, NULL);
271 buf += 3;
272 len -= 3;
274 while (len > 0 && isspace(buf[0])) {
275 buf++;
276 len--;
279 if (len == 0)
280 return emit_line(p, t, NULL, NULL);
282 if ((l = calloc(1, len+1)) == NULL)
283 return 0;
285 memcpy(l, buf, len);
286 return emit_line(p, t, l, NULL);
289 static int
290 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
292 char *l;
294 if (len == 0)
295 return emit_line(p, t, NULL, NULL);
297 if ((l = calloc(1, len+1)) == NULL)
298 return 0;
299 memcpy(l, buf, len);
300 return emit_line(p, t, l, NULL);
303 static int
304 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
306 return emit_line(p, t, NULL, NULL);
309 static inline enum line_type
310 detect_line_type(const char *buf, size_t len, int in_pre)
312 if (in_pre) {
313 if (len >= 3 &&
314 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
315 return LINE_PRE_END;
316 else
317 return LINE_PRE_CONTENT;
320 if (len == 0)
321 return LINE_TEXT;
323 switch (*buf) {
324 case '*': return LINE_ITEM;
325 case '>': return LINE_QUOTE;
326 case '=':
327 if (len >= 1 && buf[1] == '>')
328 return LINE_LINK;
329 break;
330 case '#':
331 if (len == 1)
332 return LINE_TEXT;
333 if (buf[1] != '#')
334 return LINE_TITLE_1;
335 if (len == 2)
336 return LINE_TEXT;
337 if (buf[2] != '#')
338 return LINE_TITLE_2;
339 if (len == 3)
340 return LINE_TEXT;
341 return LINE_TITLE_3;
342 case '`':
343 if (len < 3)
344 return LINE_TEXT;
345 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
346 return LINE_PRE_START;
347 break;
350 return LINE_TEXT;
353 static int
354 gemtext_parse(struct parser *p, const char *buf, size_t size)
356 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
359 static int
360 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
362 enum line_type t;
364 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
365 if (t == LINE_PRE_START)
366 p->flags ^= PARSER_IN_PRE;
367 if (t == LINE_PRE_END)
368 p->flags ^= PARSER_IN_PRE;
369 return parsers[t](p, t, line, linelen);
372 static int
373 gemtext_free(struct parser *p)
375 enum line_type t;
377 /* flush the buffer */
378 if (p->len != 0) {
379 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
380 if (!parsers[t](p, t, p->buf, p->len))
381 return 0;
382 if ((p->flags & PARSER_IN_PRE) &&
383 !emit_line(p, LINE_PRE_END, NULL, NULL))
384 return 0;
387 free(p->buf);
389 /*
390 * use the first level 2 or 3 header as page title if none
391 * found yet.
392 */
393 if (*p->title == '\0')
394 search_title(p, LINE_TITLE_2);
395 if (*p->title == '\0')
396 search_title(p, LINE_TITLE_3);
398 return 1;
401 static void
402 search_title(struct parser *p, enum line_type level)
404 struct line *l;
406 TAILQ_FOREACH(l, &p->head, lines) {
407 if (l->type == level) {
408 if (l->line == NULL)
409 continue;
410 strlcpy(p->title, l->line, sizeof(p->title));
411 break;