Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "defaults.h"
26 #include "parser.h"
27 #include "telescope.h"
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
33 static int gemtext_parse(struct parser*, const char*, size_t);
34 static int gemtext_foreach_line(struct parser*, const char*, size_t);
35 static int gemtext_free(struct parser*);
37 static int parse_text(struct parser*, enum line_type, const char*, size_t);
38 static int parse_link(struct parser*, enum line_type, const char*, size_t);
39 static int parse_title(struct parser*, enum line_type, const char*, size_t);
40 static int parse_item(struct parser*, enum line_type, const char*, size_t);
41 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
43 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
44 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
45 static void search_title(struct parser*, enum line_type);
47 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
49 static parselinefn *parsers[] = {
50 [LINE_TEXT] = parse_text,
51 [LINE_LINK] = parse_link,
52 [LINE_TITLE_1] = parse_title,
53 [LINE_TITLE_2] = parse_title,
54 [LINE_TITLE_3] = parse_title,
55 [LINE_ITEM] = parse_item,
56 [LINE_QUOTE] = parse_quote,
57 [LINE_PRE_START] = parse_pre_start,
58 [LINE_PRE_CONTENT] = parse_pre_cnt,
59 [LINE_PRE_END] = parse_pre_end,
60 };
62 void
63 gemtext_initparser(struct parser *p)
64 {
65 memset(p, 0, sizeof(*p));
67 p->name = "text/gemini";
68 p->parse = &gemtext_parse;
69 p->free = &gemtext_free;
70 }
72 static inline int
73 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
74 {
75 struct line *l;
77 if ((l = calloc(1, sizeof(*l))) == NULL)
78 return 0;
80 l->type = type;
81 l->line = line;
82 l->meta.alt = alt;
84 switch (l->type) {
85 case LINE_PRE_START:
86 case LINE_PRE_END:
87 if (hide_pre_context)
88 l->flags = L_HIDDEN;
89 if (l->type == LINE_PRE_END &&
90 hide_pre_closing_line)
91 l->flags = L_HIDDEN;
92 break;
93 case LINE_PRE_CONTENT:
94 if (hide_pre_blocks)
95 l->flags = L_HIDDEN;
96 break;
97 default:
98 l->flags = 0;
99 break;
102 if (TAILQ_EMPTY(&p->head))
103 TAILQ_INSERT_HEAD(&p->head, l, lines);
104 else
105 TAILQ_INSERT_TAIL(&p->head, l, lines);
107 return 1;
110 static int
111 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
113 char *l;
115 if ((l = calloc(1, len+1)) == NULL)
116 return 0;
117 memcpy(l, buf, len);
118 return emit_line(p, t, l, NULL);
121 static int
122 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
124 char *l, *u;
125 const char *url_start;
127 if (len <= 2)
128 return emit_line(p, t, NULL, NULL);
129 buf += 2;
130 len -= 2;
132 while (len > 0 && isspace(buf[0])) {
133 buf++;
134 len--;
137 if (len == 0)
138 return emit_line(p, t, NULL, NULL);
140 url_start = buf;
141 while (len > 0 && !isspace(buf[0])) {
142 buf++;
143 len--;
146 if ((u = calloc(1, buf - url_start + 1)) == NULL)
147 return 0;
148 memcpy(u, url_start, buf - url_start);
150 if (len == 0)
151 goto nolabel;
153 while (len > 0 && isspace(buf[0])) {
154 buf++;
155 len--;
158 if (len == 0)
159 goto nolabel;
161 if ((l = calloc(1, len + 1)) == NULL)
162 return 0;
164 memcpy(l, buf, len);
165 return emit_line(p, t, l, u);
167 nolabel:
168 if ((l = strdup(u)) == NULL)
169 return 0;
170 return emit_line(p, t, l, u);
173 static int
174 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
176 char *l;
178 switch (t) {
179 case LINE_TITLE_1:
180 if (len <= 1)
181 return emit_line(p, t, NULL, NULL);
182 buf++;
183 len--;
184 break;
185 case LINE_TITLE_2:
186 if (len <= 2)
187 return emit_line(p, t, NULL, NULL);
188 buf += 2;
189 len -= 2;
190 break;
191 case LINE_TITLE_3:
192 if (len <= 3)
193 return emit_line(p, t, NULL, NULL);
194 buf += 3;
195 len -= 3;
196 break;
197 default:
198 /* unreachable */
199 abort();
202 while (len > 0 && isspace(buf[0])) {
203 buf++;
204 len--;
207 if (len == 0)
208 return emit_line(p, t, NULL, NULL);
210 if (t == LINE_TITLE_1 && *p->title == '\0')
211 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
213 if ((l = calloc(1, len+1)) == NULL)
214 return 0;
215 memcpy(l, buf, len);
216 return emit_line(p, t, l, NULL);
219 static int
220 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
222 char *l;
224 if (len == 1)
225 return emit_line(p, t, NULL, NULL);
227 buf++;
228 len--;
230 while (len > 0 && isspace(buf[0])) {
231 buf++;
232 len--;
235 if (len == 0)
236 return emit_line(p, t, NULL, NULL);
238 if ((l = calloc(1, len+1)) == NULL)
239 return 0;
240 memcpy(l, buf, len);
241 return emit_line(p, t, l, NULL);
244 static int
245 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
247 char *l;
249 if (len == 1)
250 return emit_line(p, t, NULL, NULL);
252 buf++;
253 len--;
255 while (len > 0 && isspace(buf[0])) {
256 buf++;
257 len--;
260 if (len == 0)
261 return emit_line(p, t, NULL, NULL);
263 if ((l = calloc(1, len+1)) == NULL)
264 return 0;
265 memcpy(l, buf, len);
266 return emit_line(p, t, l, NULL);
269 static int
270 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
272 char *l;
274 if (len <= 3)
275 return emit_line(p, t, NULL, NULL);
277 buf += 3;
278 len -= 3;
280 while (len > 0 && isspace(buf[0])) {
281 buf++;
282 len--;
285 if (len == 0)
286 return emit_line(p, t, NULL, NULL);
288 if ((l = calloc(1, len+1)) == NULL)
289 return 0;
291 memcpy(l, buf, len);
292 return emit_line(p, t, l, NULL);
295 static int
296 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
298 char *l;
300 if (len == 0)
301 return emit_line(p, t, NULL, NULL);
303 if ((l = calloc(1, len+1)) == NULL)
304 return 0;
305 memcpy(l, buf, len);
306 return emit_line(p, t, l, NULL);
309 static int
310 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
312 return emit_line(p, t, NULL, NULL);
315 static inline enum line_type
316 detect_line_type(const char *buf, size_t len, int in_pre)
318 if (in_pre) {
319 if (len >= 3 &&
320 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
321 return LINE_PRE_END;
322 else
323 return LINE_PRE_CONTENT;
326 if (len == 0)
327 return LINE_TEXT;
329 switch (*buf) {
330 case '*': return LINE_ITEM;
331 case '>': return LINE_QUOTE;
332 case '=':
333 if (len >= 1 && buf[1] == '>')
334 return LINE_LINK;
335 break;
336 case '#':
337 if (len == 1)
338 return LINE_TEXT;
339 if (buf[1] != '#')
340 return LINE_TITLE_1;
341 if (len == 2)
342 return LINE_TEXT;
343 if (buf[2] != '#')
344 return LINE_TITLE_2;
345 if (len == 3)
346 return LINE_TEXT;
347 return LINE_TITLE_3;
348 case '`':
349 if (len < 3)
350 return LINE_TEXT;
351 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
352 return LINE_PRE_START;
353 break;
356 return LINE_TEXT;
359 static int
360 gemtext_parse(struct parser *p, const char *buf, size_t size)
362 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
365 static int
366 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
368 enum line_type t;
370 t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
371 if (t == LINE_PRE_START)
372 p->flags ^= PARSER_IN_PRE;
373 if (t == LINE_PRE_END)
374 p->flags ^= PARSER_IN_PRE;
375 return parsers[t](p, t, line, linelen);
378 static int
379 gemtext_free(struct parser *p)
381 enum line_type t;
383 /* flush the buffer */
384 if (p->len != 0) {
385 t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
386 if (!parsers[t](p, t, p->buf, p->len))
387 return 0;
388 if ((p->flags & PARSER_IN_PRE) &&
389 !emit_line(p, LINE_PRE_END, NULL, NULL))
390 return 0;
393 free(p->buf);
395 /*
396 * use the first level 2 or 3 header as page title if none
397 * found yet.
398 */
399 if (*p->title == '\0')
400 search_title(p, LINE_TITLE_2);
401 if (*p->title == '\0')
402 search_title(p, LINE_TITLE_3);
404 return 1;
407 static void
408 search_title(struct parser *p, enum line_type level)
410 struct line *l;
412 TAILQ_FOREACH(l, &p->head, lines) {
413 if (l->type == level) {
414 if (l->line == NULL)
415 continue;
416 strlcpy(p->title, l->line, sizeof(p->title));
417 break;