Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "telescope.h"
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 static int gemtext_parse(struct parser*, const char*, size_t);
32 static int gemtext_foreach_line(struct parser*, const char*, size_t);
33 static int gemtext_free(struct parser*);
35 static int parse_text(struct parser*, enum line_type, const char*, size_t);
36 static int parse_link(struct parser*, enum line_type, const char*, size_t);
37 static int parse_title(struct parser*, enum line_type, const char*, size_t);
38 static int parse_item(struct parser*, enum line_type, const char*, size_t);
39 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
40 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
43 static void search_title(struct parser*, enum line_type);
45 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
47 static parselinefn *parsers[] = {
48 parse_text, /* LINE_TEXT */
49 parse_link, /* LINE_LINK */
50 parse_title, /* LINE_TITLE_1 */
51 parse_title, /* LINE_TITLE_2 */
52 parse_title, /* LINE_TITLE_3 */
53 parse_item, /* LINE_ITEM */
54 parse_quote, /* LINE_QUOTE */
55 parse_pre_start, /* LINE_PRE_START */
56 parse_pre_cnt, /* LINE_PRE_CONTENT */
57 parse_pre_end, /* LINE_PRE_END */
58 };
60 void
61 gemtext_initparser(struct parser *p)
62 {
63 memset(p, 0, sizeof(*p));
65 p->name = "text/gemini";
66 p->parse = &gemtext_parse;
67 p->free = &gemtext_free;
68 }
70 static inline int
71 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
72 {
73 struct line *l;
75 if ((l = calloc(1, sizeof(*l))) == NULL)
76 return 0;
78 l->type = type;
79 l->line = line;
80 l->alt = alt;
82 switch (l->type) {
83 case LINE_PRE_START:
84 case LINE_PRE_END:
85 if (hide_pre_context)
86 l->flags = L_HIDDEN;
87 break;
88 case LINE_PRE_CONTENT:
89 if (hide_pre_blocks)
90 l->flags = L_HIDDEN;
91 break;
92 default:
93 l->flags = 0;
94 break;
95 }
97 if (TAILQ_EMPTY(&p->head))
98 TAILQ_INSERT_HEAD(&p->head, l, lines);
99 else
100 TAILQ_INSERT_TAIL(&p->head, l, lines);
102 return 1;
105 static int
106 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
108 char *l;
110 if ((l = calloc(1, len+1)) == NULL)
111 return 0;
112 memcpy(l, buf, len);
113 return emit_line(p, t, l, NULL);
116 static int
117 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
119 char *l, *u;
120 const char *url_start;
122 if (len <= 2)
123 return emit_line(p, t, NULL, NULL);
124 buf += 2;
125 len -= 2;
127 while (len > 0 && isspace(buf[0])) {
128 buf++;
129 len--;
132 if (len == 0)
133 return emit_line(p, t, NULL, NULL);
135 url_start = buf;
136 while (len > 0 && !isspace(buf[0])) {
137 buf++;
138 len--;
141 if ((u = calloc(1, buf - url_start + 1)) == NULL)
142 return 0;
143 memcpy(u, url_start, buf - url_start);
145 if (len == 0)
146 goto nolabel;
148 while (len > 0 && isspace(buf[0])) {
149 buf++;
150 len--;
153 if (len == 0)
154 goto nolabel;
156 if ((l = calloc(1, len + 1)) == NULL)
157 return 0;
159 memcpy(l, buf, len);
160 return emit_line(p, t, l, u);
162 nolabel:
163 if ((l = strdup(u)) == NULL)
164 return 0;
165 return emit_line(p, t, l, u);
168 static int
169 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
171 char *l;
173 switch (t) {
174 case LINE_TITLE_1:
175 if (len <= 1)
176 return emit_line(p, t, NULL, NULL);
177 buf++;
178 len--;
179 break;
180 case LINE_TITLE_2:
181 if (len <= 2)
182 return emit_line(p, t, NULL, NULL);
183 buf += 2;
184 len -= 2;
185 break;
186 case LINE_TITLE_3:
187 if (len <= 3)
188 return emit_line(p, t, NULL, NULL);
189 buf += 3;
190 len -= 3;
191 break;
192 default:
193 /* unreachable */
194 abort();
197 while (len > 0 && isspace(buf[0])) {
198 buf++;
199 len--;
202 if (len == 0)
203 return emit_line(p, t, NULL, NULL);
205 if (t == LINE_TITLE_1 && *p->title == '\0')
206 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
208 if ((l = calloc(1, len+1)) == NULL)
209 return 0;
210 memcpy(l, buf, len);
211 return emit_line(p, t, l, NULL);
214 static int
215 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
217 char *l;
219 if (len == 1)
220 return emit_line(p, t, NULL, NULL);
222 buf++;
223 len--;
225 while (len > 0 && isspace(buf[0])) {
226 buf++;
227 len--;
230 if (len == 0)
231 return emit_line(p, t, NULL, NULL);
233 if ((l = calloc(1, len+1)) == NULL)
234 return 0;
235 memcpy(l, buf, len);
236 return emit_line(p, t, l, NULL);
239 static int
240 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
242 char *l;
244 if (len == 1)
245 return emit_line(p, t, NULL, NULL);
247 buf++;
248 len--;
250 while (len > 0 && isspace(buf[0])) {
251 buf++;
252 len--;
255 if (len == 0)
256 return emit_line(p, t, NULL, NULL);
258 if ((l = calloc(1, len+1)) == NULL)
259 return 0;
260 memcpy(l, buf, len);
261 return emit_line(p, t, l, NULL);
264 static int
265 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
267 char *l;
269 if (len <= 3)
270 return emit_line(p, t, NULL, NULL);
272 buf += 3;
273 len -= 3;
275 while (len > 0 && isspace(buf[0])) {
276 buf++;
277 len--;
280 if (len == 0)
281 return emit_line(p, t, NULL, NULL);
283 if ((l = calloc(1, len+1)) == NULL)
284 return 0;
286 memcpy(l, buf, len);
287 return emit_line(p, t, l, NULL);
290 static int
291 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
293 char *l;
295 if (len == 0)
296 return emit_line(p, t, NULL, NULL);
298 if ((l = calloc(1, len+1)) == NULL)
299 return 0;
300 memcpy(l, buf, len);
301 return emit_line(p, t, l, NULL);
304 static int
305 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
307 return emit_line(p, t, NULL, NULL);
310 static inline enum line_type
311 detect_line_type(const char *buf, size_t len, int in_pre)
313 if (len == 0)
314 return LINE_TEXT;
316 if (in_pre) {
317 if (len >= 3 &&
318 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
319 return LINE_PRE_END;
320 else
321 return LINE_PRE_CONTENT;
324 switch (*buf) {
325 case '*': return LINE_ITEM;
326 case '>': return LINE_QUOTE;
327 case '=':
328 if (len >= 1 && buf[1] == '>')
329 return LINE_LINK;
330 break;
331 case '#':
332 if (len == 1)
333 return LINE_TEXT;
334 if (buf[1] != '#')
335 return LINE_TITLE_1;
336 if (len == 2)
337 return LINE_TEXT;
338 if (buf[2] != '#')
339 return LINE_TITLE_2;
340 if (len == 3)
341 return LINE_TEXT;
342 return LINE_TITLE_3;
343 case '`':
344 if (len < 3)
345 return LINE_TEXT;
346 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
347 return LINE_PRE_START;
348 break;
351 return LINE_TEXT;
354 static int
355 gemtext_parse(struct parser *p, const char *buf, size_t size)
357 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
360 static int
361 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
363 enum line_type t;
365 t = detect_line_type(line, linelen, p->flags);
366 if (t == LINE_PRE_START)
367 p->flags = 1;
368 if (t == LINE_PRE_END)
369 p->flags = 0;
370 return parsers[t](p, t, line, linelen);
373 static int
374 gemtext_free(struct parser *p)
376 enum line_type t;
378 /* flush the buffer */
379 if (p->len != 0) {
380 t = detect_line_type(p->buf, p->len, p->flags);
381 if (!parsers[t](p, t, p->buf, p->len))
382 return 0;
383 if (p->flags && !emit_line(p, LINE_PRE_END, NULL, NULL))
384 return 0;
387 free(p->buf);
389 /*
390 * use the first level 2 or 3 header as page title if none
391 * found yet.
392 */
393 if (*p->title == '\0')
394 search_title(p, LINE_TITLE_2);
395 if (*p->title == '\0')
396 search_title(p, LINE_TITLE_3);
398 return 1;
401 static void
402 search_title(struct parser *p, enum line_type level)
404 struct line *l;
406 TAILQ_FOREACH(l, &p->head, lines) {
407 if (l->type == level) {
408 if (l->line == NULL)
409 continue;
410 strlcpy(p->title, l->line, sizeof(p->title));
411 break;