Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "telescope.h"
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 static int gemtext_parse(struct parser*, const char*, size_t);
32 static int gemtext_foreach_line(struct parser*, const char*, size_t);
33 static int gemtext_free(struct parser*);
35 static int parse_text(struct parser*, enum line_type, const char*, size_t);
36 static int parse_link(struct parser*, enum line_type, const char*, size_t);
37 static int parse_title(struct parser*, enum line_type, const char*, size_t);
38 static int parse_item(struct parser*, enum line_type, const char*, size_t);
39 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
40 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
43 static void search_title(struct parser*, enum line_type);
45 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
47 static parselinefn *parsers[] = {
48 parse_text, /* LINE_TEXT */
49 parse_link, /* LINE_LINK */
50 parse_title, /* LINE_TITLE_1 */
51 parse_title, /* LINE_TITLE_2 */
52 parse_title, /* LINE_TITLE_3 */
53 parse_item, /* LINE_ITEM */
54 parse_quote, /* LINE_QUOTE */
55 parse_pre_start, /* LINE_PRE_START */
56 parse_pre_cnt, /* LINE_PRE_CONTENT */
57 parse_pre_end, /* LINE_PRE_END */
58 };
60 void
61 gemtext_initparser(struct parser *p)
62 {
63 memset(p, 0, sizeof(*p));
65 p->name = "text/gemini";
66 p->parse = &gemtext_parse;
67 p->free = &gemtext_free;
68 }
70 static inline int
71 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
72 {
73 struct line *l;
75 if ((l = calloc(1, sizeof(*l))) == NULL)
76 return 0;
78 l->type = type;
79 l->line = line;
80 l->alt = alt;
82 if (TAILQ_EMPTY(&p->head))
83 TAILQ_INSERT_HEAD(&p->head, l, lines);
84 else
85 TAILQ_INSERT_TAIL(&p->head, l, lines);
87 return 1;
88 }
90 static int
91 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
92 {
93 char *l;
95 if ((l = calloc(1, len+1)) == NULL)
96 return 0;
97 memcpy(l, buf, len);
98 return emit_line(p, t, l, NULL);
99 }
101 static int
102 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
104 char *l, *u;
105 const char *url_start;
107 if (len <= 2)
108 return emit_line(p, t, NULL, NULL);
109 buf += 2;
110 len -= 2;
112 while (len > 0 && isspace(buf[0])) {
113 buf++;
114 len--;
117 if (len == 0)
118 return emit_line(p, t, NULL, NULL);
120 url_start = buf;
121 while (len > 0 && !isspace(buf[0])) {
122 buf++;
123 len--;
126 if ((u = calloc(1, buf - url_start + 1)) == NULL)
127 return 0;
128 memcpy(u, url_start, buf - url_start);
130 if (len == 0)
131 goto nolabel;
133 while (len > 0 && isspace(buf[0])) {
134 buf++;
135 len--;
138 if (len == 0)
139 goto nolabel;
141 if ((l = calloc(1, len + 1)) == NULL)
142 return 0;
144 memcpy(l, buf, len);
145 return emit_line(p, t, l, u);
147 nolabel:
148 if ((l = strdup(u)) == NULL)
149 return 0;
150 return emit_line(p, t, l, u);
153 static int
154 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
156 char *l;
158 switch (t) {
159 case LINE_TITLE_1:
160 if (len <= 1)
161 return emit_line(p, t, NULL, NULL);
162 buf++;
163 len--;
164 break;
165 case LINE_TITLE_2:
166 if (len <= 2)
167 return emit_line(p, t, NULL, NULL);
168 buf += 2;
169 len -= 2;
170 break;
171 case LINE_TITLE_3:
172 if (len <= 3)
173 return emit_line(p, t, NULL, NULL);
174 buf += 3;
175 len -= 3;
176 break;
177 default:
178 /* unreachable */
179 abort();
182 while (len > 0 && isspace(buf[0])) {
183 buf++;
184 len--;
187 if (len == 0)
188 return emit_line(p, t, NULL, NULL);
190 if (t == LINE_TITLE_1 && *p->title == '\0')
191 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
193 if ((l = calloc(1, len+1)) == NULL)
194 return 0;
195 memcpy(l, buf, len);
196 return emit_line(p, t, l, NULL);
199 static int
200 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
202 char *l;
204 if (len == 1)
205 return emit_line(p, t, NULL, NULL);
207 buf++;
208 len--;
210 while (len > 0 && isspace(buf[0])) {
211 buf++;
212 len--;
215 if (len == 0)
216 return emit_line(p, t, NULL, NULL);
218 if ((l = calloc(1, len+1)) == NULL)
219 return 0;
220 memcpy(l, buf, len);
221 return emit_line(p, t, l, NULL);
224 static int
225 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
227 char *l;
229 if (len == 1)
230 return emit_line(p, t, NULL, NULL);
232 buf++;
233 len--;
235 while (len > 0 && isspace(buf[0])) {
236 buf++;
237 len--;
240 if (len == 0)
241 return emit_line(p, t, NULL, NULL);
243 if ((l = calloc(1, len+1)) == NULL)
244 return 0;
245 memcpy(l, buf, len);
246 return emit_line(p, t, l, NULL);
249 static int
250 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
252 char *l;
254 if (len <= 3)
255 return emit_line(p, t, NULL, NULL);
257 buf += 3;
258 len -= 3;
260 while (len > 0 && isspace(buf[0])) {
261 buf++;
262 len--;
265 if (len == 0)
266 return emit_line(p, t, NULL, NULL);
268 if ((l = calloc(1, len+1)) == NULL)
269 return 0;
271 memcpy(l, buf, len);
272 return emit_line(p, t, l, NULL);
275 static int
276 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
278 char *l;
280 if (len == 0)
281 return emit_line(p, t, NULL, NULL);
283 if ((l = calloc(1, len+1)) == NULL)
284 return 0;
285 memcpy(l, buf, len);
286 return emit_line(p, t, l, NULL);
289 static int
290 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
292 return emit_line(p, t, NULL, NULL);
295 static inline enum line_type
296 detect_line_type(const char *buf, size_t len, int in_pre)
298 if (len == 0)
299 return LINE_TEXT;
301 if (in_pre) {
302 if (len >= 3 &&
303 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
304 return LINE_PRE_END;
305 else
306 return LINE_PRE_CONTENT;
309 switch (*buf) {
310 case '*': return LINE_ITEM;
311 case '>': return LINE_QUOTE;
312 case '=':
313 if (len >= 1 && buf[1] == '>')
314 return LINE_LINK;
315 break;
316 case '#':
317 if (len == 1)
318 return LINE_TEXT;
319 if (buf[1] != '#')
320 return LINE_TITLE_1;
321 if (len == 2)
322 return LINE_TEXT;
323 if (buf[2] != '#')
324 return LINE_TITLE_2;
325 if (len == 3)
326 return LINE_TEXT;
327 return LINE_TITLE_3;
328 case '`':
329 if (len < 3)
330 return LINE_TEXT;
331 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
332 return LINE_PRE_START;
333 break;
336 return LINE_TEXT;
339 static int
340 gemtext_parse(struct parser *p, const char *buf, size_t size)
342 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
345 static int
346 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
348 enum line_type t;
350 t = detect_line_type(line, linelen, p->flags);
351 if (t == LINE_PRE_START)
352 p->flags = 1;
353 if (t == LINE_PRE_END)
354 p->flags = 0;
355 return parsers[t](p, t, line, linelen);
358 static int
359 gemtext_free(struct parser *p)
361 enum line_type t;
363 /* flush the buffer */
364 if (p->len != 0) {
365 t = detect_line_type(p->buf, p->len, p->flags);
366 if (!parsers[t](p, t, p->buf, p->len))
367 return 0;
368 if (p->flags && !emit_line(p, LINE_PRE_END, NULL, NULL))
369 return 0;
372 free(p->buf);
374 /*
375 * use the first level 2 or 3 header as page title if none
376 * found yet.
377 */
378 if (*p->title == '\0')
379 search_title(p, LINE_TITLE_2);
380 if (*p->title == '\0')
381 search_title(p, LINE_TITLE_3);
383 return 1;
386 static void
387 search_title(struct parser *p, enum line_type level)
389 struct line *l;
391 TAILQ_FOREACH(l, &p->head, lines) {
392 if (l->type == level) {
393 if (l->line == NULL)
394 continue;
395 strlcpy(p->title, l->line, sizeof(p->title));