Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include <telescope.h>
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 static int gemtext_parse(struct parser*, const char*, size_t);
32 static int gemtext_free(struct parser*);
34 static int parse_text(struct parser*, enum line_type, const char*, size_t);
35 static int parse_link(struct parser*, enum line_type, const char*, size_t);
36 static int parse_title(struct parser*, enum line_type, const char*, size_t);
37 static int parse_item(struct parser*, enum line_type, const char*, size_t);
38 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
39 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
40 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
43 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
45 static parselinefn *parsers[] = {
46 parse_text, /* LINE_TEXT */
47 parse_link, /* LINE_LINK */
48 parse_title, /* LINE_TITLE_1 */
49 parse_title, /* LINE_TITLE_2 */
50 parse_title, /* LINE_TITLE_3 */
51 parse_item, /* LINE_ITEM */
52 parse_quote, /* LINE_QUOTE */
53 parse_pre_start, /* LINE_PRE_START */
54 parse_pre_cnt, /* LINE_PRE_CONTENT */
55 parse_pre_end, /* LINE_PRE_END */
56 };
58 void
59 gemtext_initparser(struct parser *p)
60 {
61 memset(p, 0, sizeof(*p));
63 p->name = "text/gemini";
64 p->parse = &gemtext_parse;
65 p->free = &gemtext_free;
66 }
68 static inline int
69 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
70 {
71 struct line *l;
73 if ((l = calloc(1, sizeof(*l))) == NULL)
74 return 0;
76 l->type = type;
77 l->line = line;
78 l->alt = alt;
80 if (TAILQ_EMPTY(&p->head))
81 TAILQ_INSERT_HEAD(&p->head, l, lines);
82 else
83 TAILQ_INSERT_TAIL(&p->head, l, lines);
85 return 1;
86 }
88 static int
89 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
90 {
91 char *l;
93 if ((l = calloc(1, len+1)) == NULL)
94 return 0;
95 memcpy(l, buf, len);
96 return emit_line(p, t, l, NULL);
97 }
99 static int
100 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
102 char *l, *u;
103 const char *url_start;
105 if (len <= 2)
106 return emit_line(p, t, NULL, NULL);
107 buf += 2;
108 len -= 2;
110 while (len > 0 && isspace(buf[0])) {
111 buf++;
112 len--;
115 if (len == 0)
116 return emit_line(p, t, NULL, NULL);
118 url_start = buf;
119 while (len > 0 && !isspace(buf[0])) {
120 buf++;
121 len--;
124 if ((u = calloc(1, buf - url_start + 1)) == NULL)
125 return 0;
126 memcpy(u, url_start, buf - url_start);
128 if (len == 0)
129 goto nolabel;
131 while (len > 0 && isspace(buf[0])) {
132 buf++;
133 len--;
136 if (len == 0)
137 goto nolabel;
139 if ((l = calloc(1, len + 1)) == NULL)
140 return 0;
142 memcpy(l, buf, len);
143 return emit_line(p, t, l, u);
145 nolabel:
146 if ((l = strdup(u)) == NULL)
147 return 0;
148 return emit_line(p, t, l, u);
151 static int
152 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
154 char *l;
156 switch (t) {
157 case LINE_TITLE_1:
158 if (len <= 1)
159 return emit_line(p, t, NULL, NULL);
160 buf++;
161 len--;
162 break;
163 case LINE_TITLE_2:
164 if (len <= 2)
165 return emit_line(p, t, NULL, NULL);
166 buf += 2;
167 len -= 2;
168 break;
169 case LINE_TITLE_3:
170 if (len <= 3)
171 return emit_line(p, t, NULL, NULL);
172 buf += 3;
173 len -= 3;
174 break;
175 default:
176 /* unreachable */
177 abort();
180 while (len > 0 && isspace(buf[0])) {
181 buf++;
182 len--;
185 if (len == 0)
186 return emit_line(p, t, NULL, NULL);
188 if (t == LINE_TITLE_1 && *p->title == '\0')
189 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
191 if ((l = calloc(1, len+1)) == NULL)
192 return 0;
193 memcpy(l, buf, len);
194 return emit_line(p, t, l, NULL);
197 static int
198 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
200 char *l;
202 if (len == 1)
203 return emit_line(p, t, NULL, NULL);
205 buf++;
206 len--;
208 while (len > 0 && isspace(buf[0])) {
209 buf++;
210 len--;
213 if (len == 0)
214 return emit_line(p, t, NULL, NULL);
216 if ((l = calloc(1, len+1)) == NULL)
217 return 0;
218 memcpy(l, buf, len);
219 return emit_line(p, t, l, NULL);
222 static int
223 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
225 char *l;
227 if (len == 1)
228 return emit_line(p, t, NULL, NULL);
230 buf++;
231 len--;
233 while (len > 0 && isspace(buf[0])) {
234 buf++;
235 len--;
238 if (len == 0)
239 return emit_line(p, t, NULL, NULL);
241 if ((l = calloc(1, len+1)) == NULL)
242 return 0;
243 memcpy(l, buf, len);
244 return emit_line(p, t, l, NULL);
247 static int
248 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
250 char *l;
252 if (len <= 3)
253 return emit_line(p, t, NULL, NULL);
255 buf += 3;
256 len += 3;
258 while (len > 0 && isspace(buf[0])) {
259 buf++;
260 len--;
263 if (len == 0)
264 return emit_line(p, t, NULL, NULL);
266 if ((l = calloc(1, len+1)) == NULL)
267 return 0;
269 memcpy(l, buf, len);
270 return emit_line(p, t, NULL, l);
273 static int
274 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
276 char *l;
278 if (len == 0)
279 return emit_line(p, t, NULL, NULL);
281 if ((l = calloc(1, len+1)) == NULL)
282 return 0;
283 memcpy(l, buf, len);
284 return emit_line(p, t, l, NULL);
287 static int
288 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
290 return emit_line(p, t, NULL, NULL);
293 static inline enum line_type
294 detect_line_type(const char *buf, size_t len, int in_pre)
296 size_t i;
298 if (len == 0)
299 return LINE_TEXT;
301 if (in_pre) {
302 if (len >= 3 &&
303 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
304 return LINE_PRE_END;
305 else
306 return LINE_PRE_CONTENT;
309 switch (*buf) {
310 case '*': return LINE_ITEM;
311 case '>': return LINE_QUOTE;
312 case '=':
313 if (len >= 1 && buf[1] == '>')
314 return LINE_LINK;
315 break;
316 case '#':
317 if (len == 1)
318 return LINE_TEXT;
319 if (buf[1] != '#')
320 return LINE_TITLE_1;
321 if (len == 2)
322 return LINE_TEXT;
323 if (buf[2] != '#')
324 return LINE_TITLE_2;
325 if (len == 3)
326 return LINE_TEXT;
327 return LINE_TITLE_3;
328 case '`':
329 if (len < 3)
330 return LINE_TEXT;
331 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
332 return LINE_PRE_START;
333 break;
336 return LINE_TEXT;
339 static int
340 gemtext_parse(struct parser *p, const char *buf, size_t size)
342 const char *b, *e;
343 enum line_type t;
344 size_t len, l;
346 if (p->len == 0) {
347 b = buf;
348 len = size;
349 } else {
350 if (!parser_append(p, buf, size))
351 return 0;
352 b = p->buf;
353 len = p->len;
356 while (len > 0) {
357 if ((e = telescope_strnchr((char*)b, '\n', len)) == NULL)
358 break;
359 l = e - b;
360 t = detect_line_type(b, l, p->flags);
361 if (t == LINE_PRE_START)
362 p->flags = 1;
363 if (t == LINE_PRE_END)
364 p->flags = 0;
365 if (!parsers[t](p, t, b, l))
366 return 0;
368 len -= l;
369 b += l;
371 if (len > 0) {
372 /* skip \n */
373 len--;
374 b++;
378 return parser_set_buf(p, b, len);
381 static int
382 gemtext_free(struct parser *p)
384 enum line_type t;
386 /* flush the buffer */
387 if (p->len != 0) {
388 t = detect_line_type(p->buf, p->len, p->flags);
389 if (!parsers[t](p, t, p->buf, p->len))
390 return 0;
391 if (p->flags && !emit_line(p, LINE_PRE_END, NULL, NULL))
392 return 0;
395 free(p->buf);
396 return 1;