Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include <telescope.h>
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 static int gemtext_parse(struct parser*, const char*, size_t);
32 static void gemtext_free(struct parser*);
34 static int parse_text(struct parser*, enum line_type, const char*, size_t);
35 static int parse_link(struct parser*, enum line_type, const char*, size_t);
36 static int parse_title(struct parser*, enum line_type, const char*, size_t);
37 static int parse_item(struct parser*, enum line_type, const char*, size_t);
38 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
39 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
40 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
43 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
45 static parselinefn *parsers[] = {
46 parse_text, /* LINE_TEXT */
47 parse_link, /* LINE_LINK */
48 parse_title, /* LINE_TITLE_1 */
49 parse_title, /* LINE_TITLE_2 */
50 parse_title, /* LINE_TITLE_3 */
51 parse_item, /* LINE_ITEM */
52 parse_quote, /* LINE_QUOTE */
53 parse_pre_start, /* LINE_PRE_START */
54 parse_pre_cnt, /* LINE_PRE_CONTENT */
55 parse_pre_end, /* LINE_PRE_END */
56 };
58 void
59 gemtext_initparser(struct parser *p)
60 {
61 memset(p, 0, sizeof(*p));
63 p->parse = &gemtext_parse;
64 p->free = &gemtext_free;
65 }
67 static inline int
68 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
69 {
70 struct line *l;
72 if ((l = calloc(1, sizeof(*l))) == NULL)
73 return 0;
75 l->type = type;
76 l->line = line;
77 l->alt = alt;
79 if (TAILQ_EMPTY(&p->head))
80 TAILQ_INSERT_HEAD(&p->head, l, lines);
81 else
82 TAILQ_INSERT_TAIL(&p->head, l, lines);
84 return 1;
85 }
87 static int
88 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
89 {
90 char *l;
92 if ((l = calloc(1, len+1)) == NULL)
93 return 0;
94 memcpy(l, buf, len);
95 return emit_line(p, t, l, NULL);
96 }
98 static int
99 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
101 char *l, *u;
102 const char *url_start;
104 if (len <= 2)
105 return emit_line(p, t, NULL, NULL);
106 buf += 2;
107 len -= 2;
109 while (len > 0 && isspace(buf[0])) {
110 buf++;
111 len--;
114 if (len == 0)
115 return emit_line(p, t, NULL, NULL);
117 url_start = buf;
118 while (len > 0 && !isspace(buf[0])) {
119 buf++;
120 len--;
123 if ((u = calloc(1, buf - url_start + 1)) == NULL)
124 return 0;
125 memcpy(u, url_start, buf - url_start);
127 if (len == 0)
128 return emit_line(p, t, u, NULL);
130 while (len > 0) {
131 buf++;
132 len--;
135 if (len == 0)
136 return emit_line(p, t, u, NULL);
138 if ((l = calloc(1, len + 1)) == NULL)
139 return 0;
141 memcpy(l, buf, len);
142 return emit_line(p, t, u, l);
145 static int
146 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
148 char *l;
150 switch (t) {
151 case LINE_TITLE_1:
152 if (len <= 1)
153 return emit_line(p, t, NULL, NULL);
154 buf++;
155 len--;
156 break;
157 case LINE_TITLE_2:
158 if (len <= 2)
159 return emit_line(p, t, NULL, NULL);
160 buf += 2;
161 len -= 2;
162 break;
163 case LINE_TITLE_3:
164 if (len <= 3)
165 return emit_line(p, t, NULL, NULL);
166 buf += 3;
167 len -= 3;
168 break;
169 default:
170 /* unreachable */
171 abort();
174 while (len > 0 && isspace(buf[0])) {
175 buf++;
176 len--;
179 if (len == 0)
180 return emit_line(p, t, NULL, NULL);
182 if ((l = calloc(1, len+1)) == NULL)
183 return 0;
184 memcpy(l, buf, len);
185 return emit_line(p, t, l, NULL);
188 static int
189 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
191 char *l;
193 if (len == 1)
194 return emit_line(p, t, NULL, NULL);
196 buf++;
197 len--;
199 while (len > 0 && isspace(buf[0])) {
200 buf++;
201 len--;
204 if (len == 0)
205 return emit_line(p, t, NULL, NULL);
207 if ((l = calloc(1, len+1)) == NULL)
208 return 0;
209 memcpy(l, buf, len);
210 return emit_line(p, t, l, NULL);
213 static int
214 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
216 char *l;
218 if (len == 1)
219 return emit_line(p, t, NULL, NULL);
221 buf++;
222 len--;
224 while (len > 0 && isspace(buf[0])) {
225 buf++;
226 len--;
229 if (len == 0)
230 return emit_line(p, t, NULL, NULL);
232 if ((l = calloc(1, len+1)) == NULL)
233 return 0;
234 memcpy(l, buf, len);
235 return emit_line(p, t, l, NULL);
238 static int
239 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
241 char *l;
243 if (len <= 3)
244 return emit_line(p, t, NULL, NULL);
246 buf += 3;
247 len += 3;
249 while (len > 0 && isspace(buf[0])) {
250 buf++;
251 len--;
254 if (len == 0)
255 return emit_line(p, t, NULL, NULL);
257 if ((l = calloc(1, len+1)) == NULL)
258 return 0;
260 memcpy(l, buf, len);
261 return emit_line(p, t, NULL, l);
264 static int
265 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
267 char *l;
269 if (len == 0)
270 return emit_line(p, t, NULL, NULL);
272 if ((l = calloc(1, len+1)) == NULL)
273 return 0;
274 memcpy(l, buf, len);
275 return emit_line(p, t, l, NULL);
278 static int
279 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
281 return emit_line(p, t, NULL, NULL);
284 static inline enum line_type
285 detect_line_type(const char *buf, size_t len, int in_pre)
287 size_t i;
289 if (len == 0)
290 return LINE_TEXT;
292 if (in_pre) {
293 if (len >= 3 &&
294 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
295 return LINE_PRE_END;
296 else
297 return LINE_PRE_CONTENT;
300 switch (*buf) {
301 case '*': return LINE_ITEM;
302 case '>': return LINE_QUOTE;
303 case '=':
304 if (len >= 1 && buf[1] == '>')
305 return LINE_LINK;
306 break;
307 case '#':
308 if (len == 1)
309 return LINE_TEXT;
310 if (buf[1] != '#')
311 return LINE_TITLE_1;
312 if (len == 2)
313 return LINE_TEXT;
314 if (buf[2] != '#')
315 return LINE_TITLE_2;
316 if (len == 3)
317 return LINE_TEXT;
318 return LINE_TITLE_3;
319 case '`':
320 if (len < 3)
321 return LINE_TEXT;
322 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
323 return LINE_PRE_START;
324 break;
327 return LINE_TEXT;
330 static inline int
331 append(struct parser *p, const char *buf, size_t len)
333 size_t newlen;
334 char *t;
336 newlen = len + p->len;
337 if ((t = calloc(1, newlen)) == NULL)
338 return 0;
339 free(p->buf);
340 p->buf = t;
341 p->len = newlen;
342 return 1;
345 static inline int
346 set_buf(struct parser *p, const char *buf, size_t len)
348 free(p->buf);
349 p->buf = NULL;
351 if (len == 0)
352 return 1;
354 if ((p->buf = calloc(1, len)) == NULL)
355 return 0;
356 memcpy(p->buf, buf, len);
357 return 1;
360 static int
361 gemtext_parse(struct parser *p, const char *buf, size_t size)
363 const char *b, *e;
364 enum line_type t;
365 size_t len, l;
367 if (p->len == 0) {
368 b = buf;
369 len = size;
370 } else {
371 if (!append(p, buf, size))
372 return 0;
373 b = p->buf;
374 len = p->len;
377 while (len > 0) {
378 if ((e = telescope_strnchr((char*)b, '\n', len)) == NULL)
379 break;
380 l = e - b;
381 t = detect_line_type(b, l, p->flags);
382 if (t == LINE_PRE_START)
383 p->flags = 1;
384 if (t == LINE_PRE_END)
385 p->flags = 0;
386 if (!parsers[t](p, t, b, l))
387 return 0;
389 len -= l;
390 b += l;
392 if (len > 0) {
393 /* skip \n */
394 len--;
395 b++;
399 return set_buf(p, b, len);
402 static void
403 gemtext_free(struct parser *p)
405 free(p->buf);