Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include <telescope.h>
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 static int gemtext_parse(struct parser*, const char*, size_t);
32 static int gemtext_free(struct parser*);
34 static int parse_text(struct parser*, enum line_type, const char*, size_t);
35 static int parse_link(struct parser*, enum line_type, const char*, size_t);
36 static int parse_title(struct parser*, enum line_type, const char*, size_t);
37 static int parse_item(struct parser*, enum line_type, const char*, size_t);
38 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
39 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
40 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
43 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
45 static parselinefn *parsers[] = {
46 parse_text, /* LINE_TEXT */
47 parse_link, /* LINE_LINK */
48 parse_title, /* LINE_TITLE_1 */
49 parse_title, /* LINE_TITLE_2 */
50 parse_title, /* LINE_TITLE_3 */
51 parse_item, /* LINE_ITEM */
52 parse_quote, /* LINE_QUOTE */
53 parse_pre_start, /* LINE_PRE_START */
54 parse_pre_cnt, /* LINE_PRE_CONTENT */
55 parse_pre_end, /* LINE_PRE_END */
56 };
58 void
59 gemtext_initparser(struct parser *p)
60 {
61 memset(p, 0, sizeof(*p));
63 p->parse = &gemtext_parse;
64 p->free = &gemtext_free;
65 }
67 static inline int
68 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
69 {
70 struct line *l;
72 if ((l = calloc(1, sizeof(*l))) == NULL)
73 return 0;
75 l->type = type;
76 l->line = line;
77 l->alt = alt;
79 if (TAILQ_EMPTY(&p->head))
80 TAILQ_INSERT_HEAD(&p->head, l, lines);
81 else
82 TAILQ_INSERT_TAIL(&p->head, l, lines);
84 return 1;
85 }
87 static int
88 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
89 {
90 char *l;
92 if ((l = calloc(1, len+1)) == NULL)
93 return 0;
94 memcpy(l, buf, len);
95 return emit_line(p, t, l, NULL);
96 }
98 static int
99 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
101 char *l, *u;
102 const char *url_start;
104 if (len <= 2)
105 return emit_line(p, t, NULL, NULL);
106 buf += 2;
107 len -= 2;
109 while (len > 0 && isspace(buf[0])) {
110 buf++;
111 len--;
114 if (len == 0)
115 return emit_line(p, t, NULL, NULL);
117 url_start = buf;
118 while (len > 0 && !isspace(buf[0])) {
119 buf++;
120 len--;
123 if ((u = calloc(1, buf - url_start + 1)) == NULL)
124 return 0;
125 memcpy(u, url_start, buf - url_start);
127 if (len == 0)
128 goto nolabel;
130 while (len > 0 && isspace(buf[0])) {
131 buf++;
132 len--;
135 if (len == 0)
136 goto nolabel;
138 if ((l = calloc(1, len + 1)) == NULL)
139 return 0;
141 memcpy(l, buf, len);
142 return emit_line(p, t, l, u);
144 nolabel:
145 if ((l = strdup(u)) == NULL)
146 return 0;
147 return emit_line(p, t, l, u);
150 static int
151 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
153 char *l;
155 switch (t) {
156 case LINE_TITLE_1:
157 if (len <= 1)
158 return emit_line(p, t, NULL, NULL);
159 buf++;
160 len--;
161 break;
162 case LINE_TITLE_2:
163 if (len <= 2)
164 return emit_line(p, t, NULL, NULL);
165 buf += 2;
166 len -= 2;
167 break;
168 case LINE_TITLE_3:
169 if (len <= 3)
170 return emit_line(p, t, NULL, NULL);
171 buf += 3;
172 len -= 3;
173 break;
174 default:
175 /* unreachable */
176 abort();
179 while (len > 0 && isspace(buf[0])) {
180 buf++;
181 len--;
184 if (len == 0)
185 return emit_line(p, t, NULL, NULL);
187 if ((l = calloc(1, len+1)) == NULL)
188 return 0;
189 memcpy(l, buf, len);
190 return emit_line(p, t, l, NULL);
193 static int
194 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
196 char *l;
198 if (len == 1)
199 return emit_line(p, t, NULL, NULL);
201 buf++;
202 len--;
204 while (len > 0 && isspace(buf[0])) {
205 buf++;
206 len--;
209 if (len == 0)
210 return emit_line(p, t, NULL, NULL);
212 if ((l = calloc(1, len+1)) == NULL)
213 return 0;
214 memcpy(l, buf, len);
215 return emit_line(p, t, l, NULL);
218 static int
219 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
221 char *l;
223 if (len == 1)
224 return emit_line(p, t, NULL, NULL);
226 buf++;
227 len--;
229 while (len > 0 && isspace(buf[0])) {
230 buf++;
231 len--;
234 if (len == 0)
235 return emit_line(p, t, NULL, NULL);
237 if ((l = calloc(1, len+1)) == NULL)
238 return 0;
239 memcpy(l, buf, len);
240 return emit_line(p, t, l, NULL);
243 static int
244 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
246 char *l;
248 if (len <= 3)
249 return emit_line(p, t, NULL, NULL);
251 buf += 3;
252 len += 3;
254 while (len > 0 && isspace(buf[0])) {
255 buf++;
256 len--;
259 if (len == 0)
260 return emit_line(p, t, NULL, NULL);
262 if ((l = calloc(1, len+1)) == NULL)
263 return 0;
265 memcpy(l, buf, len);
266 return emit_line(p, t, NULL, l);
269 static int
270 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
272 char *l;
274 if (len == 0)
275 return emit_line(p, t, NULL, NULL);
277 if ((l = calloc(1, len+1)) == NULL)
278 return 0;
279 memcpy(l, buf, len);
280 return emit_line(p, t, l, NULL);
283 static int
284 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
286 return emit_line(p, t, NULL, NULL);
289 static inline enum line_type
290 detect_line_type(const char *buf, size_t len, int in_pre)
292 size_t i;
294 if (len == 0)
295 return LINE_TEXT;
297 if (in_pre) {
298 if (len >= 3 &&
299 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
300 return LINE_PRE_END;
301 else
302 return LINE_PRE_CONTENT;
305 switch (*buf) {
306 case '*': return LINE_ITEM;
307 case '>': return LINE_QUOTE;
308 case '=':
309 if (len >= 1 && buf[1] == '>')
310 return LINE_LINK;
311 break;
312 case '#':
313 if (len == 1)
314 return LINE_TEXT;
315 if (buf[1] != '#')
316 return LINE_TITLE_1;
317 if (len == 2)
318 return LINE_TEXT;
319 if (buf[2] != '#')
320 return LINE_TITLE_2;
321 if (len == 3)
322 return LINE_TEXT;
323 return LINE_TITLE_3;
324 case '`':
325 if (len < 3)
326 return LINE_TEXT;
327 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
328 return LINE_PRE_START;
329 break;
332 return LINE_TEXT;
335 static inline int
336 append(struct parser *p, const char *buf, size_t len)
338 size_t newlen;
339 char *t;
341 newlen = len + p->len;
342 if ((t = calloc(1, newlen)) == NULL)
343 return 0;
344 memcpy(t, p->buf, p->len);
345 memcpy(t + p->len, buf, len);
346 free(p->buf);
347 p->buf = t;
348 p->len = newlen;
349 return 1;
352 static inline int
353 set_buf(struct parser *p, const char *buf, size_t len)
355 free(p->buf);
356 p->buf = NULL;
358 if (len == 0) {
359 p->len = 0;
360 return 1;
363 if ((p->buf = calloc(1, len)) == NULL)
364 return 0;
365 memcpy(p->buf, buf, len);
366 p->len = len;
367 return 1;
370 static int
371 gemtext_parse(struct parser *p, const char *buf, size_t size)
373 const char *b, *e;
374 enum line_type t;
375 size_t len, l;
377 if (p->len == 0) {
378 b = buf;
379 len = size;
380 } else {
381 if (!append(p, buf, size))
382 return 0;
383 b = p->buf;
384 len = p->len;
387 while (len > 0) {
388 if ((e = telescope_strnchr((char*)b, '\n', len)) == NULL)
389 break;
390 l = e - b;
391 t = detect_line_type(b, l, p->flags);
392 if (t == LINE_PRE_START)
393 p->flags = 1;
394 if (t == LINE_PRE_END)
395 p->flags = 0;
396 if (!parsers[t](p, t, b, l))
397 return 0;
399 len -= l;
400 b += l;
402 if (len > 0) {
403 /* skip \n */
404 len--;
405 b++;
409 return set_buf(p, b, len);
412 static int
413 gemtext_free(struct parser *p)
415 enum line_type t;
417 /* flush the buffer */
418 if (p->len != 0) {
419 t = detect_line_type(p->buf, p->len, p->flags);
420 if (!parsers[t](p, t, p->buf, p->len))
421 return 0;
422 if (p->flags && !emit_line(p, LINE_PRE_END, NULL, NULL))
423 return 0;
426 free(p->buf);
427 return 1;