2 5e11c00c 2021-03-02 op * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
4 5e11c00c 2021-03-02 op * Permission to use, copy, modify, and distribute this software for any
5 5e11c00c 2021-03-02 op * purpose with or without fee is hereby granted, provided that the above
6 5e11c00c 2021-03-02 op * copyright notice and this permission notice appear in all copies.
8 5e11c00c 2021-03-02 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 5e11c00c 2021-03-02 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 5e11c00c 2021-03-02 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 5e11c00c 2021-03-02 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 5e11c00c 2021-03-02 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 5e11c00c 2021-03-02 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 5e11c00c 2021-03-02 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 5e11c00c 2021-03-02 op * A streaming gemtext parser.
21 5e11c00c 2021-03-02 op * - handle NULs
25 5e11c00c 2021-03-02 op #include <telescope.h>
27 5e11c00c 2021-03-02 op #include <ctype.h>
28 5e11c00c 2021-03-02 op #include <string.h>
29 5e11c00c 2021-03-02 op #include <stdlib.h>
31 5e11c00c 2021-03-02 op static int gemtext_parse(struct parser*, const char*, size_t);
32 5e11c00c 2021-03-02 op static void gemtext_free(struct parser*);
34 5e11c00c 2021-03-02 op static int parse_text(struct parser*, enum line_type, const char*, size_t);
35 5e11c00c 2021-03-02 op static int parse_link(struct parser*, enum line_type, const char*, size_t);
36 5e11c00c 2021-03-02 op static int parse_title(struct parser*, enum line_type, const char*, size_t);
37 5e11c00c 2021-03-02 op static int parse_item(struct parser*, enum line_type, const char*, size_t);
38 5e11c00c 2021-03-02 op static int parse_quote(struct parser*, enum line_type, const char*, size_t);
39 5e11c00c 2021-03-02 op static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
40 5e11c00c 2021-03-02 op static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
41 5e11c00c 2021-03-02 op static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
43 5e11c00c 2021-03-02 op typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
45 5e11c00c 2021-03-02 op static parselinefn *parsers[] = {
46 5e11c00c 2021-03-02 op parse_text, /* LINE_TEXT */
47 5e11c00c 2021-03-02 op parse_link, /* LINE_LINK */
48 5e11c00c 2021-03-02 op parse_title, /* LINE_TITLE_1 */
49 5e11c00c 2021-03-02 op parse_title, /* LINE_TITLE_2 */
50 5e11c00c 2021-03-02 op parse_title, /* LINE_TITLE_3 */
51 5e11c00c 2021-03-02 op parse_item, /* LINE_ITEM */
52 5e11c00c 2021-03-02 op parse_quote, /* LINE_QUOTE */
53 5e11c00c 2021-03-02 op parse_pre_start, /* LINE_PRE_START */
54 5e11c00c 2021-03-02 op parse_pre_cnt, /* LINE_PRE_CONTENT */
55 5e11c00c 2021-03-02 op parse_pre_end, /* LINE_PRE_END */
59 5e11c00c 2021-03-02 op gemtext_initparser(struct parser *p)
61 5e11c00c 2021-03-02 op memset(p, 0, sizeof(*p));
63 5e11c00c 2021-03-02 op p->parse = &gemtext_parse;
64 5e11c00c 2021-03-02 op p->free = &gemtext_free;
67 5e11c00c 2021-03-02 op static inline int
68 5e11c00c 2021-03-02 op emit_line(struct parser *p, enum line_type type, char *line, char *alt)
70 5e11c00c 2021-03-02 op struct line *l;
72 5e11c00c 2021-03-02 op if ((l = calloc(1, sizeof(*l))) == NULL)
75 5e11c00c 2021-03-02 op l->type = type;
76 5e11c00c 2021-03-02 op l->line = line;
79 5e11c00c 2021-03-02 op if (TAILQ_EMPTY(&p->head))
80 5e11c00c 2021-03-02 op TAILQ_INSERT_HEAD(&p->head, l, lines);
82 5e11c00c 2021-03-02 op TAILQ_INSERT_TAIL(&p->head, l, lines);
88 5e11c00c 2021-03-02 op parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
92 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
94 5e11c00c 2021-03-02 op memcpy(l, buf, len);
95 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
99 5e11c00c 2021-03-02 op parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
102 5e11c00c 2021-03-02 op const char *url_start;
104 5e11c00c 2021-03-02 op if (len <= 2)
105 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
109 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
114 5e11c00c 2021-03-02 op if (len == 0)
115 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
117 5e11c00c 2021-03-02 op url_start = buf;
118 5e11c00c 2021-03-02 op while (len > 0 && !isspace(buf[0])) {
123 5e11c00c 2021-03-02 op if ((u = calloc(1, buf - url_start + 1)) == NULL)
125 5e11c00c 2021-03-02 op memcpy(u, url_start, buf - url_start);
127 5e11c00c 2021-03-02 op if (len == 0)
128 5e11c00c 2021-03-02 op return emit_line(p, t, u, NULL);
130 5e11c00c 2021-03-02 op while (len > 0) {
135 5e11c00c 2021-03-02 op if (len == 0)
136 5e11c00c 2021-03-02 op return emit_line(p, t, u, NULL);
138 5e11c00c 2021-03-02 op if ((l = calloc(1, len + 1)) == NULL)
141 5e11c00c 2021-03-02 op memcpy(l, buf, len);
142 5e11c00c 2021-03-02 op return emit_line(p, t, u, l);
146 5e11c00c 2021-03-02 op parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
151 5e11c00c 2021-03-02 op case LINE_TITLE_1:
152 5e11c00c 2021-03-02 op if (len <= 1)
153 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
157 5e11c00c 2021-03-02 op case LINE_TITLE_2:
158 5e11c00c 2021-03-02 op if (len <= 2)
159 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
163 5e11c00c 2021-03-02 op case LINE_TITLE_3:
164 5e11c00c 2021-03-02 op if (len <= 3)
165 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
170 5e11c00c 2021-03-02 op /* unreachable */
174 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
179 5e11c00c 2021-03-02 op if (len == 0)
180 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
182 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
184 5e11c00c 2021-03-02 op memcpy(l, buf, len);
185 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
189 5e11c00c 2021-03-02 op parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
193 5e11c00c 2021-03-02 op if (len == 1)
194 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
199 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
204 5e11c00c 2021-03-02 op if (len == 0)
205 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
207 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
209 5e11c00c 2021-03-02 op memcpy(l, buf, len);
210 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
214 5e11c00c 2021-03-02 op parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
218 5e11c00c 2021-03-02 op if (len == 1)
219 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
224 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
229 5e11c00c 2021-03-02 op if (len == 0)
230 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
232 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
234 5e11c00c 2021-03-02 op memcpy(l, buf, len);
235 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
239 5e11c00c 2021-03-02 op parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
243 5e11c00c 2021-03-02 op if (len <= 3)
244 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
249 5e11c00c 2021-03-02 op while (len > 0 && isspace(buf[0])) {
254 5e11c00c 2021-03-02 op if (len == 0)
255 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
257 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
260 5e11c00c 2021-03-02 op memcpy(l, buf, len);
261 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, l);
265 5e11c00c 2021-03-02 op parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
269 5e11c00c 2021-03-02 op if (len == 0)
270 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
272 5e11c00c 2021-03-02 op if ((l = calloc(1, len+1)) == NULL)
274 5e11c00c 2021-03-02 op memcpy(l, buf, len);
275 5e11c00c 2021-03-02 op return emit_line(p, t, l, NULL);
279 5e11c00c 2021-03-02 op parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
281 5e11c00c 2021-03-02 op return emit_line(p, t, NULL, NULL);
284 5e11c00c 2021-03-02 op static inline enum line_type
285 5e11c00c 2021-03-02 op detect_line_type(const char *buf, size_t len, int in_pre)
289 5e11c00c 2021-03-02 op if (len == 0)
290 5e11c00c 2021-03-02 op return LINE_TEXT;
292 5e11c00c 2021-03-02 op if (in_pre) {
293 5e11c00c 2021-03-02 op if (len >= 3 &&
294 5e11c00c 2021-03-02 op buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
295 5e11c00c 2021-03-02 op return LINE_PRE_END;
297 5e11c00c 2021-03-02 op return LINE_PRE_CONTENT;
300 5e11c00c 2021-03-02 op switch (*buf) {
301 5e11c00c 2021-03-02 op case '*': return LINE_ITEM;
302 5e11c00c 2021-03-02 op case '>': return LINE_QUOTE;
304 5e11c00c 2021-03-02 op if (len >= 1 && buf[1] == '>')
305 5e11c00c 2021-03-02 op return LINE_LINK;
308 5e11c00c 2021-03-02 op if (len == 1)
309 5e11c00c 2021-03-02 op return LINE_TEXT;
310 5e11c00c 2021-03-02 op if (buf[1] != '#')
311 5e11c00c 2021-03-02 op return LINE_TITLE_1;
312 5e11c00c 2021-03-02 op if (len == 2)
313 5e11c00c 2021-03-02 op return LINE_TEXT;
314 5e11c00c 2021-03-02 op if (buf[2] != '#')
315 5e11c00c 2021-03-02 op return LINE_TITLE_2;
316 5e11c00c 2021-03-02 op if (len == 3)
317 5e11c00c 2021-03-02 op return LINE_TEXT;
318 5e11c00c 2021-03-02 op return LINE_TITLE_3;
321 5e11c00c 2021-03-02 op return LINE_TEXT;
322 5e11c00c 2021-03-02 op if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
323 5e11c00c 2021-03-02 op return LINE_PRE_START;
327 5e11c00c 2021-03-02 op return LINE_TEXT;
330 5e11c00c 2021-03-02 op static inline int
331 5e11c00c 2021-03-02 op append(struct parser *p, const char *buf, size_t len)
333 5e11c00c 2021-03-02 op size_t newlen;
336 5e11c00c 2021-03-02 op newlen = len + p->len;
337 5e11c00c 2021-03-02 op if ((t = calloc(1, newlen)) == NULL)
339 5e11c00c 2021-03-02 op free(p->buf);
341 5e11c00c 2021-03-02 op p->len = newlen;
345 5e11c00c 2021-03-02 op static inline int
346 5e11c00c 2021-03-02 op set_buf(struct parser *p, const char *buf, size_t len)
348 5e11c00c 2021-03-02 op free(p->buf);
349 5e11c00c 2021-03-02 op p->buf = NULL;
351 5e11c00c 2021-03-02 op if (len == 0)
354 5e11c00c 2021-03-02 op if ((p->buf = calloc(1, len)) == NULL)
356 5e11c00c 2021-03-02 op memcpy(p->buf, buf, len);
361 5e11c00c 2021-03-02 op gemtext_parse(struct parser *p, const char *buf, size_t size)
363 5e11c00c 2021-03-02 op const char *b, *e;
364 5e11c00c 2021-03-02 op enum line_type t;
365 5e11c00c 2021-03-02 op size_t len, l;
367 5e11c00c 2021-03-02 op if (p->len == 0) {
371 5e11c00c 2021-03-02 op if (!append(p, buf, size))
374 5e11c00c 2021-03-02 op len = p->len;
377 5e11c00c 2021-03-02 op while (len > 0) {
378 5e11c00c 2021-03-02 op if ((e = telescope_strnchr((char*)b, '\n', len)) == NULL)
381 5e11c00c 2021-03-02 op t = detect_line_type(b, l, p->flags);
382 5e11c00c 2021-03-02 op if (t == LINE_PRE_START)
383 5e11c00c 2021-03-02 op p->flags = 1;
384 5e11c00c 2021-03-02 op if (t == LINE_PRE_END)
385 5e11c00c 2021-03-02 op p->flags = 0;
386 5e11c00c 2021-03-02 op if (!parsers[t](p, t, b, l))
392 5e11c00c 2021-03-02 op if (len > 0) {
393 5e11c00c 2021-03-02 op /* skip \n */
399 5e11c00c 2021-03-02 op return set_buf(p, b, len);
403 5e11c00c 2021-03-02 op gemtext_free(struct parser *p)
405 5e11c00c 2021-03-02 op free(p->buf);