Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include <telescope.h>
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 static int gemtext_parse(struct parser*, const char*, size_t);
32 static int gemtext_free(struct parser*);
34 static int parse_text(struct parser*, enum line_type, const char*, size_t);
35 static int parse_link(struct parser*, enum line_type, const char*, size_t);
36 static int parse_title(struct parser*, enum line_type, const char*, size_t);
37 static int parse_item(struct parser*, enum line_type, const char*, size_t);
38 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
39 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
40 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
43 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
45 static parselinefn *parsers[] = {
46 parse_text, /* LINE_TEXT */
47 parse_link, /* LINE_LINK */
48 parse_title, /* LINE_TITLE_1 */
49 parse_title, /* LINE_TITLE_2 */
50 parse_title, /* LINE_TITLE_3 */
51 parse_item, /* LINE_ITEM */
52 parse_quote, /* LINE_QUOTE */
53 parse_pre_start, /* LINE_PRE_START */
54 parse_pre_cnt, /* LINE_PRE_CONTENT */
55 parse_pre_end, /* LINE_PRE_END */
56 };
58 void
59 gemtext_initparser(struct parser *p)
60 {
61 memset(p, 0, sizeof(*p));
63 p->name = "text/gemini";
64 p->parse = &gemtext_parse;
65 p->free = &gemtext_free;
66 }
68 static inline int
69 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
70 {
71 struct line *l;
73 if ((l = calloc(1, sizeof(*l))) == NULL)
74 return 0;
76 l->type = type;
77 l->line = line;
78 l->alt = alt;
80 if (TAILQ_EMPTY(&p->head))
81 TAILQ_INSERT_HEAD(&p->head, l, lines);
82 else
83 TAILQ_INSERT_TAIL(&p->head, l, lines);
85 return 1;
86 }
88 static int
89 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
90 {
91 char *l;
93 if ((l = calloc(1, len+1)) == NULL)
94 return 0;
95 memcpy(l, buf, len);
96 return emit_line(p, t, l, NULL);
97 }
99 static int
100 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
102 char *l, *u;
103 const char *url_start;
105 if (len <= 2)
106 return emit_line(p, t, NULL, NULL);
107 buf += 2;
108 len -= 2;
110 while (len > 0 && isspace(buf[0])) {
111 buf++;
112 len--;
115 if (len == 0)
116 return emit_line(p, t, NULL, NULL);
118 url_start = buf;
119 while (len > 0 && !isspace(buf[0])) {
120 buf++;
121 len--;
124 if ((u = calloc(1, buf - url_start + 1)) == NULL)
125 return 0;
126 memcpy(u, url_start, buf - url_start);
128 if (len == 0)
129 goto nolabel;
131 while (len > 0 && isspace(buf[0])) {
132 buf++;
133 len--;
136 if (len == 0)
137 goto nolabel;
139 if ((l = calloc(1, len + 1)) == NULL)
140 return 0;
142 memcpy(l, buf, len);
143 return emit_line(p, t, l, u);
145 nolabel:
146 if ((l = strdup(u)) == NULL)
147 return 0;
148 return emit_line(p, t, l, u);
151 static int
152 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
154 char *l;
156 switch (t) {
157 case LINE_TITLE_1:
158 if (len <= 1)
159 return emit_line(p, t, NULL, NULL);
160 buf++;
161 len--;
162 break;
163 case LINE_TITLE_2:
164 if (len <= 2)
165 return emit_line(p, t, NULL, NULL);
166 buf += 2;
167 len -= 2;
168 break;
169 case LINE_TITLE_3:
170 if (len <= 3)
171 return emit_line(p, t, NULL, NULL);
172 buf += 3;
173 len -= 3;
174 break;
175 default:
176 /* unreachable */
177 abort();
180 while (len > 0 && isspace(buf[0])) {
181 buf++;
182 len--;
185 if (len == 0)
186 return emit_line(p, t, NULL, NULL);
188 if ((l = calloc(1, len+1)) == NULL)
189 return 0;
190 memcpy(l, buf, len);
191 return emit_line(p, t, l, NULL);
194 static int
195 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
197 char *l;
199 if (len == 1)
200 return emit_line(p, t, NULL, NULL);
202 buf++;
203 len--;
205 while (len > 0 && isspace(buf[0])) {
206 buf++;
207 len--;
210 if (len == 0)
211 return emit_line(p, t, NULL, NULL);
213 if ((l = calloc(1, len+1)) == NULL)
214 return 0;
215 memcpy(l, buf, len);
216 return emit_line(p, t, l, NULL);
219 static int
220 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
222 char *l;
224 if (len == 1)
225 return emit_line(p, t, NULL, NULL);
227 buf++;
228 len--;
230 while (len > 0 && isspace(buf[0])) {
231 buf++;
232 len--;
235 if (len == 0)
236 return emit_line(p, t, NULL, NULL);
238 if ((l = calloc(1, len+1)) == NULL)
239 return 0;
240 memcpy(l, buf, len);
241 return emit_line(p, t, l, NULL);
244 static int
245 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
247 char *l;
249 if (len <= 3)
250 return emit_line(p, t, NULL, NULL);
252 buf += 3;
253 len += 3;
255 while (len > 0 && isspace(buf[0])) {
256 buf++;
257 len--;
260 if (len == 0)
261 return emit_line(p, t, NULL, NULL);
263 if ((l = calloc(1, len+1)) == NULL)
264 return 0;
266 memcpy(l, buf, len);
267 return emit_line(p, t, NULL, l);
270 static int
271 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
273 char *l;
275 if (len == 0)
276 return emit_line(p, t, NULL, NULL);
278 if ((l = calloc(1, len+1)) == NULL)
279 return 0;
280 memcpy(l, buf, len);
281 return emit_line(p, t, l, NULL);
284 static int
285 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
287 return emit_line(p, t, NULL, NULL);
290 static inline enum line_type
291 detect_line_type(const char *buf, size_t len, int in_pre)
293 size_t i;
295 if (len == 0)
296 return LINE_TEXT;
298 if (in_pre) {
299 if (len >= 3 &&
300 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
301 return LINE_PRE_END;
302 else
303 return LINE_PRE_CONTENT;
306 switch (*buf) {
307 case '*': return LINE_ITEM;
308 case '>': return LINE_QUOTE;
309 case '=':
310 if (len >= 1 && buf[1] == '>')
311 return LINE_LINK;
312 break;
313 case '#':
314 if (len == 1)
315 return LINE_TEXT;
316 if (buf[1] != '#')
317 return LINE_TITLE_1;
318 if (len == 2)
319 return LINE_TEXT;
320 if (buf[2] != '#')
321 return LINE_TITLE_2;
322 if (len == 3)
323 return LINE_TEXT;
324 return LINE_TITLE_3;
325 case '`':
326 if (len < 3)
327 return LINE_TEXT;
328 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
329 return LINE_PRE_START;
330 break;
333 return LINE_TEXT;
336 static int
337 gemtext_parse(struct parser *p, const char *buf, size_t size)
339 const char *b, *e;
340 enum line_type t;
341 size_t len, l;
343 if (p->len == 0) {
344 b = buf;
345 len = size;
346 } else {
347 if (!parser_append(p, buf, size))
348 return 0;
349 b = p->buf;
350 len = p->len;
353 while (len > 0) {
354 if ((e = telescope_strnchr((char*)b, '\n', len)) == NULL)
355 break;
356 l = e - b;
357 t = detect_line_type(b, l, p->flags);
358 if (t == LINE_PRE_START)
359 p->flags = 1;
360 if (t == LINE_PRE_END)
361 p->flags = 0;
362 if (!parsers[t](p, t, b, l))
363 return 0;
365 len -= l;
366 b += l;
368 if (len > 0) {
369 /* skip \n */
370 len--;
371 b++;
375 return parser_set_buf(p, b, len);
378 static int
379 gemtext_free(struct parser *p)
381 enum line_type t;
383 /* flush the buffer */
384 if (p->len != 0) {
385 t = detect_line_type(p->buf, p->len, p->flags);
386 if (!parsers[t](p, t, p->buf, p->len))
387 return 0;
388 if (p->flags && !emit_line(p, LINE_PRE_END, NULL, NULL))
389 return 0;
392 free(p->buf);
393 return 1;