Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include <telescope.h>
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 static int gemtext_parse(struct parser*, const char*, size_t);
32 static int gemtext_foreach_line(struct parser*, const char*, size_t);
33 static int gemtext_free(struct parser*);
35 static int parse_text(struct parser*, enum line_type, const char*, size_t);
36 static int parse_link(struct parser*, enum line_type, const char*, size_t);
37 static int parse_title(struct parser*, enum line_type, const char*, size_t);
38 static int parse_item(struct parser*, enum line_type, const char*, size_t);
39 static int parse_quote(struct parser*, enum line_type, const char*, size_t);
40 static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
41 static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
42 static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
44 typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
46 static parselinefn *parsers[] = {
47 parse_text, /* LINE_TEXT */
48 parse_link, /* LINE_LINK */
49 parse_title, /* LINE_TITLE_1 */
50 parse_title, /* LINE_TITLE_2 */
51 parse_title, /* LINE_TITLE_3 */
52 parse_item, /* LINE_ITEM */
53 parse_quote, /* LINE_QUOTE */
54 parse_pre_start, /* LINE_PRE_START */
55 parse_pre_cnt, /* LINE_PRE_CONTENT */
56 parse_pre_end, /* LINE_PRE_END */
57 };
59 void
60 gemtext_initparser(struct parser *p)
61 {
62 memset(p, 0, sizeof(*p));
64 p->name = "text/gemini";
65 p->parse = &gemtext_parse;
66 p->free = &gemtext_free;
67 }
69 static inline int
70 emit_line(struct parser *p, enum line_type type, char *line, char *alt)
71 {
72 struct line *l;
74 if ((l = calloc(1, sizeof(*l))) == NULL)
75 return 0;
77 l->type = type;
78 l->line = line;
79 l->alt = alt;
81 if (TAILQ_EMPTY(&p->head))
82 TAILQ_INSERT_HEAD(&p->head, l, lines);
83 else
84 TAILQ_INSERT_TAIL(&p->head, l, lines);
86 return 1;
87 }
89 static int
90 parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
91 {
92 char *l;
94 if ((l = calloc(1, len+1)) == NULL)
95 return 0;
96 memcpy(l, buf, len);
97 return emit_line(p, t, l, NULL);
98 }
100 static int
101 parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
103 char *l, *u;
104 const char *url_start;
106 if (len <= 2)
107 return emit_line(p, t, NULL, NULL);
108 buf += 2;
109 len -= 2;
111 while (len > 0 && isspace(buf[0])) {
112 buf++;
113 len--;
116 if (len == 0)
117 return emit_line(p, t, NULL, NULL);
119 url_start = buf;
120 while (len > 0 && !isspace(buf[0])) {
121 buf++;
122 len--;
125 if ((u = calloc(1, buf - url_start + 1)) == NULL)
126 return 0;
127 memcpy(u, url_start, buf - url_start);
129 if (len == 0)
130 goto nolabel;
132 while (len > 0 && isspace(buf[0])) {
133 buf++;
134 len--;
137 if (len == 0)
138 goto nolabel;
140 if ((l = calloc(1, len + 1)) == NULL)
141 return 0;
143 memcpy(l, buf, len);
144 return emit_line(p, t, l, u);
146 nolabel:
147 if ((l = strdup(u)) == NULL)
148 return 0;
149 return emit_line(p, t, l, u);
152 static int
153 parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
155 char *l;
157 switch (t) {
158 case LINE_TITLE_1:
159 if (len <= 1)
160 return emit_line(p, t, NULL, NULL);
161 buf++;
162 len--;
163 break;
164 case LINE_TITLE_2:
165 if (len <= 2)
166 return emit_line(p, t, NULL, NULL);
167 buf += 2;
168 len -= 2;
169 break;
170 case LINE_TITLE_3:
171 if (len <= 3)
172 return emit_line(p, t, NULL, NULL);
173 buf += 3;
174 len -= 3;
175 break;
176 default:
177 /* unreachable */
178 abort();
181 while (len > 0 && isspace(buf[0])) {
182 buf++;
183 len--;
186 if (len == 0)
187 return emit_line(p, t, NULL, NULL);
189 if (t == LINE_TITLE_1 && *p->title == '\0')
190 strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
192 if ((l = calloc(1, len+1)) == NULL)
193 return 0;
194 memcpy(l, buf, len);
195 return emit_line(p, t, l, NULL);
198 static int
199 parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
201 char *l;
203 if (len == 1)
204 return emit_line(p, t, NULL, NULL);
206 buf++;
207 len--;
209 while (len > 0 && isspace(buf[0])) {
210 buf++;
211 len--;
214 if (len == 0)
215 return emit_line(p, t, NULL, NULL);
217 if ((l = calloc(1, len+1)) == NULL)
218 return 0;
219 memcpy(l, buf, len);
220 return emit_line(p, t, l, NULL);
223 static int
224 parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
226 char *l;
228 if (len == 1)
229 return emit_line(p, t, NULL, NULL);
231 buf++;
232 len--;
234 while (len > 0 && isspace(buf[0])) {
235 buf++;
236 len--;
239 if (len == 0)
240 return emit_line(p, t, NULL, NULL);
242 if ((l = calloc(1, len+1)) == NULL)
243 return 0;
244 memcpy(l, buf, len);
245 return emit_line(p, t, l, NULL);
248 static int
249 parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
251 char *l;
253 if (len <= 3)
254 return emit_line(p, t, NULL, NULL);
256 buf += 3;
257 len -= 3;
259 while (len > 0 && isspace(buf[0])) {
260 buf++;
261 len--;
264 if (len == 0)
265 return emit_line(p, t, NULL, NULL);
267 if ((l = calloc(1, len+1)) == NULL)
268 return 0;
270 memcpy(l, buf, len);
271 return emit_line(p, t, l, NULL);
274 static int
275 parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
277 char *l;
279 if (len == 0)
280 return emit_line(p, t, NULL, NULL);
282 if ((l = calloc(1, len+1)) == NULL)
283 return 0;
284 memcpy(l, buf, len);
285 return emit_line(p, t, l, NULL);
288 static int
289 parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
291 return emit_line(p, t, NULL, NULL);
294 static inline enum line_type
295 detect_line_type(const char *buf, size_t len, int in_pre)
297 if (len == 0)
298 return LINE_TEXT;
300 if (in_pre) {
301 if (len >= 3 &&
302 buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
303 return LINE_PRE_END;
304 else
305 return LINE_PRE_CONTENT;
308 switch (*buf) {
309 case '*': return LINE_ITEM;
310 case '>': return LINE_QUOTE;
311 case '=':
312 if (len >= 1 && buf[1] == '>')
313 return LINE_LINK;
314 break;
315 case '#':
316 if (len == 1)
317 return LINE_TEXT;
318 if (buf[1] != '#')
319 return LINE_TITLE_1;
320 if (len == 2)
321 return LINE_TEXT;
322 if (buf[2] != '#')
323 return LINE_TITLE_2;
324 if (len == 3)
325 return LINE_TEXT;
326 return LINE_TITLE_3;
327 case '`':
328 if (len < 3)
329 return LINE_TEXT;
330 if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
331 return LINE_PRE_START;
332 break;
335 return LINE_TEXT;
338 static int
339 gemtext_parse(struct parser *p, const char *buf, size_t size)
341 return parser_foreach_line(p, buf, size, gemtext_foreach_line);
344 static int
345 gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
347 enum line_type t;
349 t = detect_line_type(line, linelen, p->flags);
350 if (t == LINE_PRE_START)
351 p->flags = 1;
352 if (t == LINE_PRE_END)
353 p->flags = 0;
354 return parsers[t](p, t, line, linelen);
357 static int
358 gemtext_free(struct parser *p)
360 enum line_type t;
362 /* flush the buffer */
363 if (p->len != 0) {
364 t = detect_line_type(p->buf, p->len, p->flags);
365 if (!parsers[t](p, t, p->buf, p->len))
366 return 0;
367 if (p->flags && !emit_line(p, LINE_PRE_END, NULL, NULL))
368 return 0;
371 free(p->buf);
372 return 1;