Blame


1 75a8a1ec 2022-02-08 op /*
2 75a8a1ec 2022-02-08 op * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
3 75a8a1ec 2022-02-08 op *
4 75a8a1ec 2022-02-08 op * Permission to use, copy, modify, and distribute this software for any
5 75a8a1ec 2022-02-08 op * purpose with or without fee is hereby granted, provided that the above
6 75a8a1ec 2022-02-08 op * copyright notice and this permission notice appear in all copies.
7 75a8a1ec 2022-02-08 op *
8 75a8a1ec 2022-02-08 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 75a8a1ec 2022-02-08 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 75a8a1ec 2022-02-08 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 75a8a1ec 2022-02-08 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 75a8a1ec 2022-02-08 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 75a8a1ec 2022-02-08 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 75a8a1ec 2022-02-08 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 75a8a1ec 2022-02-08 op */
16 75a8a1ec 2022-02-08 op
17 75a8a1ec 2022-02-08 op /*
18 75a8a1ec 2022-02-08 op * A streaming gemtext parser.
19 75a8a1ec 2022-02-08 op *
20 75a8a1ec 2022-02-08 op * TODO:
21 75a8a1ec 2022-02-08 op * - handle NULs
22 75a8a1ec 2022-02-08 op * - UTF8
23 75a8a1ec 2022-02-08 op */
24 75a8a1ec 2022-02-08 op
25 75a8a1ec 2022-02-08 op #include "compat.h"
26 75a8a1ec 2022-02-08 op
27 75a8a1ec 2022-02-08 op #include <ctype.h>
28 75a8a1ec 2022-02-08 op #include <string.h>
29 75a8a1ec 2022-02-08 op #include <stdlib.h>
30 75a8a1ec 2022-02-08 op
31 75a8a1ec 2022-02-08 op #include "defaults.h"
32 75a8a1ec 2022-02-08 op #include "parser.h"
33 75a8a1ec 2022-02-08 op #include "utf8.h"
34 75a8a1ec 2022-02-08 op
35 75a8a1ec 2022-02-08 op static int gemtext_parse(struct parser *, const char *, size_t);
36 75a8a1ec 2022-02-08 op static int gemtext_foreach_line(struct parser *, const char *, size_t);
37 75a8a1ec 2022-02-08 op static int gemtext_free(struct parser *);
38 0110411e 2022-04-13 op static int gemtext_serialize(struct parser *, FILE *);
39 75a8a1ec 2022-02-08 op
40 75a8a1ec 2022-02-08 op static int parse_text(struct parser*, enum line_type, const char*, size_t);
41 75a8a1ec 2022-02-08 op static int parse_link(struct parser*, enum line_type, const char*, size_t);
42 75a8a1ec 2022-02-08 op static int parse_title(struct parser*, enum line_type, const char*, size_t);
43 75a8a1ec 2022-02-08 op static int parse_item(struct parser*, enum line_type, const char*, size_t);
44 75a8a1ec 2022-02-08 op static int parse_quote(struct parser*, enum line_type, const char*, size_t);
45 75a8a1ec 2022-02-08 op static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
46 75a8a1ec 2022-02-08 op static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
47 75a8a1ec 2022-02-08 op static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
48 75a8a1ec 2022-02-08 op static void search_title(struct parser*, enum line_type);
49 75a8a1ec 2022-02-08 op
50 75a8a1ec 2022-02-08 op typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
51 75a8a1ec 2022-02-08 op
52 75a8a1ec 2022-02-08 op static parselinefn *parsers[] = {
53 75a8a1ec 2022-02-08 op [LINE_TEXT] = parse_text,
54 75a8a1ec 2022-02-08 op [LINE_LINK] = parse_link,
55 75a8a1ec 2022-02-08 op [LINE_TITLE_1] = parse_title,
56 75a8a1ec 2022-02-08 op [LINE_TITLE_2] = parse_title,
57 75a8a1ec 2022-02-08 op [LINE_TITLE_3] = parse_title,
58 75a8a1ec 2022-02-08 op [LINE_ITEM] = parse_item,
59 75a8a1ec 2022-02-08 op [LINE_QUOTE] = parse_quote,
60 75a8a1ec 2022-02-08 op [LINE_PRE_START] = parse_pre_start,
61 75a8a1ec 2022-02-08 op [LINE_PRE_CONTENT] = parse_pre_cnt,
62 75a8a1ec 2022-02-08 op [LINE_PRE_END] = parse_pre_end,
63 75a8a1ec 2022-02-08 op };
64 75a8a1ec 2022-02-08 op
65 75a8a1ec 2022-02-08 op void
66 75a8a1ec 2022-02-08 op gemtext_initparser(struct parser *p)
67 75a8a1ec 2022-02-08 op {
68 75a8a1ec 2022-02-08 op memset(p, 0, sizeof(*p));
69 75a8a1ec 2022-02-08 op
70 75a8a1ec 2022-02-08 op p->name = "text/gemini";
71 75a8a1ec 2022-02-08 op p->parse = &gemtext_parse;
72 75a8a1ec 2022-02-08 op p->free = &gemtext_free;
73 75a8a1ec 2022-02-08 op p->serialize = &gemtext_serialize;
74 75a8a1ec 2022-02-08 op
75 75a8a1ec 2022-02-08 op TAILQ_INIT(&p->head);
76 75a8a1ec 2022-02-08 op }
77 75a8a1ec 2022-02-08 op
78 75a8a1ec 2022-02-08 op static inline int
79 75a8a1ec 2022-02-08 op emit_line(struct parser *p, enum line_type type, char *line, char *alt)
80 75a8a1ec 2022-02-08 op {
81 75a8a1ec 2022-02-08 op struct line *l;
82 75a8a1ec 2022-02-08 op
83 75a8a1ec 2022-02-08 op if ((l = calloc(1, sizeof(*l))) == NULL)
84 75a8a1ec 2022-02-08 op return 0;
85 75a8a1ec 2022-02-08 op
86 75a8a1ec 2022-02-08 op l->type = type;
87 75a8a1ec 2022-02-08 op l->line = line;
88 75a8a1ec 2022-02-08 op l->alt = alt;
89 75a8a1ec 2022-02-08 op
90 75a8a1ec 2022-02-08 op switch (l->type) {
91 75a8a1ec 2022-02-08 op case LINE_PRE_START:
92 75a8a1ec 2022-02-08 op case LINE_PRE_END:
93 75a8a1ec 2022-02-08 op if (hide_pre_context)
94 75a8a1ec 2022-02-08 op l->flags = L_HIDDEN;
95 75a8a1ec 2022-02-08 op if (l->type == LINE_PRE_END &&
96 75a8a1ec 2022-02-08 op hide_pre_closing_line)
97 75a8a1ec 2022-02-08 op l->flags = L_HIDDEN;
98 75a8a1ec 2022-02-08 op break;
99 75a8a1ec 2022-02-08 op case LINE_PRE_CONTENT:
100 75a8a1ec 2022-02-08 op if (hide_pre_blocks)
101 75a8a1ec 2022-02-08 op l->flags = L_HIDDEN;
102 75a8a1ec 2022-02-08 op break;
103 75a8a1ec 2022-02-08 op case LINE_LINK:
104 75a8a1ec 2022-02-08 op if (emojify_link &&
105 75a8a1ec 2022-02-08 op !emojied_line(line, (const char **)&l->data))
106 75a8a1ec 2022-02-08 op l->data = NULL;
107 75a8a1ec 2022-02-08 op break;
108 75a8a1ec 2022-02-08 op default:
109 75a8a1ec 2022-02-08 op break;
110 75a8a1ec 2022-02-08 op }
111 75a8a1ec 2022-02-08 op
112 75a8a1ec 2022-02-08 op TAILQ_INSERT_TAIL(&p->head, l, lines);
113 75a8a1ec 2022-02-08 op
114 75a8a1ec 2022-02-08 op return 1;
115 75a8a1ec 2022-02-08 op }
116 75a8a1ec 2022-02-08 op
117 75a8a1ec 2022-02-08 op static int
118 75a8a1ec 2022-02-08 op parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
119 75a8a1ec 2022-02-08 op {
120 75a8a1ec 2022-02-08 op char *l;
121 75a8a1ec 2022-02-08 op
122 75a8a1ec 2022-02-08 op if ((l = calloc(1, len+1)) == NULL)
123 75a8a1ec 2022-02-08 op return 0;
124 75a8a1ec 2022-02-08 op memcpy(l, buf, len);
125 75a8a1ec 2022-02-08 op return emit_line(p, t, l, NULL);
126 75a8a1ec 2022-02-08 op }
127 75a8a1ec 2022-02-08 op
128 75a8a1ec 2022-02-08 op static int
129 75a8a1ec 2022-02-08 op parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
130 75a8a1ec 2022-02-08 op {
131 75a8a1ec 2022-02-08 op char *l, *u;
132 75a8a1ec 2022-02-08 op const char *url_start;
133 75a8a1ec 2022-02-08 op
134 75a8a1ec 2022-02-08 op if (len <= 2)
135 75a8a1ec 2022-02-08 op return emit_line(p, LINE_TEXT, NULL, NULL);
136 75a8a1ec 2022-02-08 op buf += 2;
137 75a8a1ec 2022-02-08 op len -= 2;
138 75a8a1ec 2022-02-08 op
139 75a8a1ec 2022-02-08 op while (len > 0 && isspace(buf[0])) {
140 75a8a1ec 2022-02-08 op buf++;
141 75a8a1ec 2022-02-08 op len--;
142 75a8a1ec 2022-02-08 op }
143 75a8a1ec 2022-02-08 op
144 75a8a1ec 2022-02-08 op if (len == 0)
145 75a8a1ec 2022-02-08 op return emit_line(p, LINE_TEXT, NULL, NULL);
146 75a8a1ec 2022-02-08 op
147 75a8a1ec 2022-02-08 op url_start = buf;
148 75a8a1ec 2022-02-08 op while (len > 0 && !isspace(buf[0])) {
149 75a8a1ec 2022-02-08 op buf++;
150 75a8a1ec 2022-02-08 op len--;
151 75a8a1ec 2022-02-08 op }
152 75a8a1ec 2022-02-08 op
153 75a8a1ec 2022-02-08 op if ((u = calloc(1, buf - url_start + 1)) == NULL)
154 75a8a1ec 2022-02-08 op return 0;
155 75a8a1ec 2022-02-08 op memcpy(u, url_start, buf - url_start);
156 75a8a1ec 2022-02-08 op
157 75a8a1ec 2022-02-08 op if (len == 0)
158 75a8a1ec 2022-02-08 op goto nolabel;
159 75a8a1ec 2022-02-08 op
160 75a8a1ec 2022-02-08 op while (len > 0 && isspace(buf[0])) {
161 75a8a1ec 2022-02-08 op buf++;
162 75a8a1ec 2022-02-08 op len--;
163 75a8a1ec 2022-02-08 op }
164 75a8a1ec 2022-02-08 op
165 75a8a1ec 2022-02-08 op if (len == 0)
166 75a8a1ec 2022-02-08 op goto nolabel;
167 75a8a1ec 2022-02-08 op
168 75a8a1ec 2022-02-08 op if ((l = calloc(1, len + 1)) == NULL)
169 75a8a1ec 2022-02-08 op return 0;
170 75a8a1ec 2022-02-08 op
171 75a8a1ec 2022-02-08 op memcpy(l, buf, len);
172 75a8a1ec 2022-02-08 op return emit_line(p, t, l, u);
173 75a8a1ec 2022-02-08 op
174 75a8a1ec 2022-02-08 op nolabel:
175 75a8a1ec 2022-02-08 op if ((l = strdup(u)) == NULL)
176 75a8a1ec 2022-02-08 op return 0;
177 75a8a1ec 2022-02-08 op return emit_line(p, t, l, u);
178 75a8a1ec 2022-02-08 op }
179 75a8a1ec 2022-02-08 op
180 75a8a1ec 2022-02-08 op static int
181 75a8a1ec 2022-02-08 op parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
182 75a8a1ec 2022-02-08 op {
183 75a8a1ec 2022-02-08 op char *l;
184 75a8a1ec 2022-02-08 op
185 75a8a1ec 2022-02-08 op switch (t) {
186 75a8a1ec 2022-02-08 op case LINE_TITLE_1:
187 75a8a1ec 2022-02-08 op if (len <= 1)
188 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
189 75a8a1ec 2022-02-08 op buf++;
190 75a8a1ec 2022-02-08 op len--;
191 75a8a1ec 2022-02-08 op break;
192 75a8a1ec 2022-02-08 op case LINE_TITLE_2:
193 75a8a1ec 2022-02-08 op if (len <= 2)
194 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
195 75a8a1ec 2022-02-08 op buf += 2;
196 75a8a1ec 2022-02-08 op len -= 2;
197 75a8a1ec 2022-02-08 op break;
198 75a8a1ec 2022-02-08 op case LINE_TITLE_3:
199 75a8a1ec 2022-02-08 op if (len <= 3)
200 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
201 75a8a1ec 2022-02-08 op buf += 3;
202 75a8a1ec 2022-02-08 op len -= 3;
203 75a8a1ec 2022-02-08 op break;
204 75a8a1ec 2022-02-08 op default:
205 75a8a1ec 2022-02-08 op /* unreachable */
206 75a8a1ec 2022-02-08 op abort();
207 75a8a1ec 2022-02-08 op }
208 75a8a1ec 2022-02-08 op
209 75a8a1ec 2022-02-08 op while (len > 0 && isspace(buf[0])) {
210 75a8a1ec 2022-02-08 op buf++;
211 75a8a1ec 2022-02-08 op len--;
212 75a8a1ec 2022-02-08 op }
213 75a8a1ec 2022-02-08 op
214 75a8a1ec 2022-02-08 op if (len == 0)
215 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
216 75a8a1ec 2022-02-08 op
217 75a8a1ec 2022-02-08 op if (t == LINE_TITLE_1 && *p->title == '\0')
218 75a8a1ec 2022-02-08 op strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
219 75a8a1ec 2022-02-08 op
220 75a8a1ec 2022-02-08 op if ((l = calloc(1, len+1)) == NULL)
221 75a8a1ec 2022-02-08 op return 0;
222 75a8a1ec 2022-02-08 op memcpy(l, buf, len);
223 75a8a1ec 2022-02-08 op return emit_line(p, t, l, NULL);
224 75a8a1ec 2022-02-08 op }
225 75a8a1ec 2022-02-08 op
226 75a8a1ec 2022-02-08 op static int
227 75a8a1ec 2022-02-08 op parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
228 75a8a1ec 2022-02-08 op {
229 75a8a1ec 2022-02-08 op char *l;
230 75a8a1ec 2022-02-08 op
231 75a8a1ec 2022-02-08 op if (len == 1)
232 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
233 75a8a1ec 2022-02-08 op
234 75a8a1ec 2022-02-08 op buf++;
235 75a8a1ec 2022-02-08 op len--;
236 75a8a1ec 2022-02-08 op
237 75a8a1ec 2022-02-08 op while (len > 0 && isspace(buf[0])) {
238 75a8a1ec 2022-02-08 op buf++;
239 75a8a1ec 2022-02-08 op len--;
240 75a8a1ec 2022-02-08 op }
241 75a8a1ec 2022-02-08 op
242 75a8a1ec 2022-02-08 op if (len == 0)
243 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
244 75a8a1ec 2022-02-08 op
245 75a8a1ec 2022-02-08 op if ((l = calloc(1, len+1)) == NULL)
246 75a8a1ec 2022-02-08 op return 0;
247 75a8a1ec 2022-02-08 op memcpy(l, buf, len);
248 75a8a1ec 2022-02-08 op return emit_line(p, t, l, NULL);
249 75a8a1ec 2022-02-08 op }
250 75a8a1ec 2022-02-08 op
251 75a8a1ec 2022-02-08 op static int
252 75a8a1ec 2022-02-08 op parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
253 75a8a1ec 2022-02-08 op {
254 75a8a1ec 2022-02-08 op char *l;
255 75a8a1ec 2022-02-08 op
256 75a8a1ec 2022-02-08 op if (len == 1)
257 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
258 75a8a1ec 2022-02-08 op
259 75a8a1ec 2022-02-08 op buf++;
260 75a8a1ec 2022-02-08 op len--;
261 75a8a1ec 2022-02-08 op
262 75a8a1ec 2022-02-08 op while (len > 0 && isspace(buf[0])) {
263 75a8a1ec 2022-02-08 op buf++;
264 75a8a1ec 2022-02-08 op len--;
265 75a8a1ec 2022-02-08 op }
266 75a8a1ec 2022-02-08 op
267 75a8a1ec 2022-02-08 op if (len == 0)
268 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
269 75a8a1ec 2022-02-08 op
270 75a8a1ec 2022-02-08 op if ((l = calloc(1, len+1)) == NULL)
271 75a8a1ec 2022-02-08 op return 0;
272 75a8a1ec 2022-02-08 op memcpy(l, buf, len);
273 75a8a1ec 2022-02-08 op return emit_line(p, t, l, NULL);
274 75a8a1ec 2022-02-08 op }
275 75a8a1ec 2022-02-08 op
276 75a8a1ec 2022-02-08 op static int
277 75a8a1ec 2022-02-08 op parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
278 75a8a1ec 2022-02-08 op {
279 75a8a1ec 2022-02-08 op char *l;
280 75a8a1ec 2022-02-08 op
281 75a8a1ec 2022-02-08 op if (len <= 3)
282 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
283 75a8a1ec 2022-02-08 op
284 75a8a1ec 2022-02-08 op buf += 3;
285 75a8a1ec 2022-02-08 op len -= 3;
286 75a8a1ec 2022-02-08 op
287 75a8a1ec 2022-02-08 op while (len > 0 && isspace(buf[0])) {
288 75a8a1ec 2022-02-08 op buf++;
289 75a8a1ec 2022-02-08 op len--;
290 75a8a1ec 2022-02-08 op }
291 75a8a1ec 2022-02-08 op
292 75a8a1ec 2022-02-08 op if (len == 0)
293 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
294 75a8a1ec 2022-02-08 op
295 75a8a1ec 2022-02-08 op if ((l = calloc(1, len+1)) == NULL)
296 75a8a1ec 2022-02-08 op return 0;
297 75a8a1ec 2022-02-08 op
298 75a8a1ec 2022-02-08 op memcpy(l, buf, len);
299 75a8a1ec 2022-02-08 op return emit_line(p, t, l, NULL);
300 75a8a1ec 2022-02-08 op }
301 75a8a1ec 2022-02-08 op
302 75a8a1ec 2022-02-08 op static int
303 75a8a1ec 2022-02-08 op parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
304 75a8a1ec 2022-02-08 op {
305 75a8a1ec 2022-02-08 op char *l;
306 75a8a1ec 2022-02-08 op
307 75a8a1ec 2022-02-08 op if (len == 0)
308 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
309 75a8a1ec 2022-02-08 op
310 75a8a1ec 2022-02-08 op if ((l = calloc(1, len+1)) == NULL)
311 75a8a1ec 2022-02-08 op return 0;
312 75a8a1ec 2022-02-08 op memcpy(l, buf, len);
313 75a8a1ec 2022-02-08 op return emit_line(p, t, l, NULL);
314 75a8a1ec 2022-02-08 op }
315 75a8a1ec 2022-02-08 op
316 75a8a1ec 2022-02-08 op static int
317 75a8a1ec 2022-02-08 op parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
318 75a8a1ec 2022-02-08 op {
319 75a8a1ec 2022-02-08 op return emit_line(p, t, NULL, NULL);
320 75a8a1ec 2022-02-08 op }
321 75a8a1ec 2022-02-08 op
322 75a8a1ec 2022-02-08 op static inline enum line_type
323 75a8a1ec 2022-02-08 op detect_line_type(const char *buf, size_t len, int in_pre)
324 75a8a1ec 2022-02-08 op {
325 75a8a1ec 2022-02-08 op if (in_pre) {
326 75a8a1ec 2022-02-08 op if (len >= 3 &&
327 75a8a1ec 2022-02-08 op buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
328 75a8a1ec 2022-02-08 op return LINE_PRE_END;
329 75a8a1ec 2022-02-08 op else
330 75a8a1ec 2022-02-08 op return LINE_PRE_CONTENT;
331 75a8a1ec 2022-02-08 op }
332 75a8a1ec 2022-02-08 op
333 75a8a1ec 2022-02-08 op if (len == 0)
334 75a8a1ec 2022-02-08 op return LINE_TEXT;
335 75a8a1ec 2022-02-08 op
336 75a8a1ec 2022-02-08 op switch (*buf) {
337 75a8a1ec 2022-02-08 op case '*':
338 75a8a1ec 2022-02-08 op if (len > 1 && buf[1] == ' ')
339 75a8a1ec 2022-02-08 op return LINE_ITEM;
340 75a8a1ec 2022-02-08 op break;
341 75a8a1ec 2022-02-08 op case '>': return LINE_QUOTE;
342 75a8a1ec 2022-02-08 op case '=':
343 75a8a1ec 2022-02-08 op if (len >= 1 && buf[1] == '>')
344 75a8a1ec 2022-02-08 op return LINE_LINK;
345 75a8a1ec 2022-02-08 op break;
346 75a8a1ec 2022-02-08 op case '#':
347 75a8a1ec 2022-02-08 op if (len == 1)
348 75a8a1ec 2022-02-08 op return LINE_TEXT;
349 75a8a1ec 2022-02-08 op if (buf[1] != '#')
350 75a8a1ec 2022-02-08 op return LINE_TITLE_1;
351 75a8a1ec 2022-02-08 op if (len == 2)
352 75a8a1ec 2022-02-08 op return LINE_TEXT;
353 75a8a1ec 2022-02-08 op if (buf[2] != '#')
354 75a8a1ec 2022-02-08 op return LINE_TITLE_2;
355 75a8a1ec 2022-02-08 op if (len == 3)
356 75a8a1ec 2022-02-08 op return LINE_TEXT;
357 75a8a1ec 2022-02-08 op return LINE_TITLE_3;
358 75a8a1ec 2022-02-08 op case '`':
359 75a8a1ec 2022-02-08 op if (len < 3)
360 75a8a1ec 2022-02-08 op return LINE_TEXT;
361 75a8a1ec 2022-02-08 op if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
362 75a8a1ec 2022-02-08 op return LINE_PRE_START;
363 75a8a1ec 2022-02-08 op break;
364 75a8a1ec 2022-02-08 op }
365 75a8a1ec 2022-02-08 op
366 75a8a1ec 2022-02-08 op return LINE_TEXT;
367 75a8a1ec 2022-02-08 op }
368 75a8a1ec 2022-02-08 op
369 75a8a1ec 2022-02-08 op static int
370 75a8a1ec 2022-02-08 op gemtext_parse(struct parser *p, const char *buf, size_t size)
371 75a8a1ec 2022-02-08 op {
372 75a8a1ec 2022-02-08 op return parser_foreach_line(p, buf, size, gemtext_foreach_line);
373 75a8a1ec 2022-02-08 op }
374 75a8a1ec 2022-02-08 op
375 75a8a1ec 2022-02-08 op static int
376 75a8a1ec 2022-02-08 op gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
377 75a8a1ec 2022-02-08 op {
378 75a8a1ec 2022-02-08 op enum line_type t;
379 75a8a1ec 2022-02-08 op
380 75a8a1ec 2022-02-08 op t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
381 75a8a1ec 2022-02-08 op if (t == LINE_PRE_START)
382 75a8a1ec 2022-02-08 op p->flags ^= PARSER_IN_PRE;
383 75a8a1ec 2022-02-08 op if (t == LINE_PRE_END)
384 75a8a1ec 2022-02-08 op p->flags ^= PARSER_IN_PRE;
385 75a8a1ec 2022-02-08 op return parsers[t](p, t, line, linelen);
386 75a8a1ec 2022-02-08 op }
387 75a8a1ec 2022-02-08 op
388 75a8a1ec 2022-02-08 op static int
389 75a8a1ec 2022-02-08 op gemtext_free(struct parser *p)
390 75a8a1ec 2022-02-08 op {
391 75a8a1ec 2022-02-08 op enum line_type t;
392 75a8a1ec 2022-02-08 op
393 75a8a1ec 2022-02-08 op /* flush the buffer */
394 75a8a1ec 2022-02-08 op if (p->len != 0) {
395 75a8a1ec 2022-02-08 op t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
396 75a8a1ec 2022-02-08 op if (!parsers[t](p, t, p->buf, p->len))
397 75a8a1ec 2022-02-08 op return 0;
398 75a8a1ec 2022-02-08 op if ((p->flags & PARSER_IN_PRE) &&
399 75a8a1ec 2022-02-08 op !emit_line(p, LINE_PRE_END, NULL, NULL))
400 75a8a1ec 2022-02-08 op return 0;
401 75a8a1ec 2022-02-08 op }
402 75a8a1ec 2022-02-08 op
403 75a8a1ec 2022-02-08 op free(p->buf);
404 75a8a1ec 2022-02-08 op
405 75a8a1ec 2022-02-08 op /*
406 75a8a1ec 2022-02-08 op * use the first level 2 or 3 header as page title if none
407 75a8a1ec 2022-02-08 op * found yet.
408 75a8a1ec 2022-02-08 op */
409 75a8a1ec 2022-02-08 op if (*p->title == '\0')
410 75a8a1ec 2022-02-08 op search_title(p, LINE_TITLE_2);
411 75a8a1ec 2022-02-08 op if (*p->title == '\0')
412 75a8a1ec 2022-02-08 op search_title(p, LINE_TITLE_3);
413 75a8a1ec 2022-02-08 op
414 75a8a1ec 2022-02-08 op return 1;
415 75a8a1ec 2022-02-08 op }
416 75a8a1ec 2022-02-08 op
417 75a8a1ec 2022-02-08 op static void
418 75a8a1ec 2022-02-08 op search_title(struct parser *p, enum line_type level)
419 75a8a1ec 2022-02-08 op {
420 75a8a1ec 2022-02-08 op struct line *l;
421 75a8a1ec 2022-02-08 op
422 75a8a1ec 2022-02-08 op TAILQ_FOREACH(l, &p->head, lines) {
423 75a8a1ec 2022-02-08 op if (l->type == level) {
424 75a8a1ec 2022-02-08 op if (l->line == NULL)
425 75a8a1ec 2022-02-08 op continue;
426 75a8a1ec 2022-02-08 op strlcpy(p->title, l->line, sizeof(p->title));
427 75a8a1ec 2022-02-08 op break;
428 75a8a1ec 2022-02-08 op }
429 75a8a1ec 2022-02-08 op }
430 75a8a1ec 2022-02-08 op }
431 75a8a1ec 2022-02-08 op
432 75a8a1ec 2022-02-08 op static const char *gemtext_prefixes[] = {
433 75a8a1ec 2022-02-08 op [LINE_TEXT] = "",
434 75a8a1ec 2022-02-08 op [LINE_TITLE_1] = "# ",
435 75a8a1ec 2022-02-08 op [LINE_TITLE_2] = "## ",
436 75a8a1ec 2022-02-08 op [LINE_TITLE_3] = "### ",
437 75a8a1ec 2022-02-08 op [LINE_ITEM] = "* ",
438 75a8a1ec 2022-02-08 op [LINE_QUOTE] = "> ",
439 75a8a1ec 2022-02-08 op [LINE_PRE_START] = "``` ",
440 75a8a1ec 2022-02-08 op [LINE_PRE_CONTENT] = "",
441 75a8a1ec 2022-02-08 op [LINE_PRE_END] = "```",
442 75a8a1ec 2022-02-08 op };
443 75a8a1ec 2022-02-08 op
444 75a8a1ec 2022-02-08 op static int
445 0110411e 2022-04-13 op gemtext_serialize(struct parser *p, FILE *fp)
446 75a8a1ec 2022-02-08 op {
447 75a8a1ec 2022-02-08 op struct line *line;
448 75a8a1ec 2022-02-08 op const char *text;
449 75a8a1ec 2022-02-08 op const char *alt;
450 75a8a1ec 2022-02-08 op int r;
451 75a8a1ec 2022-02-08 op
452 75a8a1ec 2022-02-08 op TAILQ_FOREACH(line, &p->head, lines) {
453 75a8a1ec 2022-02-08 op if ((text = line->line) == NULL)
454 75a8a1ec 2022-02-08 op text = "";
455 75a8a1ec 2022-02-08 op
456 75a8a1ec 2022-02-08 op if ((alt = line->alt) == NULL)
457 75a8a1ec 2022-02-08 op alt = "";
458 75a8a1ec 2022-02-08 op
459 75a8a1ec 2022-02-08 op switch (line->type) {
460 75a8a1ec 2022-02-08 op case LINE_TEXT:
461 75a8a1ec 2022-02-08 op case LINE_TITLE_1:
462 75a8a1ec 2022-02-08 op case LINE_TITLE_2:
463 75a8a1ec 2022-02-08 op case LINE_TITLE_3:
464 75a8a1ec 2022-02-08 op case LINE_ITEM:
465 75a8a1ec 2022-02-08 op case LINE_QUOTE:
466 75a8a1ec 2022-02-08 op case LINE_PRE_START:
467 75a8a1ec 2022-02-08 op case LINE_PRE_CONTENT:
468 75a8a1ec 2022-02-08 op case LINE_PRE_END:
469 0110411e 2022-04-13 op r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
470 d54dd816 2022-04-13 op text);
471 75a8a1ec 2022-02-08 op break;
472 75a8a1ec 2022-02-08 op
473 75a8a1ec 2022-02-08 op case LINE_LINK:
474 0110411e 2022-04-13 op r = fprintf(fp, "=> %s %s\n", alt, text);
475 75a8a1ec 2022-02-08 op break;
476 75a8a1ec 2022-02-08 op
477 75a8a1ec 2022-02-08 op default:
478 75a8a1ec 2022-02-08 op /* not reached */
479 75a8a1ec 2022-02-08 op abort();
480 75a8a1ec 2022-02-08 op }
481 75a8a1ec 2022-02-08 op
482 75a8a1ec 2022-02-08 op if (r == -1)
483 75a8a1ec 2022-02-08 op return 0;
484 75a8a1ec 2022-02-08 op }
485 75a8a1ec 2022-02-08 op
486 75a8a1ec 2022-02-08 op return 1;
487 75a8a1ec 2022-02-08 op }