Blame


1 1ac119fb 2024-01-23 op /*
2 1ac119fb 2024-01-23 op * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
3 1ac119fb 2024-01-23 op *
4 1ac119fb 2024-01-23 op * Permission to use, copy, modify, and distribute this software for any
5 1ac119fb 2024-01-23 op * purpose with or without fee is hereby granted, provided that the above
6 1ac119fb 2024-01-23 op * copyright notice and this permission notice appear in all copies.
7 1ac119fb 2024-01-23 op *
8 1ac119fb 2024-01-23 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 1ac119fb 2024-01-23 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 1ac119fb 2024-01-23 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 1ac119fb 2024-01-23 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 1ac119fb 2024-01-23 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 1ac119fb 2024-01-23 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 1ac119fb 2024-01-23 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 1ac119fb 2024-01-23 op */
16 1ac119fb 2024-01-23 op
17 1ac119fb 2024-01-23 op /*
18 1ac119fb 2024-01-23 op * A streaming gemtext parser.
19 1ac119fb 2024-01-23 op *
20 1ac119fb 2024-01-23 op * TODO:
21 1ac119fb 2024-01-23 op * - handle NULs
22 1ac119fb 2024-01-23 op * - UTF8
23 1ac119fb 2024-01-23 op */
24 1ac119fb 2024-01-23 op
25 1ac119fb 2024-01-23 op #include "compat.h"
26 1ac119fb 2024-01-23 op
27 1ac119fb 2024-01-23 op #include <ctype.h>
28 1ac119fb 2024-01-23 op #include <string.h>
29 1ac119fb 2024-01-23 op #include <stdlib.h>
30 1ac119fb 2024-01-23 op
31 1ac119fb 2024-01-23 op #include "defaults.h"
32 1ac119fb 2024-01-23 op #include "parser.h"
33 1ac119fb 2024-01-23 op #include "utf8.h"
34 1ac119fb 2024-01-23 op
35 1ac119fb 2024-01-23 op static int gemtext_parse(struct parser *, const char *, size_t);
36 1ac119fb 2024-01-23 op static int gemtext_foreach_line(struct parser *, const char *, size_t);
37 1ac119fb 2024-01-23 op static int gemtext_free(struct parser *);
38 1ac119fb 2024-01-23 op static int gemtext_serialize(struct parser *, FILE *);
39 1ac119fb 2024-01-23 op
40 1ac119fb 2024-01-23 op static int parse_text(struct parser*, enum line_type, const char*, size_t);
41 1ac119fb 2024-01-23 op static int parse_link(struct parser*, enum line_type, const char*, size_t);
42 1ac119fb 2024-01-23 op static int parse_title(struct parser*, enum line_type, const char*, size_t);
43 1ac119fb 2024-01-23 op static int parse_item(struct parser*, enum line_type, const char*, size_t);
44 1ac119fb 2024-01-23 op static int parse_quote(struct parser*, enum line_type, const char*, size_t);
45 1ac119fb 2024-01-23 op static int parse_pre_start(struct parser*, enum line_type, const char*, size_t);
46 1ac119fb 2024-01-23 op static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
47 1ac119fb 2024-01-23 op static int parse_pre_end(struct parser*, enum line_type, const char*, size_t);
48 1ac119fb 2024-01-23 op static void search_title(struct parser*, enum line_type);
49 1ac119fb 2024-01-23 op
50 1ac119fb 2024-01-23 op typedef int (parselinefn)(struct parser*, enum line_type, const char*, size_t);
51 1ac119fb 2024-01-23 op
52 1ac119fb 2024-01-23 op static parselinefn *parsers[] = {
53 1ac119fb 2024-01-23 op [LINE_TEXT] = parse_text,
54 1ac119fb 2024-01-23 op [LINE_LINK] = parse_link,
55 1ac119fb 2024-01-23 op [LINE_TITLE_1] = parse_title,
56 1ac119fb 2024-01-23 op [LINE_TITLE_2] = parse_title,
57 1ac119fb 2024-01-23 op [LINE_TITLE_3] = parse_title,
58 1ac119fb 2024-01-23 op [LINE_ITEM] = parse_item,
59 1ac119fb 2024-01-23 op [LINE_QUOTE] = parse_quote,
60 1ac119fb 2024-01-23 op [LINE_PRE_START] = parse_pre_start,
61 1ac119fb 2024-01-23 op [LINE_PRE_CONTENT] = parse_pre_cnt,
62 1ac119fb 2024-01-23 op [LINE_PRE_END] = parse_pre_end,
63 1ac119fb 2024-01-23 op };
64 1ac119fb 2024-01-23 op
65 1ac119fb 2024-01-23 op void
66 1ac119fb 2024-01-23 op gemtext_initparser(struct parser *p)
67 1ac119fb 2024-01-23 op {
68 1ac119fb 2024-01-23 op memset(p, 0, sizeof(*p));
69 1ac119fb 2024-01-23 op
70 1ac119fb 2024-01-23 op p->name = "text/gemini";
71 1ac119fb 2024-01-23 op p->parse = &gemtext_parse;
72 1ac119fb 2024-01-23 op p->free = &gemtext_free;
73 1ac119fb 2024-01-23 op p->serialize = &gemtext_serialize;
74 1ac119fb 2024-01-23 op
75 1ac119fb 2024-01-23 op TAILQ_INIT(&p->head);
76 1ac119fb 2024-01-23 op }
77 1ac119fb 2024-01-23 op
78 1ac119fb 2024-01-23 op static inline int
79 1ac119fb 2024-01-23 op emit_line(struct parser *p, enum line_type type, char *line, char *alt)
80 1ac119fb 2024-01-23 op {
81 1ac119fb 2024-01-23 op struct line *l;
82 1ac119fb 2024-01-23 op
83 1ac119fb 2024-01-23 op if ((l = calloc(1, sizeof(*l))) == NULL)
84 1ac119fb 2024-01-23 op return 0;
85 1ac119fb 2024-01-23 op
86 1ac119fb 2024-01-23 op l->type = type;
87 1ac119fb 2024-01-23 op l->line = line;
88 1ac119fb 2024-01-23 op l->alt = alt;
89 1ac119fb 2024-01-23 op
90 1ac119fb 2024-01-23 op switch (l->type) {
91 1ac119fb 2024-01-23 op case LINE_PRE_START:
92 1ac119fb 2024-01-23 op case LINE_PRE_END:
93 1ac119fb 2024-01-23 op if (hide_pre_context)
94 1ac119fb 2024-01-23 op l->flags = L_HIDDEN;
95 1ac119fb 2024-01-23 op if (l->type == LINE_PRE_END &&
96 1ac119fb 2024-01-23 op hide_pre_closing_line)
97 1ac119fb 2024-01-23 op l->flags = L_HIDDEN;
98 1ac119fb 2024-01-23 op break;
99 1ac119fb 2024-01-23 op case LINE_PRE_CONTENT:
100 1ac119fb 2024-01-23 op if (hide_pre_blocks)
101 1ac119fb 2024-01-23 op l->flags = L_HIDDEN;
102 1ac119fb 2024-01-23 op break;
103 1ac119fb 2024-01-23 op case LINE_LINK:
104 1ac119fb 2024-01-23 op if (emojify_link &&
105 1ac119fb 2024-01-23 op !emojied_line(line, (const char **)&l->data))
106 1ac119fb 2024-01-23 op l->data = NULL;
107 1ac119fb 2024-01-23 op break;
108 1ac119fb 2024-01-23 op default:
109 1ac119fb 2024-01-23 op break;
110 1ac119fb 2024-01-23 op }
111 1ac119fb 2024-01-23 op
112 1ac119fb 2024-01-23 op TAILQ_INSERT_TAIL(&p->head, l, lines);
113 1ac119fb 2024-01-23 op
114 1ac119fb 2024-01-23 op return 1;
115 1ac119fb 2024-01-23 op }
116 1ac119fb 2024-01-23 op
117 1ac119fb 2024-01-23 op static int
118 1ac119fb 2024-01-23 op parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
119 1ac119fb 2024-01-23 op {
120 1ac119fb 2024-01-23 op char *l;
121 1ac119fb 2024-01-23 op
122 1ac119fb 2024-01-23 op if ((l = calloc(1, len+1)) == NULL)
123 1ac119fb 2024-01-23 op return 0;
124 1ac119fb 2024-01-23 op memcpy(l, buf, len);
125 1ac119fb 2024-01-23 op return emit_line(p, t, l, NULL);
126 1ac119fb 2024-01-23 op }
127 1ac119fb 2024-01-23 op
128 1ac119fb 2024-01-23 op static int
129 1ac119fb 2024-01-23 op parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
130 1ac119fb 2024-01-23 op {
131 1ac119fb 2024-01-23 op char *l, *u;
132 1ac119fb 2024-01-23 op const char *url_start;
133 1ac119fb 2024-01-23 op
134 1ac119fb 2024-01-23 op if (len <= 2)
135 1ac119fb 2024-01-23 op return emit_line(p, LINE_TEXT, NULL, NULL);
136 1ac119fb 2024-01-23 op buf += 2;
137 1ac119fb 2024-01-23 op len -= 2;
138 1ac119fb 2024-01-23 op
139 1ac119fb 2024-01-23 op while (len > 0 && isspace(buf[0])) {
140 1ac119fb 2024-01-23 op buf++;
141 1ac119fb 2024-01-23 op len--;
142 1ac119fb 2024-01-23 op }
143 1ac119fb 2024-01-23 op
144 1ac119fb 2024-01-23 op if (len == 0)
145 1ac119fb 2024-01-23 op return emit_line(p, LINE_TEXT, NULL, NULL);
146 1ac119fb 2024-01-23 op
147 1ac119fb 2024-01-23 op url_start = buf;
148 1ac119fb 2024-01-23 op while (len > 0 && !isspace(buf[0])) {
149 1ac119fb 2024-01-23 op buf++;
150 1ac119fb 2024-01-23 op len--;
151 1ac119fb 2024-01-23 op }
152 1ac119fb 2024-01-23 op
153 1ac119fb 2024-01-23 op if ((u = calloc(1, buf - url_start + 1)) == NULL)
154 1ac119fb 2024-01-23 op return 0;
155 1ac119fb 2024-01-23 op memcpy(u, url_start, buf - url_start);
156 1ac119fb 2024-01-23 op
157 1ac119fb 2024-01-23 op if (len == 0)
158 1ac119fb 2024-01-23 op goto nolabel;
159 1ac119fb 2024-01-23 op
160 1ac119fb 2024-01-23 op while (len > 0 && isspace(buf[0])) {
161 1ac119fb 2024-01-23 op buf++;
162 1ac119fb 2024-01-23 op len--;
163 1ac119fb 2024-01-23 op }
164 1ac119fb 2024-01-23 op
165 1ac119fb 2024-01-23 op if (len == 0)
166 1ac119fb 2024-01-23 op goto nolabel;
167 1ac119fb 2024-01-23 op
168 1ac119fb 2024-01-23 op if ((l = calloc(1, len + 1)) == NULL)
169 1ac119fb 2024-01-23 op return 0;
170 1ac119fb 2024-01-23 op
171 1ac119fb 2024-01-23 op memcpy(l, buf, len);
172 1ac119fb 2024-01-23 op return emit_line(p, t, l, u);
173 1ac119fb 2024-01-23 op
174 1ac119fb 2024-01-23 op nolabel:
175 1ac119fb 2024-01-23 op if ((l = strdup(u)) == NULL)
176 1ac119fb 2024-01-23 op return 0;
177 1ac119fb 2024-01-23 op return emit_line(p, t, l, u);
178 1ac119fb 2024-01-23 op }
179 1ac119fb 2024-01-23 op
180 1ac119fb 2024-01-23 op static int
181 1ac119fb 2024-01-23 op parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
182 1ac119fb 2024-01-23 op {
183 1ac119fb 2024-01-23 op char *l;
184 1ac119fb 2024-01-23 op
185 1ac119fb 2024-01-23 op switch (t) {
186 1ac119fb 2024-01-23 op case LINE_TITLE_1:
187 1ac119fb 2024-01-23 op if (len <= 1)
188 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
189 1ac119fb 2024-01-23 op buf++;
190 1ac119fb 2024-01-23 op len--;
191 1ac119fb 2024-01-23 op break;
192 1ac119fb 2024-01-23 op case LINE_TITLE_2:
193 1ac119fb 2024-01-23 op if (len <= 2)
194 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
195 1ac119fb 2024-01-23 op buf += 2;
196 1ac119fb 2024-01-23 op len -= 2;
197 1ac119fb 2024-01-23 op break;
198 1ac119fb 2024-01-23 op case LINE_TITLE_3:
199 1ac119fb 2024-01-23 op if (len <= 3)
200 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
201 1ac119fb 2024-01-23 op buf += 3;
202 1ac119fb 2024-01-23 op len -= 3;
203 1ac119fb 2024-01-23 op break;
204 1ac119fb 2024-01-23 op default:
205 1ac119fb 2024-01-23 op /* unreachable */
206 1ac119fb 2024-01-23 op abort();
207 1ac119fb 2024-01-23 op }
208 1ac119fb 2024-01-23 op
209 1ac119fb 2024-01-23 op while (len > 0 && isspace(buf[0])) {
210 1ac119fb 2024-01-23 op buf++;
211 1ac119fb 2024-01-23 op len--;
212 1ac119fb 2024-01-23 op }
213 1ac119fb 2024-01-23 op
214 1ac119fb 2024-01-23 op if (len == 0)
215 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
216 1ac119fb 2024-01-23 op
217 1ac119fb 2024-01-23 op if (t == LINE_TITLE_1 && *p->title == '\0')
218 1ac119fb 2024-01-23 op strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
219 1ac119fb 2024-01-23 op
220 1ac119fb 2024-01-23 op if ((l = calloc(1, len+1)) == NULL)
221 1ac119fb 2024-01-23 op return 0;
222 1ac119fb 2024-01-23 op memcpy(l, buf, len);
223 1ac119fb 2024-01-23 op return emit_line(p, t, l, NULL);
224 1ac119fb 2024-01-23 op }
225 1ac119fb 2024-01-23 op
226 1ac119fb 2024-01-23 op static int
227 1ac119fb 2024-01-23 op parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
228 1ac119fb 2024-01-23 op {
229 1ac119fb 2024-01-23 op char *l;
230 1ac119fb 2024-01-23 op
231 1ac119fb 2024-01-23 op if (len == 1)
232 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
233 1ac119fb 2024-01-23 op
234 1ac119fb 2024-01-23 op buf++;
235 1ac119fb 2024-01-23 op len--;
236 1ac119fb 2024-01-23 op
237 1ac119fb 2024-01-23 op while (len > 0 && isspace(buf[0])) {
238 1ac119fb 2024-01-23 op buf++;
239 1ac119fb 2024-01-23 op len--;
240 1ac119fb 2024-01-23 op }
241 1ac119fb 2024-01-23 op
242 1ac119fb 2024-01-23 op if (len == 0)
243 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
244 1ac119fb 2024-01-23 op
245 1ac119fb 2024-01-23 op if ((l = calloc(1, len+1)) == NULL)
246 1ac119fb 2024-01-23 op return 0;
247 1ac119fb 2024-01-23 op memcpy(l, buf, len);
248 1ac119fb 2024-01-23 op return emit_line(p, t, l, NULL);
249 1ac119fb 2024-01-23 op }
250 1ac119fb 2024-01-23 op
251 1ac119fb 2024-01-23 op static int
252 1ac119fb 2024-01-23 op parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
253 1ac119fb 2024-01-23 op {
254 1ac119fb 2024-01-23 op char *l;
255 1ac119fb 2024-01-23 op
256 1ac119fb 2024-01-23 op if (len == 1)
257 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
258 1ac119fb 2024-01-23 op
259 1ac119fb 2024-01-23 op buf++;
260 1ac119fb 2024-01-23 op len--;
261 1ac119fb 2024-01-23 op
262 1ac119fb 2024-01-23 op while (len > 0 && isspace(buf[0])) {
263 1ac119fb 2024-01-23 op buf++;
264 1ac119fb 2024-01-23 op len--;
265 1ac119fb 2024-01-23 op }
266 1ac119fb 2024-01-23 op
267 1ac119fb 2024-01-23 op if (len == 0)
268 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
269 1ac119fb 2024-01-23 op
270 1ac119fb 2024-01-23 op if ((l = calloc(1, len+1)) == NULL)
271 1ac119fb 2024-01-23 op return 0;
272 1ac119fb 2024-01-23 op memcpy(l, buf, len);
273 1ac119fb 2024-01-23 op return emit_line(p, t, l, NULL);
274 1ac119fb 2024-01-23 op }
275 1ac119fb 2024-01-23 op
276 1ac119fb 2024-01-23 op static int
277 1ac119fb 2024-01-23 op parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
278 1ac119fb 2024-01-23 op {
279 1ac119fb 2024-01-23 op char *l;
280 1ac119fb 2024-01-23 op
281 1ac119fb 2024-01-23 op if (len <= 3)
282 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
283 1ac119fb 2024-01-23 op
284 1ac119fb 2024-01-23 op buf += 3;
285 1ac119fb 2024-01-23 op len -= 3;
286 1ac119fb 2024-01-23 op
287 1ac119fb 2024-01-23 op while (len > 0 && isspace(buf[0])) {
288 1ac119fb 2024-01-23 op buf++;
289 1ac119fb 2024-01-23 op len--;
290 1ac119fb 2024-01-23 op }
291 1ac119fb 2024-01-23 op
292 1ac119fb 2024-01-23 op if (len == 0)
293 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
294 1ac119fb 2024-01-23 op
295 1ac119fb 2024-01-23 op if ((l = calloc(1, len+1)) == NULL)
296 1ac119fb 2024-01-23 op return 0;
297 1ac119fb 2024-01-23 op
298 1ac119fb 2024-01-23 op memcpy(l, buf, len);
299 1ac119fb 2024-01-23 op return emit_line(p, t, l, NULL);
300 1ac119fb 2024-01-23 op }
301 1ac119fb 2024-01-23 op
302 1ac119fb 2024-01-23 op static int
303 1ac119fb 2024-01-23 op parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
304 1ac119fb 2024-01-23 op {
305 1ac119fb 2024-01-23 op char *l;
306 1ac119fb 2024-01-23 op
307 1ac119fb 2024-01-23 op if (len == 0)
308 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
309 1ac119fb 2024-01-23 op
310 1ac119fb 2024-01-23 op if ((l = calloc(1, len+1)) == NULL)
311 1ac119fb 2024-01-23 op return 0;
312 1ac119fb 2024-01-23 op memcpy(l, buf, len);
313 1ac119fb 2024-01-23 op return emit_line(p, t, l, NULL);
314 1ac119fb 2024-01-23 op }
315 1ac119fb 2024-01-23 op
316 1ac119fb 2024-01-23 op static int
317 1ac119fb 2024-01-23 op parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
318 1ac119fb 2024-01-23 op {
319 1ac119fb 2024-01-23 op return emit_line(p, t, NULL, NULL);
320 1ac119fb 2024-01-23 op }
321 1ac119fb 2024-01-23 op
322 1ac119fb 2024-01-23 op static inline enum line_type
323 1ac119fb 2024-01-23 op detect_line_type(const char *buf, size_t len, int in_pre)
324 1ac119fb 2024-01-23 op {
325 1ac119fb 2024-01-23 op if (in_pre) {
326 1ac119fb 2024-01-23 op if (len >= 3 &&
327 1ac119fb 2024-01-23 op buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
328 1ac119fb 2024-01-23 op return LINE_PRE_END;
329 1ac119fb 2024-01-23 op else
330 1ac119fb 2024-01-23 op return LINE_PRE_CONTENT;
331 1ac119fb 2024-01-23 op }
332 1ac119fb 2024-01-23 op
333 1ac119fb 2024-01-23 op if (len == 0)
334 1ac119fb 2024-01-23 op return LINE_TEXT;
335 1ac119fb 2024-01-23 op
336 1ac119fb 2024-01-23 op switch (*buf) {
337 1ac119fb 2024-01-23 op case '*':
338 1ac119fb 2024-01-23 op if (len > 1 && buf[1] == ' ')
339 1ac119fb 2024-01-23 op return LINE_ITEM;
340 1ac119fb 2024-01-23 op break;
341 1ac119fb 2024-01-23 op case '>': return LINE_QUOTE;
342 1ac119fb 2024-01-23 op case '=':
343 1ac119fb 2024-01-23 op if (len >= 1 && buf[1] == '>')
344 1ac119fb 2024-01-23 op return LINE_LINK;
345 1ac119fb 2024-01-23 op break;
346 1ac119fb 2024-01-23 op case '#':
347 1ac119fb 2024-01-23 op if (len == 1)
348 1ac119fb 2024-01-23 op return LINE_TEXT;
349 1ac119fb 2024-01-23 op if (buf[1] != '#')
350 1ac119fb 2024-01-23 op return LINE_TITLE_1;
351 1ac119fb 2024-01-23 op if (len == 2)
352 1ac119fb 2024-01-23 op return LINE_TEXT;
353 1ac119fb 2024-01-23 op if (buf[2] != '#')
354 1ac119fb 2024-01-23 op return LINE_TITLE_2;
355 1ac119fb 2024-01-23 op if (len == 3)
356 1ac119fb 2024-01-23 op return LINE_TEXT;
357 1ac119fb 2024-01-23 op return LINE_TITLE_3;
358 1ac119fb 2024-01-23 op case '`':
359 1ac119fb 2024-01-23 op if (len < 3)
360 1ac119fb 2024-01-23 op return LINE_TEXT;
361 1ac119fb 2024-01-23 op if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
362 1ac119fb 2024-01-23 op return LINE_PRE_START;
363 1ac119fb 2024-01-23 op break;
364 1ac119fb 2024-01-23 op }
365 1ac119fb 2024-01-23 op
366 1ac119fb 2024-01-23 op return LINE_TEXT;
367 1ac119fb 2024-01-23 op }
368 1ac119fb 2024-01-23 op
369 1ac119fb 2024-01-23 op static int
370 1ac119fb 2024-01-23 op gemtext_parse(struct parser *p, const char *buf, size_t size)
371 1ac119fb 2024-01-23 op {
372 1ac119fb 2024-01-23 op return parser_foreach_line(p, buf, size, gemtext_foreach_line);
373 1ac119fb 2024-01-23 op }
374 1ac119fb 2024-01-23 op
375 1ac119fb 2024-01-23 op static int
376 1ac119fb 2024-01-23 op gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
377 1ac119fb 2024-01-23 op {
378 1ac119fb 2024-01-23 op enum line_type t;
379 1ac119fb 2024-01-23 op
380 1ac119fb 2024-01-23 op t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
381 1ac119fb 2024-01-23 op if (t == LINE_PRE_START)
382 1ac119fb 2024-01-23 op p->flags ^= PARSER_IN_PRE;
383 1ac119fb 2024-01-23 op if (t == LINE_PRE_END)
384 1ac119fb 2024-01-23 op p->flags ^= PARSER_IN_PRE;
385 1ac119fb 2024-01-23 op return parsers[t](p, t, line, linelen);
386 1ac119fb 2024-01-23 op }
387 1ac119fb 2024-01-23 op
388 1ac119fb 2024-01-23 op static int
389 1ac119fb 2024-01-23 op gemtext_free(struct parser *p)
390 1ac119fb 2024-01-23 op {
391 1ac119fb 2024-01-23 op enum line_type t;
392 1ac119fb 2024-01-23 op
393 1ac119fb 2024-01-23 op /* flush the buffer */
394 1ac119fb 2024-01-23 op if (p->len != 0) {
395 1ac119fb 2024-01-23 op t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
396 1ac119fb 2024-01-23 op if (!parsers[t](p, t, p->buf, p->len))
397 1ac119fb 2024-01-23 op return 0;
398 1ac119fb 2024-01-23 op if ((p->flags & PARSER_IN_PRE) &&
399 1ac119fb 2024-01-23 op !emit_line(p, LINE_PRE_END, NULL, NULL))
400 1ac119fb 2024-01-23 op return 0;
401 1ac119fb 2024-01-23 op }
402 1ac119fb 2024-01-23 op
403 1ac119fb 2024-01-23 op free(p->buf);
404 1ac119fb 2024-01-23 op
405 1ac119fb 2024-01-23 op /*
406 1ac119fb 2024-01-23 op * use the first level 2 or 3 header as page title if none
407 1ac119fb 2024-01-23 op * found yet.
408 1ac119fb 2024-01-23 op */
409 1ac119fb 2024-01-23 op if (*p->title == '\0')
410 1ac119fb 2024-01-23 op search_title(p, LINE_TITLE_2);
411 1ac119fb 2024-01-23 op if (*p->title == '\0')
412 1ac119fb 2024-01-23 op search_title(p, LINE_TITLE_3);
413 1ac119fb 2024-01-23 op
414 1ac119fb 2024-01-23 op return 1;
415 1ac119fb 2024-01-23 op }
416 1ac119fb 2024-01-23 op
417 1ac119fb 2024-01-23 op static void
418 1ac119fb 2024-01-23 op search_title(struct parser *p, enum line_type level)
419 1ac119fb 2024-01-23 op {
420 1ac119fb 2024-01-23 op struct line *l;
421 1ac119fb 2024-01-23 op
422 1ac119fb 2024-01-23 op TAILQ_FOREACH(l, &p->head, lines) {
423 1ac119fb 2024-01-23 op if (l->type == level) {
424 1ac119fb 2024-01-23 op if (l->line == NULL)
425 1ac119fb 2024-01-23 op continue;
426 1ac119fb 2024-01-23 op strlcpy(p->title, l->line, sizeof(p->title));
427 1ac119fb 2024-01-23 op break;
428 1ac119fb 2024-01-23 op }
429 1ac119fb 2024-01-23 op }
430 1ac119fb 2024-01-23 op }
431 1ac119fb 2024-01-23 op
432 1ac119fb 2024-01-23 op static const char *gemtext_prefixes[] = {
433 1ac119fb 2024-01-23 op [LINE_TEXT] = "",
434 1ac119fb 2024-01-23 op [LINE_TITLE_1] = "# ",
435 1ac119fb 2024-01-23 op [LINE_TITLE_2] = "## ",
436 1ac119fb 2024-01-23 op [LINE_TITLE_3] = "### ",
437 1ac119fb 2024-01-23 op [LINE_ITEM] = "* ",
438 1ac119fb 2024-01-23 op [LINE_QUOTE] = "> ",
439 1ac119fb 2024-01-23 op [LINE_PRE_START] = "``` ",
440 1ac119fb 2024-01-23 op [LINE_PRE_CONTENT] = "",
441 1ac119fb 2024-01-23 op [LINE_PRE_END] = "```",
442 1ac119fb 2024-01-23 op };
443 1ac119fb 2024-01-23 op
444 1ac119fb 2024-01-23 op static int
445 1ac119fb 2024-01-23 op gemtext_serialize(struct parser *p, FILE *fp)
446 1ac119fb 2024-01-23 op {
447 1ac119fb 2024-01-23 op struct line *line;
448 1ac119fb 2024-01-23 op const char *text;
449 1ac119fb 2024-01-23 op const char *alt;
450 1ac119fb 2024-01-23 op int r;
451 1ac119fb 2024-01-23 op
452 1ac119fb 2024-01-23 op TAILQ_FOREACH(line, &p->head, lines) {
453 1ac119fb 2024-01-23 op if ((text = line->line) == NULL)
454 1ac119fb 2024-01-23 op text = "";
455 1ac119fb 2024-01-23 op
456 1ac119fb 2024-01-23 op if ((alt = line->alt) == NULL)
457 1ac119fb 2024-01-23 op alt = "";
458 1ac119fb 2024-01-23 op
459 1ac119fb 2024-01-23 op switch (line->type) {
460 1ac119fb 2024-01-23 op case LINE_TEXT:
461 1ac119fb 2024-01-23 op case LINE_TITLE_1:
462 1ac119fb 2024-01-23 op case LINE_TITLE_2:
463 1ac119fb 2024-01-23 op case LINE_TITLE_3:
464 1ac119fb 2024-01-23 op case LINE_ITEM:
465 1ac119fb 2024-01-23 op case LINE_QUOTE:
466 1ac119fb 2024-01-23 op case LINE_PRE_START:
467 1ac119fb 2024-01-23 op case LINE_PRE_CONTENT:
468 1ac119fb 2024-01-23 op case LINE_PRE_END:
469 1ac119fb 2024-01-23 op r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
470 1ac119fb 2024-01-23 op text);
471 1ac119fb 2024-01-23 op break;
472 1ac119fb 2024-01-23 op
473 1ac119fb 2024-01-23 op case LINE_LINK:
474 1ac119fb 2024-01-23 op r = fprintf(fp, "=> %s %s\n", alt, text);
475 1ac119fb 2024-01-23 op break;
476 1ac119fb 2024-01-23 op
477 1ac119fb 2024-01-23 op default:
478 1ac119fb 2024-01-23 op /* not reached */
479 1ac119fb 2024-01-23 op abort();
480 1ac119fb 2024-01-23 op }
481 1ac119fb 2024-01-23 op
482 1ac119fb 2024-01-23 op if (r == -1)
483 1ac119fb 2024-01-23 op return 0;
484 1ac119fb 2024-01-23 op }
485 1ac119fb 2024-01-23 op
486 1ac119fb 2024-01-23 op return 1;
487 1ac119fb 2024-01-23 op }