Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include "compat.h"
19 #include <stdlib.h>
20 #include <string.h>
22 #include "parser.h"
23 #include "telescope.h"
24 #include "ui.h"
26 /*
27 * Load a text/gemini page given the string page. Always returns 0.
28 */
29 int
30 load_page_from_str(struct tab *tab, const char *page)
31 {
32 parser_init(tab, gemtext_initparser);
33 if (!tab->buffer.page.parse(&tab->buffer.page, page, strlen(page)))
34 abort();
35 if (!tab->buffer.page.free(&tab->buffer.page))
36 abort();
37 ui_on_tab_refresh(tab);
38 ui_on_tab_loaded(tab);
39 return 0;
40 }
42 void
43 parser_init(struct tab *tab, parserfn fn)
44 {
45 erase_buffer(&tab->buffer);
46 fn(&tab->buffer.page);
47 tab->buffer.page.init = fn;
48 }
50 int
51 parser_parse(struct tab *tab, const char *chunk, size_t len)
52 {
53 return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
54 }
56 int
57 parser_free(struct tab *tab)
58 {
59 int r;
60 char *tilde, *slash;
62 r = tab->buffer.page.free(&tab->buffer.page);
64 if (*tab->buffer.page.title != '\0')
65 return r;
67 /*
68 * heuristic: see if there is a "tilde user" and use that as
69 * page title, using the full domain name as fallback.
70 */
71 if ((tilde = strstr(tab->hist_cur->h, "/~")) != NULL) {
72 strlcpy(tab->buffer.page.title, tilde+1,
73 sizeof(tab->buffer.page.title));
75 if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
76 *slash = '\0';
77 } else
78 strlcpy(tab->buffer.page.title, tab->uri.host,
79 sizeof(tab->buffer.page.title));
81 return r;
82 }
84 int
85 parser_serialize(struct tab *tab, struct evbuffer *evb)
86 {
87 struct line *line;
88 const char *text;
89 int r;
91 if (tab->buffer.page.serialize != NULL)
92 return tab->buffer.page.serialize(&tab->buffer.page, evb);
94 /* a default implementation good enough for plain text */
95 TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
96 if ((text = line->line) == NULL)
97 text = "";
99 r = evbuffer_add_printf(evb, "%s\n", text);
100 if (r == -1)
101 return 0;
104 return 1;
107 int
108 parser_append(struct parser *p, const char *buf, size_t len)
110 size_t newlen;
111 char *t;
113 newlen = len + p->len;
114 if ((t = calloc(1, newlen)) == NULL)
115 return 0;
116 memcpy(t, p->buf, p->len);
117 memcpy(t + p->len, buf, len);
118 free(p->buf);
119 p->buf = t;
120 p->len = newlen;
121 return 1;
124 int
125 parser_set_buf(struct parser *p, const char *buf, size_t len)
127 char *tmp;
129 if (len == 0) {
130 p->len = 0;
131 free(p->buf);
132 p->buf = NULL;
133 return 1;
136 /*
137 * p->buf and buf can (and probably almost always will)
138 * overlap!
139 */
141 if ((tmp = calloc(1, len)) == NULL)
142 return 0;
143 memcpy(tmp, buf, len);
144 free(p->buf);
145 p->buf = tmp;
146 p->len = len;
147 return 1;
150 int
151 parser_foreach_line(struct parser *p, const char *buf, size_t size,
152 parsechunkfn fn)
154 char *b, *e;
155 unsigned int ch;
156 size_t i, l, len;
158 if (!parser_append(p, buf, size))
159 return 0;
160 b = p->buf;
161 len = p->len;
163 if (!(p->flags & PARSER_IN_BODY) && len < 3)
164 return 1;
166 if (!(p->flags & PARSER_IN_BODY)) {
167 p->flags |= PARSER_IN_BODY;
169 /*
170 * drop the BOM: only UTF-8 is supported, and there
171 * it's useless; some editors may still add one
172 * though.
173 */
174 if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
175 b += 3;
176 len -= 3;
180 /* drop every "funny" ASCII character */
181 for (i = 0; i < len; ) {
182 ch = b[i];
183 if ((ch >= ' ' || ch == '\n' || ch == '\t')
184 && ch != 127) { /* del */
185 ++i;
186 continue;
188 memmove(&b[i], &b[i+1], len - i - 1);
189 len--;
192 while (len > 0) {
193 if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
194 break;
195 l = e - b;
197 if (!fn(p, b, l))
198 return 0;
200 len -= l;
201 b += l;
203 if (len > 0) {
204 /* skip \n */
205 len--;
206 b++;
210 return parser_set_buf(p, b, len);