Blame


1 75a8a1ec 2022-02-08 op /*
2 75a8a1ec 2022-02-08 op * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 75a8a1ec 2022-02-08 op *
4 75a8a1ec 2022-02-08 op * Permission to use, copy, modify, and distribute this software for any
5 75a8a1ec 2022-02-08 op * purpose with or without fee is hereby granted, provided that the above
6 75a8a1ec 2022-02-08 op * copyright notice and this permission notice appear in all copies.
7 75a8a1ec 2022-02-08 op *
8 75a8a1ec 2022-02-08 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 75a8a1ec 2022-02-08 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 75a8a1ec 2022-02-08 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 75a8a1ec 2022-02-08 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 75a8a1ec 2022-02-08 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 75a8a1ec 2022-02-08 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 75a8a1ec 2022-02-08 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 75a8a1ec 2022-02-08 op */
16 75a8a1ec 2022-02-08 op
17 75a8a1ec 2022-02-08 op #include "compat.h"
18 75a8a1ec 2022-02-08 op
19 75a8a1ec 2022-02-08 op #include <stdlib.h>
20 75a8a1ec 2022-02-08 op #include <string.h>
21 75a8a1ec 2022-02-08 op
22 75a8a1ec 2022-02-08 op #include "parser.h"
23 75a8a1ec 2022-02-08 op #include "telescope.h"
24 75a8a1ec 2022-02-08 op
25 75a8a1ec 2022-02-08 op void
26 75a8a1ec 2022-02-08 op parser_init(struct tab *tab, parserfn fn)
27 75a8a1ec 2022-02-08 op {
28 75a8a1ec 2022-02-08 op erase_buffer(&tab->buffer);
29 75a8a1ec 2022-02-08 op fn(&tab->buffer.page);
30 75a8a1ec 2022-02-08 op tab->buffer.page.init = fn;
31 75a8a1ec 2022-02-08 op }
32 75a8a1ec 2022-02-08 op
33 75a8a1ec 2022-02-08 op int
34 75a8a1ec 2022-02-08 op parser_parse(struct tab *tab, const char *chunk, size_t len)
35 75a8a1ec 2022-02-08 op {
36 75a8a1ec 2022-02-08 op return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
37 75a8a1ec 2022-02-08 op }
38 75a8a1ec 2022-02-08 op
39 75a8a1ec 2022-02-08 op int
40 55aa433f 2022-04-24 op parser_parsef(struct tab *tab, const char *fmt, ...)
41 55aa433f 2022-04-24 op {
42 55aa433f 2022-04-24 op char *s;
43 55aa433f 2022-04-24 op va_list ap;
44 55aa433f 2022-04-24 op int r;
45 55aa433f 2022-04-24 op
46 55aa433f 2022-04-24 op va_start(ap, fmt);
47 55aa433f 2022-04-24 op r = vasprintf(&s, fmt, ap);
48 55aa433f 2022-04-24 op va_end(ap);
49 55aa433f 2022-04-24 op
50 55aa433f 2022-04-24 op if (r == -1)
51 55aa433f 2022-04-24 op return 0;
52 55aa433f 2022-04-24 op
53 55aa433f 2022-04-24 op r = parser_parse(tab, s, strlen(s));
54 55aa433f 2022-04-24 op free(s);
55 55aa433f 2022-04-24 op return r;
56 55aa433f 2022-04-24 op }
57 55aa433f 2022-04-24 op
58 55aa433f 2022-04-24 op int
59 75a8a1ec 2022-02-08 op parser_free(struct tab *tab)
60 75a8a1ec 2022-02-08 op {
61 75a8a1ec 2022-02-08 op int r;
62 75a8a1ec 2022-02-08 op char *tilde, *slash;
63 75a8a1ec 2022-02-08 op
64 75a8a1ec 2022-02-08 op r = tab->buffer.page.free(&tab->buffer.page);
65 75a8a1ec 2022-02-08 op
66 75a8a1ec 2022-02-08 op if (*tab->buffer.page.title != '\0')
67 75a8a1ec 2022-02-08 op return r;
68 75a8a1ec 2022-02-08 op
69 75a8a1ec 2022-02-08 op /*
70 75a8a1ec 2022-02-08 op * heuristic: see if there is a "tilde user" and use that as
71 75a8a1ec 2022-02-08 op * page title, using the full domain name as fallback.
72 75a8a1ec 2022-02-08 op */
73 75a8a1ec 2022-02-08 op if ((tilde = strstr(tab->hist_cur->h, "/~")) != NULL) {
74 75a8a1ec 2022-02-08 op strlcpy(tab->buffer.page.title, tilde+1,
75 75a8a1ec 2022-02-08 op sizeof(tab->buffer.page.title));
76 75a8a1ec 2022-02-08 op
77 75a8a1ec 2022-02-08 op if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
78 75a8a1ec 2022-02-08 op *slash = '\0';
79 75a8a1ec 2022-02-08 op } else
80 75a8a1ec 2022-02-08 op strlcpy(tab->buffer.page.title, tab->uri.host,
81 75a8a1ec 2022-02-08 op sizeof(tab->buffer.page.title));
82 75a8a1ec 2022-02-08 op
83 75a8a1ec 2022-02-08 op return r;
84 75a8a1ec 2022-02-08 op }
85 75a8a1ec 2022-02-08 op
86 75a8a1ec 2022-02-08 op int
87 0110411e 2022-04-13 op parser_serialize(struct tab *tab, FILE *fp)
88 75a8a1ec 2022-02-08 op {
89 75a8a1ec 2022-02-08 op struct line *line;
90 75a8a1ec 2022-02-08 op const char *text;
91 75a8a1ec 2022-02-08 op int r;
92 75a8a1ec 2022-02-08 op
93 75a8a1ec 2022-02-08 op if (tab->buffer.page.serialize != NULL)
94 0110411e 2022-04-13 op return tab->buffer.page.serialize(&tab->buffer.page, fp);
95 75a8a1ec 2022-02-08 op
96 75a8a1ec 2022-02-08 op /* a default implementation good enough for plain text */
97 75a8a1ec 2022-02-08 op TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
98 75a8a1ec 2022-02-08 op if ((text = line->line) == NULL)
99 75a8a1ec 2022-02-08 op text = "";
100 75a8a1ec 2022-02-08 op
101 0110411e 2022-04-13 op r = fprintf(fp, "%s\n", text);
102 75a8a1ec 2022-02-08 op if (r == -1)
103 75a8a1ec 2022-02-08 op return 0;
104 75a8a1ec 2022-02-08 op }
105 75a8a1ec 2022-02-08 op
106 75a8a1ec 2022-02-08 op return 1;
107 75a8a1ec 2022-02-08 op }
108 75a8a1ec 2022-02-08 op
109 75a8a1ec 2022-02-08 op int
110 75a8a1ec 2022-02-08 op parser_append(struct parser *p, const char *buf, size_t len)
111 75a8a1ec 2022-02-08 op {
112 75a8a1ec 2022-02-08 op size_t newlen;
113 75a8a1ec 2022-02-08 op char *t;
114 75a8a1ec 2022-02-08 op
115 75a8a1ec 2022-02-08 op newlen = len + p->len;
116 75a8a1ec 2022-02-08 op if ((t = calloc(1, newlen)) == NULL)
117 75a8a1ec 2022-02-08 op return 0;
118 75a8a1ec 2022-02-08 op memcpy(t, p->buf, p->len);
119 75a8a1ec 2022-02-08 op memcpy(t + p->len, buf, len);
120 75a8a1ec 2022-02-08 op free(p->buf);
121 75a8a1ec 2022-02-08 op p->buf = t;
122 75a8a1ec 2022-02-08 op p->len = newlen;
123 75a8a1ec 2022-02-08 op return 1;
124 75a8a1ec 2022-02-08 op }
125 75a8a1ec 2022-02-08 op
126 75a8a1ec 2022-02-08 op int
127 75a8a1ec 2022-02-08 op parser_set_buf(struct parser *p, const char *buf, size_t len)
128 75a8a1ec 2022-02-08 op {
129 75a8a1ec 2022-02-08 op char *tmp;
130 75a8a1ec 2022-02-08 op
131 75a8a1ec 2022-02-08 op if (len == 0) {
132 75a8a1ec 2022-02-08 op p->len = 0;
133 75a8a1ec 2022-02-08 op free(p->buf);
134 75a8a1ec 2022-02-08 op p->buf = NULL;
135 75a8a1ec 2022-02-08 op return 1;
136 75a8a1ec 2022-02-08 op }
137 75a8a1ec 2022-02-08 op
138 75a8a1ec 2022-02-08 op /*
139 75a8a1ec 2022-02-08 op * p->buf and buf can (and probably almost always will)
140 75a8a1ec 2022-02-08 op * overlap!
141 75a8a1ec 2022-02-08 op */
142 75a8a1ec 2022-02-08 op
143 75a8a1ec 2022-02-08 op if ((tmp = calloc(1, len)) == NULL)
144 75a8a1ec 2022-02-08 op return 0;
145 75a8a1ec 2022-02-08 op memcpy(tmp, buf, len);
146 75a8a1ec 2022-02-08 op free(p->buf);
147 75a8a1ec 2022-02-08 op p->buf = tmp;
148 75a8a1ec 2022-02-08 op p->len = len;
149 75a8a1ec 2022-02-08 op return 1;
150 75a8a1ec 2022-02-08 op }
151 75a8a1ec 2022-02-08 op
152 75a8a1ec 2022-02-08 op int
153 75a8a1ec 2022-02-08 op parser_foreach_line(struct parser *p, const char *buf, size_t size,
154 75a8a1ec 2022-02-08 op parsechunkfn fn)
155 75a8a1ec 2022-02-08 op {
156 75a8a1ec 2022-02-08 op char *b, *e;
157 75a8a1ec 2022-02-08 op unsigned int ch;
158 75a8a1ec 2022-02-08 op size_t i, l, len;
159 75a8a1ec 2022-02-08 op
160 75a8a1ec 2022-02-08 op if (!parser_append(p, buf, size))
161 75a8a1ec 2022-02-08 op return 0;
162 75a8a1ec 2022-02-08 op b = p->buf;
163 75a8a1ec 2022-02-08 op len = p->len;
164 75a8a1ec 2022-02-08 op
165 75a8a1ec 2022-02-08 op if (!(p->flags & PARSER_IN_BODY) && len < 3)
166 75a8a1ec 2022-02-08 op return 1;
167 75a8a1ec 2022-02-08 op
168 75a8a1ec 2022-02-08 op if (!(p->flags & PARSER_IN_BODY)) {
169 75a8a1ec 2022-02-08 op p->flags |= PARSER_IN_BODY;
170 75a8a1ec 2022-02-08 op
171 75a8a1ec 2022-02-08 op /*
172 75a8a1ec 2022-02-08 op * drop the BOM: only UTF-8 is supported, and there
173 75a8a1ec 2022-02-08 op * it's useless; some editors may still add one
174 75a8a1ec 2022-02-08 op * though.
175 75a8a1ec 2022-02-08 op */
176 75a8a1ec 2022-02-08 op if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
177 75a8a1ec 2022-02-08 op b += 3;
178 75a8a1ec 2022-02-08 op len -= 3;
179 75a8a1ec 2022-02-08 op }
180 75a8a1ec 2022-02-08 op }
181 75a8a1ec 2022-02-08 op
182 75a8a1ec 2022-02-08 op /* drop every "funny" ASCII character */
183 75a8a1ec 2022-02-08 op for (i = 0; i < len; ) {
184 75a8a1ec 2022-02-08 op ch = b[i];
185 75a8a1ec 2022-02-08 op if ((ch >= ' ' || ch == '\n' || ch == '\t')
186 75a8a1ec 2022-02-08 op && ch != 127) { /* del */
187 75a8a1ec 2022-02-08 op ++i;
188 75a8a1ec 2022-02-08 op continue;
189 75a8a1ec 2022-02-08 op }
190 75a8a1ec 2022-02-08 op memmove(&b[i], &b[i+1], len - i - 1);
191 75a8a1ec 2022-02-08 op len--;
192 75a8a1ec 2022-02-08 op }
193 75a8a1ec 2022-02-08 op
194 75a8a1ec 2022-02-08 op while (len > 0) {
195 75a8a1ec 2022-02-08 op if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
196 75a8a1ec 2022-02-08 op break;
197 75a8a1ec 2022-02-08 op l = e - b;
198 75a8a1ec 2022-02-08 op
199 75a8a1ec 2022-02-08 op if (!fn(p, b, l))
200 75a8a1ec 2022-02-08 op return 0;
201 75a8a1ec 2022-02-08 op
202 75a8a1ec 2022-02-08 op len -= l;
203 75a8a1ec 2022-02-08 op b += l;
204 75a8a1ec 2022-02-08 op
205 75a8a1ec 2022-02-08 op if (len > 0) {
206 75a8a1ec 2022-02-08 op /* skip \n */
207 75a8a1ec 2022-02-08 op len--;
208 75a8a1ec 2022-02-08 op b++;
209 75a8a1ec 2022-02-08 op }
210 75a8a1ec 2022-02-08 op }
211 75a8a1ec 2022-02-08 op
212 75a8a1ec 2022-02-08 op return parser_set_buf(p, b, len);
213 75a8a1ec 2022-02-08 op }