Blame


1 1ac119fb 2024-01-23 op /*
2 1ac119fb 2024-01-23 op * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 1ac119fb 2024-01-23 op *
4 1ac119fb 2024-01-23 op * Permission to use, copy, modify, and distribute this software for any
5 1ac119fb 2024-01-23 op * purpose with or without fee is hereby granted, provided that the above
6 1ac119fb 2024-01-23 op * copyright notice and this permission notice appear in all copies.
7 1ac119fb 2024-01-23 op *
8 1ac119fb 2024-01-23 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 1ac119fb 2024-01-23 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 1ac119fb 2024-01-23 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 1ac119fb 2024-01-23 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 1ac119fb 2024-01-23 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 1ac119fb 2024-01-23 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 1ac119fb 2024-01-23 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 1ac119fb 2024-01-23 op */
16 1ac119fb 2024-01-23 op
17 1ac119fb 2024-01-23 op #include "compat.h"
18 1ac119fb 2024-01-23 op
19 1ac119fb 2024-01-23 op #include <stdlib.h>
20 1ac119fb 2024-01-23 op #include <string.h>
21 1ac119fb 2024-01-23 op
22 1ac119fb 2024-01-23 op #include "hist.h"
23 1ac119fb 2024-01-23 op #include "parser.h"
24 1ac119fb 2024-01-23 op #include "telescope.h"
25 1ac119fb 2024-01-23 op
26 1ac119fb 2024-01-23 op void
27 1ac119fb 2024-01-23 op parser_init(struct tab *tab, parserfn fn)
28 1ac119fb 2024-01-23 op {
29 1ac119fb 2024-01-23 op erase_buffer(&tab->buffer);
30 1ac119fb 2024-01-23 op fn(&tab->buffer.page);
31 1ac119fb 2024-01-23 op tab->buffer.page.init = fn;
32 1ac119fb 2024-01-23 op }
33 1ac119fb 2024-01-23 op
34 1ac119fb 2024-01-23 op int
35 1ac119fb 2024-01-23 op parser_parse(struct tab *tab, const char *chunk, size_t len)
36 1ac119fb 2024-01-23 op {
37 1ac119fb 2024-01-23 op return tab->buffer.page.parse(&tab->buffer.page, chunk, len);
38 1ac119fb 2024-01-23 op }
39 1ac119fb 2024-01-23 op
40 1ac119fb 2024-01-23 op int
41 1ac119fb 2024-01-23 op parser_parsef(struct tab *tab, const char *fmt, ...)
42 1ac119fb 2024-01-23 op {
43 1ac119fb 2024-01-23 op char *s;
44 1ac119fb 2024-01-23 op va_list ap;
45 1ac119fb 2024-01-23 op int r;
46 1ac119fb 2024-01-23 op
47 1ac119fb 2024-01-23 op va_start(ap, fmt);
48 1ac119fb 2024-01-23 op r = vasprintf(&s, fmt, ap);
49 1ac119fb 2024-01-23 op va_end(ap);
50 1ac119fb 2024-01-23 op
51 1ac119fb 2024-01-23 op if (r == -1)
52 1ac119fb 2024-01-23 op return 0;
53 1ac119fb 2024-01-23 op
54 1ac119fb 2024-01-23 op r = parser_parse(tab, s, strlen(s));
55 1ac119fb 2024-01-23 op free(s);
56 1ac119fb 2024-01-23 op return r;
57 1ac119fb 2024-01-23 op }
58 1ac119fb 2024-01-23 op
59 1ac119fb 2024-01-23 op int
60 1ac119fb 2024-01-23 op parser_free(struct tab *tab)
61 1ac119fb 2024-01-23 op {
62 1ac119fb 2024-01-23 op int r;
63 1ac119fb 2024-01-23 op char *tilde, *slash;
64 1ac119fb 2024-01-23 op
65 1ac119fb 2024-01-23 op r = tab->buffer.page.free(&tab->buffer.page);
66 1ac119fb 2024-01-23 op
67 1ac119fb 2024-01-23 op if (*tab->buffer.page.title != '\0')
68 1ac119fb 2024-01-23 op return r;
69 1ac119fb 2024-01-23 op
70 1ac119fb 2024-01-23 op /*
71 1ac119fb 2024-01-23 op * heuristic: see if there is a "tilde user" and use that as
72 1ac119fb 2024-01-23 op * page title, using the full domain name as fallback.
73 1ac119fb 2024-01-23 op */
74 1ac119fb 2024-01-23 op if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) {
75 1ac119fb 2024-01-23 op strlcpy(tab->buffer.page.title, tilde+1,
76 1ac119fb 2024-01-23 op sizeof(tab->buffer.page.title));
77 1ac119fb 2024-01-23 op
78 1ac119fb 2024-01-23 op if ((slash = strchr(tab->buffer.page.title, '/')) != NULL)
79 1ac119fb 2024-01-23 op *slash = '\0';
80 1ac119fb 2024-01-23 op } else
81 1ac119fb 2024-01-23 op strlcpy(tab->buffer.page.title, tab->iri.iri_host,
82 1ac119fb 2024-01-23 op sizeof(tab->buffer.page.title));
83 1ac119fb 2024-01-23 op
84 1ac119fb 2024-01-23 op return r;
85 1ac119fb 2024-01-23 op }
86 1ac119fb 2024-01-23 op
87 1ac119fb 2024-01-23 op int
88 1ac119fb 2024-01-23 op parser_serialize(struct tab *tab, FILE *fp)
89 1ac119fb 2024-01-23 op {
90 1ac119fb 2024-01-23 op struct line *line;
91 1ac119fb 2024-01-23 op const char *text;
92 1ac119fb 2024-01-23 op int r;
93 1ac119fb 2024-01-23 op
94 1ac119fb 2024-01-23 op if (tab->buffer.page.serialize != NULL)
95 1ac119fb 2024-01-23 op return tab->buffer.page.serialize(&tab->buffer.page, fp);
96 1ac119fb 2024-01-23 op
97 1ac119fb 2024-01-23 op /* a default implementation good enough for plain text */
98 1ac119fb 2024-01-23 op TAILQ_FOREACH(line, &tab->buffer.page.head, lines) {
99 1ac119fb 2024-01-23 op if ((text = line->line) == NULL)
100 1ac119fb 2024-01-23 op text = "";
101 1ac119fb 2024-01-23 op
102 1ac119fb 2024-01-23 op r = fprintf(fp, "%s\n", text);
103 1ac119fb 2024-01-23 op if (r == -1)
104 1ac119fb 2024-01-23 op return 0;
105 1ac119fb 2024-01-23 op }
106 1ac119fb 2024-01-23 op
107 1ac119fb 2024-01-23 op return 1;
108 1ac119fb 2024-01-23 op }
109 1ac119fb 2024-01-23 op
110 1ac119fb 2024-01-23 op int
111 1ac119fb 2024-01-23 op parser_append(struct parser *p, const char *buf, size_t len)
112 1ac119fb 2024-01-23 op {
113 1ac119fb 2024-01-23 op size_t newlen;
114 1ac119fb 2024-01-23 op char *t;
115 1ac119fb 2024-01-23 op
116 1ac119fb 2024-01-23 op newlen = len + p->len;
117 1ac119fb 2024-01-23 op if ((t = calloc(1, newlen)) == NULL)
118 1ac119fb 2024-01-23 op return 0;
119 1ac119fb 2024-01-23 op memcpy(t, p->buf, p->len);
120 1ac119fb 2024-01-23 op memcpy(t + p->len, buf, len);
121 1ac119fb 2024-01-23 op free(p->buf);
122 1ac119fb 2024-01-23 op p->buf = t;
123 1ac119fb 2024-01-23 op p->len = newlen;
124 1ac119fb 2024-01-23 op return 1;
125 1ac119fb 2024-01-23 op }
126 1ac119fb 2024-01-23 op
127 1ac119fb 2024-01-23 op int
128 1ac119fb 2024-01-23 op parser_set_buf(struct parser *p, const char *buf, size_t len)
129 1ac119fb 2024-01-23 op {
130 1ac119fb 2024-01-23 op char *tmp;
131 1ac119fb 2024-01-23 op
132 1ac119fb 2024-01-23 op if (len == 0) {
133 1ac119fb 2024-01-23 op p->len = 0;
134 1ac119fb 2024-01-23 op free(p->buf);
135 1ac119fb 2024-01-23 op p->buf = NULL;
136 1ac119fb 2024-01-23 op return 1;
137 1ac119fb 2024-01-23 op }
138 1ac119fb 2024-01-23 op
139 1ac119fb 2024-01-23 op /*
140 1ac119fb 2024-01-23 op * p->buf and buf can (and probably almost always will)
141 1ac119fb 2024-01-23 op * overlap!
142 1ac119fb 2024-01-23 op */
143 1ac119fb 2024-01-23 op
144 1ac119fb 2024-01-23 op if ((tmp = calloc(1, len)) == NULL)
145 1ac119fb 2024-01-23 op return 0;
146 1ac119fb 2024-01-23 op memcpy(tmp, buf, len);
147 1ac119fb 2024-01-23 op free(p->buf);
148 1ac119fb 2024-01-23 op p->buf = tmp;
149 1ac119fb 2024-01-23 op p->len = len;
150 1ac119fb 2024-01-23 op return 1;
151 1ac119fb 2024-01-23 op }
152 1ac119fb 2024-01-23 op
153 1ac119fb 2024-01-23 op int
154 1ac119fb 2024-01-23 op parser_foreach_line(struct parser *p, const char *buf, size_t size,
155 1ac119fb 2024-01-23 op parsechunkfn fn)
156 1ac119fb 2024-01-23 op {
157 1ac119fb 2024-01-23 op char *b, *e;
158 1ac119fb 2024-01-23 op unsigned int ch;
159 1ac119fb 2024-01-23 op size_t i, l, len;
160 1ac119fb 2024-01-23 op
161 1ac119fb 2024-01-23 op if (!parser_append(p, buf, size))
162 1ac119fb 2024-01-23 op return 0;
163 1ac119fb 2024-01-23 op b = p->buf;
164 1ac119fb 2024-01-23 op len = p->len;
165 1ac119fb 2024-01-23 op
166 1ac119fb 2024-01-23 op if (!(p->flags & PARSER_IN_BODY) && len < 3)
167 1ac119fb 2024-01-23 op return 1;
168 1ac119fb 2024-01-23 op
169 1ac119fb 2024-01-23 op if (!(p->flags & PARSER_IN_BODY)) {
170 1ac119fb 2024-01-23 op p->flags |= PARSER_IN_BODY;
171 1ac119fb 2024-01-23 op
172 1ac119fb 2024-01-23 op /*
173 1ac119fb 2024-01-23 op * drop the BOM: only UTF-8 is supported, and there
174 1ac119fb 2024-01-23 op * it's useless; some editors may still add one
175 1ac119fb 2024-01-23 op * though.
176 1ac119fb 2024-01-23 op */
177 1ac119fb 2024-01-23 op if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
178 1ac119fb 2024-01-23 op b += 3;
179 1ac119fb 2024-01-23 op len -= 3;
180 1ac119fb 2024-01-23 op }
181 1ac119fb 2024-01-23 op }
182 1ac119fb 2024-01-23 op
183 1ac119fb 2024-01-23 op /* drop every "funny" ASCII character */
184 1ac119fb 2024-01-23 op for (i = 0; i < len; ) {
185 1ac119fb 2024-01-23 op ch = b[i];
186 1ac119fb 2024-01-23 op if ((ch >= ' ' || ch == '\n' || ch == '\t')
187 1ac119fb 2024-01-23 op && ch != 127) { /* del */
188 1ac119fb 2024-01-23 op ++i;
189 1ac119fb 2024-01-23 op continue;
190 1ac119fb 2024-01-23 op }
191 1ac119fb 2024-01-23 op memmove(&b[i], &b[i+1], len - i - 1);
192 1ac119fb 2024-01-23 op len--;
193 1ac119fb 2024-01-23 op }
194 1ac119fb 2024-01-23 op
195 1ac119fb 2024-01-23 op while (len > 0) {
196 1ac119fb 2024-01-23 op if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
197 1ac119fb 2024-01-23 op break;
198 1ac119fb 2024-01-23 op l = e - b;
199 1ac119fb 2024-01-23 op
200 1ac119fb 2024-01-23 op if (!fn(p, b, l))
201 1ac119fb 2024-01-23 op return 0;
202 1ac119fb 2024-01-23 op
203 1ac119fb 2024-01-23 op len -= l;
204 1ac119fb 2024-01-23 op b += l;
205 1ac119fb 2024-01-23 op
206 1ac119fb 2024-01-23 op if (len > 0) {
207 1ac119fb 2024-01-23 op /* skip \n */
208 1ac119fb 2024-01-23 op len--;
209 1ac119fb 2024-01-23 op b++;
210 1ac119fb 2024-01-23 op }
211 1ac119fb 2024-01-23 op }
212 1ac119fb 2024-01-23 op
213 1ac119fb 2024-01-23 op return parser_set_buf(p, b, len);
214 1ac119fb 2024-01-23 op }