Blame


1 1ac119fb 2024-01-23 op /*
2 62c0f697 2024-02-21 op * Copyright (c) 2021, 2024 Omar Polo <op@omarpolo.com>
3 1ac119fb 2024-01-23 op *
4 1ac119fb 2024-01-23 op * Permission to use, copy, modify, and distribute this software for any
5 1ac119fb 2024-01-23 op * purpose with or without fee is hereby granted, provided that the above
6 1ac119fb 2024-01-23 op * copyright notice and this permission notice appear in all copies.
7 1ac119fb 2024-01-23 op *
8 1ac119fb 2024-01-23 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 1ac119fb 2024-01-23 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 1ac119fb 2024-01-23 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 1ac119fb 2024-01-23 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 1ac119fb 2024-01-23 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 1ac119fb 2024-01-23 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 1ac119fb 2024-01-23 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 1ac119fb 2024-01-23 op */
16 1ac119fb 2024-01-23 op
17 1ac119fb 2024-01-23 op #include "compat.h"
18 1ac119fb 2024-01-23 op
19 1ac119fb 2024-01-23 op #include <stdio.h>
20 1ac119fb 2024-01-23 op #include <stdlib.h>
21 1ac119fb 2024-01-23 op #include <string.h>
22 1ac119fb 2024-01-23 op
23 1ac119fb 2024-01-23 op #include "parser.h"
24 62c0f697 2024-02-21 op #include "iri.h"
25 1ac119fb 2024-01-23 op #include "utils.h"
26 e5e04904 2024-02-06 op
27 e5e04904 2024-02-06 op #ifndef LINE_MAX
28 e5e04904 2024-02-06 op #define LINE_MAX 2048
29 e5e04904 2024-02-06 op #endif
30 1ac119fb 2024-01-23 op
31 1ac119fb 2024-01-23 op struct gm_selector {
32 1ac119fb 2024-01-23 op char type;
33 1ac119fb 2024-01-23 op const char *ds;
34 1ac119fb 2024-01-23 op const char *selector;
35 1ac119fb 2024-01-23 op const char *addr;
36 1ac119fb 2024-01-23 op const char *port;
37 1ac119fb 2024-01-23 op };
38 1ac119fb 2024-01-23 op
39 1ac119fb 2024-01-23 op static void gm_parse_selector(char *, struct gm_selector *);
40 1ac119fb 2024-01-23 op
41 1ac119fb 2024-01-23 op static int gm_parse(struct parser *, const char *, size_t);
42 1ac119fb 2024-01-23 op static int gm_foreach_line(struct parser *, const char *, size_t);
43 1ac119fb 2024-01-23 op static int gm_free(struct parser *);
44 1ac119fb 2024-01-23 op static int gm_serialize(struct parser *, FILE *);
45 1ac119fb 2024-01-23 op
46 1ac119fb 2024-01-23 op void
47 1ac119fb 2024-01-23 op gophermap_initparser(struct parser *p)
48 1ac119fb 2024-01-23 op {
49 1ac119fb 2024-01-23 op memset(p, 0, sizeof(*p));
50 1ac119fb 2024-01-23 op
51 1ac119fb 2024-01-23 op p->name = "gophermap";
52 1ac119fb 2024-01-23 op p->parse = &gm_parse;
53 1ac119fb 2024-01-23 op p->free = &gm_free;
54 1ac119fb 2024-01-23 op p->serialize = &gm_serialize;
55 1ac119fb 2024-01-23 op
56 1ac119fb 2024-01-23 op TAILQ_INIT(&p->head);
57 1ac119fb 2024-01-23 op }
58 1ac119fb 2024-01-23 op
59 1ac119fb 2024-01-23 op static void
60 1ac119fb 2024-01-23 op gm_parse_selector(char *line, struct gm_selector *s)
61 1ac119fb 2024-01-23 op {
62 1ac119fb 2024-01-23 op s->type = *line++;
63 1ac119fb 2024-01-23 op s->ds = line;
64 1ac119fb 2024-01-23 op s->selector = "";
65 1ac119fb 2024-01-23 op s->addr = "";
66 1ac119fb 2024-01-23 op s->port = "";
67 1ac119fb 2024-01-23 op
68 1ac119fb 2024-01-23 op if ((line = strchr(line, '\t')) == NULL)
69 1ac119fb 2024-01-23 op return;
70 1ac119fb 2024-01-23 op *line++ = '\0';
71 1ac119fb 2024-01-23 op s->selector = line;
72 1ac119fb 2024-01-23 op
73 1ac119fb 2024-01-23 op if ((line = strchr(line, '\t')) == NULL)
74 1ac119fb 2024-01-23 op return;
75 1ac119fb 2024-01-23 op *line++ = '\0';
76 1ac119fb 2024-01-23 op s->addr = line;
77 1ac119fb 2024-01-23 op
78 1ac119fb 2024-01-23 op if ((line = strchr(line, '\t')) == NULL)
79 1ac119fb 2024-01-23 op return;
80 1ac119fb 2024-01-23 op *line++ = '\0';
81 1ac119fb 2024-01-23 op s->port = line;
82 1ac119fb 2024-01-23 op }
83 1ac119fb 2024-01-23 op
84 1ac119fb 2024-01-23 op static int
85 1ac119fb 2024-01-23 op gm_parse(struct parser *p, const char *buf, size_t size)
86 1ac119fb 2024-01-23 op {
87 1ac119fb 2024-01-23 op return parser_foreach_line(p, buf, size, gm_foreach_line);
88 62c0f697 2024-02-21 op }
89 62c0f697 2024-02-21 op
90 62c0f697 2024-02-21 op static int
91 62c0f697 2024-02-21 op selector2uri(struct gm_selector *s, char *buf, size_t len)
92 62c0f697 2024-02-21 op {
93 62c0f697 2024-02-21 op int r;
94 62c0f697 2024-02-21 op
95 62c0f697 2024-02-21 op r = snprintf(buf, len, "gopher://%s:%s/%c%s",
96 62c0f697 2024-02-21 op s->addr, s->port, s->type, *s->selector != '/' ? "/" : "");
97 62c0f697 2024-02-21 op if (r < 0 || (size_t)r >= len)
98 62c0f697 2024-02-21 op return (-1);
99 62c0f697 2024-02-21 op
100 62c0f697 2024-02-21 op buf += r;
101 62c0f697 2024-02-21 op len -= r;
102 62c0f697 2024-02-21 op return (iri_urlescape(s->selector, buf, len));
103 1ac119fb 2024-01-23 op }
104 1ac119fb 2024-01-23 op
105 1ac119fb 2024-01-23 op static inline int
106 1ac119fb 2024-01-23 op emit_line(struct parser *p, enum line_type type, struct gm_selector *s)
107 1ac119fb 2024-01-23 op {
108 1ac119fb 2024-01-23 op struct line *l;
109 62c0f697 2024-02-21 op char buf[LINE_MAX];
110 1ac119fb 2024-01-23 op
111 1ac119fb 2024-01-23 op if ((l = calloc(1, sizeof(*l))) == NULL)
112 1ac119fb 2024-01-23 op goto err;
113 1ac119fb 2024-01-23 op
114 1ac119fb 2024-01-23 op if ((l->line = strdup(s->ds)) == NULL)
115 1ac119fb 2024-01-23 op goto err;
116 1ac119fb 2024-01-23 op
117 1ac119fb 2024-01-23 op switch (l->type = type) {
118 1ac119fb 2024-01-23 op case LINE_LINK:
119 1ac119fb 2024-01-23 op if (s->type == 'h' && !strncmp(s->selector, "URL:", 4)) {
120 1ac119fb 2024-01-23 op strlcpy(buf, s->selector+4, sizeof(buf));
121 62c0f697 2024-02-21 op } else if (selector2uri(s, buf, sizeof(buf)) == -1)
122 62c0f697 2024-02-21 op goto err;
123 1ac119fb 2024-01-23 op
124 1ac119fb 2024-01-23 op if ((l->alt = strdup(buf)) == NULL)
125 1ac119fb 2024-01-23 op goto err;
126 1ac119fb 2024-01-23 op break;
127 1ac119fb 2024-01-23 op
128 1ac119fb 2024-01-23 op default:
129 1ac119fb 2024-01-23 op break;
130 1ac119fb 2024-01-23 op }
131 1ac119fb 2024-01-23 op
132 1ac119fb 2024-01-23 op TAILQ_INSERT_TAIL(&p->head, l, lines);
133 1ac119fb 2024-01-23 op
134 1ac119fb 2024-01-23 op return 1;
135 1ac119fb 2024-01-23 op
136 1ac119fb 2024-01-23 op err:
137 1ac119fb 2024-01-23 op if (l != NULL) {
138 1ac119fb 2024-01-23 op free(l->line);
139 1ac119fb 2024-01-23 op free(l->alt);
140 1ac119fb 2024-01-23 op free(l);
141 1ac119fb 2024-01-23 op }
142 1ac119fb 2024-01-23 op return 0;
143 1ac119fb 2024-01-23 op }
144 1ac119fb 2024-01-23 op
145 1ac119fb 2024-01-23 op static int
146 1ac119fb 2024-01-23 op gm_foreach_line(struct parser *p, const char *line, size_t linelen)
147 1ac119fb 2024-01-23 op {
148 1ac119fb 2024-01-23 op char buf[LINE_MAX] = {0};
149 1ac119fb 2024-01-23 op struct gm_selector s = {0};
150 1ac119fb 2024-01-23 op
151 1ac119fb 2024-01-23 op memcpy(buf, line, MIN(sizeof(buf)-1, linelen));
152 1ac119fb 2024-01-23 op gm_parse_selector(buf, &s);
153 1ac119fb 2024-01-23 op
154 1ac119fb 2024-01-23 op switch (s.type) {
155 1ac119fb 2024-01-23 op case '0': /* text file */
156 1ac119fb 2024-01-23 op case '1': /* gopher submenu */
157 1ac119fb 2024-01-23 op case '2': /* CCSO nameserver */
158 1ac119fb 2024-01-23 op case '4': /* binhex-encoded file */
159 1ac119fb 2024-01-23 op case '5': /* DOS file */
160 1ac119fb 2024-01-23 op case '6': /* uuencoded file */
161 1ac119fb 2024-01-23 op case '7': /* full-text search */
162 1ac119fb 2024-01-23 op case '8': /* telnet */
163 1ac119fb 2024-01-23 op case '9': /* binary file */
164 1ac119fb 2024-01-23 op case '+': /* mirror or alternate server */
165 1ac119fb 2024-01-23 op case 'g': /* gif */
166 1ac119fb 2024-01-23 op case 'I': /* image */
167 1ac119fb 2024-01-23 op case 'T': /* telnet 3270 */
168 1ac119fb 2024-01-23 op case ':': /* gopher+: bitmap image */
169 1ac119fb 2024-01-23 op case ';': /* gopher+: movie file */
170 1ac119fb 2024-01-23 op case 'd': /* non-canonical: doc */
171 1ac119fb 2024-01-23 op case 'h': /* non-canonical: html file */
172 1ac119fb 2024-01-23 op case 's': /* non-canonical: sound file */
173 1ac119fb 2024-01-23 op if (!emit_line(p, LINE_LINK, &s))
174 1ac119fb 2024-01-23 op return 0;
175 1ac119fb 2024-01-23 op break;
176 1ac119fb 2024-01-23 op
177 1ac119fb 2024-01-23 op case 'i': /* non-canonical: message */
178 1ac119fb 2024-01-23 op if (!emit_line(p, LINE_TEXT, &s))
179 1ac119fb 2024-01-23 op return 0;
180 1ac119fb 2024-01-23 op break;
181 1ac119fb 2024-01-23 op
182 1ac119fb 2024-01-23 op case '3': /* error code */
183 1ac119fb 2024-01-23 op if (!emit_line(p, LINE_QUOTE, &s))
184 1ac119fb 2024-01-23 op return 0;
185 1ac119fb 2024-01-23 op break;
186 1ac119fb 2024-01-23 op }
187 1ac119fb 2024-01-23 op
188 1ac119fb 2024-01-23 op return 1;
189 1ac119fb 2024-01-23 op }
190 1ac119fb 2024-01-23 op
191 1ac119fb 2024-01-23 op static int
192 1ac119fb 2024-01-23 op gm_free(struct parser *p)
193 1ac119fb 2024-01-23 op {
194 1ac119fb 2024-01-23 op /* flush the buffer */
195 1ac119fb 2024-01-23 op if (p->len != 0)
196 1ac119fb 2024-01-23 op gm_foreach_line(p, p->buf, p->len);
197 1ac119fb 2024-01-23 op
198 1ac119fb 2024-01-23 op free(p->buf);
199 1ac119fb 2024-01-23 op
200 1ac119fb 2024-01-23 op return 1;
201 1ac119fb 2024-01-23 op }
202 1ac119fb 2024-01-23 op
203 1ac119fb 2024-01-23 op static inline const char *
204 1ac119fb 2024-01-23 op gopher_skip_selector(const char *path, int *ret_type)
205 1ac119fb 2024-01-23 op {
206 1ac119fb 2024-01-23 op *ret_type = 0;
207 1ac119fb 2024-01-23 op
208 1ac119fb 2024-01-23 op if (!strcmp(path, "/") || *path == '\0') {
209 1ac119fb 2024-01-23 op *ret_type = '1';
210 1ac119fb 2024-01-23 op return path;
211 1ac119fb 2024-01-23 op }
212 1ac119fb 2024-01-23 op
213 1ac119fb 2024-01-23 op if (*path != '/')
214 1ac119fb 2024-01-23 op return path;
215 1ac119fb 2024-01-23 op path++;
216 1ac119fb 2024-01-23 op
217 1ac119fb 2024-01-23 op switch (*ret_type = *path) {
218 1ac119fb 2024-01-23 op case '0':
219 1ac119fb 2024-01-23 op case '1':
220 1ac119fb 2024-01-23 op case '7':
221 1ac119fb 2024-01-23 op break;
222 1ac119fb 2024-01-23 op
223 1ac119fb 2024-01-23 op default:
224 1ac119fb 2024-01-23 op *ret_type = 0;
225 1ac119fb 2024-01-23 op path -= 1;
226 1ac119fb 2024-01-23 op return path;
227 1ac119fb 2024-01-23 op }
228 1ac119fb 2024-01-23 op
229 1ac119fb 2024-01-23 op return ++path;
230 1ac119fb 2024-01-23 op }
231 1ac119fb 2024-01-23 op
232 1ac119fb 2024-01-23 op static int
233 1ac119fb 2024-01-23 op serialize_link(struct line *line, const char *text, FILE *fp)
234 1ac119fb 2024-01-23 op {
235 1ac119fb 2024-01-23 op size_t portlen = 0;
236 1ac119fb 2024-01-23 op int type;
237 1ac119fb 2024-01-23 op const char *uri, *endhost, *port, *path, *colon;
238 1ac119fb 2024-01-23 op
239 1ac119fb 2024-01-23 op if ((uri = line->alt) == NULL)
240 1ac119fb 2024-01-23 op return -1;
241 1ac119fb 2024-01-23 op
242 1ac119fb 2024-01-23 op if (strncmp(uri, "gopher://", 9) != 0)
243 1ac119fb 2024-01-23 op return fprintf(fp, "h%s\tURL:%s\terror.host\t1\n",
244 1ac119fb 2024-01-23 op text, line->alt);
245 1ac119fb 2024-01-23 op
246 1ac119fb 2024-01-23 op uri += 9; /* skip gopher:// */
247 1ac119fb 2024-01-23 op
248 1ac119fb 2024-01-23 op path = strchr(uri, '/');
249 1ac119fb 2024-01-23 op colon = strchr(uri, ':');
250 1ac119fb 2024-01-23 op
251 1ac119fb 2024-01-23 op if (path != NULL && colon > path)
252 1ac119fb 2024-01-23 op colon = NULL;
253 1ac119fb 2024-01-23 op
254 1ac119fb 2024-01-23 op if ((endhost = colon) == NULL &&
255 1ac119fb 2024-01-23 op (endhost = path) == NULL)
256 1ac119fb 2024-01-23 op endhost = strchr(uri, '\0');
257 1ac119fb 2024-01-23 op
258 1ac119fb 2024-01-23 op if (colon != NULL) {
259 1ac119fb 2024-01-23 op for (port = colon+1; *port && *port != '/'; ++port)
260 1ac119fb 2024-01-23 op ++portlen;
261 1ac119fb 2024-01-23 op port = colon+1;
262 1ac119fb 2024-01-23 op } else {
263 1ac119fb 2024-01-23 op port = "70";
264 1ac119fb 2024-01-23 op portlen = 2;
265 1ac119fb 2024-01-23 op }
266 1ac119fb 2024-01-23 op
267 1ac119fb 2024-01-23 op if (path == NULL) {
268 1ac119fb 2024-01-23 op type = '1';
269 1ac119fb 2024-01-23 op path = "";
270 1ac119fb 2024-01-23 op } else
271 1ac119fb 2024-01-23 op path = gopher_skip_selector(path, &type);
272 1ac119fb 2024-01-23 op
273 1ac119fb 2024-01-23 op return fprintf(fp, "%c%s\t%s\t%.*s\t%.*s\n", type, text,
274 1ac119fb 2024-01-23 op path, (int)(endhost - uri), uri, (int)portlen, port);
275 1ac119fb 2024-01-23 op }
276 1ac119fb 2024-01-23 op
277 1ac119fb 2024-01-23 op static int
278 1ac119fb 2024-01-23 op gm_serialize(struct parser *p, FILE *fp)
279 1ac119fb 2024-01-23 op {
280 1ac119fb 2024-01-23 op struct line *line;
281 1ac119fb 2024-01-23 op const char *text;
282 1ac119fb 2024-01-23 op int r;
283 1ac119fb 2024-01-23 op
284 1ac119fb 2024-01-23 op TAILQ_FOREACH(line, &p->head, lines) {
285 1ac119fb 2024-01-23 op if ((text = line->line) == NULL)
286 1ac119fb 2024-01-23 op text = "";
287 1ac119fb 2024-01-23 op
288 1ac119fb 2024-01-23 op switch (line->type) {
289 1ac119fb 2024-01-23 op case LINE_LINK:
290 1ac119fb 2024-01-23 op r = serialize_link(line, text, fp);
291 1ac119fb 2024-01-23 op break;
292 1ac119fb 2024-01-23 op
293 1ac119fb 2024-01-23 op case LINE_TEXT:
294 1ac119fb 2024-01-23 op r = fprintf(fp, "i%s\t\terror.host\t1\n", text);
295 1ac119fb 2024-01-23 op break;
296 1ac119fb 2024-01-23 op
297 1ac119fb 2024-01-23 op case LINE_QUOTE:
298 1ac119fb 2024-01-23 op r = fprintf(fp, "3%s\t\terror.host\t1\n", text);
299 1ac119fb 2024-01-23 op break;
300 1ac119fb 2024-01-23 op
301 1ac119fb 2024-01-23 op default:
302 1ac119fb 2024-01-23 op /* unreachable */
303 1ac119fb 2024-01-23 op abort();
304 1ac119fb 2024-01-23 op }
305 1ac119fb 2024-01-23 op
306 1ac119fb 2024-01-23 op if (r == -1)
307 1ac119fb 2024-01-23 op return 0;
308 1ac119fb 2024-01-23 op }
309 1ac119fb 2024-01-23 op
310 1ac119fb 2024-01-23 op return 1;
311 1ac119fb 2024-01-23 op }