Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include "parser.h"
18 #include "telescope.h"
20 #include <stdlib.h>
21 #include <string.h>
23 int
24 parser_append(struct parser *p, const char *buf, size_t len)
25 {
26 size_t newlen;
27 char *t;
29 newlen = len + p->len;
30 if ((t = calloc(1, newlen)) == NULL)
31 return 0;
32 memcpy(t, p->buf, p->len);
33 memcpy(t + p->len, buf, len);
34 free(p->buf);
35 p->buf = t;
36 p->len = newlen;
37 return 1;
38 }
40 int
41 parser_set_buf(struct parser *p, const char *buf, size_t len)
42 {
43 char *tmp;
45 if (len == 0) {
46 p->len = 0;
47 free(p->buf);
48 p->buf = NULL;
49 return 1;
50 }
52 /*
53 * p->buf and buf can (and probably almost always will)
54 * overlap!
55 */
57 if ((tmp = calloc(1, len)) == NULL)
58 return 0;
59 memcpy(tmp, buf, len);
60 free(p->buf);
61 p->buf = tmp;
62 p->len = len;
63 return 1;
64 }
66 int
67 parser_foreach_line(struct parser *p, const char *buf, size_t size,
68 parsechunkfn fn)
69 {
70 char *b, *e;
71 unsigned int ch;
72 size_t i, l, len;
74 if (!parser_append(p, buf, size))
75 return 0;
76 b = p->buf;
77 len = p->len;
79 if (!(p->flags & PARSER_IN_BODY) && len < 3)
80 return 1;
82 if (!(p->flags & PARSER_IN_BODY)) {
83 p->flags |= PARSER_IN_BODY;
85 /*
86 * drop the BOM: only UTF-8 is supported, and there
87 * it's useless; some editors may still add one
88 * though.
89 */
90 if (memmem(b, len, "\xEF\xBB\xBF", 3) == b) {
91 b += 3;
92 len -= 3;
93 }
94 }
96 /* drop every "funny" ASCII character */
97 for (i = 0; i < len; ) {
98 ch = b[i];
99 if ((ch >= ' ' || ch == '\n' || ch == '\t')
100 && ch != 127) { /* del */
101 ++i;
102 continue;
104 memmove(&b[i], &b[i+1], len - i);
105 len--;
108 while (len > 0) {
109 if ((e = memmem((char*)b, len, "\n", 1)) == NULL)
110 break;
111 l = e - b;
113 if (!fn(p, b, l))
114 return 0;
116 len -= l;
117 b += l;
119 if (len > 0) {
120 /* skip \n */
121 len--;
122 b++;
126 return parser_set_buf(p, b, len);