Blob


1 #include <u.h>
2 #include <limits.h>
3 #include <libc.h>
4 #include <draw.h>
5 #include <html.h>
6 #include "impl.h"
8 Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
9 Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
11 /* All lists start out like List structure. */
12 /* List itself can be used as list of int. */
13 int
14 _listlen(List* l)
15 {
16 int n = 0;
18 while(l != nil) {
19 l = l->next;
20 n++;
21 }
22 return n;
23 }
25 /* Cons */
26 List*
27 _newlist(int val, List* rest)
28 {
29 List* ans;
31 ans = (List*)emalloc(sizeof(List));
32 ans->val = val;
33 ans->next = rest;
34 return ans;
35 }
37 /* Reverse a list in place */
38 List*
39 _revlist(List* l)
40 {
41 List* newl;
42 List* nextl;
44 newl = nil;
45 while(l != nil) {
46 nextl = l->next;
47 l->next = newl;
48 newl = l;
49 l = nextl;
50 }
51 return newl;
52 }
54 /* The next few routines take a "character class" as argument. */
55 /* e.g., "a-zA-Z", or "^ \t\n" */
56 /* (ranges indicated by - except in first position; */
57 /* ^ is first position means "not in" the following class) */
59 /* Splitl splits s[0:n] just before first character of class cl. */
60 /* Answers go in (p1, n1) and (p2, n2). */
61 /* If no split, the whole thing goes in the first component. */
62 /* Note: answers contain pointers into original string. */
63 void
64 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
65 {
66 Rune* p;
68 p = _Strnclass(s, cl, n);
69 *p1 = s;
70 if(p == nil) {
71 *n1 = n;
72 *p2 = nil;
73 *n2 = 0;
74 }
75 else {
76 *p2 = p;
77 *n1 = p-s;
78 *n2 = n-*n1;
79 }
80 }
82 /* Splitr splits s[0:n] just after last character of class cl. */
83 /* Answers go in (p1, n1) and (p2, n2). */
84 /* If no split, the whole thing goes in the last component. */
85 /* Note: answers contain pointers into original string. */
86 void
87 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
88 {
89 Rune* p;
91 p = _Strnrclass(s, cl, n);
92 if(p == nil) {
93 *p1 = nil;
94 *n1 = 0;
95 *p2 = s;
96 *n2 = n;
97 }
98 else {
99 *p1 = s;
100 *p2 = p+1;
101 *n1 = *p2-s;
102 *n2 = n-*n1;
106 /* Splitall splits s[0:n] into parts that are separated by characters from class cl. */
107 /* Each part will have nonzero length. */
108 /* At most alen parts are found, and pointers to their starts go into */
109 /* the strarr array, while their lengths go into the lenarr array. */
110 /* The return value is the number of parts found. */
111 int
112 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
114 int i;
115 Rune* p;
116 Rune* q;
117 Rune* slast;
119 if(s == nil || n == 0)
120 return 0;
121 i = 0;
122 p = s;
123 slast = s+n;
124 while(p < slast && i < alen) {
125 while(p < slast && _inclass(*p, cl))
126 p++;
127 if(p == slast)
128 break;
129 q = _Strnclass(p, cl, slast-p);
130 if(q == nil)
131 q = slast;
132 assert(q > p && q <= slast);
133 strarr[i] = p;
134 lenarr[i] = q-p;
135 i++;
136 p = q;
138 return i;
141 /* Find part of s that excludes leading and trailing whitespace, */
142 /* and return that part in *pans (and its length in *panslen). */
143 void
144 _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
146 Rune* p;
147 Rune* q;
149 p = nil;
150 if(n > 0) {
151 p = _Strnclass(s, notwhitespace, n);
152 if(p != nil) {
153 q = _Strnrclass(s, notwhitespace, n);
154 assert(q != nil);
155 n = q+1-p;
158 *pans = p;
159 *panslen = n;
162 /* _Strclass returns a pointer to the first element of s that is */
163 /* a member of class cl, nil if none. */
164 Rune*
165 _Strclass(Rune* s, Rune* cl)
167 Rune* p;
169 for(p = s; *p != 0; p++)
170 if(_inclass(*p, cl))
171 return p;
172 return nil;
175 /* _Strnclass returns a pointer to the first element of s[0:n] that is */
176 /* a member of class cl, nil if none. */
177 Rune*
178 _Strnclass(Rune* s, Rune* cl, int n)
180 Rune* p;
182 for(p = s; n-- && *p != 0; p++)
183 if(_inclass(*p, cl))
184 return p;
185 return nil;
188 /* _Strrclass returns a pointer to the last element of s that is */
189 /* a member of class cl, nil if none */
190 Rune*
191 _Strrclass(Rune* s, Rune* cl)
193 Rune* p;
195 if(s == nil || *s == 0)
196 return nil;
197 p = s + runestrlen(s) - 1;
198 while(p >= s) {
199 if(_inclass(*p, cl))
200 return p;
201 p--;
202 };
203 return nil;
206 /* _Strnrclass returns a pointer to the last element of s[0:n] that is */
207 /* a member of class cl, nil if none */
208 Rune*
209 _Strnrclass(Rune* s, Rune* cl, int n)
211 Rune* p;
213 if(s == nil || *s == 0 || n == 0)
214 return nil;
215 p = s + n - 1;
216 while(p >= s) {
217 if(_inclass(*p, cl))
218 return p;
219 p--;
220 };
221 return nil;
224 /* Is c in the class cl? */
225 int
226 _inclass(Rune c, Rune* cl)
228 int n;
229 int ans;
230 int negate;
231 int i;
233 n = _Strlen(cl);
234 if(n == 0)
235 return 0;
236 ans = 0;
237 negate = 0;
238 if(cl[0] == '^') {
239 negate = 1;
240 cl++;
241 n--;
243 for(i = 0; i < n; i++) {
244 if(cl[i] == '-' && i > 0 && i < n - 1) {
245 if(c >= cl[i - 1] && c <= cl[i + 1]) {
246 ans = 1;
247 break;
249 i++;
251 else if(c == cl[i]) {
252 ans = 1;
253 break;
256 if(negate)
257 ans = !ans;
258 return ans;
261 /* Is pre a prefix of s? */
262 int
263 _prefix(Rune* pre, Rune* s)
265 int ns;
266 int n;
267 int k;
269 ns = _Strlen(s);
270 n = _Strlen(pre);
271 if(ns < n)
272 return 0;
273 for(k = 0; k < n; k++) {
274 if(pre[k] != s[k])
275 return 0;
277 return 1;
280 /* Number of runes in (null-terminated) s */
281 int
282 _Strlen(Rune* s)
284 if(s == nil)
285 return 0;
286 return runestrlen(s);
289 /* -1, 0, 1 as s1 is lexicographically less, equal greater than s2 */
290 int
291 _Strcmp(Rune *s1, Rune *s2)
293 if(s1 == nil)
294 return (s2 == nil || *s2 == 0) ? 0 : -1;
295 if(s2 == nil)
296 return (*s1 == 0) ? 0 : 1;
297 return runestrcmp(s1, s2);
300 /* Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars). */
301 /* Also, do a case-insensitive match, assuming s2 */
302 /* has no chars in [A-Z], only their lowercase versions. */
303 /* (This routine is used for in-place keyword lookup, where s2 is in a keyword */
304 /* list and s1 is some substring, possibly mixed-case, in a buffer.) */
305 int
306 _Strncmpci(Rune *s1, int n1, Rune *s2)
308 Rune c1, c2;
310 for(;;) {
311 if(n1-- == 0) {
312 if(*s2 == 0)
313 return 0;
314 return -1;
316 c1 = *s1++;
317 c2 = *s2++;
318 if(c1 >= 'A' && c1 <= 'Z')
319 c1 = c1 - 'A' + 'a';
320 if(c1 != c2) {
321 if(c1 > c2)
322 return 1;
323 return -1;
328 /* emalloc and copy */
329 Rune*
330 _Strdup(Rune* s)
332 if(s == nil)
333 return nil;
334 return _Strndup(s, runestrlen(s));
337 /* emalloc and copy n chars of s (assume s is at least that long), */
338 /* and add 0 terminator. */
339 /* Return nil if n==0. */
340 Rune*
341 _Strndup(Rune* s, int n)
343 Rune* ans;
345 if(n <= 0)
346 return nil;
347 ans = _newstr(n);
348 memmove(ans, s, n*sizeof(Rune));
349 ans[n] = 0;
350 return ans;
352 /* emalloc enough room for n Runes, plus 1 null terminator. */
353 /* (Not initialized to anything.) */
354 Rune*
355 _newstr(int n)
357 return (Rune*)emalloc((n+1)*sizeof(Rune));
360 /* emalloc and copy s+t */
361 Rune*
362 _Strdup2(Rune* s, Rune* t)
364 int ns, nt;
365 Rune* ans;
366 Rune* p;
368 ns = _Strlen(s);
369 nt = _Strlen(t);
370 if(ns+nt == 0)
371 return nil;
372 ans = _newstr(ns+nt);
373 p = _Stradd(ans, s, ns);
374 p = _Stradd(p, t, nt);
375 *p = 0;
376 return ans;
379 /* Return emalloc'd substring s[start:stop], */
380 Rune*
381 _Strsubstr(Rune* s, int start, int stop)
383 Rune* t;
385 if(start == stop)
386 return nil;
387 t = _Strndup(s+start, stop-start);
388 return t;
391 /* Copy n chars to s1 from s2, and return s1+n */
392 Rune*
393 _Stradd(Rune* s1, Rune* s2, int n)
395 if(n == 0)
396 return s1;
397 memmove(s1, s2, n*sizeof(Rune));
398 return s1+n;
401 /* Like strtol, but converting from Rune* string */
403 /*#define LONG_MAX 2147483647L */
404 /*#define LONG_MIN -2147483648L */
406 long
407 _Strtol(Rune* nptr, Rune** endptr, int base)
409 Rune* p;
410 long n, nn;
411 int c, ovfl, v, neg, ndig;
413 p = nptr;
414 neg = 0;
415 n = 0;
416 ndig = 0;
417 ovfl = 0;
419 /*
420 * White space
421 */
422 for(;;p++){
423 switch(*p){
424 case ' ':
425 case '\t':
426 case '\n':
427 case '\f':
428 case '\r':
429 case '\v':
430 continue;
432 break;
435 /*
436 * Sign
437 */
438 if(*p=='-' || *p=='+')
439 if(*p++ == '-')
440 neg = 1;
442 /*
443 * Base
444 */
445 if(base==0){
446 if(*p != '0')
447 base = 10;
448 else{
449 base = 8;
450 if(p[1]=='x' || p[1]=='X'){
451 p += 2;
452 base = 16;
455 }else if(base==16 && *p=='0'){
456 if(p[1]=='x' || p[1]=='X')
457 p += 2;
458 }else if(base<0 || 36<base)
459 goto Return;
461 /*
462 * Non-empty sequence of digits
463 */
464 for(;; p++,ndig++){
465 c = *p;
466 v = base;
467 if('0'<=c && c<='9')
468 v = c - '0';
469 else if('a'<=c && c<='z')
470 v = c - 'a' + 10;
471 else if('A'<=c && c<='Z')
472 v = c - 'A' + 10;
473 if(v >= base)
474 break;
475 nn = n*base + v;
476 if(nn < n)
477 ovfl = 1;
478 n = nn;
481 Return:
482 if(ndig == 0)
483 p = nptr;
484 if(endptr)
485 *endptr = p;
486 if(ovfl){
487 if(neg)
488 return LONG_MIN;
489 return LONG_MAX;
491 if(neg)
492 return -n;
493 return n;
496 /* Convert buf[0:n], bytes whose character set is chset, */
497 /* into a emalloc'd null-terminated Unicode string. */
498 Rune*
499 toStr(uchar* buf, int n, int chset)
501 int i;
502 int m;
503 Rune ch;
504 Rune* ans;
506 switch(chset) {
507 case US_Ascii:
508 case ISO_8859_1:
509 ans = (Rune*)emalloc((n+1)*sizeof(Rune));
510 for(i = 0; i < n; i++)
511 ans[i] = buf[i];
512 ans[n] = 0;
513 break;
515 case UTF_8:
516 m = 0;
517 for(i = 0; i < n; ) {
518 i += chartorune(&ch, (char*)(buf+i));
519 m++;
521 ans = (Rune*)emalloc((m+1)*sizeof(Rune));
522 m = 0;
523 for(i = 0; i < n; ) {
524 i += chartorune(&ch, (char*)(buf+i));
525 ans[m++] = ch;
527 ans[m] = 0;
528 break;
530 default:
531 ans = nil;
532 assert(0);
534 return ans;
537 /* Convert buf[0:n], Unicode characters, */
538 /* into an emalloc'd null-terminated string in character set chset. */
539 /* Use 0x80 for unconvertable characters. */
540 uchar*
541 fromStr(Rune* buf, int n, int chset)
543 uchar* ans;
544 int i, lim, m;
545 Rune ch;
546 uchar* p;
547 uchar s[UTFmax];
549 ans = nil;
550 switch(chset) {
551 case US_Ascii:
552 case ISO_8859_1:
553 ans = (uchar*)emalloc(n+1);
554 lim = (chset==US_Ascii)? 127 : 255;
555 for(i = 0; i < n; i++) {
556 ch = buf[i];
557 if(ch > lim)
558 ch = 0x80;
559 ans[i] = ch;
561 ans[n] = 0;
562 break;
564 case UTF_8:
565 m = 0;
566 for(i = 0; i < n; i++) {
567 m += runetochar((char*)s, &buf[i]);
569 ans = (uchar*)emalloc(m+1);
570 p = ans;
571 for(i = 0; i < n; i++)
572 p += runetochar((char*)p, &buf[i]);
573 *p = 0;
574 break;
576 default:
577 assert(0);
579 return ans;
583 /* Convert n to emalloc'd String. */
584 Rune*
585 _ltoStr(int n)
587 int m;
588 uchar buf[20];
590 m = snprint((char*)buf, sizeof(buf), "%d", n);
591 return toStr(buf, m, US_Ascii);