8 Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
9 Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
11 // All lists start out like List structure.
12 // List itself can be used as list of int.
27 _newlist(int val, List* rest)
31 ans = (List*)emalloc(sizeof(List));
37 // Reverse a list in place
54 // The next few routines take a "character class" as argument.
55 // e.g., "a-zA-Z", or "^ \t\n"
56 // (ranges indicated by - except in first position;
57 // ^ is first position means "not in" the following class)
59 // Splitl splits s[0:n] just before first character of class cl.
60 // Answers go in (p1, n1) and (p2, n2).
61 // If no split, the whole thing goes in the first component.
62 // Note: answers contain pointers into original string.
64 _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
68 p = _Strnclass(s, cl, n);
82 // Splitr splits s[0:n] just after last character of class cl.
83 // Answers go in (p1, n1) and (p2, n2).
84 // If no split, the whole thing goes in the last component.
85 // Note: answers contain pointers into original string.
87 _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
91 p = _Strnrclass(s, cl, n);
106 // Splitall splits s[0:n] into parts that are separated by characters from class cl.
107 // Each part will have nonzero length.
108 // At most alen parts are found, and pointers to their starts go into
109 // the strarr array, while their lengths go into the lenarr array.
110 // The return value is the number of parts found.
112 _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
119 if(s == nil || n == 0)
124 while(p < slast && i < alen) {
125 while(p < slast && _inclass(*p, cl))
129 q = _Strnclass(p, cl, slast-p);
132 assert(q > p && q <= slast);
141 // Find part of s that excludes leading and trailing whitespace,
142 // and return that part in *pans (and its length in *panslen).
144 _trimwhite(Rune* s, int n, Rune** pans, int* panslen)
151 p = _Strnclass(s, notwhitespace, n);
153 q = _Strnrclass(s, notwhitespace, n);
162 // _Strclass returns a pointer to the first element of s that is
163 // a member of class cl, nil if none.
165 _Strclass(Rune* s, Rune* cl)
169 for(p = s; *p != 0; p++)
175 // _Strnclass returns a pointer to the first element of s[0:n] that is
176 // a member of class cl, nil if none.
178 _Strnclass(Rune* s, Rune* cl, int n)
182 for(p = s; n-- && *p != 0; p++)
188 // _Strrclass returns a pointer to the last element of s that is
189 // a member of class cl, nil if none
191 _Strrclass(Rune* s, Rune* cl)
195 if(s == nil || *s == 0)
197 p = s + runestrlen(s) - 1;
206 // _Strnrclass returns a pointer to the last element of s[0:n] that is
207 // a member of class cl, nil if none
209 _Strnrclass(Rune* s, Rune* cl, int n)
213 if(s == nil || *s == 0 || n == 0)
224 // Is c in the class cl?
226 _inclass(Rune c, Rune* cl)
243 for(i = 0; i < n; i++) {
244 if(cl[i] == '-' && i > 0 && i < n - 1) {
245 if(c >= cl[i - 1] && c <= cl[i + 1]) {
251 else if(c == cl[i]) {
261 // Is pre a prefix of s?
263 _prefix(Rune* pre, Rune* s)
273 for(k = 0; k < n; k++) {
280 // Number of runes in (null-terminated) s
286 return runestrlen(s);
289 // -1, 0, 1 as s1 is lexicographically less, equal greater than s2
291 _Strcmp(Rune *s1, Rune *s2)
294 return (s2 == nil || *s2 == 0) ? 0 : -1;
296 return (*s1 == 0) ? 0 : 1;
297 return runestrcmp(s1, s2);
300 // Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
301 // Also, do a case-insensitive match, assuming s2
302 // has no chars in [A-Z], only their lowercase versions.
303 // (This routine is used for in-place keyword lookup, where s2 is in a keyword
304 // list and s1 is some substring, possibly mixed-case, in a buffer.)
306 _Strncmpci(Rune *s1, int n1, Rune *s2)
318 if(c1 >= 'A' && c1 <= 'Z')
334 return _Strndup(s, runestrlen(s));
337 // emalloc and copy n chars of s (assume s is at least that long),
338 // and add 0 terminator.
339 // Return nil if n==0.
341 _Strndup(Rune* s, int n)
348 memmove(ans, s, n*sizeof(Rune));
352 // emalloc enough room for n Runes, plus 1 null terminator.
353 // (Not initialized to anything.)
357 return (Rune*)emalloc((n+1)*sizeof(Rune));
360 // emalloc and copy s+t
362 _Strdup2(Rune* s, Rune* t)
372 ans = _newstr(ns+nt);
373 p = _Stradd(ans, s, ns);
374 p = _Stradd(p, t, nt);
379 // Return emalloc'd substring s[start:stop],
381 _Strsubstr(Rune* s, int start, int stop)
387 t = _Strndup(s+start, stop-start);
391 // Copy n chars to s1 from s2, and return s1+n
393 _Stradd(Rune* s1, Rune* s2, int n)
397 memmove(s1, s2, n*sizeof(Rune));
401 // Like strtol, but converting from Rune* string
403 //#define LONG_MAX 2147483647L
404 //#define LONG_MIN -2147483648L
407 _Strtol(Rune* nptr, Rune** endptr, int base)
411 int c, ovfl, v, neg, ndig;
438 if(*p=='-' || *p=='+')
450 if(p[1]=='x' || p[1]=='X'){
455 }else if(base==16 && *p=='0'){
456 if(p[1]=='x' || p[1]=='X')
458 }else if(base<0 || 36<base)
462 * Non-empty sequence of digits
469 else if('a'<=c && c<='z')
471 else if('A'<=c && c<='Z')
496 // Convert buf[0:n], bytes whose character set is chset,
497 // into a emalloc'd null-terminated Unicode string.
499 toStr(uchar* buf, int n, int chset)
509 ans = (Rune*)emalloc((n+1)*sizeof(Rune));
510 for(i = 0; i < n; i++)
517 for(i = 0; i < n; ) {
518 i += chartorune(&ch, (char*)(buf+i));
521 ans = (Rune*)emalloc((m+1)*sizeof(Rune));
523 for(i = 0; i < n; ) {
524 i += chartorune(&ch, (char*)(buf+i));
537 // Convert buf[0:n], Unicode characters,
538 // into an emalloc'd null-terminated string in character set chset.
539 // Use 0x80 for unconvertable characters.
541 fromStr(Rune* buf, int n, int chset)
553 ans = (uchar*)emalloc(n+1);
554 lim = (chset==US_Ascii)? 127 : 255;
555 for(i = 0; i < n; i++) {
566 for(i = 0; i < n; i++) {
567 m += runetochar((char*)s, &buf[i]);
569 ans = (uchar*)emalloc(m+1);
571 for(i = 0; i < n; i++)
572 p += runetochar((char*)p, &buf[i]);
583 // Convert n to emalloc'd String.
590 m = snprint((char*)buf, sizeof(buf), "%d", n);
591 return toStr(buf, m, US_Ascii);