Blob
1 #include <u.h>2 #include <libc.h>3 #include <bio.h>4 /* Macros for Rune support of ctype.h-like functions */6 #undef isupper7 #undef islower8 #undef isalpha9 #undef isdigit10 #undef isalnum11 #undef isspace12 #undef tolower13 #define isupper(r) ('A' <= (r) && (r) <= 'Z')14 #define islower(r) ('a' <= (r) && (r) <= 'z')15 #define isalpha(r) (isupper(r) || islower(r))16 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)18 #define isdigit(r) ('0' <= (r) && (r) <= '9')20 #define isalnum(r) (isalpha(r) || isdigit(r))22 #define isspace(r) ((r) == ' ' || (r) == '\t' \23 || (0x0A <= (r) && (r) <= 0x0D))25 #define tolower(r) ((r)-'A'+'a')27 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))29 #define WORDSIZ 400030 char *filename = "#9/lib/words";31 Biobuf *dfile;32 Biobuf bout;33 Biobuf bin;35 int fold;36 int direc;37 int exact;38 int iflag;39 int rev = 1; /*-1 for reverse-ordered file, not implemented*/40 int (*compare)(Rune*, Rune*);41 Rune tab = '\t';42 Rune entry[WORDSIZ];43 Rune word[WORDSIZ];44 Rune key[50], orig[50];45 Rune latin_fold_tab[] =46 {47 /* Table to fold latin 1 characters to ASCII equivalents48 based at Rune value 0xc050 À Á Â Ã Ä Å Æ Ç51 È É Ê Ë Ì Í Î Ï52 Ð Ñ Ò Ó Ô Õ Ö ×53 Ø Ù Ú Û Ü Ý Þ ß54 à á â ã ä å æ ç55 è é ê ë ì í î ï56 ð ñ ò ó ô õ ö ÷57 ø ù ú û ü ý þ ÿ58 */59 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',60 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',61 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,62 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,63 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',64 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',65 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,66 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',67 };69 int locate(void);70 int acomp(Rune*, Rune*);71 int getword(Biobuf*, Rune *rp, int n);72 void torune(char*, Rune*);73 void rcanon(Rune*, Rune*);74 int ncomp(Rune*, Rune*);76 void77 main(int argc, char *argv[])78 {79 int n;81 filename = unsharp(filename);83 Binit(&bin, 0, OREAD);84 Binit(&bout, 1, OWRITE);85 compare = acomp;86 ARGBEGIN{87 case 'd':88 direc++;89 break;90 case 'f':91 fold++;92 break;93 case 'i':94 iflag++;95 break;96 case 'n':97 compare = ncomp;98 break;99 case 't':100 chartorune(&tab,ARGF());101 break;102 case 'x':103 exact++;104 break;105 default:106 fprint(2, "%s: bad option %c\n", argv0, ARGC());107 fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);108 exits("usage");109 } ARGEND110 if(!iflag){111 if(argc >= 1) {112 torune(argv[0], orig);113 argv++;114 argc--;115 } else116 iflag++;117 }118 if(argc < 1) {119 direc++;120 fold++;121 } else122 filename = argv[0];123 if (!iflag)124 rcanon(orig, key);125 dfile = Bopen(filename, OREAD);126 if(dfile == 0) {127 fprint(2, "look: can't open %s\n", filename);128 exits("no dictionary");129 }130 if(!iflag)131 if(!locate())132 exits("not found");133 do {134 if(iflag) {135 Bflush(&bout);136 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))137 exits(0);138 rcanon(orig, key);139 if(!locate())140 continue;141 }142 if (!exact || !acomp(word, key))143 Bprint(&bout, "%S\n", entry);144 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {145 rcanon(entry, word);146 n = compare(key, word);147 switch(n) {148 case -1:149 if(exact)150 break;151 case 0:152 if (!exact || !acomp(word, orig))153 Bprint(&bout, "%S\n", entry);154 continue;155 }156 break;157 }158 } while(iflag);159 exits(0);160 }162 int163 locate(void)164 {165 vlong top, bot, mid;166 int c;167 int n;169 bot = 0;170 top = Bseek(dfile, 0L, 2);171 for(;;) {172 mid = (top+bot) / 2;173 Bseek(dfile, mid, 0);174 do175 c = Bgetrune(dfile);176 while(c>=0 && c!='\n');177 mid = Boffset(dfile);178 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))179 break;180 rcanon(entry, word);181 n = compare(key, word);182 switch(n) {183 case -2:184 case -1:185 case 0:186 if(top <= mid)187 break;188 top = mid;189 continue;190 case 1:191 case 2:192 bot = mid;193 continue;194 }195 break;196 }197 Bseek(dfile, bot, 0);198 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {199 rcanon(entry, word);200 n = compare(key, word);201 switch(n) {202 case -2:203 return 0;204 case -1:205 if(exact)206 return 0;207 case 0:208 return 1;209 case 1:210 case 2:211 continue;212 }213 }214 return 0;215 }217 /*218 * acomp(s, t) returns:219 * -2 if s strictly precedes t220 * -1 if s is a prefix of t221 * 0 if s is the same as t222 * 1 if t is a prefix of s223 * 2 if t strictly precedes s224 */226 int227 acomp(Rune *s, Rune *t)228 {229 int cs, ct;231 for(;;) {232 cs = *s;233 ct = *t;234 if(cs != ct)235 break;236 if(cs == 0)237 return 0;238 s++;239 t++;240 }241 if(cs == 0)242 return -1;243 if(ct == 0)244 return 1;245 if(cs < ct)246 return -2;247 return 2;248 }250 void251 torune(char *old, Rune *new)252 {253 do old += chartorune(new, old);254 while(*new++);255 }257 void258 rcanon(Rune *old, Rune *new)259 {260 Rune r;262 while((r = *old++) && r != tab) {263 if (islatin1(r) && latin_fold_tab[r-0xc0])264 r = latin_fold_tab[r-0xc0];265 if(direc)266 if(!(isalnum(r) || r == ' ' || r == '\t'))267 continue;268 if(fold)269 if(isupper(r))270 r = tolower(r);271 *new++ = r;272 }273 *new = 0;274 }276 int277 ncomp(Rune *s, Rune *t)278 {279 Rune *is, *it, *js, *jt;280 int a, b;281 int ssgn, tsgn;283 while(isspace(*s))284 s++;285 while(isspace(*t))286 t++;287 ssgn = tsgn = -2*rev;288 if(*s == '-') {289 s++;290 ssgn = -ssgn;291 }292 if(*t == '-') {293 t++;294 tsgn = -tsgn;295 }296 for(is = s; isdigit(*is); is++)297 ;298 for(it = t; isdigit(*it); it++)299 ;300 js = is;301 jt = it;302 a = 0;303 if(ssgn == tsgn)304 while(it>t && is>s)305 if(b = *--it - *--is)306 a = b;307 while(is > s)308 if(*--is != '0')309 return -ssgn;310 while(it > t)311 if(*--it != '0')312 return tsgn;313 if(a)314 return sgn(a)*ssgn;315 if(*(s=js) == '.')316 s++;317 if(*(t=jt) == '.')318 t++;319 if(ssgn == tsgn)320 while(isdigit(*s) && isdigit(*t))321 if(a = *t++ - *s++)322 return sgn(a)*ssgn;323 while(isdigit(*s))324 if(*s++ != '0')325 return -ssgn;326 while(isdigit(*t))327 if(*t++ != '0')328 return tsgn;329 return 0;330 }332 int333 getword(Biobuf *f, Rune *rp, int n)334 {335 long c;337 while(n-- > 0) {338 c = Bgetrune(f);339 if(c < 0)340 return 0;341 if(c == '\n') {342 *rp = '\0';343 return 1;344 }345 *rp++ = c;346 }347 fprint(2, "Look: word too long. Bailing out.\n");348 return 0;349 }