Blob
1 #include <u.h>2 #include <libc.h>3 #include <bio.h>4 /* Macros for Rune support of ctype.h-like functions */6 #define isupper(r) (L'A' <= (r) && (r) <= L'Z')7 #define islower(r) (L'a' <= (r) && (r) <= L'z')8 #define isalpha(r) (isupper(r) || islower(r))9 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)11 #define isdigit(r) (L'0' <= (r) && (r) <= L'9')13 #define isalnum(r) (isalpha(r) || isdigit(r))15 #define isspace(r) ((r) == L' ' || (r) == L'\t' \16 || (0x0A <= (r) && (r) <= 0x0D))18 #define tolower(r) ((r)-'A'+'a')20 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))22 #define WORDSIZ 400023 char *filename = "#9/lib/words";24 Biobuf *dfile;25 Biobuf bout;26 Biobuf bin;28 int fold;29 int direc;30 int exact;31 int iflag;32 int rev = 1; /*-1 for reverse-ordered file, not implemented*/33 int (*compare)(Rune*, Rune*);34 Rune tab = '\t';35 Rune entry[WORDSIZ];36 Rune word[WORDSIZ];37 Rune key[50], orig[50];38 Rune latin_fold_tab[] =39 {40 /* Table to fold latin 1 characters to ASCII equivalents41 based at Rune value 0xc043 À Á Â Ã Ä Å Æ Ç44 È É Ê Ë Ì Í Î Ï45 Ð Ñ Ò Ó Ô Õ Ö ×46 Ø Ù Ú Û Ü Ý Þ ß47 à á â ã ä å æ ç48 è é ê ë ì í î ï49 ð ñ ò ó ô õ ö ÷50 ø ù ú û ü ý þ ÿ51 */52 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',53 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',54 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,55 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,56 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',57 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',58 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,59 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',60 };62 int locate(void);63 int acomp(Rune*, Rune*);64 int getword(Biobuf*, Rune *rp, int n);65 void torune(char*, Rune*);66 void rcanon(Rune*, Rune*);67 int ncomp(Rune*, Rune*);69 void70 main(int argc, char *argv[])71 {72 int n;74 filename = unsharp(filename);76 Binit(&bin, 0, OREAD);77 Binit(&bout, 1, OWRITE);78 compare = acomp;79 ARGBEGIN{80 case 'd':81 direc++;82 break;83 case 'f':84 fold++;85 break;86 case 'i':87 iflag++;88 break;89 case 'n':90 compare = ncomp;91 break;92 case 't':93 chartorune(&tab,ARGF());94 break;95 case 'x':96 exact++;97 break;98 default:99 fprint(2, "%s: bad option %c\n", argv0, ARGC());100 fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);101 exits("usage");102 } ARGEND103 if(!iflag){104 if(argc >= 1) {105 torune(argv[0], orig);106 argv++;107 argc--;108 } else109 iflag++;110 }111 if(argc < 1) {112 direc++;113 fold++;114 } else115 filename = argv[0];116 if (!iflag)117 rcanon(orig, key);118 dfile = Bopen(filename, OREAD);119 if(dfile == 0) {120 fprint(2, "look: can't open %s\n", filename);121 exits("no dictionary");122 }123 if(!iflag)124 if(!locate())125 exits("not found");126 do {127 if(iflag) {128 Bflush(&bout);129 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))130 exits(0);131 rcanon(orig, key);132 if(!locate())133 continue;134 }135 if (!exact || !acomp(word, key))136 Bprint(&bout, "%S\n", entry);137 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {138 rcanon(entry, word);139 n = compare(key, word);140 switch(n) {141 case -1:142 if(exact)143 break;144 case 0:145 if (!exact || !acomp(word, orig))146 Bprint(&bout, "%S\n", entry);147 continue;148 }149 break;150 }151 } while(iflag);152 exits(0);153 }155 int156 locate(void)157 {158 long top, bot, mid;159 long c;160 int n;162 bot = 0;163 top = Bseek(dfile, 0L, 2);164 for(;;) {165 mid = (top+bot) / 2;166 Bseek(dfile, mid, 0);167 do168 c = Bgetrune(dfile);169 while(c>=0 && c!='\n');170 mid = Boffset(dfile);171 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))172 break;173 rcanon(entry, word);174 n = compare(key, word);175 switch(n) {176 case -2:177 case -1:178 case 0:179 if(top <= mid)180 break;181 top = mid;182 continue;183 case 1:184 case 2:185 bot = mid;186 continue;187 }188 break;189 }190 Bseek(dfile, bot, 0);191 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {192 rcanon(entry, word);193 n = compare(key, word);194 switch(n) {195 case -2:196 return 0;197 case -1:198 if(exact)199 return 0;200 case 0:201 return 1;202 case 1:203 case 2:204 continue;205 }206 }207 return 0;208 }210 /*211 * acomp(s, t) returns:212 * -2 if s strictly precedes t213 * -1 if s is a prefix of t214 * 0 if s is the same as t215 * 1 if t is a prefix of s216 * 2 if t strictly precedes s217 */219 int220 acomp(Rune *s, Rune *t)221 {222 int cs, ct;224 for(;;) {225 cs = *s;226 ct = *t;227 if(cs != ct)228 break;229 if(cs == 0)230 return 0;231 s++;232 t++;233 }234 if(cs == 0)235 return -1;236 if(ct == 0)237 return 1;238 if(cs < ct)239 return -2;240 return 2;241 }243 void244 torune(char *old, Rune *new)245 {246 do old += chartorune(new, old);247 while(*new++);248 }250 void251 rcanon(Rune *old, Rune *new)252 {253 Rune r;255 while((r = *old++) && r != tab) {256 if (islatin1(r) && latin_fold_tab[r-0xc0])257 r = latin_fold_tab[r-0xc0];258 if(direc)259 if(!(isalnum(r) || r == L' ' || r == L'\t'))260 continue;261 if(fold)262 if(isupper(r))263 r = tolower(r);264 *new++ = r;265 }266 *new = 0;267 }269 int270 ncomp(Rune *s, Rune *t)271 {272 Rune *is, *it, *js, *jt;273 int a, b;274 int ssgn, tsgn;276 while(isspace(*s))277 s++;278 while(isspace(*t))279 t++;280 ssgn = tsgn = -2*rev;281 if(*s == '-') {282 s++;283 ssgn = -ssgn;284 }285 if(*t == '-') {286 t++;287 tsgn = -tsgn;288 }289 for(is = s; isdigit(*is); is++)290 ;291 for(it = t; isdigit(*it); it++)292 ;293 js = is;294 jt = it;295 a = 0;296 if(ssgn == tsgn)297 while(it>t && is>s)298 if(b = *--it - *--is)299 a = b;300 while(is > s)301 if(*--is != '0')302 return -ssgn;303 while(it > t)304 if(*--it != '0')305 return tsgn;306 if(a)307 return sgn(a)*ssgn;308 if(*(s=js) == '.')309 s++;310 if(*(t=jt) == '.')311 t++;312 if(ssgn == tsgn)313 while(isdigit(*s) && isdigit(*t))314 if(a = *t++ - *s++)315 return sgn(a)*ssgn;316 while(isdigit(*s))317 if(*s++ != '0')318 return -ssgn;319 while(isdigit(*t))320 if(*t++ != '0')321 return tsgn;322 return 0;323 }325 int326 getword(Biobuf *f, Rune *rp, int n)327 {328 long c;330 while(n-- > 0) {331 c = Bgetrune(f);332 if(c < 0)333 return 0;334 if(c == '\n') {335 *rp = L'\0';336 return 1;337 }338 *rp++ = c;339 }340 fprint(2, "Look: word too long. Bailing out.\n");341 return 0;342 }