7 /* fig leaves for possibly signed char quantities */
8 #define ISUPPER(c) isupper((c)&0xff)
9 #define ISLOWER(c) islower((c)&0xff)
10 #define ISALPHA(c) isalpha((c)&0xff)
11 #define ISDIGIT(c) isdigit((c)&0xff)
12 #define ISVOWEL(c) voweltab[(c)&0xff]
13 #define Tolower(c) (ISUPPER(c)? (c)-'A'+'a': (c))
14 #define pair(a,b) (((a)<<8) | (b))
19 #define Set(h, f) ((long)(h) & (f))
21 Bits nop(char*, char*, char*, int, int);
22 Bits strip(char*, char*, char*, int, int);
23 Bits ize(char*, char*, char*, int, int);
24 Bits i_to_y(char*, char*, char*, int, int);
25 Bits ily(char*, char*, char*, int, int);
26 Bits subst(char*, char*, char*, int, int);
27 Bits CCe(char*, char*, char*, int, int);
28 Bits tion(char*, char*, char*, int, int);
29 Bits an(char*, char*, char*, int, int);
30 Bits s(char*, char*, char*, int, int);
31 Bits es(char*, char*, char*, int, int);
32 Bits bility(char*, char*, char*, int, int);
33 Bits y_to_e(char*, char*, char*, int, int);
34 Bits VCe(char*, char*, char*, int, int);
36 Bits trypref(char*, char*, int, int);
37 Bits tryword(char*, char*, int, int);
38 Bits trysuff(char*, int, int);
39 Bits dict(char*, char*);
46 int inun(char*, Bits);
50 typedef struct Ptab Ptab;
57 typedef struct Suftab Suftab;
61 Bits (*p1)(char*, char*, char*, int, int);
67 Bits (*p2)(char*, char*, char*, int, int);
74 {"aibohp",subst,1,"-e+ia","",NOUN, NOUN},
80 {"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN},
81 {"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN},
82 {"citi",ize,1,"-e+ic","",N_AFFIX, ADJ },
83 {"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN },
84 {"cipocs",ize,1,"-e+ic","",NOUN, ADJ },
85 {"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ },
86 {"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ },
87 {"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ },
88 {"cibohp",subst,1,"-e+ic","",NOUN, ADJ },
93 {"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"},
94 {"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN},
100 * V_affix for comment ->commence->commentment??
102 {"ecna",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
103 {"ecne",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
104 {"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ},
105 {"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ},
106 {"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ},
107 {"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP},
108 {"ekil",strip,4,"","+like",N_AFFIX ,ADJ},
113 {"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN},
114 {"gnikam",strip,6,"","+making",NOUN,NOUN},
115 {"gnipeek",strip,7,"","+keeping",NOUN,NOUN},
116 {"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN},
121 {"ladio",strip,2,"","+al",NOUN |ADJ,ADJ},
122 {"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX},
123 {"latnem",strip,2,"","+al",N_AFFIX,ADJ},
124 {"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN},
125 {"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN},
130 /* congregational + ism */
131 {"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN},
132 {"margo",subst,-1,"-ph+m","",NOUN,NOUN},
137 {"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX},
138 {"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX},
139 {"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR},
140 {"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
141 {"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX},
142 {"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB},
143 {"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX},
144 {"nemow",strip,5,"","+women",MAN,PROP_COLLECT},
145 {"nem",strip,3,"","+man",MAN,PROP_COLLECT},
146 {"nosrep",strip,6,"","+person",MAN,PROP_COLLECT},
151 {"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
156 {"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"},
157 {"reyhparg",nop,0,"","",0,NOUN},
158 {"reyl",nop,0,"","",0,NOUN},
159 {"rekam",strip,5,"","+maker",NOUN,NOUN},
160 {"repeek",strip,6,"","+keeper",NOUN,NOUN},
161 {"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"},
162 {"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y},
163 {"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX},
164 {"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX},
169 {"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX},
170 {"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ },
171 {"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"},
172 {"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH },
173 {"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH },
178 {"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB},
179 {"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" },
180 {"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX},
181 {"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP},
186 {"ycna",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
187 {"ycne",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
188 {"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX},
189 {"ytisuo",nop,0,"","",NOUN},
190 {"ytilb",nop,0,"","",0,NOUN},
191 {"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX },
192 {"ylb",y_to_e,1,"-e+y","",ADJ,ADV},
193 {"ylc",nop,0,"","",0},
194 {"ylelb",nop,0,"","",0},
195 {"ylelp",nop,0,"","",0},
196 {"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP},
197 {"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX},
198 {"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP},
368 "under", 0, /*must precede un*/
425 enum { NONE, SUFF, PREF} type;
438 char affix[DSIZ*10]; /* 10 is longest affix message */
442 char space[300000]; /* must be as large as "words"+"space" in pcode run */
443 Bits encode[2048]; /* must be as long as "codes" in pcode run */
446 char* spacep[128*128+1]; /* pointer to words starting with 'xx' */
450 char* codefile = "#9/lib/amspell";
451 char* brfile = "#9/lib/brspell";
452 char* Usage = "usage";
455 main(int argc, char *argv[])
463 codefile = unsharp(codefile);
464 brfile = unsharp(brfile);
466 Binit(&bin, 0, OREAD);
467 Binit(&bout, 1, OWRITE);
468 for(i=0; c = "aeiouyAEIOUY"[i]; i++)
471 if(argv[1][0] != '-')
473 for(i=1; c = argv[1][i]; i++)
476 fprint(2, "usage: spell [-bcCvx] [-f file]\n");
489 case 'C': /* for "correct" */
491 case 'c': /* for ocr */
505 fprint(2, "spell: -f requires another argument\n");
520 fprint(2, "usage: spell [-bcCvx] [-f file]\n");
528 original = Brdline(&bin, '\n');
531 original[Blinelen(&bin)-1] = 0;
536 while(*original != ':')
539 while(*++original != ':')
544 for(ep=word,dp=original; j = *dp; ep++,dp++) {
547 if(ep >= word+sizeof(word)-1)
553 if(ISDIGIT(word[0]) && ordinal())
557 if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)))
558 for(cp=original+1,dp=word+1; dp<ep; dp++,cp++)
561 for(;;) { /* at most twice */
562 if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))
564 if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH))
566 if(!ISUPPER(word[0]))
575 word[0] = Tolower(word[0]);
579 if(!h || Set(h,STOP))
584 print("%c",'0' + (suffcount>0) +
585 (prefcount>4? 8: 2*prefcount));
586 } else if(!h || Set(h,STOP)) {
588 Bprint(&bout, "%s:%s\n", acmeid, original);
590 Bprint(&bout, "%s\n", original);
591 } else if(affix[0] != 0 && affix[0] != '.')
592 print("%s\t%s\n", affix, original);
597 /* strip exactly one suffix and do
598 * indicated routine(s), which may recursively
602 trysuff(char* ep, int lev, int flag)
607 int initchar = ep[-1];
612 deriv[lev] = emptyderiv;
613 deriv[lev-1] = emptyderiv;
615 if(!ISLOWER(initchar))
617 for(t=suftab[initchar-'a']; sp=t->suf; t++) {
622 for(sp=ep-t->n1; --sp >= word && !ISVOWEL(*sp);)
626 if(!(t->affixable & flag))
628 h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP);
631 deriv[lev] = emptyderiv;
632 deriv[lev+1] = emptyderiv;
634 h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP);
643 nop(char* ep, char* d, char* a, int lev, int flag)
654 cstrip(char* ep, char* d, char* a, int lev, int flag)
658 if(ISVOWEL(temp) && ISVOWEL(ep[-1])) {
659 switch(pair(ep[-1],ep[0])) {
671 if(temp==ep[-1]&&temp==ep[-2])
673 return strip(ep,d,a,lev,flag);
677 strip(char* ep, char* d, char* a, int lev, int flag)
679 Bits h = trypref(ep, a, lev, flag);
682 if(Set(h,MONO) && ISVOWEL(*ep) && ISVOWEL(ep[-2]))
686 if(ISVOWEL(*ep) && !ISVOWEL(ep[-1]) && ep[-1]==ep[-2]) {
687 h = trypref(ep-1,a,lev,flag|MONO);
691 return trysuff(ep,lev,flag);
695 s(char* ep, char* d, char* a, int lev, int flag)
702 if(ISVOWEL(ep[-2])||ISUPPER(*word))
703 break; /*says Kennedys*/
716 return strip(ep,d,a,lev,flag);
720 an(char* ep, char* d, char* a, int lev, int flag)
723 if(!ISUPPER(*word)) /*must be proper name*/
725 return trypref(ep,a,lev,flag);
729 ize(char* ep, char* d, char* a, int lev, int flag)
736 h = strip(ep,"",d,lev,flag);
742 y_to_e(char* ep, char* d, char* a, int lev, int flag)
756 h = strip(ep,"",d,lev,flag);
762 ily(char* ep, char* d, char* a, int lev, int flag)
767 if(temp==ep[-1]&&temp==ep[-2]) /* sillly */
769 if(*--cp=='y' && !ISVOWEL(*--cp)) /* happyly */
771 if(ISVOWEL(*--cp)) /* shyness */
774 return i_to_y(ep,d,a,lev,flag);
775 return cstrip(ep,d,a,lev,flag);
779 bility(char* ep, char* d, char* a, int lev, int flag)
782 return y_to_e(ep,d,a,lev,flag);
786 i_to_y(char* ep, char* d, char* a, int lev, int flag)
793 if((temp=ep[-1])=='i' && !ISVOWEL(ep[-2])) {
797 h = cstrip(ep,"",a,lev,flag);
803 es(char* ep, char* d, char* a, int lev, int flag)
811 return i_to_y(ep,d,a,lev,flag);
823 return strip(ep,d,a,lev,flag);
828 subst(char* ep, char* d, char* a, int lev, int flag)
834 if(skipv(skipv(ep-1)) < word)
836 for(t=d; *t!='+'; t++)
838 for(u=ep; *--t!='-';)
840 h = strip(ep,"",d,lev,flag);
849 tion(char* ep, char* d, char* a, int lev, int flag)
853 return trypref(ep,a,lev,flag);
859 return y_to_e(ep,d,a,lev,flag);
864 * possible consonant-consonant-e ending
867 CCe(char* ep, char* d, char* a, int lev, int flag)
881 return y_to_e(ep,d,a,lev,flag);
886 if(*ep == 'a') /* prevent -able for -eable */
896 if(h = y_to_e(ep,d,a,lev,flag))
898 if(!(ep[-2]=='n' && ep[-1]=='g'))
901 return VCe(ep,d,a,lev,flag);
905 * possible consonant-vowel-consonant-e ending
908 VCe(char* ep, char* d, char* a, int lev, int flag)
916 if(!ISVOWEL(c) && ISVOWEL(ep[-2])) {
919 h = trypref(ep,d,lev,flag);
921 h = trysuff(ep,lev,flag);
927 return cstrip(ep,d,a,lev,flag);
931 lookuppref(uchar** wp, char* ep)
935 unsigned int initchar = Tolower(**wp);
937 if(!ISALPHA(initchar))
939 for(sp=preftab[initchar-'a'];sp->s;sp++) {
941 for(cp= (uchar*)sp->s;*cp; )
944 for(cp=bp;cp<(uchar*)ep;cp++)
954 /* while word is not in dictionary try stripping
955 * prefixes. Fail if no more prefixes.
958 trypref(char* ep, char* a, int lev, int flag)
968 deriv[lev].type = *a=='.'? NONE: SUFF;
970 if(h = tryword(word,ep,lev,flag)) {
971 if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO))
978 deriv[lev+1].mesg = pp;
979 deriv[lev+1].type = 0;
981 while(tp=lookuppref((uchar**)(void*)&bp,ep)) {
984 while(pp<space+sizeof(space) && (*pp = *cp++))
986 deriv[lev+1].type += PREF;
987 h = tryword(bp,ep,lev+1,flag);
989 ((tp->flag&IN) && inun(bp-2,h)==0)) {
993 if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO))
998 deriv[lev+1] = emptyderiv;
999 deriv[lev+2] = emptyderiv;
1005 tryword(char* bp, char* ep, int lev, int flag)
1015 deriv[++lev].mesg = duple;
1016 deriv[lev].type = SUFF;
1023 if(vflag==0 || h==0)
1026 * when derivations are wanted, collect them
1030 prefcount = suffcount = 0;
1032 if(j<DSIZ && deriv[j].type) {
1033 strcat(affix, deriv[j].mesg);
1034 if(deriv[j].type == SUFF)
1036 else if(deriv[j].type != NONE)
1037 prefcount = deriv[j].type/PREF;
1044 inun(char* bp, Bits h)
1047 return Set(h, IN) == 0;
1053 return bp[1] == 'r';
1056 return bp[1] == 'm';
1058 return bp[1] == 'n';
1064 if(s >= word && ISVOWEL(*s))
1066 while(s >= word && !ISVOWEL(*s))
1072 * crummy way to Britishise
1081 for(p = suftab[i]; p->suf; p++) {
1082 p->suf = ztos(p->suf);
1083 p->d1 = ztos(p->d1);
1084 p->a1 = ztos(p->a1);
1107 dict(char* bp, char* ep)
1109 char *cp, *cp1, *w, *wp, *we;
1127 fprint(2, "=%.*s\n", utfnlen(w, n), w);
1131 * find the beginning of some word in the middle
1133 cp = bp + (ep-bp)/2;
1135 while(cp > bp && !(*cp & 0x80))
1137 while(cp > bp && (cp[-1] & 0x80))
1140 wp = w + 2; /* skip two letters */
1141 cp1 = cp + 2; /* skip affix code */
1160 while(!(*cp1 & 0x80))
1169 f = ((cp[0] & 0x7) << 8) |
1172 fprint(2, "=%.*s ", utfnlen(w, n), w);
1173 typeprint(encode[f]);
1185 if(h & PROP_COLLECT)
1188 if((h & VERB) == VERB)
1191 if((h & VERB) == V_IRREG)
1200 if((h & COMP) == ACTOR)
1243 fprint(2, ",%s", s);
1247 * is the word on of the following
1252 * called knowing word[0] is a digit
1263 if(ISUPPER(cp[0]) && ISUPPER(cp[1])) {
1264 sp[0] = Tolower(cp[0]);
1265 sp[1] = Tolower(cp[1]);
1267 return 0 == strncmp(sp,
1268 cp[-2]=='1'? "th": /* out of bounds if 1 digit */
1269 *--cp=='1'? "st": /* harmless */
1276 * read in the dictionary.
1280 * long encode[nencode];
1284 * the encodings are a table all different
1286 * the dictionary proper has 2 bytes
1287 * that demark and then the rest of the
1288 * word. the 2 bytes have the following
1290 * 0x78 0x00 count of prefix bytes
1291 * common with prev word
1292 * 0x07 0xff affix code
1294 * all ints are big endians in the file.
1297 readdict(char *file)
1299 char *s, *is, *lasts, *ls;
1307 fprint(2, "cannot open %s\n", file);
1310 if(read(f, space, 2) != 2)
1312 nencode = ((space[0]&0xff)<<8) | (space[1]&0xff);
1313 if(read(f, space, 4*nencode) != 4*nencode)
1316 for(i=0; i<nencode; i++) {
1317 l = (long)(s[0] & 0xff) << 24;
1318 l |= (s[1] & 0xff) << 16;
1319 l |= (s[2] & 0xff) << 8;
1321 encode[i] = (Bits)l;
1324 l = read(f, space, sizeof(space));
1325 if(l == sizeof(space))
1327 is = space + (sizeof(space) - l);
1328 memmove(is, space, l);
1342 *s = 0x80; /* fence */
1347 *s++ = *is++ & 0xff;
1349 i = (*is++ & 0xff)*128;
1352 i = i/128*128 + (*is++ & 0xff);
1354 fprint(2, "the dict isnt sorted or \n");
1355 fprint(2, "memmove didn't work\n");
1366 if(is >= space+sizeof(space)) {
1379 fprint(2, "trouble reading %s\n", file);
1382 fprint(2, "not enough space for dictionary\n");