1 d1f529f4 2005-10-29 devnull #include <u.h>
2 d1f529f4 2005-10-29 devnull #include <libc.h>
3 d1f529f4 2005-10-29 devnull #include <bio.h>
4 d1f529f4 2005-10-29 devnull #include <ctype.h>
5 d1f529f4 2005-10-29 devnull #include "msgdb.h"
8 d1f529f4 2005-10-29 devnull usage(void)
10 d1f529f4 2005-10-29 devnull fprint(2, "usage: upas/msgclass [-a] [-d name dbfile]... [-l lockfile] [-m mul] [-t thresh] [tokenfile ...]\n");
11 d1f529f4 2005-10-29 devnull exits("usage");
16 d1f529f4 2005-10-29 devnull MAXBEST = 32,
17 d1f529f4 2005-10-29 devnull MAXLEN = 64,
18 cbeb0b26 2006-04-01 devnull MAXTAB = 256
21 d1f529f4 2005-10-29 devnull typedef struct Ndb Ndb;
22 d1f529f4 2005-10-29 devnull struct Ndb
24 d1f529f4 2005-10-29 devnull char *name;
25 d1f529f4 2005-10-29 devnull char *file;
26 d1f529f4 2005-10-29 devnull Msgdb *db;
27 d1f529f4 2005-10-29 devnull double p;
28 d1f529f4 2005-10-29 devnull long nmsg;
31 d1f529f4 2005-10-29 devnull typedef struct Word Word;
32 d1f529f4 2005-10-29 devnull struct Word
34 d1f529f4 2005-10-29 devnull char s[MAXLEN];
35 d1f529f4 2005-10-29 devnull int count[MAXTAB];
36 d1f529f4 2005-10-29 devnull double p[MAXTAB];
37 d1f529f4 2005-10-29 devnull double mp;
38 d1f529f4 2005-10-29 devnull int mi; /* w.p[w.mi] = w.mp */
39 d1f529f4 2005-10-29 devnull int nmsg;
42 d1f529f4 2005-10-29 devnull Ndb db[MAXTAB];
47 d1f529f4 2005-10-29 devnull Msgdb *indb;
49 d1f529f4 2005-10-29 devnull Word best[MAXBEST];
50 d1f529f4 2005-10-29 devnull int mbest = 15;
51 d1f529f4 2005-10-29 devnull int nbest;
53 d1f529f4 2005-10-29 devnull void process(Biobuf*, char*);
54 d1f529f4 2005-10-29 devnull void lockfile(char*);
57 d1f529f4 2005-10-29 devnull noteword(Word *w, char *s)
61 d1f529f4 2005-10-29 devnull for(i=nbest-1; i>=0; i--)
62 d1f529f4 2005-10-29 devnull if(w->mp < best[i].mp)
66 d1f529f4 2005-10-29 devnull if(i >= mbest)
68 d1f529f4 2005-10-29 devnull if(nbest == mbest)
70 d1f529f4 2005-10-29 devnull if(i < nbest)
71 d1f529f4 2005-10-29 devnull memmove(&best[i+1], &best[i], (nbest-i)*sizeof(best[0]));
72 d1f529f4 2005-10-29 devnull best[i] = *w;
73 d1f529f4 2005-10-29 devnull strecpy(best[i].s, best[i].s+MAXLEN, s);
78 d1f529f4 2005-10-29 devnull main(int argc, char **argv)
80 d1f529f4 2005-10-29 devnull int i, bad, m, tot, nn, j;
81 d1f529f4 2005-10-29 devnull Biobuf bin, *b, bout;
82 d1f529f4 2005-10-29 devnull char *s, *lf;
83 d1f529f4 2005-10-29 devnull double totp, p, thresh;
87 d1f529f4 2005-10-29 devnull lf = nil;
88 d1f529f4 2005-10-29 devnull thresh = 0;
89 d1f529f4 2005-10-29 devnull ARGBEGIN{
90 d1f529f4 2005-10-29 devnull case 'a':
93 d1f529f4 2005-10-29 devnull case 'd':
94 d1f529f4 2005-10-29 devnull if(ndb >= MAXTAB)
95 d1f529f4 2005-10-29 devnull sysfatal("too many db classes");
96 d1f529f4 2005-10-29 devnull db[ndb].name = EARGF(usage());
97 d1f529f4 2005-10-29 devnull db[ndb].file = EARGF(usage());
100 d1f529f4 2005-10-29 devnull case 'l':
101 d1f529f4 2005-10-29 devnull lf = EARGF(usage());
103 d1f529f4 2005-10-29 devnull case 'm':
104 d1f529f4 2005-10-29 devnull mul = atoi(EARGF(usage()));
106 d1f529f4 2005-10-29 devnull case 't':
107 d1f529f4 2005-10-29 devnull thresh = atof(EARGF(usage()));
109 d1f529f4 2005-10-29 devnull default:
110 d1f529f4 2005-10-29 devnull usage();
113 d1f529f4 2005-10-29 devnull if(ndb == 0){
114 d1f529f4 2005-10-29 devnull fprint(2, "must have at least one -d option\n");
115 d1f529f4 2005-10-29 devnull usage();
118 d1f529f4 2005-10-29 devnull indb = mdopen(nil, 1);
119 d1f529f4 2005-10-29 devnull if(argc == 0){
120 d1f529f4 2005-10-29 devnull Binit(&bin, 0, OREAD);
121 d1f529f4 2005-10-29 devnull process(&bin, "<stdin>");
122 d1f529f4 2005-10-29 devnull Bterm(&bin);
124 d1f529f4 2005-10-29 devnull bad = 0;
125 d1f529f4 2005-10-29 devnull for(i=0; i<argc; i++){
126 d1f529f4 2005-10-29 devnull if((b = Bopen(argv[i], OREAD)) == nil){
127 d1f529f4 2005-10-29 devnull fprint(2, "opening %s: %r\n", argv[i]);
128 d1f529f4 2005-10-29 devnull bad = 1;
129 d1f529f4 2005-10-29 devnull continue;
131 d1f529f4 2005-10-29 devnull process(b, argv[i]);
132 d1f529f4 2005-10-29 devnull Bterm(b);
135 d1f529f4 2005-10-29 devnull exits("open inputs");
138 d1f529f4 2005-10-29 devnull lockfile(lf);
139 d1f529f4 2005-10-29 devnull bad = 0;
140 d1f529f4 2005-10-29 devnull for(i=0; i<ndb; i++){
141 d1f529f4 2005-10-29 devnull if((db[i].db = mdopen(db[i].file, 0)) == nil){
142 d1f529f4 2005-10-29 devnull fprint(2, "opendb %s: %r\n", db[i].file);
143 d1f529f4 2005-10-29 devnull bad = 1;
145 d1f529f4 2005-10-29 devnull db[i].nmsg = mdget(db[i].db, "*From*");
148 d1f529f4 2005-10-29 devnull exits("open databases");
150 d1f529f4 2005-10-29 devnull /* run conditional probabilities of input words, getting 15 most specific */
151 d1f529f4 2005-10-29 devnull mdenum(indb);
152 d1f529f4 2005-10-29 devnull nbest = 0;
153 d1f529f4 2005-10-29 devnull while(mdnext(indb, &s, &n) >= 0){
154 d1f529f4 2005-10-29 devnull tot = 0;
155 d1f529f4 2005-10-29 devnull totp = 0.0;
156 d1f529f4 2005-10-29 devnull for(i=0; i<ndb; i++){
157 d1f529f4 2005-10-29 devnull nn = mdget(db[i].db, s)*(i==0 ? 3 : 1);
158 d1f529f4 2005-10-29 devnull tot += nn;
159 d1f529f4 2005-10-29 devnull w.count[i] = nn;
160 d1f529f4 2005-10-29 devnull p = w.count[i]/(double)db[i].nmsg;
161 d1f529f4 2005-10-29 devnull if(p >= 1.0)
162 d1f529f4 2005-10-29 devnull p = 1.0;
163 d1f529f4 2005-10-29 devnull w.p[i] = p;
164 d1f529f4 2005-10-29 devnull totp += p;
166 cbeb0b26 2006-04-01 devnull /*fprint(2, "%s tot %d totp %g\n", s, tot, totp); */
167 d1f529f4 2005-10-29 devnull if(tot < 2)
168 d1f529f4 2005-10-29 devnull continue;
169 d1f529f4 2005-10-29 devnull w.mp = 0.0;
170 d1f529f4 2005-10-29 devnull for(i=0; i<ndb; i++){
171 d1f529f4 2005-10-29 devnull p = w.p[i];
172 d1f529f4 2005-10-29 devnull p /= totp;
173 d1f529f4 2005-10-29 devnull if(p < 0.001)
174 d1f529f4 2005-10-29 devnull p = 0.001;
175 d1f529f4 2005-10-29 devnull else if(p > 0.999)
176 d1f529f4 2005-10-29 devnull p = 0.999;
177 d1f529f4 2005-10-29 devnull if(p > w.mp){
178 d1f529f4 2005-10-29 devnull w.mp = p;
179 d1f529f4 2005-10-29 devnull w.mi = i;
181 d1f529f4 2005-10-29 devnull w.p[i] = p;
183 d1f529f4 2005-10-29 devnull noteword(&w, s);
186 d1f529f4 2005-10-29 devnull /* compute conditional probabilities of message classes using 15 most specific */
187 d1f529f4 2005-10-29 devnull totp = 0.0;
188 d1f529f4 2005-10-29 devnull for(i=0; i<ndb; i++){
189 d1f529f4 2005-10-29 devnull p = 1.0;
190 d1f529f4 2005-10-29 devnull for(j=0; j<nbest; j++)
191 d1f529f4 2005-10-29 devnull p *= best[j].p[i];
192 d1f529f4 2005-10-29 devnull db[i].p = p;
193 d1f529f4 2005-10-29 devnull totp += p;
195 d1f529f4 2005-10-29 devnull for(i=0; i<ndb; i++)
196 d1f529f4 2005-10-29 devnull db[i].p /= totp;
198 d1f529f4 2005-10-29 devnull for(i=1; i<ndb; i++)
199 d1f529f4 2005-10-29 devnull if(db[i].p > db[m].p)
202 d1f529f4 2005-10-29 devnull Binit(&bout, 1, OWRITE);
203 d1f529f4 2005-10-29 devnull if(db[m].p < thresh)
205 d1f529f4 2005-10-29 devnull if(m >= 0)
206 d1f529f4 2005-10-29 devnull Bprint(&bout, "%s", db[m].name);
208 d1f529f4 2005-10-29 devnull Bprint(&bout, "inconclusive");
209 d1f529f4 2005-10-29 devnull for(j=0; j<ndb; j++)
210 d1f529f4 2005-10-29 devnull Bprint(&bout, " %s=%g", db[j].name, db[j].p);
211 d1f529f4 2005-10-29 devnull Bprint(&bout, "\n");
212 d1f529f4 2005-10-29 devnull for(i=0; i<nbest; i++){
213 d1f529f4 2005-10-29 devnull Bprint(&bout, "%s", best[i].s);
214 d1f529f4 2005-10-29 devnull for(j=0; j<ndb; j++)
215 d1f529f4 2005-10-29 devnull Bprint(&bout, " %s=%g", db[j].name, best[i].p[j]);
216 d1f529f4 2005-10-29 devnull Bprint(&bout, "\n");
218 d1f529f4 2005-10-29 devnull Bprint(&bout, "%s %g\n", best[i].s, best[i].p[m]);
219 d1f529f4 2005-10-29 devnull Bterm(&bout);
221 d1f529f4 2005-10-29 devnull if(m >= 0 && add){
222 d1f529f4 2005-10-29 devnull mdenum(indb);
223 d1f529f4 2005-10-29 devnull while(mdnext(indb, &s, &n) >= 0)
224 d1f529f4 2005-10-29 devnull mdput(db[m].db, s, mdget(db[m].db, s)+n*mul);
225 d1f529f4 2005-10-29 devnull mdclose(db[m].db);
227 d1f529f4 2005-10-29 devnull exits(nil);
231 d1f529f4 2005-10-29 devnull process(Biobuf *b, char*)
233 d1f529f4 2005-10-29 devnull char *s;
234 d1f529f4 2005-10-29 devnull char *p;
237 d1f529f4 2005-10-29 devnull while((s = Brdline(b, '\n')) != nil){
238 d1f529f4 2005-10-29 devnull s[Blinelen(b)-1] = 0;
239 d1f529f4 2005-10-29 devnull if((p = strrchr(s, ' ')) != nil){
240 d1f529f4 2005-10-29 devnull *p++ = 0;
241 d1f529f4 2005-10-29 devnull n = atoi(p);
244 d1f529f4 2005-10-29 devnull mdput(indb, s, mdget(indb, s)+n);
248 d1f529f4 2005-10-29 devnull int tpid;
250 d1f529f4 2005-10-29 devnull killtickle(void)
252 d1f529f4 2005-10-29 devnull postnote(PNPROC, tpid, "die");
256 d1f529f4 2005-10-29 devnull lockfile(char *s)
258 d1f529f4 2005-10-29 devnull int fd, t, w;
259 d1f529f4 2005-10-29 devnull char err[ERRMAX];
261 d1f529f4 2005-10-29 devnull if(s == nil)
265 d1f529f4 2005-10-29 devnull for(;;){
266 d1f529f4 2005-10-29 devnull fd = open(s, OREAD);
267 d1f529f4 2005-10-29 devnull if(fd >= 0)
269 d1f529f4 2005-10-29 devnull rerrstr(err, sizeof err);
270 d1f529f4 2005-10-29 devnull if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil))
272 d1f529f4 2005-10-29 devnull sleep(w);
274 d1f529f4 2005-10-29 devnull if(w < 1000)
275 d1f529f4 2005-10-29 devnull w = (w*3)/2;
276 d1f529f4 2005-10-29 devnull if(t > 120*1000)
279 d1f529f4 2005-10-29 devnull if(fd < 0)
280 d1f529f4 2005-10-29 devnull sysfatal("could not lock %s", s);
281 d1f529f4 2005-10-29 devnull switch(tpid = fork()){
282 d1f529f4 2005-10-29 devnull case -1:
283 d1f529f4 2005-10-29 devnull sysfatal("fork: %r");
285 d1f529f4 2005-10-29 devnull for(;;){
286 d1f529f4 2005-10-29 devnull sleep(30*1000);
287 d1f529f4 2005-10-29 devnull free(dirfstat(fd));
289 d1f529f4 2005-10-29 devnull _exits(nil);
290 d1f529f4 2005-10-29 devnull default:
293 d1f529f4 2005-10-29 devnull close(fd);
294 d1f529f4 2005-10-29 devnull atexit(killtickle);