Blob
- Date:
- Message:
- sort: use noted(NDFLT) in note handler There are many more random notes flying around here in Unix-land than there were on Plan 9. For example, some shells implement "cat file | sort" with cat as the child of sort, so that when cat exits, sort gets a "sys: child" note. noted(NDFLT) knows which signals aren't really important and can be ignored, and which need to kill the program.
- Actions:
- History | Blame | Raw File
1 #include <u.h>2 #include <libc.h>3 #include <bio.h>5 /*6 bugs:7 00/ff for end of file can conflict with 00/ff characters8 */10 enum11 {12 Nline = 500000, /* default max number of lines saved in memory */13 Nmerge = 10, /* max number of temporary files merged */14 Nfield = 20, /* max number of argument fields */16 Bflag = 1<<0, /* flags per field */17 B1flag = 1<<1,19 Dflag = 1<<2,20 Fflag = 1<<3,21 Gflag = 1<<4,22 Iflag = 1<<5,23 Mflag = 1<<6,24 Nflag = 1<<7,25 Rflag = 1<<8,26 Wflag = 1<<9,28 NSstart = 0, /* states for number to key decoding */29 NSsign,30 NSzero,31 NSdigit,32 NSpoint,33 NSfract,34 NSzerofract,35 NSexp,36 NSexpsign,37 NSexpdigit38 };40 typedef struct Line Line;41 typedef struct Key Key;42 typedef struct Merge Merge;43 typedef struct Field Field;45 struct Line46 {47 Key* key;48 int llen; /* always >= 1 */49 uchar line[1]; /* always ends in '\n' */50 };52 struct Merge53 {54 Key* key; /* copy of line->key so (Line*) looks like (Merge*) */55 Line* line; /* line at the head of a merged temp file */56 int fd; /* file descriptor */57 Biobuf b; /* iobuf for reading a temp file */58 };60 struct Key61 {62 int klen;63 uchar key[1];64 };66 struct Field67 {68 int beg1;69 int beg2;70 int end1;71 int end2;73 long flags;74 uchar mapto[256];76 void (*dokey)(Key*, uchar*, uchar*, Field*);77 };79 struct args80 {81 char* ofile;82 char* tname;83 Rune tabchar;84 char cflag;85 char uflag;86 char vflag;87 int nfield;88 int nfile;89 Field field[Nfield];91 Line** linep;92 long nline; /* number of lines in this temp file */93 long lineno; /* overall ordinal for -s option */94 int ntemp;95 long mline; /* max lines per file */96 } args;98 extern int latinmap[];99 extern Rune* month[12];101 void buildkey(Line*);102 void doargs(int, char*[]);103 void dofield(char*, int*, int*, int, int);104 void dofile(Biobuf*);105 void dokey_(Key*, uchar*, uchar*, Field*);106 void dokey_dfi(Key*, uchar*, uchar*, Field*);107 void dokey_gn(Key*, uchar*, uchar*, Field*);108 void dokey_m(Key*, uchar*, uchar*, Field*);109 void dokey_r(Key*, uchar*, uchar*, Field*);110 void done(char*);111 int kcmp(Key*, Key*);112 void makemapd(Field*);113 void makemapm(Field*);114 void mergefiles(int, int, Biobuf*);115 void mergeout(Biobuf*);116 void newfield(void);117 Line* newline(Biobuf*);118 void nomem(void);119 void notifyf(void*, char*);120 void printargs(void);121 void printout(Biobuf*);122 void setfield(int, int);123 uchar* skip(uchar*, int, int, int, int);124 void sort4(void*, ulong);125 char* tempfile(int);126 void tempout(void);127 void lineout(Biobuf*, Line*);129 void130 main(int argc, char *argv[])131 {132 int i, f;133 char *s;134 Biobuf bbuf;136 notify(notifyf); /**/137 doargs(argc, argv);138 if(args.vflag)139 printargs();141 for(i=1; i<argc; i++) {142 s = argv[i];143 if(s == 0)144 continue;145 if(strcmp(s, "-") == 0) {146 Binit(&bbuf, 0, OREAD);147 dofile(&bbuf);148 Bterm(&bbuf);149 continue;150 }151 f = open(s, OREAD);152 if(f < 0) {153 fprint(2, "sort: open %s: %r\n", s);154 done("open");155 }156 Binit(&bbuf, f, OREAD);157 dofile(&bbuf);158 Bterm(&bbuf);159 close(f);160 }161 if(args.nfile == 0) {162 Binit(&bbuf, 0, OREAD);163 dofile(&bbuf);164 Bterm(&bbuf);165 }166 if(args.cflag)167 done(0);168 if(args.vflag)169 fprint(2, "=========\n");171 f = 1;172 if(args.ofile) {173 f = create(args.ofile, OWRITE, 0666);174 if(f < 0) {175 fprint(2, "sort: create %s: %r\n", args.ofile);176 done("create");177 }178 }180 Binit(&bbuf, f, OWRITE);181 if(args.ntemp) {182 tempout();183 mergeout(&bbuf);184 } else {185 printout(&bbuf);186 }187 Bterm(&bbuf);188 done(0);189 }191 void192 dofile(Biobuf *b)193 {194 Line *l, *ol;195 int n;197 if(args.cflag) {198 ol = newline(b);199 if(ol == 0)200 return;201 for(;;) {202 l = newline(b);203 if(l == 0)204 break;205 n = kcmp(ol->key, l->key);206 if(n > 0 || (n == 0 && args.uflag)) {207 fprint(2, "sort: -c file not in sort\n"); /**/208 done("order");209 }210 free(ol->key);211 free(ol);212 ol = l;213 }214 return;215 }217 if(args.linep == 0) {218 args.linep = malloc(args.mline * sizeof(args.linep));219 if(args.linep == 0)220 nomem();221 }222 for(;;) {223 l = newline(b);224 if(l == 0)225 break;226 if(args.nline >= args.mline)227 tempout();228 args.linep[args.nline] = l;229 args.nline++;230 args.lineno++;231 }232 }234 void235 notifyf(void *a, char *s)236 {237 USED(a);238 if(strcmp(s, "interrupt") == 0)239 done(0);240 if(strcmp(s, "hangup") == 0)241 done(0);242 if(strcmp(s, "kill") == 0)243 done(0);244 if(strncmp(s, "sys: write on closed pipe", 25) == 0)245 done(0);246 noted(NDFLT);247 }249 Line*250 newline(Biobuf *b)251 {252 Line *l;253 char *p;254 int n, c;256 p = Brdline(b, '\n');257 n = Blinelen(b);258 if(p == 0) {259 if(n == 0)260 return 0;261 l = 0;262 for(n=0;;) {263 if((n & 31) == 0) {264 l = realloc(l, sizeof(Line) +265 (n+31)*sizeof(l->line[0]));266 if(l == 0)267 nomem();268 }269 c = Bgetc(b);270 if(c < 0) {271 fprint(2, "sort: newline added\n");272 c = '\n';273 }274 l->line[n++] = c;275 if(c == '\n')276 break;277 }278 l->llen = n;279 buildkey(l);280 return l;281 }282 l = malloc(sizeof(Line) +283 (n-1)*sizeof(l->line[0]));284 if(l == 0)285 nomem();286 l->llen = n;287 memmove(l->line, p, n);288 buildkey(l);289 return l;290 }292 void293 lineout(Biobuf *b, Line *l)294 {295 int n, m;297 n = l->llen;298 m = Bwrite(b, l->line, n);299 if(n != m)300 exits("write");301 }303 void304 tempout(void)305 {306 long n;307 Line **lp, *l;308 char *tf;309 int f;310 Biobuf tb;312 sort4(args.linep, args.nline);313 tf = tempfile(args.ntemp);314 args.ntemp++;315 f = create(tf, OWRITE, 0666);316 if(f < 0) {317 fprint(2, "sort: create %s: %r\n", tf);318 done("create");319 }321 Binit(&tb, f, OWRITE);322 lp = args.linep;323 for(n=args.nline; n>0; n--) {324 l = *lp++;325 lineout(&tb, l);326 free(l->key);327 free(l);328 }329 args.nline = 0;330 Bterm(&tb);331 close(f);332 }334 void335 done(char *xs)336 {337 int i;339 for(i=0; i<args.ntemp; i++)340 remove(tempfile(i));341 exits(xs);342 }344 void345 nomem(void)346 {347 fprint(2, "sort: out of memory\n");348 done("mem");349 }351 char*352 tempfile(int n)353 {354 static char file[100];355 static uint pid;356 char *dir;358 dir = "/var/tmp";359 if(args.tname)360 dir = args.tname;361 if(strlen(dir) >= nelem(file)-20) {362 fprint(2, "temp file directory name is too long: %s\n", dir);363 done("tdir");364 }366 if(pid == 0) {367 pid = getpid();368 if(pid == 0) {369 pid = time(0);370 if(pid == 0)371 pid = 1;372 }373 }375 sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);376 return file;377 }379 void380 mergeout(Biobuf *b)381 {382 int n, i, f;383 char *tf;384 Biobuf tb;386 for(i=0; i<args.ntemp; i+=n) {387 n = args.ntemp - i;388 if(n > Nmerge) {389 tf = tempfile(args.ntemp);390 args.ntemp++;391 f = create(tf, OWRITE, 0666);392 if(f < 0) {393 fprint(2, "sort: create %s: %r\n", tf);394 done("create");395 }396 Binit(&tb, f, OWRITE);398 n = Nmerge;399 mergefiles(i, n, &tb);401 Bterm(&tb);402 close(f);403 } else404 mergefiles(i, n, b);405 }406 }408 void409 mergefiles(int t, int n, Biobuf *b)410 {411 Merge *m, *mp, **mmp;412 Key *ok;413 Line *l;414 char *tf;415 int i, f, nn;417 mmp = malloc(n*sizeof(*mmp));418 mp = malloc(n*sizeof(*mp));419 if(mmp == 0 || mp == 0)420 nomem();422 nn = 0;423 m = mp;424 for(i=0; i<n; i++,m++) {425 tf = tempfile(t+i);426 f = open(tf, OREAD);427 if(f < 0) {428 fprint(2, "sort: reopen %s: %r\n", tf);429 done("open");430 }431 m->fd = f;432 Binit(&m->b, f, OREAD);433 mmp[nn] = m;435 l = newline(&m->b);436 if(l == 0)437 continue;438 nn++;439 m->line = l;440 m->key = l->key;441 }443 ok = 0;444 for(;;) {445 sort4(mmp, nn);446 m = *mmp;447 if(nn == 0)448 break;449 for(;;) {450 l = m->line;451 if(args.uflag && ok && kcmp(ok, l->key) == 0) {452 free(l->key);453 free(l);454 } else {455 lineout(b, l);456 if(ok)457 free(ok);458 ok = l->key;459 free(l);460 }462 l = newline(&m->b);463 if(l == 0) {464 nn--;465 mmp[0] = mmp[nn];466 break;467 }468 m->line = l;469 m->key = l->key;470 if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)471 break;472 }473 }474 if(ok)475 free(ok);477 m = mp;478 for(i=0; i<n; i++,m++) {479 Bterm(&m->b);480 close(m->fd);481 }483 free(mp);484 free(mmp);485 }487 int488 kcmp(Key *ka, Key *kb)489 {490 int n, m;492 /*493 * set n to length of smaller key494 */495 n = ka->klen;496 m = kb->klen;497 if(n > m)498 n = m;499 return memcmp(ka->key, kb->key, n);500 }502 void503 printout(Biobuf *b)504 {505 long n;506 Line **lp, *l;507 Key *ok;509 sort4(args.linep, args.nline);510 lp = args.linep;511 ok = 0;512 for(n=args.nline; n>0; n--) {513 l = *lp++;514 if(args.uflag && ok && kcmp(ok, l->key) == 0)515 continue;516 lineout(b, l);517 ok = l->key;518 }519 }521 void522 setfield(int n, int c)523 {524 Field *f;526 f = &args.field[n];527 switch(c) {528 default:529 fprint(2, "sort: unknown option: field.%C\n", c);530 done("option");531 case 'b': /* skip blanks */532 f->flags |= Bflag;533 break;534 case 'd': /* directory order */535 f->flags |= Dflag;536 break;537 case 'f': /* fold case */538 f->flags |= Fflag;539 break;540 case 'g': /* floating point -n case */541 f->flags |= Gflag;542 break;543 case 'i': /* ignore non-ascii */544 f->flags |= Iflag;545 break;546 case 'M': /* month */547 f->flags |= Mflag;548 break;549 case 'n': /* numbers */550 f->flags |= Nflag;551 break;552 case 'r': /* reverse */553 f->flags |= Rflag;554 break;555 case 'w': /* ignore white */556 f->flags |= Wflag;557 break;558 }559 }561 void562 dofield(char *s, int *n1, int *n2, int off1, int off2)563 {564 int c, n;566 c = *s++;567 if(c >= '0' && c <= '9') {568 n = 0;569 while(c >= '0' && c <= '9') {570 n = n*10 + (c-'0');571 c = *s++;572 }573 n -= off1; /* posix committee: rot in hell */574 if(n < 0) {575 fprint(2, "sort: field offset must be positive\n");576 done("option");577 }578 *n1 = n;579 }580 if(c == '.') {581 c = *s++;582 if(c >= '0' && c <= '9') {583 n = 0;584 while(c >= '0' && c <= '9') {585 n = n*10 + (c-'0');586 c = *s++;587 }588 n -= off2;589 if(n < 0) {590 fprint(2, "sort: character offset must be positive\n");591 done("option");592 }593 *n2 = n;594 }595 }596 while(c != 0) {597 setfield(args.nfield, c);598 c = *s++;599 }600 }602 void603 printargs(void)604 {605 int i, n;606 Field *f;607 char *prefix;609 fprint(2, "sort");610 for(i=0; i<=args.nfield; i++) {611 f = &args.field[i];612 prefix = " -";613 if(i) {614 n = f->beg1;615 if(n >= 0)616 fprint(2, " +%d", n);617 else618 fprint(2, " +*");619 n = f->beg2;620 if(n >= 0)621 fprint(2, ".%d", n);622 else623 fprint(2, ".*");625 if(f->flags & B1flag)626 fprint(2, "b");628 n = f->end1;629 if(n >= 0)630 fprint(2, " -%d", n);631 else632 fprint(2, " -*");633 n = f->end2;634 if(n >= 0)635 fprint(2, ".%d", n);636 else637 fprint(2, ".*");638 prefix = "";639 }640 if(f->flags & Bflag)641 fprint(2, "%sb", prefix);642 if(f->flags & Dflag)643 fprint(2, "%sd", prefix);644 if(f->flags & Fflag)645 fprint(2, "%sf", prefix);646 if(f->flags & Gflag)647 fprint(2, "%sg", prefix);648 if(f->flags & Iflag)649 fprint(2, "%si", prefix);650 if(f->flags & Mflag)651 fprint(2, "%sM", prefix);652 if(f->flags & Nflag)653 fprint(2, "%sn", prefix);654 if(f->flags & Rflag)655 fprint(2, "%sr", prefix);656 if(f->flags & Wflag)657 fprint(2, "%sw", prefix);658 }659 if(args.cflag)660 fprint(2, " -c");661 if(args.uflag)662 fprint(2, " -u");663 if(args.ofile)664 fprint(2, " -o %s", args.ofile);665 if(args.mline != Nline)666 fprint(2, " -l %ld", args.mline);667 fprint(2, "\n");668 }670 void671 newfield(void)672 {673 int n;674 Field *f;676 n = args.nfield + 1;677 if(n >= Nfield) {678 fprint(2, "sort: too many fields specified\n");679 done("option");680 }681 args.nfield = n;682 f = &args.field[n];683 f->beg1 = -1;684 f->beg2 = -1;685 f->end1 = -1;686 f->end2 = -1;687 }689 void690 doargs(int argc, char *argv[])691 {692 int i, c, hadplus;693 char *s, *p, *q;694 Field *f;696 hadplus = 0;697 args.mline = Nline;698 for(i=1; i<argc; i++) {699 s = argv[i];700 c = *s++;701 if(c == '-') {702 c = *s;703 if(c == 0) /* forced end of arg marker */704 break;705 argv[i] = 0; /* clobber args processed */706 if(c == '.' || (c >= '0' && c <= '9')) {707 if(!hadplus)708 newfield();709 f = &args.field[args.nfield];710 dofield(s, &f->end1, &f->end2, 0, 0);711 hadplus = 0;712 continue;713 }715 while(c = *s++)716 switch(c) {717 case '-': /* end of options */718 i = argc;719 continue;720 case 'T': /* temp directory */721 if(*s == 0) {722 i++;723 if(i < argc) {724 args.tname = argv[i];725 argv[i] = 0;726 }727 } else728 args.tname = s;729 s = strchr(s, 0);730 break;731 case 'o': /* output file */732 if(*s == 0) {733 i++;734 if(i < argc) {735 args.ofile = argv[i];736 argv[i] = 0;737 }738 } else739 args.ofile = s;740 s = strchr(s, 0);741 break;742 case 'k': /* posix key (what were they thinking?) */743 p = 0;744 if(*s == 0) {745 i++;746 if(i < argc) {747 p = argv[i];748 argv[i] = 0;749 }750 } else751 p = s;752 s = strchr(s, 0);753 if(p == 0)754 break;756 newfield();757 q = strchr(p, ',');758 if(q)759 *q++ = 0;760 f = &args.field[args.nfield];761 dofield(p, &f->beg1, &f->beg2, 1, 1);762 if(f->flags & Bflag) {763 f->flags |= B1flag;764 f->flags &= ~Bflag;765 }766 if(q) {767 dofield(q, &f->end1, &f->end2, 1, 0);768 if(f->end2 <= 0)769 f->end1++;770 }771 hadplus = 0;772 break;773 case 't': /* tab character */774 if(*s == 0) {775 i++;776 if(i < argc) {777 chartorune(&args.tabchar, argv[i]);778 argv[i] = 0;779 }780 } else781 s += chartorune(&args.tabchar, s);782 if(args.tabchar == '\n') {783 fprint(2, "aw come on, rob\n");784 done("rob");785 }786 break;787 case 'c': /* check order */788 args.cflag = 1;789 break;790 case 'u': /* unique */791 args.uflag = 1;792 break;793 case 'v': /* debugging noise */794 args.vflag = 1;795 break;796 case 'l':797 if(*s == 0) {798 i++;799 if(i < argc) {800 args.mline = atol(argv[i]);801 argv[i] = 0;802 }803 } else804 args.mline = atol(s);805 s = strchr(s, 0);806 break;808 case 'M': /* month */809 case 'b': /* skip blanks */810 case 'd': /* directory order */811 case 'f': /* fold case */812 case 'g': /* floating numbers */813 case 'i': /* ignore non-ascii */814 case 'n': /* numbers */815 case 'r': /* reverse */816 case 'w': /* ignore white */817 if(args.nfield > 0)818 fprint(2, "sort: global field set after -k\n");819 setfield(0, c);820 break;821 case 'm':822 /* option m silently ignored but required by posix */823 break;824 default:825 fprint(2, "sort: unknown option: -%C\n", c);826 done("option");827 }828 continue;829 }830 if(c == '+') {831 argv[i] = 0; /* clobber args processed */832 c = *s;833 if(c == '.' || (c >= '0' && c <= '9')) {834 newfield();835 f = &args.field[args.nfield];836 dofield(s, &f->beg1, &f->beg2, 0, 0);837 if(f->flags & Bflag) {838 f->flags |= B1flag;839 f->flags &= ~Bflag;840 }841 hadplus = 1;842 continue;843 }844 fprint(2, "sort: unknown option: +%C\n", c);845 done("option");846 }847 args.nfile++;848 }850 for(i=0; i<=args.nfield; i++) {851 f = &args.field[i];853 /*854 * global options apply to fields that855 * specify no options856 */857 if(f->flags == 0) {858 f->flags = args.field[0].flags;859 if(args.field[0].flags & Bflag)860 f->flags |= B1flag;861 }864 /*865 * build buildkey specification866 */867 switch(f->flags & ~(Bflag|B1flag)) {868 default:869 fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);870 done("option");871 case 0:872 f->dokey = dokey_;873 break;874 case Rflag:875 f->dokey = dokey_r;876 break;877 case Gflag:878 case Nflag:879 case Gflag|Nflag:880 case Gflag|Rflag:881 case Nflag|Rflag:882 case Gflag|Nflag|Rflag:883 f->dokey = dokey_gn;884 break;885 case Mflag:886 case Mflag|Rflag:887 f->dokey = dokey_m;888 makemapm(f);889 break;890 case Dflag:891 case Dflag|Fflag:892 case Dflag|Fflag|Iflag:893 case Dflag|Fflag|Iflag|Rflag:894 case Dflag|Fflag|Iflag|Rflag|Wflag:895 case Dflag|Fflag|Iflag|Wflag:896 case Dflag|Fflag|Rflag:897 case Dflag|Fflag|Rflag|Wflag:898 case Dflag|Fflag|Wflag:899 case Dflag|Iflag:900 case Dflag|Iflag|Rflag:901 case Dflag|Iflag|Rflag|Wflag:902 case Dflag|Iflag|Wflag:903 case Dflag|Rflag:904 case Dflag|Rflag|Wflag:905 case Dflag|Wflag:906 case Fflag:907 case Fflag|Iflag:908 case Fflag|Iflag|Rflag:909 case Fflag|Iflag|Rflag|Wflag:910 case Fflag|Iflag|Wflag:911 case Fflag|Rflag:912 case Fflag|Rflag|Wflag:913 case Fflag|Wflag:914 case Iflag:915 case Iflag|Rflag:916 case Iflag|Rflag|Wflag:917 case Iflag|Wflag:918 case Wflag:919 f->dokey = dokey_dfi;920 makemapd(f);921 break;922 }923 }925 /*926 * random spot checks927 */928 if(args.nfile > 1 && args.cflag) {929 fprint(2, "sort: -c can have at most one input file\n");930 done("option");931 }932 return;933 }935 uchar*936 skip(uchar *l, int n1, int n2, int bflag, int endfield)937 {938 int i, c, tc;939 Rune r;941 if(endfield && n1 < 0)942 return 0;944 c = *l++;945 tc = args.tabchar;946 if(tc) {947 if(tc < Runeself) {948 for(i=n1; i>0; i--) {949 while(c != tc) {950 if(c == '\n')951 return 0;952 c = *l++;953 }954 if(!(endfield && i == 1))955 c = *l++;956 }957 } else {958 l--;959 l += chartorune(&r, (char*)l);960 for(i=n1; i>0; i--) {961 while(r != tc) {962 if(r == '\n')963 return 0;964 l += chartorune(&r, (char*)l);965 }966 if(!(endfield && i == 1))967 l += chartorune(&r, (char*)l);968 }969 c = r;970 }971 } else {972 for(i=n1; i>0; i--) {973 while(c == ' ' || c == '\t')974 c = *l++;975 while(c != ' ' && c != '\t') {976 if(c == '\n')977 return 0;978 c = *l++;979 }980 }981 }983 if(bflag)984 while(c == ' ' || c == '\t')985 c = *l++;987 l--;988 for(i=n2; i>0; i--) {989 c = *l;990 if(c < Runeself) {991 if(c == '\n')992 return 0;993 l++;994 continue;995 }996 l += chartorune(&r, (char*)l);997 }998 return l;999 }1001 void1002 dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f)1003 {1004 uchar *kp;1005 int c, cl, dp;1006 int state, nzero, exp, expsign, rflag;1008 cl = k->klen + 3;1009 kp = k->key + cl; /* skip place for sign, exponent[2] */1011 nzero = 0; /* number of trailing zeros */1012 exp = 0; /* value of the exponent */1013 expsign = 0; /* sign of the exponent */1014 dp = 0x4040; /* location of decimal point */1015 rflag = f->flags&Rflag; /* xor of rflag and - sign */1016 state = NSstart;1018 for(;; lp++) {1019 if(lp >= lpe)1020 break;1021 c = *lp;1023 if(c == ' ' || c == '\t') {1024 switch(state) {1025 case NSstart:1026 case NSsign:1027 continue;1028 }1029 break;1030 }1031 if(c == '+' || c == '-') {1032 switch(state) {1033 case NSstart:1034 state = NSsign;1035 if(c == '-')1036 rflag = !rflag;1037 continue;1038 case NSexp:1039 state = NSexpsign;1040 if(c == '-')1041 expsign = 1;1042 continue;1043 }1044 break;1045 }1046 if(c == '0') {1047 switch(state) {1048 case NSdigit:1049 if(rflag)1050 c = ~c;1051 *kp++ = c;1052 cl++;1053 nzero++;1054 dp++;1055 state = NSdigit;1056 continue;1057 case NSfract:1058 if(rflag)1059 c = ~c;1060 *kp++ = c;1061 cl++;1062 nzero++;1063 state = NSfract;1064 continue;1065 case NSstart:1066 case NSsign:1067 case NSzero:1068 state = NSzero;1069 continue;1070 case NSzerofract:1071 case NSpoint:1072 dp--;1073 state = NSzerofract;1074 continue;1075 case NSexpsign:1076 case NSexp:1077 case NSexpdigit:1078 exp = exp*10 + (c - '0');1079 state = NSexpdigit;1080 continue;1081 }1082 break;1083 }1084 if(c >= '1' && c <= '9') {1085 switch(state) {1086 case NSzero:1087 case NSstart:1088 case NSsign:1089 case NSdigit:1090 if(rflag)1091 c = ~c;1092 *kp++ = c;1093 cl++;1094 nzero = 0;1095 dp++;1096 state = NSdigit;1097 continue;1098 case NSzerofract:1099 case NSpoint:1100 case NSfract:1101 if(rflag)1102 c = ~c;1103 *kp++ = c;1104 cl++;1105 nzero = 0;1106 state = NSfract;1107 continue;1108 case NSexpsign:1109 case NSexp:1110 case NSexpdigit:1111 exp = exp*10 + (c - '0');1112 state = NSexpdigit;1113 continue;1114 }1115 break;1116 }1117 if(c == '.') {1118 switch(state) {1119 case NSstart:1120 case NSsign:1121 state = NSpoint;1122 continue;1123 case NSzero:1124 state = NSzerofract;1125 continue;1126 case NSdigit:1127 state = NSfract;1128 continue;1129 }1130 break;1131 }1132 if((f->flags & Gflag) && (c == 'e' || c == 'E')) {1133 switch(state) {1134 case NSdigit:1135 case NSfract:1136 state = NSexp;1137 continue;1138 }1139 break;1140 }1141 break;1142 }1144 switch(state) {1145 /*1146 * result is zero1147 */1148 case NSstart:1149 case NSsign:1150 case NSzero:1151 case NSzerofract:1152 case NSpoint:1153 kp = k->key + k->klen;1154 k->klen += 2;1155 kp[0] = 0x20; /* between + and - */1156 kp[1] = 0;1157 return;1158 /*1159 * result has exponent1160 */1161 case NSexpsign:1162 case NSexp:1163 case NSexpdigit:1164 if(expsign)1165 exp = -exp;1166 dp += exp;1168 /*1169 * result is fixed point number1170 */1171 case NSdigit:1172 case NSfract:1173 kp -= nzero;1174 cl -= nzero;1175 break;1176 }1178 /*1179 * end of number1180 */1181 c = 0;1182 if(rflag)1183 c = ~c;1184 *kp = c;1186 /*1187 * sign and exponent1188 */1189 c = 0x30;1190 if(rflag) {1191 c = 0x10;1192 dp = ~dp;1193 }1194 kp = k->key + k->klen;1195 kp[0] = c;1196 kp[1] = (dp >> 8);1197 kp[2] = dp;1198 k->klen = cl+1;1199 }1201 void1202 dokey_m(Key *k, uchar *lp, uchar *lpe, Field *f)1203 {1204 uchar *kp;1205 Rune r, place[3];1206 int c, cl, pc;1207 int rflag;1209 rflag = f->flags&Rflag;1210 pc = 0;1212 cl = k->klen;1213 kp = k->key + cl;1215 for(;;) {1216 /*1217 * get the character1218 */1219 if(lp >= lpe)1220 break;1221 c = *lp;1222 if(c >= Runeself) {1223 lp += chartorune(&r, (char*)lp);1224 c = r;1225 } else1226 lp++;1228 if(c < nelem(f->mapto)) {1229 c = f->mapto[c];1230 if(c == 0)1231 continue;1232 }1233 place[pc++] = c;1234 if(pc < 3)1235 continue;1236 for(c=11; c>=0; c--)1237 if(memcmp(month[c], place, sizeof(place)) == 0)1238 break;1239 c += 10;1240 if(rflag)1241 c = ~c;1242 *kp++ = c;1243 cl++;1244 break;1245 }1247 c = 0;1248 if(rflag)1249 c = ~c;1250 *kp = c;1251 k->klen = cl+1;1252 }1254 void1255 dokey_dfi(Key *k, uchar *lp, uchar *lpe, Field *f)1256 {1257 uchar *kp;1258 Rune r;1259 int c, cl, n, rflag;1261 cl = k->klen;1262 kp = k->key + cl;1263 rflag = f->flags & Rflag;1265 for(;;) {1266 /*1267 * get the character1268 */1269 if(lp >= lpe)1270 break;1271 c = *lp;1272 if(c >= Runeself) {1273 lp += chartorune(&r, (char*)lp);1274 c = r;1275 } else1276 lp++;1278 /*1279 * do the various mappings.1280 * the common case is handled1281 * completely by the table.1282 */1283 if(c != 0 && c < Runeself) {1284 c = f->mapto[c];1285 if(c) {1286 *kp++ = c;1287 cl++;1288 }1289 continue;1290 }1292 /*1293 * for characters out of range,1294 * the table does not do Rflag.1295 * ignore is based on mapto[255]1296 */1297 if(c != 0 && c < nelem(f->mapto)) {1298 c = f->mapto[c];1299 if(c == 0)1300 continue;1301 } else1302 if(f->mapto[nelem(f->mapto)-1] == 0)1303 continue;1305 /*1306 * put it in the key1307 */1308 r = c;1309 n = runetochar((char*)kp, &r);1310 kp += n;1311 cl += n;1312 if(rflag)1313 while(n > 0) {1314 kp[-n] = ~kp[-n];1315 n--;1316 }1317 }1319 /*1320 * end of key1321 */1322 k->klen = cl+1;1323 if(rflag) {1324 *kp = ~0;1325 return;1326 }1327 *kp = 0;1328 }1330 void1331 dokey_r(Key *k, uchar *lp, uchar *lpe, Field *f)1332 {1333 int cl, n;1334 uchar *kp;1336 USED(f);1337 n = lpe - lp;1338 if(n < 0)1339 n = 0;1340 cl = k->klen;1341 kp = k->key + cl;1342 k->klen = cl+n+1;1344 lpe -= 3;1345 while(lp < lpe) {1346 kp[0] = ~lp[0];1347 kp[1] = ~lp[1];1348 kp[2] = ~lp[2];1349 kp[3] = ~lp[3];1350 kp += 4;1351 lp += 4;1352 }1354 lpe += 3;1355 while(lp < lpe)1356 *kp++ = ~*lp++;1357 *kp = ~0;1358 }1360 void1361 dokey_(Key *k, uchar *lp, uchar *lpe, Field *f)1362 {1363 int n, cl;1364 uchar *kp;1366 USED(f);1367 n = lpe - lp;1368 if(n < 0)1369 n = 0;1370 cl = k->klen;1371 kp = k->key + cl;1372 k->klen = cl+n+1;1373 memmove(kp, lp, n);1374 kp[n] = 0;1375 }1377 void1378 buildkey(Line *l)1379 {1380 Key *k;1381 uchar *lp, *lpe;1382 int ll, kl, cl, i, n;1383 Field *f;1385 ll = l->llen - 1;1386 kl = 0; /* allocated length */1387 cl = 0; /* current length */1388 k = 0;1390 for(i=1; i<=args.nfield; i++) {1391 f = &args.field[i];1392 lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);1393 if(lp == 0)1394 lp = l->line + ll;1395 lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);1396 if(lpe == 0)1397 lpe = l->line + ll;1398 n = (lpe - lp) + 1;1399 if(n <= 0)1400 n = 1;1401 if(cl+(n+4) > kl) {1402 kl = cl+(n+4);1403 k = realloc(k, sizeof(Key) +1404 (kl-1)*sizeof(k->key[0]));1405 if(k == 0)1406 nomem();1407 }1408 k->klen = cl;1409 (*f->dokey)(k, lp, lpe, f);1410 cl = k->klen;1411 }1413 /*1414 * global comparisons1415 */1416 if(!(args.uflag && cl > 0)) {1417 f = &args.field[0];1418 if(cl+(ll+4) > kl) {1419 kl = cl+(ll+4);1420 k = realloc(k, sizeof(Key) +1421 (kl-1)*sizeof(k->key[0]));1422 if(k == 0)1423 nomem();1424 }1425 k->klen = cl;1426 (*f->dokey)(k, l->line, l->line+ll, f);1427 cl = k->klen;1428 }1430 l->key = k;1431 k->klen = cl;1433 if(args.vflag) {1434 write(2, l->line, l->llen);1435 for(i=0; i<k->klen; i++) {1436 fprint(2, " %.2x", k->key[i]);1437 if(k->key[i] == 0x00 || k->key[i] == 0xff)1438 fprint(2, "\n");1439 }1440 }1441 }1443 void1444 makemapm(Field *f)1445 {1446 int i, c;1448 for(i=0; i<nelem(f->mapto); i++) {1449 c = 1;1450 if(i == ' ' || i == '\t')1451 c = 0;1452 if(i >= 'a' && i <= 'z')1453 c = i + ('A' - 'a');1454 if(i >= 'A' && i <= 'Z')1455 c = i;1456 f->mapto[i] = c;1457 if(args.vflag) {1458 if((i & 15) == 0)1459 fprint(2, " ");1460 fprint(2, " %.2x", c);1461 if((i & 15) == 15)1462 fprint(2, "\n");1463 }1464 }1465 }1467 void1468 makemapd(Field *f)1469 {1470 int i, j, c;1472 for(i=0; i<nelem(f->mapto); i++) {1473 c = i;1474 if(f->flags & Iflag)1475 if(c < 040 || c > 0176)1476 c = -1;1477 if((f->flags & Wflag) && c >= 0)1478 if(c == ' ' || c == '\t')1479 c = -1;1480 if((f->flags & Dflag) && c >= 0)1481 if(!(c == ' ' || c == '\t' ||1482 (c >= 'a' && c <= 'z') ||1483 (c >= 'A' && c <= 'Z') ||1484 (c >= '0' && c <= '9'))) {1485 for(j=0; latinmap[j]; j+=3)1486 if(c == latinmap[j+0] ||1487 c == latinmap[j+1])1488 break;1489 if(latinmap[j] == 0)1490 c = -1;1491 }1492 if((f->flags & Fflag) && c >= 0) {1493 if(c >= 'a' && c <= 'z')1494 c += 'A' - 'a';1495 for(j=0; latinmap[j]; j+=3)1496 if(c == latinmap[j+0] ||1497 c == latinmap[j+1]) {1498 c = latinmap[j+2];1499 break;1500 }1501 }1502 if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)1503 c = ~c & 0xff;1504 if(c < 0)1505 c = 0;1506 f->mapto[i] = c;1507 if(args.vflag) {1508 if((i & 15) == 0)1509 fprint(2, " ");1510 fprint(2, " %.2x", c);1511 if((i & 15) == 15)1512 fprint(2, "\n");1513 }1514 }1515 }1517 int latinmap[] =1518 {1519 /* lcase ucase fold */1520 0xe0, 0xc0, 0x41, /* L'à', L'À', L'A', */1521 0xe1, 0xc1, 0x41, /* L'á', L'Á', L'A', */1522 0xe2, 0xc2, 0x41, /* L'â', L'Â', L'A', */1523 0xe4, 0xc4, 0x41, /* L'ä', L'Ä', L'A', */1524 0xe3, 0xc3, 0x41, /* L'ã', L'Ã', L'A', */1525 0xe5, 0xc5, 0x41, /* L'å', L'Å', L'A', */1526 0xe8, 0xc8, 0x45, /* L'è', L'È', L'E', */1527 0xe9, 0xc9, 0x45, /* L'é', L'É', L'E', */1528 0xea, 0xca, 0x45, /* L'ê', L'Ê', L'E', */1529 0xeb, 0xcb, 0x45, /* L'ë', L'Ë', L'E', */1530 0xec, 0xcc, 0x49, /* L'ì', L'Ì', L'I', */1531 0xed, 0xcd, 0x49, /* L'í', L'Í', L'I', */1532 0xee, 0xce, 0x49, /* L'î', L'Î', L'I', */1533 0xef, 0xcf, 0x49, /* L'ï', L'Ï', L'I', */1534 0xf2, 0xd2, 0x4f, /* L'ò', L'Ò', L'O', */1535 0xf3, 0xd3, 0x4f, /* L'ó', L'Ó', L'O', */1536 0xf4, 0xd4, 0x4f, /* L'ô', L'Ô', L'O', */1537 0xf6, 0xd6, 0x4f, /* L'ö', L'Ö', L'O', */1538 0xf5, 0xd5, 0x4f, /* L'õ', L'Õ', L'O', */1539 0xf8, 0xd8, 0x4f, /* L'ø', L'Ø', L'O', */1540 0xf9, 0xd9, 0x55, /* L'ù', L'Ù', L'U', */1541 0xfa, 0xda, 0x55, /* L'ú', L'Ú', L'U', */1542 0xfb, 0xdb, 0x55, /* L'û', L'Û', L'U', */1543 0xfc, 0xdc, 0x55, /* L'ü', L'Ü', L'U', */1544 0xe6, 0xc6, 0x41, /* L'æ', L'Æ', L'A', */1545 0xf0, 0xd0, 0x44, /* L'ð', L'Ð', L'D', */1546 0xf1, 0xd1, 0x4e, /* L'ñ', L'Ñ', L'N', */1547 0xfd, 0xdd, 0x59, /* L'ý', L'Ý', L'Y', */1548 0xe7, 0xc7, 0x43, /* L'ç', L'Ç', L'C', */1549 0,1550 };1552 Rune LJAN[] = { 'J', 'A', 'N', 0 };1553 Rune LFEB[] = { 'F', 'E', 'B', 0 };1554 Rune LMAR[] = { 'M', 'A', 'R', 0 };1555 Rune LAPR[] = { 'A', 'P', 'R', 0 };1556 Rune LMAY[] = { 'M', 'A', 'Y', 0 };1557 Rune LJUN[] = { 'J', 'U', 'N', 0 };1558 Rune LJUL[] = { 'J', 'U', 'L', 0 };1559 Rune LAUG[] = { 'A', 'U', 'G', 0 };1560 Rune LSEP[] = { 'S', 'E', 'P', 0 };1561 Rune LOCT[] = { 'O', 'C', 'T', 0 };1562 Rune LNOV[] = { 'N', 'O', 'V', 0 };1563 Rune LDEC[] = { 'D', 'E', 'C', 0 };1565 Rune* month[12] =1566 {1567 LJAN,1568 LFEB,1569 LMAR,1570 LAPR,1571 LMAY,1572 LJUN,1573 LJUL,1574 LAUG,1575 LSEP,1576 LOCT,1577 LNOV,1578 LDEC,1579 };1581 /************** radix sort ***********/1583 enum1584 {1585 Threshold = 141586 };1588 void rsort4(Key***, ulong, int);1589 void bsort4(Key***, ulong, int);1591 void1592 sort4(void *a, ulong n)1593 {1594 if(n > Threshold)1595 rsort4((Key***)a, n, 0);1596 else1597 bsort4((Key***)a, n, 0);1598 }1600 void1601 rsort4(Key ***a, ulong n, int b)1602 {1603 Key ***ea, ***t, ***u, **t1, **u1, *k;1604 Key ***part[257];1605 static long count[257];1606 long clist[257+257], *cp, *cp1;1607 int c, lowc, higc;1609 /*1610 * pass 1 over all keys,1611 * count the number of each key[b].1612 * find low count and high count.1613 */1614 lowc = 256;1615 higc = 0;1616 ea = a+n;1617 for(t=a; t<ea; t++) {1618 k = **t;1619 n = k->klen;1620 if(b >= n) {1621 count[256]++;1622 continue;1623 }1624 c = k->key[b];1625 n = count[c]++;1626 if(n == 0) {1627 if(c < lowc)1628 lowc = c;1629 if(c > higc)1630 higc = c;1631 }1632 }1634 /*1635 * pass 2 over all counts,1636 * put partition pointers in part[c].1637 * save compacted indexes and counts1638 * in clist[].1639 */1640 t = a;1641 n = count[256];1642 clist[0] = n;1643 part[256] = t;1644 t += n;1646 cp1 = clist+1;1647 cp = count+lowc;1648 for(c=lowc; c<=higc; c++,cp++) {1649 n = *cp;1650 if(n) {1651 cp1[0] = n;1652 cp1[1] = c;1653 cp1 += 2;1654 part[c] = t;1655 t += n;1656 }1657 }1658 *cp1 = 0;1660 /*1661 * pass 3 over all counts.1662 * chase lowest pointer in each partition1663 * around a permutation until it comes1664 * back and is stored where it started.1665 * static array, count[], should be1666 * reduced to zero entries except maybe1667 * count[256].1668 */1669 for(cp1=clist+1; cp1[0]; cp1+=2) {1670 c = cp1[1];1671 cp = count+c;1672 while(*cp) {1673 t1 = *part[c];1674 for(;;) {1675 k = *t1;1676 n = 256;1677 if(b < k->klen)1678 n = k->key[b];1679 u = part[n]++;1680 count[n]--;1681 u1 = *u;1682 *u = t1;1683 if(n == c)1684 break;1685 t1 = u1;1686 }1687 }1688 }1690 /*1691 * pass 4 over all partitions.1692 * call recursively.1693 */1694 b++;1695 t = a + clist[0];1696 count[256] = 0;1697 for(cp1=clist+1; n=cp1[0]; cp1+=2) {1698 if(n > Threshold)1699 rsort4(t, n, b);1700 else1701 if(n > 1)1702 bsort4(t, n, b);1703 t += n;1704 }1705 }1707 /*1708 * bubble sort to pick up1709 * the pieces.1710 */1711 void1712 bsort4(Key ***a, ulong n, int b)1713 {1714 Key ***i, ***j, ***k, ***l, **t;1715 Key *ka, *kb;1716 int n1, n2;1718 l = a+n;1719 j = a;1721 loop:1722 i = j;1723 j++;1724 if(j >= l)1725 return;1727 ka = **i;1728 kb = **j;1729 n1 = ka->klen - b;1730 n2 = kb->klen - b;1731 if(n1 > n2)1732 n1 = n2;1733 if(n1 <= 0)1734 goto loop;1735 n2 = ka->key[b] - kb->key[b];1736 if(n2 == 0)1737 n2 = memcmp(ka->key+b, kb->key+b, n1);1738 if(n2 <= 0)1739 goto loop;1741 for(;;) {1742 k = i+1;1744 t = *k;1745 *k = *i;1746 *i = t;1748 if(i <= a)1749 goto loop;1751 i--;1752 ka = **i;1753 kb = *t;1754 n1 = ka->klen - b;1755 n2 = kb->klen - b;1756 if(n1 > n2)1757 n1 = n2;1758 if(n1 <= 0)1759 goto loop;1760 n2 = ka->key[b] - kb->key[b];1761 if(n2 == 0)1762 n2 = memcmp(ka->key+b, kb->key+b, n1);1763 if(n2 <= 0)1764 goto loop;1765 }1766 }