Blob


1 /*
2 * Deal with duplicated lines in a file
3 */
4 #include <u.h>
5 #include <libc.h>
6 #include <bio.h>
7 #include <ctype.h>
9 #define SIZE 8000
11 int fields = 0;
12 int letters = 0;
13 int linec = 0;
14 char mode;
15 int uniq;
16 char *b1, *b2;
17 long bsize;
18 Biobuf fin;
19 Biobuf fout;
21 int gline(char *buf);
22 void pline(char *buf);
23 int equal(char *b1, char *b2);
24 char* skip(char *s);
26 void
27 main(int argc, char *argv[])
28 {
29 int f;
31 bsize = SIZE;
32 b1 = malloc(bsize);
33 b2 = malloc(bsize);
34 f = 0;
35 while(argc > 1) {
36 if(*argv[1] == '-') {
37 if(isdigit((uchar)argv[1][1]))
38 fields = atoi(&argv[1][1]);
39 else
40 mode = argv[1][1];
41 argc--;
42 argv++;
43 continue;
44 }
45 if(*argv[1] == '+') {
46 letters = atoi(&argv[1][1]);
47 argc--;
48 argv++;
49 continue;
50 }
51 f = open(argv[1], 0);
52 if(f < 0) {
53 fprint(2, "cannot open %s\n", argv[1]);
54 exits("open");
55 }
56 break;
57 }
58 if(argc > 2) {
59 fprint(2, "unexpected argument %s\n", argv[2]);
60 exits("arg");
61 }
62 Binit(&fin, f, OREAD);
63 Binit(&fout, 1, OWRITE);
65 if(gline(b1))
66 exits(0);
67 for(;;) {
68 linec++;
69 if(gline(b2)) {
70 pline(b1);
71 exits(0);
72 }
73 if(!equal(b1, b2)) {
74 pline(b1);
75 linec = 0;
76 do {
77 linec++;
78 if(gline(b1)) {
79 pline(b2);
80 exits(0);
81 }
82 } while(equal(b2, b1));
83 pline(b2);
84 linec = 0;
85 }
86 }
87 }
89 int
90 gline(char *buf)
91 {
92 char *p;
94 p = Brdline(&fin, '\n');
95 if(p == 0)
96 return 1;
97 if(fin.rdline >= bsize-1) {
98 fprint(2, "line too long\n");
99 exits("too long");
101 memmove(buf, p, fin.rdline);
102 buf[fin.rdline-1] = 0;
103 return 0;
106 void
107 pline(char *buf)
110 switch(mode) {
112 case 'u':
113 if(uniq) {
114 uniq = 0;
115 return;
117 break;
119 case 'd':
120 if(uniq)
121 break;
122 return;
124 case 'c':
125 Bprint(&fout, "%4d ", linec);
127 uniq = 0;
128 Bprint(&fout, "%s\n", buf);
131 int
132 equal(char *b1, char *b2)
134 char c;
136 if(fields || letters) {
137 b1 = skip(b1);
138 b2 = skip(b2);
140 for(;;) {
141 c = *b1++;
142 if(c != *b2++) {
143 if(c == 0 && mode == 's')
144 return 1;
145 return 0;
147 if(c == 0) {
148 uniq++;
149 return 1;
154 char*
155 skip(char *s)
157 int nf, nl;
159 nf = nl = 0;
160 while(nf++ < fields) {
161 while(*s == ' ' || *s == '\t')
162 s++;
163 while(!(*s == ' ' || *s == '\t' || *s == 0) )
164 s++;
166 while(nl++ < letters && *s != 0)
167 s++;
168 return s;