Blob


1 /* join F1 F2 on stuff */
2 #include <u.h>
3 #include <libc.h>
4 #include <stdio.h>
5 #include <ctype.h>
6 #define F1 0
7 #define F2 1
8 #define F0 3
9 #define NFLD 100 /* max field per line */
10 #define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
11 FILE *f[2];
12 Rune buf[2][BUFSIZ]; /*input lines */
13 Rune *ppi[4][NFLD+1]; /* pointers to fields in lines */
14 Rune *s1,*s2;
15 #define j1 joinj1
16 #define j2 joinj2
18 int j1 = 1; /* join of this field of file 1 */
19 int j2 = 1; /* join of this field of file 2 */
20 int olist[2*NFLD]; /* output these fields */
21 int olistf[2*NFLD]; /* from these files */
22 int no; /* number of entries in olist */
23 Rune sep1 = ' '; /* default field separator */
24 Rune sep2 = '\t';
25 char *sepstr=" ";
26 int discard; /* count of truncated lines */
27 Rune null[BUFSIZ]/* = L""*/;
28 int a1;
29 int a2;
31 char *getoptarg(int*, char***);
32 void output(int, int);
33 int input(int);
34 void oparse(char*);
35 void error(char*, char*);
36 void seek1(void), seek2(void);
37 Rune *strtorune(Rune *, char *);
40 void
41 main(int argc, char **argv)
42 {
43 int i;
45 while (argc > 1 && argv[1][0] == '-') {
46 if (argv[1][1] == '\0')
47 break;
48 switch (argv[1][1]) {
49 case '-':
50 argc--;
51 argv++;
52 goto proceed;
53 case 'a':
54 switch(*getoptarg(&argc, &argv)) {
55 case '1':
56 a1++;
57 break;
58 case '2':
59 a2++;
60 break;
61 default:
62 error("incomplete option -a","");
63 }
64 break;
65 case 'e':
66 strtorune(null, getoptarg(&argc, &argv));
67 break;
68 case 't':
69 sepstr=getoptarg(&argc, &argv);
70 chartorune(&sep1, sepstr);
71 sep2 = sep1;
72 break;
73 case 'o':
74 if(argv[1][2]!=0 ||
75 argc>2 && strchr(argv[2],',')!=0)
76 oparse(getoptarg(&argc, &argv));
77 else for (no = 0; no<2*NFLD && argc>2; no++){
78 if (argv[2][0] == '1' && argv[2][1] == '.') {
79 olistf[no] = F1;
80 olist[no] = atoi(&argv[2][2]);
81 } else if (argv[2][0] == '2' && argv[2][1] == '.') {
82 olist[no] = atoi(&argv[2][2]);
83 olistf[no] = F2;
84 } else if (argv[2][0] == '0')
85 olistf[no] = F0;
86 else
87 break;
88 argc--;
89 argv++;
90 }
91 break;
92 case 'j':
93 if(argc <= 2)
94 break;
95 if (argv[1][2] == '1')
96 j1 = atoi(argv[2]);
97 else if (argv[1][2] == '2')
98 j2 = atoi(argv[2]);
99 else
100 j1 = j2 = atoi(argv[2]);
101 argc--;
102 argv++;
103 break;
104 case '1':
105 j1 = atoi(getoptarg(&argc, &argv));
106 break;
107 case '2':
108 j2 = atoi(getoptarg(&argc, &argv));
109 break;
111 argc--;
112 argv++;
114 proceed:
115 for (i = 0; i < no; i++)
116 if (olist[i]-- > NFLD) /* 0 origin */
117 error("field number too big in -o","");
118 if (argc != 3)
119 error("usage: join [-1 x -2 y] [-o list] file1 file2","");
120 j1--;
121 j2--; /* everyone else believes in 0 origin */
122 s1 = ppi[F1][j1];
123 s2 = ppi[F2][j2];
124 if (strcmp(argv[1], "-") == 0)
125 f[F1] = stdin;
126 else if ((f[F1] = fopen(argv[1], "r")) == 0)
127 error("can't open %s", argv[1]);
128 if(strcmp(argv[2], "-") == 0) {
129 f[F2] = stdin;
130 } else if ((f[F2] = fopen(argv[2], "r")) == 0)
131 error("can't open %s", argv[2]);
133 if(ftell(f[F2]) >= 0)
134 seek2();
135 else if(ftell(f[F1]) >= 0)
136 seek1();
137 else
138 error("neither file is randomly accessible","");
139 if (discard)
140 error("some input line was truncated", "");
141 exits("");
143 int runecmp(Rune *a, Rune *b){
144 while(*a==*b){
145 if(*a=='\0') return 0;
146 a++;
147 b++;
149 if(*a<*b) return -1;
150 return 1;
152 char *runetostr(char *buf, Rune *r){
153 char *s;
154 for(s=buf;*r;r++) s+=runetochar(s, r);
155 *s='\0';
156 return buf;
158 Rune *strtorune(Rune *buf, char *s){
159 Rune *r;
160 for(r=buf;*s;r++) s+=chartorune(r, s);
161 *r='\0';
162 return buf;
164 /* lazy. there ought to be a clean way to combine seek1 & seek2 */
165 #define get1() n1=input(F1)
166 #define get2() n2=input(F2)
167 void
168 seek2(void)
170 int n1, n2;
171 int top2=0;
172 int bot2 = ftell(f[F2]);
173 get1();
174 get2();
175 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
176 if(n1>0 && n2>0 && comp()>0 || n1==0) {
177 if(a2) output(0, n2);
178 bot2 = ftell(f[F2]);
179 get2();
180 } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
181 if(a1) output(n1, 0);
182 get1();
183 } else /*(n1>0 && n2>0 && comp()==0)*/ {
184 while(n2>0 && comp()==0) {
185 output(n1, n2);
186 top2 = ftell(f[F2]);
187 get2();
189 fseek(f[F2], bot2, 0);
190 get2();
191 get1();
192 for(;;) {
193 if(n1>0 && n2>0 && comp()==0) {
194 output(n1, n2);
195 get2();
196 } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
197 fseek(f[F2], bot2, 0);
198 get2();
199 get1();
200 } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
201 fseek(f[F2], top2, 0);
202 bot2 = top2;
203 get2();
204 break;
210 void
211 seek1(void)
213 int n1, n2;
214 int top1=0;
215 int bot1 = ftell(f[F1]);
216 get1();
217 get2();
218 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
219 if(n1>0 && n2>0 && comp()>0 || n1==0) {
220 if(a2) output(0, n2);
221 get2();
222 } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
223 if(a1) output(n1, 0);
224 bot1 = ftell(f[F1]);
225 get1();
226 } else /*(n1>0 && n2>0 && comp()==0)*/ {
227 while(n2>0 && comp()==0) {
228 output(n1, n2);
229 top1 = ftell(f[F1]);
230 get1();
232 fseek(f[F1], bot1, 0);
233 get2();
234 get1();
235 for(;;) {
236 if(n1>0 && n2>0 && comp()==0) {
237 output(n1, n2);
238 get1();
239 } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
240 fseek(f[F1], bot1, 0);
241 get2();
242 get1();
243 } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
244 fseek(f[F1], top1, 0);
245 bot1 = top1;
246 get1();
247 break;
254 int
255 input(int n) /* get input line and split into fields */
257 register int i, c;
258 Rune *bp;
259 Rune **pp;
260 char line[BUFSIZ];
262 bp = buf[n];
263 pp = ppi[n];
264 if (fgets(line, BUFSIZ, f[n]) == 0)
265 return(0);
266 strtorune(bp, line);
267 i = 0;
268 do {
269 i++;
270 if (sep1 == ' ') /* strip multiples */
271 while ((c = *bp) == sep1 || c == sep2)
272 bp++; /* skip blanks */
273 *pp++ = bp; /* record beginning */
274 while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
275 bp++;
276 *bp++ = '\0'; /* mark end by overwriting blank */
277 } while (c != '\n' && c != '\0' && i < NFLD-1);
278 if (c != '\n')
279 discard++;
281 *pp = 0;
282 return(i);
285 void
286 output(int on1, int on2) /* print items from olist */
288 int i;
289 Rune *temp;
290 char buf[BUFSIZ];
292 if (no <= 0) { /* default case */
293 printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
294 for (i = 0; i < on1; i++)
295 if (i != j1)
296 printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
297 for (i = 0; i < on2; i++)
298 if (i != j2)
299 printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
300 printf("\n");
301 } else {
302 for (i = 0; i < no; i++) {
303 if (olistf[i]==F0 && on1>j1)
304 temp = ppi[F1][j1];
305 else if (olistf[i]==F0 && on2>j2)
306 temp = ppi[F2][j2];
307 else {
308 temp = ppi[olistf[i]][olist[i]];
309 if(olistf[i]==F1 && on1<=olist[i] ||
310 olistf[i]==F2 && on2<=olist[i] ||
311 *temp==0)
312 temp = null;
314 printf("%s", runetostr(buf, temp));
315 if (i == no - 1)
316 printf("\n");
317 else
318 printf("%s", sepstr);
323 void
324 error(char *s1, char *s2)
326 fprintf(stderr, "join: ");
327 fprintf(stderr, s1, s2);
328 fprintf(stderr, "\n");
329 exits(s1);
332 char *
333 getoptarg(int *argcp, char ***argvp)
335 int argc = *argcp;
336 char **argv = *argvp;
337 if(argv[1][2] != 0)
338 return &argv[1][2];
339 if(argc<=2 || argv[2][0]=='-')
340 error("incomplete option %s", argv[1]);
341 *argcp = argc-1;
342 *argvp = ++argv;
343 return argv[1];
346 void
347 oparse(char *s)
349 for (no = 0; no<2*NFLD && *s; no++, s++) {
350 switch(*s) {
351 case 0:
352 return;
353 case '0':
354 olistf[no] = F0;
355 break;
356 case '1':
357 case '2':
358 if(s[1] == '.' && isdigit((uchar)s[2])) {
359 olistf[no] = *s=='1'? F1: F2;
360 olist[no] = atoi(s += 2);
361 break;
362 } /* fall thru */
363 default:
364 error("invalid -o list", "");
366 if(s[1] == ',')
367 s++;