Blob


1 /*
2 * sed -- stream editor
3 *
4 *
5 */
6 #include <u.h>
7 #include <libc.h>
8 #include <bio.h>
9 #include <regexp.h>
11 enum {
12 DEPTH = 20, /* max nesting depth of {} */
13 MAXCMDS = 512, /* max sed commands */
14 ADDSIZE = 10000, /* size of add & read buffer */
15 MAXADDS = 20, /* max pending adds and reads */
16 LBSIZE = 8192, /* input line size */
17 LABSIZE = 50, /* max label name size */
18 MAXSUB = 10, /* max number of sub reg exp */
19 MAXFILES = 120 /* max output files */
20 };
21 /* An address is a line #, a R.E., "$", a reference to the last
22 * R.E., or nothing.
23 */
24 typedef struct {
25 enum {
26 A_NONE,
27 A_DOL,
28 A_LINE,
29 A_RE,
30 A_LAST
31 }type;
32 union {
33 long line; /* Line # */
34 Reprog *rp; /* Compiled R.E. */
35 } u;
36 } Addr;
38 typedef struct SEDCOM {
39 Addr ad1; /* optional start address */
40 Addr ad2; /* optional end address */
41 union {
42 Reprog *re1; /* compiled R.E. */
43 Rune *text; /* added text or file name */
44 struct SEDCOM *lb1; /* destination command of branch */
45 } u;
46 Rune *rhs; /* Right-hand side of substitution */
47 Biobuf* fcode; /* File ID for read and write */
48 char command; /* command code -see below */
49 char gfl; /* 'Global' flag for substitutions */
50 char pfl; /* 'print' flag for substitutions */
51 char active; /* 1 => data between start and end */
52 char negfl; /* negation flag */
53 } SedCom;
55 /* Command Codes for field SedCom.command */
56 #define ACOM 01
57 #define BCOM 020
58 #define CCOM 02
59 #define CDCOM 025
60 #define CNCOM 022
61 #define COCOM 017
62 #define CPCOM 023
63 #define DCOM 03
64 #define ECOM 015
65 #define EQCOM 013
66 #define FCOM 016
67 #define GCOM 027
68 #define CGCOM 030
69 #define HCOM 031
70 #define CHCOM 032
71 #define ICOM 04
72 #define LCOM 05
73 #define NCOM 012
74 #define PCOM 010
75 #define QCOM 011
76 #define RCOM 06
77 #define SCOM 07
78 #define TCOM 021
79 #define WCOM 014
80 #define CWCOM 024
81 #define YCOM 026
82 #define XCOM 033
85 typedef struct label { /* Label symbol table */
86 Rune asc[9]; /* Label name */
87 SedCom *chain;
88 SedCom *address; /* Command associated with label */
89 } Label;
91 typedef struct FILE_CACHE { /* Data file control block */
92 struct FILE_CACHE *next; /* Forward Link */
93 char *name; /* Name of file */
94 } FileCache;
96 SedCom pspace[MAXCMDS]; /* Command storage */
97 SedCom *pend = pspace+MAXCMDS; /* End of command storage */
98 SedCom *rep = pspace; /* Current fill point */
100 Reprog *lastre = 0; /* Last regular expression */
101 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
103 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
104 Rune *addend = addspace+ADDSIZE;
106 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
107 SedCom **aptr = abuf;
109 struct { /* Sed program input control block */
110 enum PTYPE /* Either on command line or in file */
111 { P_ARG,
112 P_FILE
113 } type;
114 union PCTL { /* Pointer to data */
115 Biobuf *bp;
116 char *curr;
117 } pctl;
118 } prog;
120 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
122 FileCache *fhead = 0; /* Head of File Cache Chain */
123 FileCache *ftail = 0; /* Tail of File Cache Chain */
125 Rune *loc1; /* Start of pattern match */
126 Rune *loc2; /* End of pattern match */
127 Rune seof; /* Pattern delimiter char */
129 Rune linebuf[LBSIZE+1]; /* Input data buffer */
130 Rune *lbend = linebuf+LBSIZE; /* End of buffer */
131 Rune *spend = linebuf; /* End of input data */
132 Rune *cp; /* Current scan point in linebuf */
134 Rune holdsp[LBSIZE+1]; /* Hold buffer */
135 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
136 Rune *hspend = holdsp; /* End of hold data */
138 int nflag; /* Command line flags */
139 int gflag;
140 int lflag;
142 int dolflag; /* Set when at true EOF */
143 int sflag; /* Set when substitution done */
144 int jflag; /* Set when jump required */
145 int delflag; /* Delete current line when set */
147 long lnum = 0; /* Input line count */
149 char fname[MAXFILES][40]; /* File name cache */
150 Biobuf *fcode[MAXFILES]; /* File ID cache */
151 int nfiles = 0; /* Cache fill point */
153 Biobuf fout; /* Output stream */
154 Biobuf bstdin; /* Default input */
155 Biobuf* f = 0; /* Input data */
157 Label ltab[LABSIZE]; /* Label name symbol table */
158 Label *labend = ltab+LABSIZE; /* End of label table */
159 Label *lab = ltab+1; /* Current Fill point */
161 int depth = 0; /* {} stack pointer */
163 Rune bad; /* Dummy err ptr reference */
164 Rune *badp = &bad;
167 char CGMES[] = "Command garbled: %S";
168 char TMMES[] = "Too much text: %S";
169 char LTL[] = "Label too long: %S";
170 char AD0MES[] = "No addresses allowed: %S";
171 char AD1MES[] = "Only one address allowed: %S";
173 void address(Addr *);
174 void arout(void);
175 int cmp(char *, char *);
176 int rcmp(Rune *, Rune *);
177 void command(SedCom *);
178 Reprog *compile(void);
179 Rune *compsub(Rune *, Rune *);
180 void dechain(void);
181 void dosub(Rune *);
182 int ecmp(Rune *, Rune *, int);
183 void enroll(char *);
184 void errexit(void);
185 int executable(SedCom *);
186 void execute(void);
187 void fcomp(void);
188 long getrune(void);
189 Rune *gline(Rune *);
190 int match(Reprog *, Rune *);
191 void newfile(enum PTYPE, char *);
192 int opendata(void);
193 Biobuf *open_file(char *);
194 Rune *place(Rune *, Rune *, Rune *);
195 void quit(char *, char *);
196 int rline(Rune *, Rune *);
197 Label *search(Label *);
198 int substitute(SedCom *);
199 char *text(char *);
200 Rune *stext(Rune *, Rune *);
201 int ycomp(SedCom *);
202 char * trans(int c);
203 void putline(Biobuf *bp, Rune *buf, int n);
205 void
206 main(int argc, char **argv)
208 int compfl;
210 lnum = 0;
211 Binit(&fout, 1, OWRITE);
212 fcode[nfiles++] = &fout;
213 compfl = 0;
215 if(argc == 1)
216 exits(0);
217 ARGBEGIN{
218 case 'n':
219 nflag++;
220 continue;
221 case 'f':
222 if(argc <= 1)
223 quit("no pattern-file", 0);
224 newfile(P_FILE, ARGF());
225 fcomp();
226 compfl = 1;
227 continue;
228 case 'e':
229 if (argc <= 1)
230 quit("missing pattern", 0);
231 newfile(P_ARG, ARGF());
232 fcomp();
233 compfl = 1;
234 continue;
235 case 'g':
236 gflag++;
237 continue;
238 case 'l':
239 lflag++;
240 continue;
241 default:
242 fprint(2, "sed: Unknown flag: %c\n", ARGC());
243 continue;
244 } ARGEND
246 if(compfl == 0) {
247 if (--argc < 0)
248 quit("missing pattern", 0);
249 newfile(P_ARG, *argv++);
250 fcomp();
253 if(depth)
254 quit("Too many {'s", 0);
256 ltab[0].address = rep;
258 dechain();
260 if(argc <= 0)
261 enroll(0); /* Add stdin to cache */
262 else while(--argc >= 0) {
263 enroll(*argv++);
265 execute();
266 exits(0);
268 void
269 fcomp(void)
271 Rune *tp;
272 SedCom *pt, *pt1;
273 int i;
274 Label *lpt;
276 static Rune *p = addspace;
277 static SedCom **cmpend[DEPTH]; /* stack of {} operations */
279 while (rline(linebuf, lbend) >= 0) {
280 cp = linebuf;
281 comploop:
282 while(*cp == ' ' || *cp == '\t')
283 cp++;
284 if(*cp == '\0' || *cp == '#')
285 continue;
286 if(*cp == ';') {
287 cp++;
288 goto comploop;
291 address(&rep->ad1);
292 if (rep->ad1.type != A_NONE) {
293 if (rep->ad1.type == A_LAST) {
294 if (!lastre)
295 quit("First RE may not be null", 0);
296 rep->ad1.type = A_RE;
297 rep->ad1.u.rp = lastre;
299 if(*cp == ',' || *cp == ';') {
300 cp++;
301 address(&rep->ad2);
302 if (rep->ad2.type == A_LAST) {
303 rep->ad1.type = A_RE;
304 rep->ad2.u.rp = lastre;
306 } else
307 rep->ad2.type = A_NONE;
309 while(*cp == ' ' || *cp == '\t')
310 cp++;
312 swit:
313 switch(*cp++) {
315 default:
316 quit("Unrecognized command: %S", (char *)linebuf);
318 case '!':
319 rep->negfl = 1;
320 goto swit;
322 case '{':
323 rep->command = BCOM;
324 rep->negfl = !(rep->negfl);
325 cmpend[depth++] = &rep->u.lb1;
326 if(++rep >= pend)
327 quit("Too many commands: %S", (char *) linebuf);
328 if(*cp == '\0') continue;
329 goto comploop;
331 case '}':
332 if(rep->ad1.type != A_NONE)
333 quit(AD0MES, (char *) linebuf);
334 if(--depth < 0)
335 quit("Too many }'s", 0);
336 *cmpend[depth] = rep;
337 if(*cp == 0) continue;
338 goto comploop;
340 case '=':
341 rep->command = EQCOM;
342 if(rep->ad2.type != A_NONE)
343 quit(AD1MES, (char *) linebuf);
344 break;
346 case ':':
347 if(rep->ad1.type != A_NONE)
348 quit(AD0MES, (char *) linebuf);
350 while(*cp == ' ')
351 cp++;
352 tp = lab->asc;
353 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
354 *tp++ = *cp++;
355 if(tp >= &(lab->asc[8]))
356 quit(LTL, (char *) linebuf);
358 *tp = '\0';
360 if(lpt = search(lab)) {
361 if(lpt->address)
362 quit("Duplicate labels: %S", (char *) linebuf);
363 } else {
364 lab->chain = 0;
365 lpt = lab;
366 if(++lab >= labend)
367 quit("Too many labels: %S", (char *) linebuf);
369 lpt->address = rep;
370 if (*cp == '#')
371 continue;
372 rep--; /* reuse this slot */
373 break;
375 case 'a':
376 rep->command = ACOM;
377 if(rep->ad2.type != A_NONE)
378 quit(AD1MES, (char *) linebuf);
379 if(*cp == '\\') cp++;
380 if(*cp++ != '\n')
381 quit(CGMES, (char *) linebuf);
382 rep->u.text = p;
383 p = stext(p, addend);
384 break;
385 case 'c':
386 rep->command = CCOM;
387 if(*cp == '\\') cp++;
388 if(*cp++ != '\n')
389 quit(CGMES, (char *) linebuf);
390 rep->u.text = p;
391 p = stext(p, addend);
392 break;
393 case 'i':
394 rep->command = ICOM;
395 if(rep->ad2.type != A_NONE)
396 quit(AD1MES, (char *) linebuf);
397 if(*cp == '\\') cp++;
398 if(*cp++ != '\n')
399 quit(CGMES, (char *) linebuf);
400 rep->u.text = p;
401 p = stext(p, addend);
402 break;
404 case 'g':
405 rep->command = GCOM;
406 break;
408 case 'G':
409 rep->command = CGCOM;
410 break;
412 case 'h':
413 rep->command = HCOM;
414 break;
416 case 'H':
417 rep->command = CHCOM;
418 break;
420 case 't':
421 rep->command = TCOM;
422 goto jtcommon;
424 case 'b':
425 rep->command = BCOM;
426 jtcommon:
427 while(*cp == ' ')cp++;
428 if(*cp == '\0') {
429 if(pt = ltab[0].chain) {
430 while(pt1 = pt->u.lb1)
431 pt = pt1;
432 pt->u.lb1 = rep;
433 } else
434 ltab[0].chain = rep;
435 break;
437 tp = lab->asc;
438 while((*tp++ = *cp++))
439 if(tp >= &(lab->asc[8]))
440 quit(LTL, (char *) linebuf);
441 cp--;
442 tp[-1] = '\0';
444 if(lpt = search(lab)) {
445 if(lpt->address) {
446 rep->u.lb1 = lpt->address;
447 } else {
448 pt = lpt->chain;
449 while(pt1 = pt->u.lb1)
450 pt = pt1;
451 pt->u.lb1 = rep;
453 } else {
454 lab->chain = rep;
455 lab->address = 0;
456 if(++lab >= labend)
457 quit("Too many labels: %S",
458 (char *) linebuf);
460 break;
462 case 'n':
463 rep->command = NCOM;
464 break;
466 case 'N':
467 rep->command = CNCOM;
468 break;
470 case 'p':
471 rep->command = PCOM;
472 break;
474 case 'P':
475 rep->command = CPCOM;
476 break;
478 case 'r':
479 rep->command = RCOM;
480 if(rep->ad2.type != A_NONE)
481 quit(AD1MES, (char *) linebuf);
482 if(*cp++ != ' ')
483 quit(CGMES, (char *) linebuf);
484 rep->u.text = p;
485 p = stext(p, addend);
486 break;
488 case 'd':
489 rep->command = DCOM;
490 break;
492 case 'D':
493 rep->command = CDCOM;
494 rep->u.lb1 = pspace;
495 break;
497 case 'q':
498 rep->command = QCOM;
499 if(rep->ad2.type != A_NONE)
500 quit(AD1MES, (char *) linebuf);
501 break;
503 case 'l':
504 rep->command = LCOM;
505 break;
507 case 's':
508 rep->command = SCOM;
509 seof = *cp++;
510 if ((rep->u.re1 = compile()) == 0) {
511 if(!lastre)
512 quit("First RE may not be null.", 0);
513 rep->u.re1 = lastre;
515 rep->rhs = p;
516 if((p = compsub(p, addend)) == 0)
517 quit(CGMES, (char *) linebuf);
518 if(*cp == 'g') {
519 cp++;
520 rep->gfl++;
521 } else if(gflag)
522 rep->gfl++;
524 if(*cp == 'p') {
525 cp++;
526 rep->pfl = 1;
529 if(*cp == 'P') {
530 cp++;
531 rep->pfl = 2;
534 if(*cp == 'w') {
535 cp++;
536 if(*cp++ != ' ')
537 quit(CGMES, (char *) linebuf);
538 text(fname[nfiles]);
539 for(i = nfiles - 1; i >= 0; i--)
540 if(cmp(fname[nfiles],fname[i]) == 0) {
541 rep->fcode = fcode[i];
542 goto done;
544 if(nfiles >= MAXFILES)
545 quit("Too many files in w commands 1", 0);
546 rep->fcode = open_file(fname[nfiles]);
548 break;
550 case 'w':
551 rep->command = WCOM;
552 if(*cp++ != ' ')
553 quit(CGMES, (char *) linebuf);
554 text(fname[nfiles]);
555 for(i = nfiles - 1; i >= 0; i--)
556 if(cmp(fname[nfiles], fname[i]) == 0) {
557 rep->fcode = fcode[i];
558 goto done;
560 if(nfiles >= MAXFILES){
561 fprint(2, "sed: Too many files in w commands 2 \n");
562 fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
563 errexit();
565 rep->fcode = open_file(fname[nfiles]);
566 break;
568 case 'x':
569 rep->command = XCOM;
570 break;
572 case 'y':
573 rep->command = YCOM;
574 seof = *cp++;
575 if (ycomp(rep) == 0)
576 quit(CGMES, (char *) linebuf);
577 break;
580 done:
581 if(++rep >= pend)
582 quit("Too many commands, last: %S", (char *) linebuf);
584 if(*cp++ != '\0') {
585 if(cp[-1] == ';')
586 goto comploop;
587 quit(CGMES, (char *) linebuf);
593 Biobuf *
594 open_file(char *name)
596 Biobuf *bp;
597 int fd;
599 if ((bp = malloc(sizeof(Biobuf))) == 0)
600 quit("Out of memory", 0);
601 if ((fd = open(name, OWRITE)) < 0 &&
602 (fd = create(name, OWRITE, 0666)) < 0)
603 quit("Cannot create %s", name);
604 Binit(bp, fd, OWRITE);
605 Bseek(bp, 0, 2);
606 fcode[nfiles++] = bp;
607 return bp;
610 Rune *
611 compsub(Rune *rhs, Rune *end)
613 Rune r;
615 while ((r = *cp++) != '\0') {
616 if(r == '\\') {
617 if (rhs < end)
618 *rhs++ = Runemax;
619 else
620 return 0;
621 r = *cp++;
622 if(r == 'n')
623 r = '\n';
624 } else {
625 if(r == seof) {
626 if (rhs < end)
627 *rhs++ = '\0';
628 else
629 return 0;
630 return rhs;
633 if (rhs < end)
634 *rhs++ = r;
635 else
636 return 0;
639 return 0;
642 Reprog *
643 compile(void)
645 Rune c;
646 char *ep;
647 char expbuf[512];
649 if((c = *cp++) == seof) /* '//' */
650 return 0;
651 ep = expbuf;
652 do {
653 if (c == 0 || c == '\n')
654 quit(TMMES, (char *) linebuf);
655 if (c == '\\') {
656 if (ep >= expbuf+sizeof(expbuf))
657 quit(TMMES, (char *) linebuf);
658 ep += runetochar(ep, &c);
659 if ((c = *cp++) == 'n')
660 c = '\n';
662 if (ep >= expbuf+sizeof(expbuf))
663 quit(TMMES, (char *) linebuf);
664 ep += runetochar(ep, &c);
665 } while ((c = *cp++) != seof);
666 *ep = 0;
667 return lastre = regcomp(expbuf);
670 void
671 regerror(char *s)
673 USED(s);
674 quit(CGMES, (char *) linebuf);
677 void
678 newfile(enum PTYPE type, char *name)
680 if (type == P_ARG)
681 prog.pctl.curr = name;
682 else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
683 quit("Cannot open pattern-file: %s\n", name);
684 prog.type = type;
687 int
688 rline(Rune *buf, Rune *end)
690 long c;
691 Rune r;
693 while ((c = getrune()) >= 0) {
694 r = c;
695 if (r == '\\') {
696 if (buf <= end)
697 *buf++ = r;
698 if ((c = getrune()) < 0)
699 break;
700 r = c;
701 } else if (r == '\n') {
702 *buf = '\0';
703 return(1);
705 if (buf <= end)
706 *buf++ = r;
708 *buf = '\0';
709 return(-1);
712 long
713 getrune(void)
715 char *p;
716 long c;
717 Rune r;
719 if (prog.type == P_ARG) {
720 if ((p = prog.pctl.curr) != 0) {
721 if (*p) {
722 prog.pctl.curr += chartorune(&r, p);
723 c = r;
724 } else {
725 c = '\n'; /* fake an end-of-line */
726 prog.pctl.curr = 0;
728 } else
729 c = -1;
730 } else if ((c = Bgetrune(prog.pctl.bp)) < 0)
731 Bterm(prog.pctl.bp);
732 return c;
735 void
736 address(Addr *ap)
738 int c;
739 long lno;
741 if((c = *cp++) == '$')
742 ap->type = A_DOL;
743 else if(c == '/') {
744 seof = c;
745 if (ap->u.rp = compile())
746 ap->type = A_RE;
747 else
748 ap->type = A_LAST;
750 else if (c >= '0' && c <= '9') {
751 lno = c-'0';
752 while ((c = *cp) >= '0' && c <= '9')
753 lno = lno*10 + *cp++-'0';
754 if(!lno)
755 quit("line number 0 is illegal",0);
756 ap->type = A_LINE;
757 ap->u.line = lno;
759 else {
760 cp--;
761 ap->type = A_NONE;
765 int
766 cmp(char *a, char *b) /* compare characters */
768 while(*a == *b++)
769 if (*a == '\0')
770 return(0);
771 else a++;
772 return(1);
775 int
776 rcmp(Rune *a, Rune *b) /* compare runes */
778 while(*a == *b++)
779 if (*a == '\0')
780 return(0);
781 else a++;
782 return(1);
785 char *
786 text(char *p) /* extract character string */
788 Rune r;
790 while(*cp == '\t' || *cp == ' ')
791 cp++;
792 while (*cp) {
793 if ((r = *cp++) == '\\')
794 if ((r = *cp++) == 0)
795 break;;
796 if (r == '\n')
797 while (*cp == '\t' || *cp == ' ')
798 cp++;
799 p += runetochar(p, &r);
801 *p++ = '\0';
802 return p;
805 Rune *
806 stext(Rune *p, Rune *end) /* extract rune string */
808 while(*cp == '\t' || *cp == ' ')
809 cp++;
810 while (*cp) {
811 if (*cp == '\\')
812 if (*++cp == 0)
813 break;
814 if (p >= end-1)
815 quit(TMMES, (char *) linebuf);
816 if ((*p++ = *cp++) == '\n')
817 while(*cp == '\t' || *cp == ' ')
818 cp++;
820 *p++ = 0;
821 return p;
825 Label *
826 search (Label *ptr)
828 Label *rp;
830 for (rp = ltab; rp < ptr; rp++)
831 if(rcmp(rp->asc, ptr->asc) == 0)
832 return(rp);
833 return(0);
836 void
837 dechain(void)
839 Label *lptr;
840 SedCom *rptr, *trptr;
842 for(lptr = ltab; lptr < lab; lptr++) {
844 if(lptr->address == 0)
845 quit("Undefined label: %S", (char *) lptr->asc);
847 if(lptr->chain) {
848 rptr = lptr->chain;
849 while(trptr = rptr->u.lb1) {
850 rptr->u.lb1 = lptr->address;
851 rptr = trptr;
853 rptr->u.lb1 = lptr->address;
858 int
859 ycomp(SedCom *r)
861 int i;
862 Rune *rp;
863 Rune c, *tsp, highc;
864 Rune *sp;
866 highc = 0;
867 for(tsp = cp; *tsp != seof; tsp++) {
868 if(*tsp == '\\')
869 tsp++;
870 if(*tsp == '\n' || *tsp == '\0')
871 return(0);
872 if (*tsp > highc) highc = *tsp;
874 tsp++;
875 if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
876 quit("Out of memory", 0);
877 *rp++ = highc; /* save upper bound */
878 for (i = 0; i <= highc; i++)
879 rp[i] = i;
880 sp = cp;
881 while((c = *sp++) != seof) {
882 if(c == '\\' && *sp == 'n') {
883 sp++;
884 c = '\n';
886 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
887 rp[c] = '\n';
888 tsp++;
890 if(rp[c] == seof || rp[c] == '\0') {
891 free(r->u.re1);
892 r->u.re1 = 0;
893 return(0);
896 if(*tsp != seof) {
897 free(r->u.re1);
898 r->u.re1 = 0;
899 return(0);
901 cp = tsp+1;
902 return(1);
905 void
906 execute(void)
908 SedCom *ipc;
910 while (spend = gline(linebuf)){
911 for(ipc = pspace; ipc->command; ) {
912 if (!executable(ipc)) {
913 ipc++;
914 continue;
916 command(ipc);
918 if(delflag)
919 break;
920 if(jflag) {
921 jflag = 0;
922 if((ipc = ipc->u.lb1) == 0)
923 break;
924 } else
925 ipc++;
928 if(!nflag && !delflag)
929 putline(&fout, linebuf, spend-linebuf);
930 if(aptr > abuf) {
931 arout();
933 delflag = 0;
936 /* determine if a statement should be applied to an input line */
937 int
938 executable(SedCom *ipc)
940 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
941 if (ipc->active == 1) /* Second line */
942 ipc->active = 2;
943 switch(ipc->ad2.type) {
944 case A_NONE: /* No second addr; use first */
945 ipc->active = 0;
946 break;
947 case A_DOL: /* Accept everything */
948 return !ipc->negfl;
949 case A_LINE: /* Line at end of range? */
950 if (lnum <= ipc->ad2.u.line) {
951 if (ipc->ad2.u.line == lnum)
952 ipc->active = 0;
953 return !ipc->negfl;
955 ipc->active = 0; /* out of range */
956 return ipc->negfl;
957 case A_RE: /* Check for matching R.E. */
958 if (match(ipc->ad2.u.rp, linebuf))
959 ipc->active = 0;
960 return !ipc->negfl;
961 default: /* internal error */
962 quit("Internal error", 0);
965 switch (ipc->ad1.type) { /* Check first address */
966 case A_NONE: /* Everything matches */
967 return !ipc->negfl;
968 case A_DOL: /* Only last line */
969 if (dolflag)
970 return !ipc->negfl;
971 break;
972 case A_LINE: /* Check line number */
973 if (ipc->ad1.u.line == lnum) {
974 ipc->active = 1; /* In range */
975 return !ipc->negfl;
977 break;
978 case A_RE: /* Check R.E. */
979 if (match(ipc->ad1.u.rp, linebuf)) {
980 ipc->active = 1; /* In range */
981 return !ipc->negfl;
983 break;
984 default:
985 quit("Internal error", 0);
987 return ipc->negfl;
990 int
991 match(Reprog *pattern, Rune *buf)
993 if (!pattern)
994 return 0;
995 subexp[0].s.rsp = buf;
996 subexp[0].e.rep = 0;
997 if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
998 loc1 = subexp[0].s.rsp;
999 loc2 = subexp[0].e.rep;
1000 return 1;
1002 loc1 = loc2 = 0;
1003 return 0;
1006 int
1007 substitute(SedCom *ipc)
1009 int len;
1011 if(!match(ipc->u.re1, linebuf))
1012 return 0;
1015 * we have at least one match. some patterns, e.g. '$' or '^', can
1016 * produce zero-length matches, so during a global substitute we
1017 * must bump to the character after a zero-length match to keep from looping.
1019 sflag = 1;
1020 if(ipc->gfl == 0) /* single substitution */
1021 dosub(ipc->rhs);
1022 else
1023 do{ /* global substitution */
1024 len = loc2-loc1; /* length of match */
1025 dosub(ipc->rhs); /* dosub moves loc2 */
1026 if(*loc2 == 0) /* end of string */
1027 break;
1028 if(len == 0) /* zero-length R.E. match */
1029 loc2++; /* bump over zero-length match */
1030 if(*loc2 == 0) /* end of string */
1031 break;
1032 } while(match(ipc->u.re1, loc2));
1033 return 1;
1036 void
1037 dosub(Rune *rhsbuf)
1039 Rune *lp, *sp;
1040 Rune *rp;
1041 int c, n;
1043 lp = linebuf;
1044 sp = genbuf;
1045 rp = rhsbuf;
1046 while (lp < loc1)
1047 *sp++ = *lp++;
1048 while(c = *rp++) {
1049 if (c == '&') {
1050 sp = place(sp, loc1, loc2);
1051 continue;
1053 if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB+'0') {
1054 n = c-'0';
1055 if (subexp[n].s.rsp && subexp[n].e.rep) {
1056 sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
1057 continue;
1059 else {
1060 fprint(2, "sed: Invalid back reference \\%d\n",n);
1061 errexit();
1064 *sp++ = c;
1065 if (sp >= &genbuf[LBSIZE])
1066 fprint(2, "sed: Output line too long.\n");
1068 lp = loc2;
1069 loc2 = sp - genbuf + linebuf;
1070 while (*sp++ = *lp++)
1071 if (sp >= &genbuf[LBSIZE])
1072 fprint(2, "sed: Output line too long.\n");
1073 lp = linebuf;
1074 sp = genbuf;
1075 while (*lp++ = *sp++)
1077 spend = lp-1;
1080 Rune *
1081 place(Rune *sp, Rune *l1, Rune *l2)
1083 while (l1 < l2) {
1084 *sp++ = *l1++;
1085 if (sp >= &genbuf[LBSIZE])
1086 fprint(2, "sed: Output line too long.\n");
1088 return(sp);
1091 char *
1092 trans(int c)
1094 static char buf[] = "\\x0000";
1095 static char hex[] = "0123456789abcdef";
1097 switch(c) {
1098 case '\b':
1099 return "\\b";
1100 case '\n':
1101 return "\\n";
1102 case '\r':
1103 return "\\r";
1104 case '\t':
1105 return "\\t";
1106 case '\\':
1107 return "\\\\";
1109 buf[2] = hex[(c>>12)&0xF];
1110 buf[3] = hex[(c>>8)&0xF];
1111 buf[4] = hex[(c>>4)&0xF];
1112 buf[5] = hex[c&0xF];
1113 return buf;
1116 void
1117 command(SedCom *ipc)
1119 int i, c;
1120 Rune *p1, *p2;
1121 char *ucp;
1122 Rune *rp;
1123 Rune *execp;
1125 switch(ipc->command) {
1127 case ACOM:
1128 *aptr++ = ipc;
1129 if(aptr >= abuf+MAXADDS) {
1130 quit("sed: Too many appends after line %ld\n",
1131 (char *) lnum);
1133 *aptr = 0;
1134 break;
1135 case CCOM:
1136 delflag = 1;
1137 if(ipc->active == 1) {
1138 for(rp = ipc->u.text; *rp; rp++)
1139 Bputrune(&fout, *rp);
1140 Bputc(&fout, '\n');
1142 break;
1143 case DCOM:
1144 delflag++;
1145 break;
1146 case CDCOM:
1147 p1 = p2 = linebuf;
1148 while(*p1 != '\n') {
1149 if(*p1++ == 0) {
1150 delflag++;
1151 return;
1154 p1++;
1155 while(*p2++ = *p1++)
1157 spend = p2-1;
1158 jflag++;
1159 break;
1160 case EQCOM:
1161 Bprint(&fout, "%ld\n", lnum);
1162 break;
1163 case GCOM:
1164 p1 = linebuf;
1165 p2 = holdsp;
1166 while(*p1++ = *p2++)
1168 spend = p1-1;
1169 break;
1170 case CGCOM:
1171 *spend++ = '\n';
1172 p1 = spend;
1173 p2 = holdsp;
1174 while(*p1++ = *p2++)
1175 if(p1 >= lbend)
1176 break;
1177 spend = p1-1;
1178 break;
1179 case HCOM:
1180 p1 = holdsp;
1181 p2 = linebuf;
1182 while(*p1++ = *p2++);
1183 hspend = p1-1;
1184 break;
1185 case CHCOM:
1186 *hspend++ = '\n';
1187 p1 = hspend;
1188 p2 = linebuf;
1189 while(*p1++ = *p2++)
1190 if(p1 >= hend)
1191 break;
1192 hspend = p1-1;
1193 break;
1194 case ICOM:
1195 for(rp = ipc->u.text; *rp; rp++)
1196 Bputrune(&fout, *rp);
1197 Bputc(&fout, '\n');
1198 break;
1199 case BCOM:
1200 jflag = 1;
1201 break;
1202 case LCOM:
1203 c = 0;
1204 for (i = 0, rp = linebuf; *rp; rp++) {
1205 c = *rp;
1206 if(c >= 0x20 && c < 0x7F && c != '\\') {
1207 Bputc(&fout, c);
1208 if(i++ > 71) {
1209 Bprint(&fout, "\\\n");
1210 i = 0;
1212 } else {
1213 for (ucp = trans(*rp); *ucp; ucp++){
1214 c = *ucp;
1215 Bputc(&fout, c);
1216 if(i++ > 71) {
1217 Bprint(&fout, "\\\n");
1218 i = 0;
1223 if(c == ' ')
1224 Bprint(&fout, "\\n");
1225 Bputc(&fout, '\n');
1226 break;
1227 case NCOM:
1228 if(!nflag)
1229 putline(&fout, linebuf, spend-linebuf);
1231 if(aptr > abuf)
1232 arout();
1233 if((execp = gline(linebuf)) == 0) {
1234 delflag = 1;
1235 break;
1237 spend = execp;
1238 break;
1239 case CNCOM:
1240 if(aptr > abuf)
1241 arout();
1242 *spend++ = '\n';
1243 if((execp = gline(spend)) == 0) {
1244 delflag = 1;
1245 break;
1247 spend = execp;
1248 break;
1249 case PCOM:
1250 putline(&fout, linebuf, spend-linebuf);
1251 break;
1252 case CPCOM:
1253 cpcom:
1254 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1255 Bputc(&fout, *rp);
1256 Bputc(&fout, '\n');
1257 break;
1258 case QCOM:
1259 if(!nflag)
1260 putline(&fout, linebuf, spend-linebuf);
1261 if(aptr > abuf)
1262 arout();
1263 exits(0);
1264 case RCOM:
1265 *aptr++ = ipc;
1266 if(aptr >= &abuf[MAXADDS])
1267 quit("sed: Too many reads after line %ld\n",
1268 (char *) lnum);
1269 *aptr = 0;
1270 break;
1271 case SCOM:
1272 i = substitute(ipc);
1273 if(i && ipc->pfl)
1274 if(ipc->pfl == 1)
1275 putline(&fout, linebuf, spend-linebuf);
1276 else
1277 goto cpcom;
1278 if(i && ipc->fcode)
1279 goto wcom;
1280 break;
1282 case TCOM:
1283 if(sflag == 0) break;
1284 sflag = 0;
1285 jflag = 1;
1286 break;
1288 wcom:
1289 case WCOM:
1290 putline(ipc->fcode,linebuf, spend-linebuf);
1291 break;
1292 case XCOM:
1293 p1 = linebuf;
1294 p2 = genbuf;
1295 while(*p2++ = *p1++);
1296 p1 = holdsp;
1297 p2 = linebuf;
1298 while(*p2++ = *p1++);
1299 spend = p2 - 1;
1300 p1 = genbuf;
1301 p2 = holdsp;
1302 while(*p2++ = *p1++);
1303 hspend = p2 - 1;
1304 break;
1305 case YCOM:
1306 p1 = linebuf;
1307 p2 = ipc->u.text;
1308 for (i = *p2++; *p1; p1++){
1309 if (*p1 <= i) *p1 = p2[*p1];
1311 break;
1316 void
1317 putline(Biobuf *bp, Rune *buf, int n)
1319 while (n--)
1320 Bputrune(bp, *buf++);
1321 Bputc(bp, '\n');
1322 if(lflag)
1323 Bflush(bp);
1326 int
1327 ecmp(Rune *a, Rune *b, int count)
1329 while(count--)
1330 if(*a++ != *b++) return(0);
1331 return(1);
1334 void
1335 arout(void)
1337 Rune *p1;
1338 Biobuf *fi;
1339 int c;
1340 char *s;
1341 char buf[128];
1343 for (aptr = abuf; *aptr; aptr++) {
1344 if((*aptr)->command == ACOM) {
1345 for(p1 = (*aptr)->u.text; *p1; p1++ )
1346 Bputrune(&fout, *p1);
1347 Bputc(&fout, '\n');
1348 } else {
1349 for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
1350 s += runetochar(s, p1);
1351 *s = '\0';
1352 if((fi = Bopen(buf, OREAD)) == 0)
1353 continue;
1354 while((c = Bgetc(fi)) >= 0)
1355 Bputc(&fout, c);
1356 Bterm(fi);
1359 aptr = abuf;
1360 *aptr = 0;
1363 void
1364 errexit(void)
1366 exits("error");
1369 void
1370 quit (char *msg, char *arg)
1372 fprint(2, "sed: ");
1373 fprint(2, msg, arg);
1374 fprint(2, "\n");
1375 errexit();
1378 Rune *
1379 gline(Rune *addr)
1381 long c;
1382 Rune *p;
1384 static long peekc = 0;
1386 if (f == 0 && opendata() < 0)
1387 return 0;
1388 sflag = 0;
1389 lnum++;
1390 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
1391 do {
1392 p = addr;
1393 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1394 if (c == '\n') {
1395 if ((peekc = Bgetrune(f)) < 0) {
1396 if (fhead == 0)
1397 dolflag = 1;
1399 *p = '\0';
1400 return p;
1402 if (c && p < lbend)
1403 *p++ = c;
1405 /* return partial final line, adding implicit newline */
1406 if(p != addr) {
1407 *p = '\0';
1408 peekc = -1;
1409 if (fhead == 0)
1410 dolflag = 1;
1411 return p;
1413 peekc = 0;
1414 Bterm(f);
1415 } while (opendata() > 0); /* Switch to next stream */
1416 f = 0;
1417 return 0;
1420 /* Data file input section - the intent is to transparently
1421 * catenate all data input streams.
1423 void
1424 enroll(char *filename) /* Add a file to the input file cache */
1426 FileCache *fp;
1428 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
1429 quit("Out of memory", 0);
1430 if (ftail == 0)
1431 fhead = fp;
1432 else
1433 ftail->next = fp;
1434 ftail = fp;
1435 fp->next = 0;
1436 fp->name = filename; /* 0 => stdin */
1439 int
1440 opendata(void)
1442 if (fhead == 0)
1443 return -1;
1444 if (fhead->name) {
1445 if ((f = Bopen(fhead->name, OREAD)) == 0)
1446 quit("Can't open %s", fhead->name);
1447 } else {
1448 Binit(&bstdin, 0, OREAD);
1449 f = &bstdin;
1451 fhead = fhead->next;
1452 return 1;