Blob


1 /*
2 * sed -- stream editor
3 *
4 *
5 */
6 #include <u.h>
7 #include <libc.h>
8 #include <bio.h>
9 #include <regexp.h>
11 enum {
12 DEPTH = 20, /* max nesting depth of {} */
13 MAXCMDS = 512, /* max sed commands */
14 ADDSIZE = 10000, /* size of add & read buffer */
15 MAXADDS = 20, /* max pending adds and reads */
16 LBSIZE = 8192, /* input line size */
17 LABSIZE = 50, /* max label name size */
18 MAXSUB = 10, /* max number of sub reg exp */
19 MAXFILES = 120, /* max output files */
20 };
21 /* An address is a line #, a R.E., "$", a reference to the last
22 * R.E., or nothing.
23 */
24 typedef struct {
25 enum {
26 A_NONE,
27 A_DOL,
28 A_LINE,
29 A_RE,
30 A_LAST,
31 }type;
32 union {
33 long line; /* Line # */
34 Reprog *rp; /* Compiled R.E. */
35 } u;
36 } Addr;
38 typedef struct SEDCOM {
39 Addr ad1; /* optional start address */
40 Addr ad2; /* optional end address */
41 union {
42 Reprog *re1; /* compiled R.E. */
43 Rune *text; /* added text or file name */
44 struct SEDCOM *lb1; /* destination command of branch */
45 } u;
46 Rune *rhs; /* Right-hand side of substitution */
47 Biobuf* fcode; /* File ID for read and write */
48 char command; /* command code -see below */
49 char gfl; /* 'Global' flag for substitutions */
50 char pfl; /* 'print' flag for substitutions */
51 char active; /* 1 => data between start and end */
52 char negfl; /* negation flag */
53 } SedCom;
55 /* Command Codes for field SedCom.command */
56 #define ACOM 01
57 #define BCOM 020
58 #define CCOM 02
59 #define CDCOM 025
60 #define CNCOM 022
61 #define COCOM 017
62 #define CPCOM 023
63 #define DCOM 03
64 #define ECOM 015
65 #define EQCOM 013
66 #define FCOM 016
67 #define GCOM 027
68 #define CGCOM 030
69 #define HCOM 031
70 #define CHCOM 032
71 #define ICOM 04
72 #define LCOM 05
73 #define NCOM 012
74 #define PCOM 010
75 #define QCOM 011
76 #define RCOM 06
77 #define SCOM 07
78 #define TCOM 021
79 #define WCOM 014
80 #define CWCOM 024
81 #define YCOM 026
82 #define XCOM 033
85 typedef struct label { /* Label symbol table */
86 Rune asc[9]; /* Label name */
87 SedCom *chain;
88 SedCom *address; /* Command associated with label */
89 } Label;
91 typedef struct FILE_CACHE { /* Data file control block */
92 struct FILE_CACHE *next; /* Forward Link */
93 char *name; /* Name of file */
94 } FileCache;
96 SedCom pspace[MAXCMDS]; /* Command storage */
97 SedCom *pend = pspace+MAXCMDS; /* End of command storage */
98 SedCom *rep = pspace; /* Current fill point */
100 Reprog *lastre = 0; /* Last regular expression */
101 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
103 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
104 Rune *addend = addspace+ADDSIZE;
106 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
107 SedCom **aptr = abuf;
109 struct { /* Sed program input control block */
110 enum PTYPE /* Either on command line or in file */
111 { P_ARG,
112 P_FILE
113 } type;
114 union PCTL { /* Pointer to data */
115 Biobuf *bp;
116 char *curr;
117 } pctl;
118 } prog;
120 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
122 FileCache *fhead = 0; /* Head of File Cache Chain */
123 FileCache *ftail = 0; /* Tail of File Cache Chain */
125 Rune *loc1; /* Start of pattern match */
126 Rune *loc2; /* End of pattern match */
127 Rune seof; /* Pattern delimiter char */
129 Rune linebuf[LBSIZE+1]; /* Input data buffer */
130 Rune *lbend = linebuf+LBSIZE; /* End of buffer */
131 Rune *spend = linebuf; /* End of input data */
132 Rune *cp; /* Current scan point in linebuf */
134 Rune holdsp[LBSIZE+1]; /* Hold buffer */
135 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
136 Rune *hspend = holdsp; /* End of hold data */
138 int nflag; /* Command line flags */
139 int gflag;
141 int dolflag; /* Set when at true EOF */
142 int sflag; /* Set when substitution done */
143 int jflag; /* Set when jump required */
144 int delflag; /* Delete current line when set */
146 long lnum = 0; /* Input line count */
148 char fname[MAXFILES][40]; /* File name cache */
149 Biobuf *fcode[MAXFILES]; /* File ID cache */
150 int nfiles = 0; /* Cache fill point */
152 Biobuf fout; /* Output stream */
153 Biobuf bstdin; /* Default input */
154 Biobuf* f = 0; /* Input data */
156 Label ltab[LABSIZE]; /* Label name symbol table */
157 Label *labend = ltab+LABSIZE; /* End of label table */
158 Label *lab = ltab+1; /* Current Fill point */
160 int depth = 0; /* {} stack pointer */
162 Rune bad; /* Dummy err ptr reference */
163 Rune *badp = &bad;
166 char CGMES[] = "Command garbled: %S";
167 char TMMES[] = "Too much text: %S";
168 char LTL[] = "Label too long: %S";
169 char AD0MES[] = "No addresses allowed: %S";
170 char AD1MES[] = "Only one address allowed: %S";
172 void address(Addr *);
173 void arout(void);
174 int cmp(char *, char *);
175 int rcmp(Rune *, Rune *);
176 void command(SedCom *);
177 Reprog *compile(void);
178 Rune *compsub(Rune *, Rune *);
179 void dechain(void);
180 void dosub(Rune *);
181 int ecmp(Rune *, Rune *, int);
182 void enroll(char *);
183 void errexit(void);
184 int executable(SedCom *);
185 void execute(void);
186 void fcomp(void);
187 long getrune(void);
188 Rune *gline(Rune *);
189 int match(Reprog *, Rune *);
190 void newfile(enum PTYPE, char *);
191 int opendata(void);
192 Biobuf *open_file(char *);
193 Rune *place(Rune *, Rune *, Rune *);
194 void quit(char *, char *);
195 int rline(Rune *, Rune *);
196 Label *search(Label *);
197 int substitute(SedCom *);
198 char *text(char *);
199 Rune *stext(Rune *, Rune *);
200 int ycomp(SedCom *);
201 char * trans(int c);
202 void putline(Biobuf *bp, Rune *buf, int n);
204 void
205 main(int argc, char **argv)
207 int compfl;
209 lnum = 0;
210 Binit(&fout, 1, OWRITE);
211 fcode[nfiles++] = &fout;
212 compfl = 0;
214 if(argc == 1)
215 exits(0);
216 ARGBEGIN{
217 case 'n':
218 nflag++;
219 continue;
220 case 'f':
221 if(argc <= 1)
222 quit("no pattern-file", 0);
223 newfile(P_FILE, ARGF());
224 fcomp();
225 compfl = 1;
226 continue;
227 case 'e':
228 if (argc <= 1)
229 quit("missing pattern", 0);
230 newfile(P_ARG, ARGF());
231 fcomp();
232 compfl = 1;
233 continue;
234 case 'g':
235 gflag++;
236 continue;
237 default:
238 fprint(2, "sed: Unknown flag: %c\n", ARGC());
239 continue;
240 } ARGEND
242 if(compfl == 0) {
243 if (--argc < 0)
244 quit("missing pattern", 0);
245 newfile(P_ARG, *argv++);
246 fcomp();
249 if(depth)
250 quit("Too many {'s", 0);
252 ltab[0].address = rep;
254 dechain();
256 if(argc <= 0)
257 enroll(0); /* Add stdin to cache */
258 else while(--argc >= 0) {
259 enroll(*argv++);
261 execute();
262 exits(0);
264 void
265 fcomp(void)
267 Rune *tp;
268 SedCom *pt, *pt1;
269 int i;
270 Label *lpt;
272 static Rune *p = addspace;
273 static SedCom **cmpend[DEPTH]; /* stack of {} operations */
275 while (rline(linebuf, lbend) >= 0) {
276 cp = linebuf;
277 comploop:
278 while(*cp == ' ' || *cp == '\t')
279 cp++;
280 if(*cp == '\0' || *cp == '#')
281 continue;
282 if(*cp == ';') {
283 cp++;
284 goto comploop;
287 address(&rep->ad1);
288 if (rep->ad1.type != A_NONE) {
289 if (rep->ad1.type == A_LAST) {
290 if (!lastre)
291 quit("First RE may not be null", 0);
292 rep->ad1.type = A_RE;
293 rep->ad1.u.rp = lastre;
295 if(*cp == ',' || *cp == ';') {
296 cp++;
297 address(&rep->ad2);
298 if (rep->ad2.type == A_LAST) {
299 rep->ad1.type = A_RE;
300 rep->ad2.u.rp = lastre;
302 } else
303 rep->ad2.type = A_NONE;
305 while(*cp == ' ' || *cp == '\t')
306 cp++;
308 swit:
309 switch(*cp++) {
311 default:
312 quit("Unrecognized command: %S", (char *)linebuf);
314 case '!':
315 rep->negfl = 1;
316 goto swit;
318 case '{':
319 rep->command = BCOM;
320 rep->negfl = !(rep->negfl);
321 cmpend[depth++] = &rep->u.lb1;
322 if(++rep >= pend)
323 quit("Too many commands: %S", (char *) linebuf);
324 if(*cp == '\0') continue;
325 goto comploop;
327 case '}':
328 if(rep->ad1.type != A_NONE)
329 quit(AD0MES, (char *) linebuf);
330 if(--depth < 0)
331 quit("Too many }'s", 0);
332 *cmpend[depth] = rep;
333 if(*cp == 0) continue;
334 goto comploop;
336 case '=':
337 rep->command = EQCOM;
338 if(rep->ad2.type != A_NONE)
339 quit(AD1MES, (char *) linebuf);
340 break;
342 case ':':
343 if(rep->ad1.type != A_NONE)
344 quit(AD0MES, (char *) linebuf);
346 while(*cp == ' ')
347 cp++;
348 tp = lab->asc;
349 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
350 *tp++ = *cp++;
351 if(tp >= &(lab->asc[8]))
352 quit(LTL, (char *) linebuf);
354 *tp = '\0';
356 if(lpt = search(lab)) {
357 if(lpt->address)
358 quit("Duplicate labels: %S", (char *) linebuf);
359 } else {
360 lab->chain = 0;
361 lpt = lab;
362 if(++lab >= labend)
363 quit("Too many labels: %S", (char *) linebuf);
365 lpt->address = rep;
366 if (*cp == '#')
367 continue;
368 rep--; /* reuse this slot */
369 break;
371 case 'a':
372 rep->command = ACOM;
373 if(rep->ad2.type != A_NONE)
374 quit(AD1MES, (char *) linebuf);
375 if(*cp == '\\') cp++;
376 if(*cp++ != '\n')
377 quit(CGMES, (char *) linebuf);
378 rep->u.text = p;
379 p = stext(p, addend);
380 break;
381 case 'c':
382 rep->command = CCOM;
383 if(*cp == '\\') cp++;
384 if(*cp++ != '\n')
385 quit(CGMES, (char *) linebuf);
386 rep->u.text = p;
387 p = stext(p, addend);
388 break;
389 case 'i':
390 rep->command = ICOM;
391 if(rep->ad2.type != A_NONE)
392 quit(AD1MES, (char *) linebuf);
393 if(*cp == '\\') cp++;
394 if(*cp++ != '\n')
395 quit(CGMES, (char *) linebuf);
396 rep->u.text = p;
397 p = stext(p, addend);
398 break;
400 case 'g':
401 rep->command = GCOM;
402 break;
404 case 'G':
405 rep->command = CGCOM;
406 break;
408 case 'h':
409 rep->command = HCOM;
410 break;
412 case 'H':
413 rep->command = CHCOM;
414 break;
416 case 't':
417 rep->command = TCOM;
418 goto jtcommon;
420 case 'b':
421 rep->command = BCOM;
422 jtcommon:
423 while(*cp == ' ')cp++;
424 if(*cp == '\0') {
425 if(pt = ltab[0].chain) {
426 while(pt1 = pt->u.lb1)
427 pt = pt1;
428 pt->u.lb1 = rep;
429 } else
430 ltab[0].chain = rep;
431 break;
433 tp = lab->asc;
434 while((*tp++ = *cp++))
435 if(tp >= &(lab->asc[8]))
436 quit(LTL, (char *) linebuf);
437 cp--;
438 tp[-1] = '\0';
440 if(lpt = search(lab)) {
441 if(lpt->address) {
442 rep->u.lb1 = lpt->address;
443 } else {
444 pt = lpt->chain;
445 while(pt1 = pt->u.lb1)
446 pt = pt1;
447 pt->u.lb1 = rep;
449 } else {
450 lab->chain = rep;
451 lab->address = 0;
452 if(++lab >= labend)
453 quit("Too many labels: %S",
454 (char *) linebuf);
456 break;
458 case 'n':
459 rep->command = NCOM;
460 break;
462 case 'N':
463 rep->command = CNCOM;
464 break;
466 case 'p':
467 rep->command = PCOM;
468 break;
470 case 'P':
471 rep->command = CPCOM;
472 break;
474 case 'r':
475 rep->command = RCOM;
476 if(rep->ad2.type != A_NONE)
477 quit(AD1MES, (char *) linebuf);
478 if(*cp++ != ' ')
479 quit(CGMES, (char *) linebuf);
480 rep->u.text = p;
481 p = stext(p, addend);
482 break;
484 case 'd':
485 rep->command = DCOM;
486 break;
488 case 'D':
489 rep->command = CDCOM;
490 rep->u.lb1 = pspace;
491 break;
493 case 'q':
494 rep->command = QCOM;
495 if(rep->ad2.type != A_NONE)
496 quit(AD1MES, (char *) linebuf);
497 break;
499 case 'l':
500 rep->command = LCOM;
501 break;
503 case 's':
504 rep->command = SCOM;
505 seof = *cp++;
506 if ((rep->u.re1 = compile()) == 0) {
507 if(!lastre)
508 quit("First RE may not be null.", 0);
509 rep->u.re1 = lastre;
511 rep->rhs = p;
512 if((p = compsub(p, addend)) == 0)
513 quit(CGMES, (char *) linebuf);
514 if(*cp == 'g') {
515 cp++;
516 rep->gfl++;
517 } else if(gflag)
518 rep->gfl++;
520 if(*cp == 'p') {
521 cp++;
522 rep->pfl = 1;
525 if(*cp == 'P') {
526 cp++;
527 rep->pfl = 2;
530 if(*cp == 'w') {
531 cp++;
532 if(*cp++ != ' ')
533 quit(CGMES, (char *) linebuf);
534 text(fname[nfiles]);
535 for(i = nfiles - 1; i >= 0; i--)
536 if(cmp(fname[nfiles],fname[i]) == 0) {
537 rep->fcode = fcode[i];
538 goto done;
540 if(nfiles >= MAXFILES)
541 quit("Too many files in w commands 1", 0);
542 rep->fcode = open_file(fname[nfiles]);
544 break;
546 case 'w':
547 rep->command = WCOM;
548 if(*cp++ != ' ')
549 quit(CGMES, (char *) linebuf);
550 text(fname[nfiles]);
551 for(i = nfiles - 1; i >= 0; i--)
552 if(cmp(fname[nfiles], fname[i]) == 0) {
553 rep->fcode = fcode[i];
554 goto done;
556 if(nfiles >= MAXFILES){
557 fprint(2, "sed: Too many files in w commands 2 \n");
558 fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
559 errexit();
561 rep->fcode = open_file(fname[nfiles]);
562 break;
564 case 'x':
565 rep->command = XCOM;
566 break;
568 case 'y':
569 rep->command = YCOM;
570 seof = *cp++;
571 if (ycomp(rep) == 0)
572 quit(CGMES, (char *) linebuf);
573 break;
576 done:
577 if(++rep >= pend)
578 quit("Too many commands, last: %S", (char *) linebuf);
580 if(*cp++ != '\0') {
581 if(cp[-1] == ';')
582 goto comploop;
583 quit(CGMES, (char *) linebuf);
589 Biobuf *
590 open_file(char *name)
592 Biobuf *bp;
593 int fd;
595 if ((bp = malloc(sizeof(Biobuf))) == 0)
596 quit("Out of memory", 0);
597 if ((fd = open(name, OWRITE)) < 0 &&
598 (fd = create(name, OWRITE, 0666)) < 0)
599 quit("Cannot create %s", name);
600 Binit(bp, fd, OWRITE);
601 Bseek(bp, 0, 2);
602 fcode[nfiles++] = bp;
603 return bp;
606 Rune *
607 compsub(Rune *rhs, Rune *end)
609 Rune r;
611 while ((r = *cp++) != '\0') {
612 if(r == '\\') {
613 if (rhs < end)
614 *rhs++ = 0xFFFF;
615 else
616 return 0;
617 r = *cp++;
618 if(r == 'n')
619 r = '\n';
620 } else {
621 if(r == seof) {
622 if (rhs < end)
623 *rhs++ = '\0';
624 else
625 return 0;
626 return rhs;
629 if (rhs < end)
630 *rhs++ = r;
631 else
632 return 0;
635 return 0;
638 Reprog *
639 compile(void)
641 Rune c;
642 char *ep;
643 char expbuf[512];
645 if((c = *cp++) == seof) /* '//' */
646 return 0;
647 ep = expbuf;
648 do {
649 if (c == 0 || c == '\n')
650 quit(TMMES, (char *) linebuf);
651 if (c == '\\') {
652 if (ep >= expbuf+sizeof(expbuf))
653 quit(TMMES, (char *) linebuf);
654 ep += runetochar(ep, &c);
655 if ((c = *cp++) == 'n')
656 c = '\n';
658 if (ep >= expbuf+sizeof(expbuf))
659 quit(TMMES, (char *) linebuf);
660 ep += runetochar(ep, &c);
661 } while ((c = *cp++) != seof);
662 *ep = 0;
663 return lastre = regcomp(expbuf);
666 void
667 regerror(char *s)
669 USED(s);
670 quit(CGMES, (char *) linebuf);
673 void
674 newfile(enum PTYPE type, char *name)
676 if (type == P_ARG)
677 prog.pctl.curr = name;
678 else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
679 quit("Cannot open pattern-file: %s\n", name);
680 prog.type = type;
683 int
684 rline(Rune *buf, Rune *end)
686 long c;
687 Rune r;
689 while ((c = getrune()) >= 0) {
690 r = c;
691 if (r == '\\') {
692 if (buf <= end)
693 *buf++ = r;
694 if ((c = getrune()) < 0)
695 break;
696 r = c;
697 } else if (r == '\n') {
698 *buf = '\0';
699 return(1);
701 if (buf <= end)
702 *buf++ = r;
704 *buf = '\0';
705 return(-1);
708 long
709 getrune(void)
711 char *p;
712 long c;
713 Rune r;
715 if (prog.type == P_ARG) {
716 if ((p = prog.pctl.curr) != 0) {
717 if (*p) {
718 prog.pctl.curr += chartorune(&r, p);
719 c = r;
720 } else {
721 c = '\n'; /* fake an end-of-line */
722 prog.pctl.curr = 0;
724 } else
725 c = -1;
726 } else if ((c = Bgetrune(prog.pctl.bp)) < 0)
727 Bterm(prog.pctl.bp);
728 return c;
731 void
732 address(Addr *ap)
734 int c;
735 long lno;
737 if((c = *cp++) == '$')
738 ap->type = A_DOL;
739 else if(c == '/') {
740 seof = c;
741 if (ap->u.rp = compile())
742 ap->type = A_RE;
743 else
744 ap->type = A_LAST;
746 else if (c >= '0' && c <= '9') {
747 lno = c-'0';
748 while ((c = *cp) >= '0' && c <= '9')
749 lno = lno*10 + *cp++-'0';
750 if(!lno)
751 quit("line number 0 is illegal",0);
752 ap->type = A_LINE;
753 ap->u.line = lno;
755 else {
756 cp--;
757 ap->type = A_NONE;
761 int
762 cmp(char *a, char *b) /* compare characters */
764 while(*a == *b++)
765 if (*a == '\0')
766 return(0);
767 else a++;
768 return(1);
771 int
772 rcmp(Rune *a, Rune *b) /* compare runes */
774 while(*a == *b++)
775 if (*a == '\0')
776 return(0);
777 else a++;
778 return(1);
781 char *
782 text(char *p) /* extract character string */
784 Rune r;
786 while(*cp == '\t' || *cp == ' ')
787 cp++;
788 while (*cp) {
789 if ((r = *cp++) == '\\')
790 if ((r = *cp++) == 0)
791 break;;
792 if (r == '\n')
793 while (*cp == '\t' || *cp == ' ')
794 cp++;
795 p += runetochar(p, &r);
797 *p++ = '\0';
798 return p;
801 Rune *
802 stext(Rune *p, Rune *end) /* extract rune string */
804 while(*cp == '\t' || *cp == ' ')
805 cp++;
806 while (*cp) {
807 if (*cp == '\\')
808 if (*++cp == 0)
809 break;
810 if (p >= end-1)
811 quit(TMMES, (char *) linebuf);
812 if ((*p++ = *cp++) == '\n')
813 while(*cp == '\t' || *cp == ' ')
814 cp++;
816 *p++ = 0;
817 return p;
821 Label *
822 search (Label *ptr)
824 Label *rp;
826 for (rp = ltab; rp < ptr; rp++)
827 if(rcmp(rp->asc, ptr->asc) == 0)
828 return(rp);
829 return(0);
832 void
833 dechain(void)
835 Label *lptr;
836 SedCom *rptr, *trptr;
838 for(lptr = ltab; lptr < lab; lptr++) {
840 if(lptr->address == 0)
841 quit("Undefined label: %S", (char *) lptr->asc);
843 if(lptr->chain) {
844 rptr = lptr->chain;
845 while(trptr = rptr->u.lb1) {
846 rptr->u.lb1 = lptr->address;
847 rptr = trptr;
849 rptr->u.lb1 = lptr->address;
854 int
855 ycomp(SedCom *r)
857 int i;
858 Rune *rp;
859 Rune c, *tsp, highc;
860 Rune *sp;
862 highc = 0;
863 for(tsp = cp; *tsp != seof; tsp++) {
864 if(*tsp == '\\')
865 tsp++;
866 if(*tsp == '\n' || *tsp == '\0')
867 return(0);
868 if (*tsp > highc) highc = *tsp;
870 tsp++;
871 if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
872 quit("Out of memory", 0);
873 *rp++ = highc; /* save upper bound */
874 for (i = 0; i <= highc; i++)
875 rp[i] = i;
876 sp = cp;
877 while((c = *sp++) != seof) {
878 if(c == '\\' && *sp == 'n') {
879 sp++;
880 c = '\n';
882 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
883 rp[c] = '\n';
884 tsp++;
886 if(rp[c] == seof || rp[c] == '\0') {
887 free(r->u.re1);
888 r->u.re1 = 0;
889 return(0);
892 if(*tsp != seof) {
893 free(r->u.re1);
894 r->u.re1 = 0;
895 return(0);
897 cp = tsp+1;
898 return(1);
901 void
902 execute(void)
904 SedCom *ipc;
906 while (spend = gline(linebuf)){
907 for(ipc = pspace; ipc->command; ) {
908 if (!executable(ipc)) {
909 ipc++;
910 continue;
912 command(ipc);
914 if(delflag)
915 break;
916 if(jflag) {
917 jflag = 0;
918 if((ipc = ipc->u.lb1) == 0)
919 break;
920 } else
921 ipc++;
924 if(!nflag && !delflag)
925 putline(&fout, linebuf, spend-linebuf);
926 if(aptr > abuf) {
927 arout();
929 delflag = 0;
932 /* determine if a statement should be applied to an input line */
933 int
934 executable(SedCom *ipc)
936 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
937 if (ipc->active == 1) /* Second line */
938 ipc->active = 2;
939 switch(ipc->ad2.type) {
940 case A_NONE: /* No second addr; use first */
941 ipc->active = 0;
942 break;
943 case A_DOL: /* Accept everything */
944 return !ipc->negfl;
945 case A_LINE: /* Line at end of range? */
946 if (lnum <= ipc->ad2.u.line) {
947 if (ipc->ad2.u.line == lnum)
948 ipc->active = 0;
949 return !ipc->negfl;
951 ipc->active = 0; /* out of range */
952 return ipc->negfl;
953 case A_RE: /* Check for matching R.E. */
954 if (match(ipc->ad2.u.rp, linebuf))
955 ipc->active = 0;
956 return !ipc->negfl;
957 default: /* internal error */
958 quit("Internal error", 0);
961 switch (ipc->ad1.type) { /* Check first address */
962 case A_NONE: /* Everything matches */
963 return !ipc->negfl;
964 case A_DOL: /* Only last line */
965 if (dolflag)
966 return !ipc->negfl;
967 break;
968 case A_LINE: /* Check line number */
969 if (ipc->ad1.u.line == lnum) {
970 ipc->active = 1; /* In range */
971 return !ipc->negfl;
973 break;
974 case A_RE: /* Check R.E. */
975 if (match(ipc->ad1.u.rp, linebuf)) {
976 ipc->active = 1; /* In range */
977 return !ipc->negfl;
979 break;
980 default:
981 quit("Internal error", 0);
983 return ipc->negfl;
986 int
987 match(Reprog *pattern, Rune *buf)
989 if (!pattern)
990 return 0;
991 subexp[0].s.rsp = buf;
992 subexp[0].e.rep = 0;
993 if (rregexec(pattern, linebuf, subexp, MAXSUB)) {
994 loc1 = subexp[0].s.rsp;
995 loc2 = subexp[0].e.rep;
996 return 1;
998 loc1 = loc2 = 0;
999 return 0;
1002 int
1003 substitute(SedCom *ipc)
1005 int len;
1007 if(!match(ipc->u.re1, linebuf))
1008 return 0;
1011 * we have at least one match. some patterns, e.g. '$' or '^', can
1012 * produce zero-length matches, so during a global substitute we
1013 * must bump to the character after a zero-length match to keep from looping.
1015 sflag = 1;
1016 if(ipc->gfl == 0) /* single substitution */
1017 dosub(ipc->rhs);
1018 else
1019 do{ /* global substitution */
1020 len = loc2-loc1; /* length of match */
1021 dosub(ipc->rhs); /* dosub moves loc2 */
1022 if(*loc2 == 0) /* end of string */
1023 break;
1024 if(len == 0) /* zero-length R.E. match */
1025 loc2++; /* bump over zero-length match */
1026 if(*loc2 == 0) /* end of string */
1027 break;
1028 } while(match(ipc->u.re1, loc2));
1029 return 1;
1032 void
1033 dosub(Rune *rhsbuf)
1035 Rune *lp, *sp;
1036 Rune *rp;
1037 int c, n;
1039 lp = linebuf;
1040 sp = genbuf;
1041 rp = rhsbuf;
1042 while (lp < loc1)
1043 *sp++ = *lp++;
1044 while(c = *rp++) {
1045 if (c == '&') {
1046 sp = place(sp, loc1, loc2);
1047 continue;
1049 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') {
1050 n = c-'0';
1051 if (subexp[n].s.rsp && subexp[n].e.rep) {
1052 sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
1053 continue;
1055 else {
1056 fprint(2, "sed: Invalid back reference \\%d\n",n);
1057 errexit();
1060 *sp++ = c;
1061 if (sp >= &genbuf[LBSIZE])
1062 fprint(2, "sed: Output line too long.\n");
1064 lp = loc2;
1065 loc2 = sp - genbuf + linebuf;
1066 while (*sp++ = *lp++)
1067 if (sp >= &genbuf[LBSIZE])
1068 fprint(2, "sed: Output line too long.\n");
1069 lp = linebuf;
1070 sp = genbuf;
1071 while (*lp++ = *sp++)
1073 spend = lp-1;
1076 Rune *
1077 place(Rune *sp, Rune *l1, Rune *l2)
1079 while (l1 < l2) {
1080 *sp++ = *l1++;
1081 if (sp >= &genbuf[LBSIZE])
1082 fprint(2, "sed: Output line too long.\n");
1084 return(sp);
1087 char *
1088 trans(int c)
1090 static char buf[] = "\\x0000";
1091 static char hex[] = "0123456789abcdef";
1093 switch(c) {
1094 case '\b':
1095 return "\\b";
1096 case '\n':
1097 return "\\n";
1098 case '\r':
1099 return "\\r";
1100 case '\t':
1101 return "\\t";
1102 case '\\':
1103 return "\\\\";
1105 buf[2] = hex[(c>>12)&0xF];
1106 buf[3] = hex[(c>>8)&0xF];
1107 buf[4] = hex[(c>>4)&0xF];
1108 buf[5] = hex[c&0xF];
1109 return buf;
1112 void
1113 command(SedCom *ipc)
1115 int i, c;
1116 Rune *p1, *p2;
1117 char *ucp;
1118 Rune *rp;
1119 Rune *execp;
1121 switch(ipc->command) {
1123 case ACOM:
1124 *aptr++ = ipc;
1125 if(aptr >= abuf+MAXADDS) {
1126 quit("sed: Too many appends after line %ld\n",
1127 (char *) lnum);
1129 *aptr = 0;
1130 break;
1131 case CCOM:
1132 delflag = 1;
1133 if(ipc->active == 1) {
1134 for(rp = ipc->u.text; *rp; rp++)
1135 Bputrune(&fout, *rp);
1136 Bputc(&fout, '\n');
1138 break;
1139 case DCOM:
1140 delflag++;
1141 break;
1142 case CDCOM:
1143 p1 = p2 = linebuf;
1144 while(*p1 != '\n') {
1145 if(*p1++ == 0) {
1146 delflag++;
1147 return;
1150 p1++;
1151 while(*p2++ = *p1++)
1153 spend = p2-1;
1154 jflag++;
1155 break;
1156 case EQCOM:
1157 Bprint(&fout, "%ld\n", lnum);
1158 break;
1159 case GCOM:
1160 p1 = linebuf;
1161 p2 = holdsp;
1162 while(*p1++ = *p2++)
1164 spend = p1-1;
1165 break;
1166 case CGCOM:
1167 *spend++ = '\n';
1168 p1 = spend;
1169 p2 = holdsp;
1170 while(*p1++ = *p2++)
1171 if(p1 >= lbend)
1172 break;
1173 spend = p1-1;
1174 break;
1175 case HCOM:
1176 p1 = holdsp;
1177 p2 = linebuf;
1178 while(*p1++ = *p2++);
1179 hspend = p1-1;
1180 break;
1181 case CHCOM:
1182 *hspend++ = '\n';
1183 p1 = hspend;
1184 p2 = linebuf;
1185 while(*p1++ = *p2++)
1186 if(p1 >= hend)
1187 break;
1188 hspend = p1-1;
1189 break;
1190 case ICOM:
1191 for(rp = ipc->u.text; *rp; rp++)
1192 Bputrune(&fout, *rp);
1193 Bputc(&fout, '\n');
1194 break;
1195 case BCOM:
1196 jflag = 1;
1197 break;
1198 case LCOM:
1199 c = 0;
1200 for (i = 0, rp = linebuf; *rp; rp++) {
1201 c = *rp;
1202 if(c >= 0x20 && c < 0x7F && c != '\\') {
1203 Bputc(&fout, c);
1204 if(i++ > 71) {
1205 Bprint(&fout, "\\\n");
1206 i = 0;
1208 } else {
1209 for (ucp = trans(*rp); *ucp; ucp++){
1210 c = *ucp;
1211 Bputc(&fout, c);
1212 if(i++ > 71) {
1213 Bprint(&fout, "\\\n");
1214 i = 0;
1219 if(c == ' ')
1220 Bprint(&fout, "\\n");
1221 Bputc(&fout, '\n');
1222 break;
1223 case NCOM:
1224 if(!nflag)
1225 putline(&fout, linebuf, spend-linebuf);
1227 if(aptr > abuf)
1228 arout();
1229 if((execp = gline(linebuf)) == 0) {
1230 delflag = 1;
1231 break;
1233 spend = execp;
1234 break;
1235 case CNCOM:
1236 if(aptr > abuf)
1237 arout();
1238 *spend++ = '\n';
1239 if((execp = gline(spend)) == 0) {
1240 delflag = 1;
1241 break;
1243 spend = execp;
1244 break;
1245 case PCOM:
1246 putline(&fout, linebuf, spend-linebuf);
1247 break;
1248 case CPCOM:
1249 cpcom:
1250 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1251 Bputc(&fout, *rp);
1252 Bputc(&fout, '\n');
1253 break;
1254 case QCOM:
1255 if(!nflag)
1256 putline(&fout, linebuf, spend-linebuf);
1257 if(aptr > abuf)
1258 arout();
1259 exits(0);
1260 case RCOM:
1261 *aptr++ = ipc;
1262 if(aptr >= &abuf[MAXADDS])
1263 quit("sed: Too many reads after line %ld\n",
1264 (char *) lnum);
1265 *aptr = 0;
1266 break;
1267 case SCOM:
1268 i = substitute(ipc);
1269 if(i && ipc->pfl)
1270 if(ipc->pfl == 1)
1271 putline(&fout, linebuf, spend-linebuf);
1272 else
1273 goto cpcom;
1274 if(i && ipc->fcode)
1275 goto wcom;
1276 break;
1278 case TCOM:
1279 if(sflag == 0) break;
1280 sflag = 0;
1281 jflag = 1;
1282 break;
1284 wcom:
1285 case WCOM:
1286 putline(ipc->fcode,linebuf, spend-linebuf);
1287 break;
1288 case XCOM:
1289 p1 = linebuf;
1290 p2 = genbuf;
1291 while(*p2++ = *p1++);
1292 p1 = holdsp;
1293 p2 = linebuf;
1294 while(*p2++ = *p1++);
1295 spend = p2 - 1;
1296 p1 = genbuf;
1297 p2 = holdsp;
1298 while(*p2++ = *p1++);
1299 hspend = p2 - 1;
1300 break;
1301 case YCOM:
1302 p1 = linebuf;
1303 p2 = ipc->u.text;
1304 for (i = *p2++; *p1; p1++){
1305 if (*p1 <= i) *p1 = p2[*p1];
1307 break;
1312 void
1313 putline(Biobuf *bp, Rune *buf, int n)
1315 while (n--)
1316 Bputrune(bp, *buf++);
1317 Bputc(bp, '\n');
1320 int
1321 ecmp(Rune *a, Rune *b, int count)
1323 while(count--)
1324 if(*a++ != *b++) return(0);
1325 return(1);
1328 void
1329 arout(void)
1331 Rune *p1;
1332 Biobuf *fi;
1333 int c;
1334 char *s;
1335 char buf[128];
1337 for (aptr = abuf; *aptr; aptr++) {
1338 if((*aptr)->command == ACOM) {
1339 for(p1 = (*aptr)->u.text; *p1; p1++ )
1340 Bputrune(&fout, *p1);
1341 Bputc(&fout, '\n');
1342 } else {
1343 for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
1344 s += runetochar(s, p1);
1345 *s = '\0';
1346 if((fi = Bopen(buf, OREAD)) == 0)
1347 continue;
1348 while((c = Bgetc(fi)) >= 0)
1349 Bputc(&fout, c);
1350 Bterm(fi);
1353 aptr = abuf;
1354 *aptr = 0;
1357 void
1358 errexit(void)
1360 exits("error");
1363 void
1364 quit (char *msg, char *arg)
1366 fprint(2, "sed: ");
1367 fprint(2, msg, arg);
1368 fprint(2, "\n");
1369 errexit();
1372 Rune *
1373 gline(Rune *addr)
1375 long c;
1376 Rune *p;
1378 static long peekc = 0;
1380 if (f == 0 && opendata() < 0)
1381 return 0;
1382 sflag = 0;
1383 lnum++;
1384 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
1385 do {
1386 p = addr;
1387 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1388 if (c == '\n') {
1389 if ((peekc = Bgetrune(f)) < 0) {
1390 if (fhead == 0)
1391 dolflag = 1;
1393 *p = '\0';
1394 return p;
1396 if (c && p < lbend)
1397 *p++ = c;
1399 /* return partial final line, adding implicit newline */
1400 if(p != addr) {
1401 *p = '\0';
1402 peekc = -1;
1403 if (fhead == 0)
1404 dolflag = 1;
1405 return p;
1407 peekc = 0;
1408 Bterm(f);
1409 } while (opendata() > 0); /* Switch to next stream */
1410 f = 0;
1411 return 0;
1414 /* Data file input section - the intent is to transparently
1415 * catenate all data input streams.
1417 void
1418 enroll(char *filename) /* Add a file to the input file cache */
1420 FileCache *fp;
1422 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
1423 quit("Out of memory", 0);
1424 if (ftail == 0)
1425 fhead = fp;
1426 else
1427 ftail->next = fp;
1428 ftail = fp;
1429 fp->next = 0;
1430 fp->name = filename; /* 0 => stdin */
1433 int
1434 opendata(void)
1436 if (fhead == 0)
1437 return -1;
1438 if (fhead->name) {
1439 if ((f = Bopen(fhead->name, OREAD)) == 0)
1440 quit("Can't open %s", fhead->name);
1441 } else {
1442 Binit(&bstdin, 0, OREAD);
1443 f = &bstdin;
1445 fhead = fhead->next;
1446 return 1;