12 DEPTH = 20, /* max nesting depth of {} */
13 MAXCMDS = 512, /* max sed commands */
14 ADDSIZE = 10000, /* size of add & read buffer */
15 MAXADDS = 20, /* max pending adds and reads */
16 LBSIZE = 8192, /* input line size */
17 LABSIZE = 50, /* max label name size */
18 MAXSUB = 10, /* max number of sub reg exp */
19 MAXFILES = 120 /* max output files */
21 /* An address is a line #, a R.E., "$", a reference to the last
33 long line; /* Line # */
34 Reprog *rp; /* Compiled R.E. */
38 typedef struct SEDCOM {
39 Addr ad1; /* optional start address */
40 Addr ad2; /* optional end address */
42 Reprog *re1; /* compiled R.E. */
43 Rune *text; /* added text or file name */
44 struct SEDCOM *lb1; /* destination command of branch */
46 Rune *rhs; /* Right-hand side of substitution */
47 Biobuf* fcode; /* File ID for read and write */
48 char command; /* command code -see below */
49 char gfl; /* 'Global' flag for substitutions */
50 char pfl; /* 'print' flag for substitutions */
51 char active; /* 1 => data between start and end */
52 char negfl; /* negation flag */
55 /* Command Codes for field SedCom.command */
85 typedef struct label { /* Label symbol table */
86 Rune asc[9]; /* Label name */
88 SedCom *address; /* Command associated with label */
91 typedef struct FILE_CACHE { /* Data file control block */
92 struct FILE_CACHE *next; /* Forward Link */
93 char *name; /* Name of file */
96 SedCom pspace[MAXCMDS]; /* Command storage */
97 SedCom *pend = pspace+MAXCMDS; /* End of command storage */
98 SedCom *rep = pspace; /* Current fill point */
100 Reprog *lastre = 0; /* Last regular expression */
101 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
103 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
104 Rune *addend = addspace+ADDSIZE;
106 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
107 SedCom **aptr = abuf;
109 struct { /* Sed program input control block */
110 enum PTYPE /* Either on command line or in file */
114 union PCTL { /* Pointer to data */
120 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
122 FileCache *fhead = 0; /* Head of File Cache Chain */
123 FileCache *ftail = 0; /* Tail of File Cache Chain */
125 Rune *loc1; /* Start of pattern match */
126 Rune *loc2; /* End of pattern match */
127 Rune seof; /* Pattern delimiter char */
129 Rune linebuf[LBSIZE+1]; /* Input data buffer */
130 Rune *lbend = linebuf+LBSIZE; /* End of buffer */
131 Rune *spend = linebuf; /* End of input data */
132 Rune *cp; /* Current scan point in linebuf */
134 Rune holdsp[LBSIZE+1]; /* Hold buffer */
135 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
136 Rune *hspend = holdsp; /* End of hold data */
138 int nflag; /* Command line flags */
142 int dolflag; /* Set when at true EOF */
143 int sflag; /* Set when substitution done */
144 int jflag; /* Set when jump required */
145 int delflag; /* Delete current line when set */
147 long lnum = 0; /* Input line count */
149 char fname[MAXFILES][40]; /* File name cache */
150 Biobuf *fcode[MAXFILES]; /* File ID cache */
151 int nfiles = 0; /* Cache fill point */
153 Biobuf fout; /* Output stream */
154 Biobuf bstdin; /* Default input */
155 Biobuf* f = 0; /* Input data */
157 Label ltab[LABSIZE]; /* Label name symbol table */
158 Label *labend = ltab+LABSIZE; /* End of label table */
159 Label *lab = ltab+1; /* Current Fill point */
161 int depth = 0; /* {} stack pointer */
163 Rune bad; /* Dummy err ptr reference */
167 char CGMES[] = "Command garbled: %S";
168 char TMMES[] = "Too much text: %S";
169 char LTL[] = "Label too long: %S";
170 char AD0MES[] = "No addresses allowed: %S";
171 char AD1MES[] = "Only one address allowed: %S";
173 void address(Addr *);
175 int cmp(char *, char *);
176 int rcmp(Rune *, Rune *);
177 void command(SedCom *);
178 Reprog *compile(void);
179 Rune *compsub(Rune *, Rune *);
182 int ecmp(Rune *, Rune *, int);
185 int executable(SedCom *);
190 int match(Reprog *, Rune *);
191 void newfile(enum PTYPE, char *);
193 Biobuf *open_file(char *);
194 Rune *place(Rune *, Rune *, Rune *);
195 void quit(char *, char *);
196 int rline(Rune *, Rune *);
197 Label *search(Label *);
198 int substitute(SedCom *);
200 Rune *stext(Rune *, Rune *);
203 void putline(Biobuf *bp, Rune *buf, int n);
206 main(int argc, char **argv)
211 Binit(&fout, 1, OWRITE);
212 fcode[nfiles++] = &fout;
223 quit("no pattern-file", 0);
224 newfile(P_FILE, ARGF());
230 quit("missing pattern", 0);
231 newfile(P_ARG, ARGF());
242 fprint(2, "sed: Unknown flag: %c\n", ARGC());
248 quit("missing pattern", 0);
249 newfile(P_ARG, *argv++);
254 quit("Too many {'s", 0);
256 ltab[0].address = rep;
261 enroll(0); /* Add stdin to cache */
262 else while(--argc >= 0) {
276 static Rune *p = addspace;
277 static SedCom **cmpend[DEPTH]; /* stack of {} operations */
279 while (rline(linebuf, lbend) >= 0) {
282 while(*cp == ' ' || *cp == '\t')
284 if(*cp == '\0' || *cp == '#')
292 if (rep->ad1.type != A_NONE) {
293 if (rep->ad1.type == A_LAST) {
295 quit("First RE may not be null", 0);
296 rep->ad1.type = A_RE;
297 rep->ad1.u.rp = lastre;
299 if(*cp == ',' || *cp == ';') {
302 if (rep->ad2.type == A_LAST) {
303 rep->ad1.type = A_RE;
304 rep->ad2.u.rp = lastre;
307 rep->ad2.type = A_NONE;
309 while(*cp == ' ' || *cp == '\t')
316 quit("Unrecognized command: %S", (char *)linebuf);
324 rep->negfl = !(rep->negfl);
325 cmpend[depth++] = &rep->u.lb1;
327 quit("Too many commands: %S", (char *) linebuf);
328 if(*cp == '\0') continue;
332 if(rep->ad1.type != A_NONE)
333 quit(AD0MES, (char *) linebuf);
335 quit("Too many }'s", 0);
336 *cmpend[depth] = rep;
337 if(*cp == 0) continue;
341 rep->command = EQCOM;
342 if(rep->ad2.type != A_NONE)
343 quit(AD1MES, (char *) linebuf);
347 if(rep->ad1.type != A_NONE)
348 quit(AD0MES, (char *) linebuf);
353 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
355 if(tp >= &(lab->asc[8]))
356 quit(LTL, (char *) linebuf);
360 if(lpt = search(lab)) {
362 quit("Duplicate labels: %S", (char *) linebuf);
367 quit("Too many labels: %S", (char *) linebuf);
372 rep--; /* reuse this slot */
377 if(rep->ad2.type != A_NONE)
378 quit(AD1MES, (char *) linebuf);
379 if(*cp == '\\') cp++;
381 quit(CGMES, (char *) linebuf);
383 p = stext(p, addend);
387 if(*cp == '\\') cp++;
389 quit(CGMES, (char *) linebuf);
391 p = stext(p, addend);
395 if(rep->ad2.type != A_NONE)
396 quit(AD1MES, (char *) linebuf);
397 if(*cp == '\\') cp++;
399 quit(CGMES, (char *) linebuf);
401 p = stext(p, addend);
409 rep->command = CGCOM;
417 rep->command = CHCOM;
427 while(*cp == ' ')cp++;
429 if(pt = ltab[0].chain) {
430 while(pt1 = pt->u.lb1)
438 while((*tp++ = *cp++))
439 if(tp >= &(lab->asc[8]))
440 quit(LTL, (char *) linebuf);
444 if(lpt = search(lab)) {
446 rep->u.lb1 = lpt->address;
449 while(pt1 = pt->u.lb1)
457 quit("Too many labels: %S",
467 rep->command = CNCOM;
475 rep->command = CPCOM;
480 if(rep->ad2.type != A_NONE)
481 quit(AD1MES, (char *) linebuf);
483 quit(CGMES, (char *) linebuf);
485 p = stext(p, addend);
493 rep->command = CDCOM;
499 if(rep->ad2.type != A_NONE)
500 quit(AD1MES, (char *) linebuf);
510 if ((rep->u.re1 = compile()) == 0) {
512 quit("First RE may not be null.", 0);
516 if((p = compsub(p, addend)) == 0)
517 quit(CGMES, (char *) linebuf);
537 quit(CGMES, (char *) linebuf);
539 for(i = nfiles - 1; i >= 0; i--)
540 if(cmp(fname[nfiles],fname[i]) == 0) {
541 rep->fcode = fcode[i];
544 if(nfiles >= MAXFILES)
545 quit("Too many files in w commands 1", 0);
546 rep->fcode = open_file(fname[nfiles]);
553 quit(CGMES, (char *) linebuf);
555 for(i = nfiles - 1; i >= 0; i--)
556 if(cmp(fname[nfiles], fname[i]) == 0) {
557 rep->fcode = fcode[i];
560 if(nfiles >= MAXFILES){
561 fprint(2, "sed: Too many files in w commands 2 \n");
562 fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
565 rep->fcode = open_file(fname[nfiles]);
576 quit(CGMES, (char *) linebuf);
582 quit("Too many commands, last: %S", (char *) linebuf);
587 quit(CGMES, (char *) linebuf);
594 open_file(char *name)
599 if ((bp = malloc(sizeof(Biobuf))) == 0)
600 quit("Out of memory", 0);
601 if ((fd = open(name, OWRITE)) < 0 &&
602 (fd = create(name, OWRITE, 0666)) < 0)
603 quit("Cannot create %s", name);
604 Binit(bp, fd, OWRITE);
606 fcode[nfiles++] = bp;
611 compsub(Rune *rhs, Rune *end)
615 while ((r = *cp++) != '\0') {
649 if((c = *cp++) == seof) /* '//' */
653 if (c == 0 || c == '\n')
654 quit(TMMES, (char *) linebuf);
656 if (ep >= expbuf+sizeof(expbuf))
657 quit(TMMES, (char *) linebuf);
658 ep += runetochar(ep, &c);
659 if ((c = *cp++) == 'n')
662 if (ep >= expbuf+sizeof(expbuf))
663 quit(TMMES, (char *) linebuf);
664 ep += runetochar(ep, &c);
665 } while ((c = *cp++) != seof);
667 return lastre = regcomp(expbuf);
674 quit(CGMES, (char *) linebuf);
678 newfile(enum PTYPE type, char *name)
681 prog.pctl.curr = name;
682 else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
683 quit("Cannot open pattern-file: %s\n", name);
688 rline(Rune *buf, Rune *end)
693 while ((c = getrune()) >= 0) {
698 if ((c = getrune()) < 0)
701 } else if (r == '\n') {
719 if (prog.type == P_ARG) {
720 if ((p = prog.pctl.curr) != 0) {
722 prog.pctl.curr += chartorune(&r, p);
725 c = '\n'; /* fake an end-of-line */
730 } else if ((c = Bgetrune(prog.pctl.bp)) < 0)
741 if((c = *cp++) == '$')
745 if (ap->u.rp = compile())
750 else if (c >= '0' && c <= '9') {
752 while ((c = *cp) >= '0' && c <= '9')
753 lno = lno*10 + *cp++-'0';
755 quit("line number 0 is illegal",0);
766 cmp(char *a, char *b) /* compare characters */
776 rcmp(Rune *a, Rune *b) /* compare runes */
786 text(char *p) /* extract character string */
790 while(*cp == '\t' || *cp == ' ')
793 if ((r = *cp++) == '\\')
794 if ((r = *cp++) == 0)
797 while (*cp == '\t' || *cp == ' ')
799 p += runetochar(p, &r);
806 stext(Rune *p, Rune *end) /* extract rune string */
808 while(*cp == '\t' || *cp == ' ')
815 quit(TMMES, (char *) linebuf);
816 if ((*p++ = *cp++) == '\n')
817 while(*cp == '\t' || *cp == ' ')
830 for (rp = ltab; rp < ptr; rp++)
831 if(rcmp(rp->asc, ptr->asc) == 0)
840 SedCom *rptr, *trptr;
842 for(lptr = ltab; lptr < lab; lptr++) {
844 if(lptr->address == 0)
845 quit("Undefined label: %S", (char *) lptr->asc);
849 while(trptr = rptr->u.lb1) {
850 rptr->u.lb1 = lptr->address;
853 rptr->u.lb1 = lptr->address;
867 for(tsp = cp; *tsp != seof; tsp++) {
870 if(*tsp == '\n' || *tsp == '\0')
872 if (*tsp > highc) highc = *tsp;
875 if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
876 quit("Out of memory", 0);
877 *rp++ = highc; /* save upper bound */
878 for (i = 0; i <= highc; i++)
881 while((c = *sp++) != seof) {
882 if(c == '\\' && *sp == 'n') {
886 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
890 if(rp[c] == seof || rp[c] == '\0') {
910 while (spend = gline(linebuf)){
911 for(ipc = pspace; ipc->command; ) {
912 if (!executable(ipc)) {
922 if((ipc = ipc->u.lb1) == 0)
928 if(!nflag && !delflag)
929 putline(&fout, linebuf, spend-linebuf);
936 /* determine if a statement should be applied to an input line */
938 executable(SedCom *ipc)
940 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
941 if (ipc->active == 1) /* Second line */
943 switch(ipc->ad2.type) {
944 case A_NONE: /* No second addr; use first */
947 case A_DOL: /* Accept everything */
949 case A_LINE: /* Line at end of range? */
950 if (lnum <= ipc->ad2.u.line) {
951 if (ipc->ad2.u.line == lnum)
955 ipc->active = 0; /* out of range */
957 case A_RE: /* Check for matching R.E. */
958 if (match(ipc->ad2.u.rp, linebuf))
961 default: /* internal error */
962 quit("Internal error", 0);
965 switch (ipc->ad1.type) { /* Check first address */
966 case A_NONE: /* Everything matches */
968 case A_DOL: /* Only last line */
972 case A_LINE: /* Check line number */
973 if (ipc->ad1.u.line == lnum) {
974 ipc->active = 1; /* In range */
978 case A_RE: /* Check R.E. */
979 if (match(ipc->ad1.u.rp, linebuf)) {
980 ipc->active = 1; /* In range */
985 quit("Internal error", 0);
991 match(Reprog *pattern, Rune *buf)
995 subexp[0].s.rsp = buf;
997 if (rregexec(pattern, linebuf, subexp, MAXSUB)) {
998 loc1 = subexp[0].s.rsp;
999 loc2 = subexp[0].e.rep;
1007 substitute(SedCom *ipc)
1011 if(!match(ipc->u.re1, linebuf))
1015 * we have at least one match. some patterns, e.g. '$' or '^', can
1016 * produce zero-length matches, so during a global substitute we
1017 * must bump to the character after a zero-length match to keep from looping.
1020 if(ipc->gfl == 0) /* single substitution */
1023 do{ /* global substitution */
1024 len = loc2-loc1; /* length of match */
1025 dosub(ipc->rhs); /* dosub moves loc2 */
1026 if(*loc2 == 0) /* end of string */
1028 if(len == 0) /* zero-length R.E. match */
1029 loc2++; /* bump over zero-length match */
1030 if(*loc2 == 0) /* end of string */
1032 } while(match(ipc->u.re1, loc2));
1050 sp = place(sp, loc1, loc2);
1053 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') {
1055 if (subexp[n].s.rsp && subexp[n].e.rep) {
1056 sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
1060 fprint(2, "sed: Invalid back reference \\%d\n",n);
1065 if (sp >= &genbuf[LBSIZE])
1066 fprint(2, "sed: Output line too long.\n");
1069 loc2 = sp - genbuf + linebuf;
1070 while (*sp++ = *lp++)
1071 if (sp >= &genbuf[LBSIZE])
1072 fprint(2, "sed: Output line too long.\n");
1075 while (*lp++ = *sp++)
1081 place(Rune *sp, Rune *l1, Rune *l2)
1085 if (sp >= &genbuf[LBSIZE])
1086 fprint(2, "sed: Output line too long.\n");
1094 static char buf[] = "\\x0000";
1095 static char hex[] = "0123456789abcdef";
1109 buf[2] = hex[(c>>12)&0xF];
1110 buf[3] = hex[(c>>8)&0xF];
1111 buf[4] = hex[(c>>4)&0xF];
1112 buf[5] = hex[c&0xF];
1117 command(SedCom *ipc)
1125 switch(ipc->command) {
1129 if(aptr >= abuf+MAXADDS) {
1130 quit("sed: Too many appends after line %ld\n",
1137 if(ipc->active == 1) {
1138 for(rp = ipc->u.text; *rp; rp++)
1139 Bputrune(&fout, *rp);
1148 while(*p1 != '\n') {
1155 while(*p2++ = *p1++)
1161 Bprint(&fout, "%ld\n", lnum);
1166 while(*p1++ = *p2++)
1174 while(*p1++ = *p2++)
1182 while(*p1++ = *p2++);
1189 while(*p1++ = *p2++)
1195 for(rp = ipc->u.text; *rp; rp++)
1196 Bputrune(&fout, *rp);
1204 for (i = 0, rp = linebuf; *rp; rp++) {
1206 if(c >= 0x20 && c < 0x7F && c != '\\') {
1209 Bprint(&fout, "\\\n");
1213 for (ucp = trans(*rp); *ucp; ucp++){
1217 Bprint(&fout, "\\\n");
1224 Bprint(&fout, "\\n");
1229 putline(&fout, linebuf, spend-linebuf);
1233 if((execp = gline(linebuf)) == 0) {
1243 if((execp = gline(spend)) == 0) {
1250 putline(&fout, linebuf, spend-linebuf);
1254 for(rp = linebuf; *rp && *rp != '\n'; rp++)
1260 putline(&fout, linebuf, spend-linebuf);
1266 if(aptr >= &abuf[MAXADDS])
1267 quit("sed: Too many reads after line %ld\n",
1272 i = substitute(ipc);
1275 putline(&fout, linebuf, spend-linebuf);
1283 if(sflag == 0) break;
1290 putline(ipc->fcode,linebuf, spend-linebuf);
1295 while(*p2++ = *p1++);
1298 while(*p2++ = *p1++);
1302 while(*p2++ = *p1++);
1308 for (i = *p2++; *p1; p1++){
1309 if (*p1 <= i) *p1 = p2[*p1];
1317 putline(Biobuf *bp, Rune *buf, int n)
1320 Bputrune(bp, *buf++);
1327 ecmp(Rune *a, Rune *b, int count)
1330 if(*a++ != *b++) return(0);
1343 for (aptr = abuf; *aptr; aptr++) {
1344 if((*aptr)->command == ACOM) {
1345 for(p1 = (*aptr)->u.text; *p1; p1++ )
1346 Bputrune(&fout, *p1);
1349 for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
1350 s += runetochar(s, p1);
1352 if((fi = Bopen(buf, OREAD)) == 0)
1354 while((c = Bgetc(fi)) >= 0)
1370 quit (char *msg, char *arg)
1373 fprint(2, msg, arg);
1384 static long peekc = 0;
1386 if (f == 0 && opendata() < 0)
1390 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/
1393 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1395 if ((peekc = Bgetrune(f)) < 0) {
1405 /* return partial final line, adding implicit newline */
1415 } while (opendata() > 0); /* Switch to next stream */
1420 /* Data file input section - the intent is to transparently
1421 * catenate all data input streams.
1424 enroll(char *filename) /* Add a file to the input file cache */
1428 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
1429 quit("Out of memory", 0);
1436 fp->name = filename; /* 0 => stdin */
1445 if ((f = Bopen(fhead->name, OREAD)) == 0)
1446 quit("Can't open %s", fhead->name);
1448 Binit(&bstdin, 0, OREAD);
1451 fhead = fhead->next;