12
DEPTH = 20, /* max nesting depth of {} */
13
MAXCMDS = 512, /* max sed commands */
14
ADDSIZE = 10000, /* size of add & read buffer */
15
MAXADDS = 20, /* max pending adds and reads */
16
LBSIZE = 8192, /* input line size */
17
LABSIZE = 50, /* max label name size */
18
MAXSUB = 10, /* max number of sub reg exp */
19
MAXFILES = 120, /* max output files */
21
/* An address is a line #, a R.E., "$", a reference to the last
33
long line; /* Line # */
34
Reprog *rp; /* Compiled R.E. */
38
typedef struct SEDCOM {
39
Addr ad1; /* optional start address */
40
Addr ad2; /* optional end address */
42
Reprog *re1; /* compiled R.E. */
43
Rune *text; /* added text or file name */
44
struct SEDCOM *lb1; /* destination command of branch */
46
Rune *rhs; /* Right-hand side of substitution */
47
Biobuf* fcode; /* File ID for read and write */
48
char command; /* command code -see below */
49
char gfl; /* 'Global' flag for substitutions */
50
char pfl; /* 'print' flag for substitutions */
51
char active; /* 1 => data between start and end */
52
char negfl; /* negation flag */
55
/* Command Codes for field SedCom.command */
85
typedef struct label { /* Label symbol table */
86
Rune asc[9]; /* Label name */
88
SedCom *address; /* Command associated with label */
91
typedef struct FILE_CACHE { /* Data file control block */
92
struct FILE_CACHE *next; /* Forward Link */
93
char *name; /* Name of file */
96
SedCom pspace[MAXCMDS]; /* Command storage */
97
SedCom *pend = pspace+MAXCMDS; /* End of command storage */
98
SedCom *rep = pspace; /* Current fill point */
100
Reprog *lastre = 0; /* Last regular expression */
101
Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/
103
Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */
104
Rune *addend = addspace+ADDSIZE;
106
SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */
107
SedCom **aptr = abuf;
109
struct { /* Sed program input control block */
110
enum PTYPE /* Either on command line or in file */
114
union PCTL { /* Pointer to data */
120
Rune genbuf[LBSIZE]; /* Miscellaneous buffer */
122
FileCache *fhead = 0; /* Head of File Cache Chain */
123
FileCache *ftail = 0; /* Tail of File Cache Chain */
125
Rune *loc1; /* Start of pattern match */
126
Rune *loc2; /* End of pattern match */
127
Rune seof; /* Pattern delimiter char */
129
Rune linebuf[LBSIZE+1]; /* Input data buffer */
130
Rune *lbend = linebuf+LBSIZE; /* End of buffer */
131
Rune *spend = linebuf; /* End of input data */
132
Rune *cp; /* Current scan point in linebuf */
134
Rune holdsp[LBSIZE+1]; /* Hold buffer */
135
Rune *hend = holdsp+LBSIZE; /* End of hold buffer */
136
Rune *hspend = holdsp; /* End of hold data */
138
int nflag; /* Command line flags */
141
int dolflag; /* Set when at true EOF */
142
int sflag; /* Set when substitution done */
143
int jflag; /* Set when jump required */
144
int delflag; /* Delete current line when set */
146
long lnum = 0; /* Input line count */
148
char fname[MAXFILES][40]; /* File name cache */
149
Biobuf *fcode[MAXFILES]; /* File ID cache */
150
int nfiles = 0; /* Cache fill point */
152
Biobuf fout; /* Output stream */
153
Biobuf bstdin; /* Default input */
154
Biobuf* f = 0; /* Input data */
156
Label ltab[LABSIZE]; /* Label name symbol table */
157
Label *labend = ltab+LABSIZE; /* End of label table */
158
Label *lab = ltab+1; /* Current Fill point */
160
int depth = 0; /* {} stack pointer */
162
Rune bad; /* Dummy err ptr reference */
166
char CGMES[] = "Command garbled: %S";
167
char TMMES[] = "Too much text: %S";
168
char LTL[] = "Label too long: %S";
169
char AD0MES[] = "No addresses allowed: %S";
170
char AD1MES[] = "Only one address allowed: %S";
172
void address(Addr *);
174
int cmp(char *, char *);
175
int rcmp(Rune *, Rune *);
176
void command(SedCom *);
177
Reprog *compile(void);
178
Rune *compsub(Rune *, Rune *);
181
int ecmp(Rune *, Rune *, int);
184
int executable(SedCom *);
189
int match(Reprog *, Rune *);
190
void newfile(enum PTYPE, char *);
192
Biobuf *open_file(char *);
193
Rune *place(Rune *, Rune *, Rune *);
194
void quit(char *, char *);
195
int rline(Rune *, Rune *);
196
Label *search(Label *);
197
int substitute(SedCom *);
199
Rune *stext(Rune *, Rune *);
202
void putline(Biobuf *bp, Rune *buf, int n);
205
main(int argc, char **argv)
210
Binit(&fout, 1, OWRITE);
211
fcode[nfiles++] = &fout;
222
quit("no pattern-file", 0);
223
newfile(P_FILE, ARGF());
229
quit("missing pattern", 0);
230
newfile(P_ARG, ARGF());
238
fprint(2, "sed: Unknown flag: %c\n", ARGC());
244
quit("missing pattern", 0);
245
newfile(P_ARG, *argv++);
250
quit("Too many {'s", 0);
252
ltab[0].address = rep;
257
enroll(0); /* Add stdin to cache */
258
else while(--argc >= 0) {
272
static Rune *p = addspace;
273
static SedCom **cmpend[DEPTH]; /* stack of {} operations */
275
while (rline(linebuf, lbend) >= 0) {
278
while(*cp == ' ' || *cp == '\t')
280
if(*cp == '\0' || *cp == '#')
288
if (rep->ad1.type != A_NONE) {
289
if (rep->ad1.type == A_LAST) {
291
quit("First RE may not be null", 0);
292
rep->ad1.type = A_RE;
293
rep->ad1.u.rp = lastre;
295
if(*cp == ',' || *cp == ';') {
298
if (rep->ad2.type == A_LAST) {
299
rep->ad1.type = A_RE;
300
rep->ad2.u.rp = lastre;
303
rep->ad2.type = A_NONE;
305
while(*cp == ' ' || *cp == '\t')
312
quit("Unrecognized command: %S", (char *)linebuf);
320
rep->negfl = !(rep->negfl);
321
cmpend[depth++] = &rep->u.lb1;
323
quit("Too many commands: %S", (char *) linebuf);
324
if(*cp == '\0') continue;
328
if(rep->ad1.type != A_NONE)
329
quit(AD0MES, (char *) linebuf);
331
quit("Too many }'s", 0);
332
*cmpend[depth] = rep;
333
if(*cp == 0) continue;
337
rep->command = EQCOM;
338
if(rep->ad2.type != A_NONE)
339
quit(AD1MES, (char *) linebuf);
343
if(rep->ad1.type != A_NONE)
344
quit(AD0MES, (char *) linebuf);
349
while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
351
if(tp >= &(lab->asc[8]))
352
quit(LTL, (char *) linebuf);
356
if(lpt = search(lab)) {
358
quit("Duplicate labels: %S", (char *) linebuf);
363
quit("Too many labels: %S", (char *) linebuf);
368
rep--; /* reuse this slot */
373
if(rep->ad2.type != A_NONE)
374
quit(AD1MES, (char *) linebuf);
375
if(*cp == '\\') cp++;
377
quit(CGMES, (char *) linebuf);
379
p = stext(p, addend);
383
if(*cp == '\\') cp++;
385
quit(CGMES, (char *) linebuf);
387
p = stext(p, addend);
391
if(rep->ad2.type != A_NONE)
392
quit(AD1MES, (char *) linebuf);
393
if(*cp == '\\') cp++;
395
quit(CGMES, (char *) linebuf);
397
p = stext(p, addend);
405
rep->command = CGCOM;
413
rep->command = CHCOM;
423
while(*cp == ' ')cp++;
425
if(pt = ltab[0].chain) {
426
while(pt1 = pt->u.lb1)
434
while((*tp++ = *cp++))
435
if(tp >= &(lab->asc[8]))
436
quit(LTL, (char *) linebuf);
440
if(lpt = search(lab)) {
442
rep->u.lb1 = lpt->address;
445
while(pt1 = pt->u.lb1)
453
quit("Too many labels: %S",
463
rep->command = CNCOM;
471
rep->command = CPCOM;
476
if(rep->ad2.type != A_NONE)
477
quit(AD1MES, (char *) linebuf);
479
quit(CGMES, (char *) linebuf);
481
p = stext(p, addend);
489
rep->command = CDCOM;
495
if(rep->ad2.type != A_NONE)
496
quit(AD1MES, (char *) linebuf);
506
if ((rep->u.re1 = compile()) == 0) {
508
quit("First RE may not be null.", 0);
512
if((p = compsub(p, addend)) == 0)
513
quit(CGMES, (char *) linebuf);
533
quit(CGMES, (char *) linebuf);
535
for(i = nfiles - 1; i >= 0; i--)
536
if(cmp(fname[nfiles],fname[i]) == 0) {
537
rep->fcode = fcode[i];
540
if(nfiles >= MAXFILES)
541
quit("Too many files in w commands 1", 0);
542
rep->fcode = open_file(fname[nfiles]);
549
quit(CGMES, (char *) linebuf);
551
for(i = nfiles - 1; i >= 0; i--)
552
if(cmp(fname[nfiles], fname[i]) == 0) {
553
rep->fcode = fcode[i];
556
if(nfiles >= MAXFILES){
557
fprint(2, "sed: Too many files in w commands 2 \n");
558
fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
561
rep->fcode = open_file(fname[nfiles]);
572
quit(CGMES, (char *) linebuf);
578
quit("Too many commands, last: %S", (char *) linebuf);
583
quit(CGMES, (char *) linebuf);
590
open_file(char *name)
595
if ((bp = malloc(sizeof(Biobuf))) == 0)
596
quit("Out of memory", 0);
597
if ((fd = open(name, OWRITE)) < 0 &&
598
(fd = create(name, OWRITE, 0666)) < 0)
599
quit("Cannot create %s", name);
600
Binit(bp, fd, OWRITE);
602
fcode[nfiles++] = bp;
607
compsub(Rune *rhs, Rune *end)
611
while ((r = *cp++) != '\0') {
645
if((c = *cp++) == seof) /* '//' */
649
if (c == 0 || c == '\n')
650
quit(TMMES, (char *) linebuf);
652
if (ep >= expbuf+sizeof(expbuf))
653
quit(TMMES, (char *) linebuf);
654
ep += runetochar(ep, &c);
655
if ((c = *cp++) == 'n')
658
if (ep >= expbuf+sizeof(expbuf))
659
quit(TMMES, (char *) linebuf);
660
ep += runetochar(ep, &c);
661
} while ((c = *cp++) != seof);
663
return lastre = regcomp(expbuf);
670
quit(CGMES, (char *) linebuf);
674
newfile(enum PTYPE type, char *name)
677
prog.pctl.curr = name;
678
else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
679
quit("Cannot open pattern-file: %s\n", name);
684
rline(Rune *buf, Rune *end)
689
while ((c = getrune()) >= 0) {
694
if ((c = getrune()) < 0)
697
} else if (r == '\n') {
715
if (prog.type == P_ARG) {
716
if ((p = prog.pctl.curr) != 0) {
718
prog.pctl.curr += chartorune(&r, p);
721
c = '\n'; /* fake an end-of-line */
726
} else if ((c = Bgetrune(prog.pctl.bp)) < 0)
737
if((c = *cp++) == '$')
741
if (ap->u.rp = compile())
746
else if (c >= '0' && c <= '9') {
748
while ((c = *cp) >= '0' && c <= '9')
749
lno = lno*10 + *cp++-'0';
751
quit("line number 0 is illegal",0);
762
cmp(char *a, char *b) /* compare characters */
772
rcmp(Rune *a, Rune *b) /* compare runes */
782
text(char *p) /* extract character string */
786
while(*cp == '\t' || *cp == ' ')
789
if ((r = *cp++) == '\\')
790
if ((r = *cp++) == 0)
793
while (*cp == '\t' || *cp == ' ')
795
p += runetochar(p, &r);
802
stext(Rune *p, Rune *end) /* extract rune string */
804
while(*cp == '\t' || *cp == ' ')
811
quit(TMMES, (char *) linebuf);
812
if ((*p++ = *cp++) == '\n')
813
while(*cp == '\t' || *cp == ' ')
826
for (rp = ltab; rp < ptr; rp++)
827
if(rcmp(rp->asc, ptr->asc) == 0)
836
SedCom *rptr, *trptr;
838
for(lptr = ltab; lptr < lab; lptr++) {
840
if(lptr->address == 0)
841
quit("Undefined label: %S", (char *) lptr->asc);
845
while(trptr = rptr->u.lb1) {
846
rptr->u.lb1 = lptr->address;
849
rptr->u.lb1 = lptr->address;
863
for(tsp = cp; *tsp != seof; tsp++) {
866
if(*tsp == '\n' || *tsp == '\0')
868
if (*tsp > highc) highc = *tsp;
871
if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
872
quit("Out of memory", 0);
873
*rp++ = highc; /* save upper bound */
874
for (i = 0; i <= highc; i++)
877
while((c = *sp++) != seof) {
878
if(c == '\\' && *sp == 'n') {
882
if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
886
if(rp[c] == seof || rp[c] == '\0') {
906
while (spend = gline(linebuf)){
907
for(ipc = pspace; ipc->command; ) {
908
if (!executable(ipc)) {
918
if((ipc = ipc->u.lb1) == 0)
924
if(!nflag && !delflag)
925
putline(&fout, linebuf, spend-linebuf);
932
/* determine if a statement should be applied to an input line */
934
executable(SedCom *ipc)
936
if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */
937
if (ipc->active == 1) /* Second line */
939
switch(ipc->ad2.type) {
940
case A_NONE: /* No second addr; use first */
943
case A_DOL: /* Accept everything */
945
case A_LINE: /* Line at end of range? */
946
if (lnum <= ipc->ad2.u.line) {
947
if (ipc->ad2.u.line == lnum)
951
ipc->active = 0; /* out of range */
953
case A_RE: /* Check for matching R.E. */
954
if (match(ipc->ad2.u.rp, linebuf))
957
default: /* internal error */
958
quit("Internal error", 0);
961
switch (ipc->ad1.type) { /* Check first address */
962
case A_NONE: /* Everything matches */
964
case A_DOL: /* Only last line */
968
case A_LINE: /* Check line number */
969
if (ipc->ad1.u.line == lnum) {
970
ipc->active = 1; /* In range */
974
case A_RE: /* Check R.E. */
975
if (match(ipc->ad1.u.rp, linebuf)) {
976
ipc->active = 1; /* In range */
981
quit("Internal error", 0);
987
match(Reprog *pattern, Rune *buf)
991
subexp[0].s.rsp = buf;
993
if (rregexec(pattern, linebuf, subexp, MAXSUB)) {
994
loc1 = subexp[0].s.rsp;
995
loc2 = subexp[0].e.rep;
1003
substitute(SedCom *ipc)
1007
if(!match(ipc->u.re1, linebuf))
1011
* we have at least one match. some patterns, e.g. '$' or '^', can
1012
* produce zero-length matches, so during a global substitute we
1013
* must bump to the character after a zero-length match to keep from looping.
1016
if(ipc->gfl == 0) /* single substitution */
1019
do{ /* global substitution */
1020
len = loc2-loc1; /* length of match */
1021
dosub(ipc->rhs); /* dosub moves loc2 */
1022
if(*loc2 == 0) /* end of string */
1024
if(len == 0) /* zero-length R.E. match */
1025
loc2++; /* bump over zero-length match */
1026
if(*loc2 == 0) /* end of string */
1028
} while(match(ipc->u.re1, loc2));
1046
sp = place(sp, loc1, loc2);
1049
if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') {
1051
if (subexp[n].s.rsp && subexp[n].e.rep) {
1052
sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
1056
fprint(2, "sed: Invalid back reference \\%d\n",n);
1061
if (sp >= &genbuf[LBSIZE])
1062
fprint(2, "sed: Output line too long.\n");
1065
loc2 = sp - genbuf + linebuf;
1066
while (*sp++ = *lp++)
1067
if (sp >= &genbuf[LBSIZE])
1068
fprint(2, "sed: Output line too long.\n");
1071
while (*lp++ = *sp++)
1077
place(Rune *sp, Rune *l1, Rune *l2)
1081
if (sp >= &genbuf[LBSIZE])
1082
fprint(2, "sed: Output line too long.\n");
1090
static char buf[] = "\\x0000";
1091
static char hex[] = "0123456789abcdef";
1105
buf[2] = hex[(c>>12)&0xF];
1106
buf[3] = hex[(c>>8)&0xF];
1107
buf[4] = hex[(c>>4)&0xF];
1108
buf[5] = hex[c&0xF];
1113
command(SedCom *ipc)
1121
switch(ipc->command) {
1125
if(aptr >= abuf+MAXADDS) {
1126
quit("sed: Too many appends after line %ld\n",
1133
if(ipc->active == 1) {
1134
for(rp = ipc->u.text; *rp; rp++)
1135
Bputrune(&fout, *rp);
1144
while(*p1 != '\n') {
1151
while(*p2++ = *p1++)
1157
Bprint(&fout, "%ld\n", lnum);
1162
while(*p1++ = *p2++)
1170
while(*p1++ = *p2++)
1178
while(*p1++ = *p2++);
1185
while(*p1++ = *p2++)
1191
for(rp = ipc->u.text; *rp; rp++)
1192
Bputrune(&fout, *rp);
1200
for (i = 0, rp = linebuf; *rp; rp++) {
1202
if(c >= 0x20 && c < 0x7F && c != '\\') {
1205
Bprint(&fout, "\\\n");
1209
for (ucp = trans(*rp); *ucp; ucp++){
1213
Bprint(&fout, "\\\n");
1220
Bprint(&fout, "\\n");
1225
putline(&fout, linebuf, spend-linebuf);
1229
if((execp = gline(linebuf)) == 0) {
1239
if((execp = gline(spend)) == 0) {
1246
putline(&fout, linebuf, spend-linebuf);
1250
for(rp = linebuf; *rp && *rp != '\n'; rp++)
1256
putline(&fout, linebuf, spend-linebuf);
1262
if(aptr >= &abuf[MAXADDS])
1263
quit("sed: Too many reads after line %ld\n",
1268
i = substitute(ipc);
1271
putline(&fout, linebuf, spend-linebuf);
1279
if(sflag == 0) break;
1286
putline(ipc->fcode,linebuf, spend-linebuf);
1291
while(*p2++ = *p1++);
1294
while(*p2++ = *p1++);
1298
while(*p2++ = *p1++);
1304
for (i = *p2++; *p1; p1++){
1305
if (*p1 <= i) *p1 = p2[*p1];
1313
putline(Biobuf *bp, Rune *buf, int n)
1316
Bputrune(bp, *buf++);
1321
ecmp(Rune *a, Rune *b, int count)
1324
if(*a++ != *b++) return(0);
1337
for (aptr = abuf; *aptr; aptr++) {
1338
if((*aptr)->command == ACOM) {
1339
for(p1 = (*aptr)->u.text; *p1; p1++ )
1340
Bputrune(&fout, *p1);
1343
for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
1344
s += runetochar(s, p1);
1346
if((fi = Bopen(buf, OREAD)) == 0)
1348
while((c = Bgetc(fi)) >= 0)
1364
quit (char *msg, char *arg)
1367
fprint(2, msg, arg);
1378
static long peekc = 0;
1380
if (f == 0 && opendata() < 0)
1384
/* Bflush(&fout);********* dumped 4/30/92 - bobf****/
1387
for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
1389
if ((peekc = Bgetrune(f)) < 0) {
1399
/* return partial final line, adding implicit newline */
1409
} while (opendata() > 0); /* Switch to next stream */
1414
/* Data file input section - the intent is to transparently
1415
* catenate all data input streams.
1418
enroll(char *filename) /* Add a file to the input file cache */
1422
if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
1423
quit("Out of memory", 0);
1430
fp->name = filename; /* 0 => stdin */
1439
if ((f = Bopen(fhead->name, OREAD)) == 0)
1440
quit("Can't open %s", fhead->name);
1442
Binit(&bstdin, 0, OREAD);
1445
fhead = fhead->next;