1
/* Copyright William F. Schelter, University of Texas 1987 */
3
/* This file may be copied by anyone, but changes may not
4
be made without permission of the author. The author hopes it will be
5
useful but cannot assume any responsibility for its use or problems
6
caused by its use. The program is provided as is. */
8
/* The program takes two files file1 = orig and file2 = orig.V
10
tutorial% merge orig orig.V > foo
12
and copies orig according to the recipe in orig.V. The advantage of
13
this program is that it does this according to the context of orig.
14
Thus even though orig might change slightly (eg some one added an
15
extra line to the copyright notice), the same change file will
16
probably still be valid.
18
If the first argument is - then the orig is standard input.
19
If a third argument is supplied, it is the name of a file to use
20
instead of standard output.
23
tutorial% merge orig orig.V | merge - change2 final
25
would take the result of merge of orig and orig.V and use it to merge
26
with change2 to produce the file final.
29
The format of a change (.V) file is very simple: There is only ONE
30
type of command in a change file. REPLACE X by Y. Here X represents
31
a chunk of text in the orig file, and Y the substitution which you
32
wish to make for this occurrence. The Y appears explicitly in the
33
change file, while the text X may be specified fully and explicitly,
34
OR by giving sufficient context from the beginning and end of X. Thus
35
in general it takes three things to specify a change. The beginning
36
of X (Xbegin), the end of X (Xend), and all of Y. These three pieces
37
of text are separated by four delimiters. The delimiters are not
38
single characters, but rather sequences of four characters. This is
39
done so as to avoid having to quote the delimiter (see QUOTING below).
40
The delimiters are "\n@s[" "\n@s," "\n@s|" and "\n@s]".
41
NOTE: The \n (Newline) Character IS PART OF THE DELIMITER in ALL CASES.
47
Thus in the above case the X text is only "X" it does not have any
48
newlines in it! They belong to the delimiters. For "X\n" we would see
57
The general case where X is a very long chunk of text, or perhaps something
58
sensitive to copyright, so that you cannot include several pages, you
59
could make Xbegin be the first few lines, and Xend the last few lines.
60
All intervening lines (including the Xbegin and the Xend, would be ripped
61
out, and replaced by Y.
69
One cycle of the merge may be thought of as:
70
The merge program looks in the change (.V) file for the next \ns[,
71
in order to determine the next values for Xbegin,Xend,and Y.
72
Having determined these, its position in the (.V) file will have
73
advanced to after the \n@s].
75
The merge program then starts at its current position in the original
76
file and searches for the next occurrence of Xbegin, marking its
77
beginning, then for the end of Xend. The inclusive interval so
78
marked, is deleted and Y is substituted. The current position in the
79
original file is now at the end of the Xend text. The next Xbegin
80
text must occur after that point. Only one pass is made through the
83
It is an error if the start of Xend does not follow
84
the end of Xbegin. Thus Xbegin and Xend may NOT overlap. A common
85
case will be that Xbegin is the entire interval and Xend = ""
86
In this case the merge program, if it finds \n@s| before \n@s,
87
will assume you want Xend="".
96
would delete the string "Hi bill" replacing it with "new body"
100
Equivalently since the E interval is empty, we could have just
108
Example of change file with two changes:
111
@s[(defmacro lcase (item &body body)
112
@s, (setq v (car rest))
113
@s|(defmacro lcase (items &body body)
117
Comments are allowed in change files. In fact anything not between
118
matching "\ns[" and "\ns]" is a comment.
130
The first change would replace the interval of the original file
131
"(defmacro.... (setq v (car rest))"
133
"(defmacro lcase (items &body body)
134
(setq v (cadr rest))"
136
If the program could not find the interval "(defmacro.... (setq v
137
(car rest))" in the orignal file it would warn you.
139
The intervals in the change file, must occur in the same order as in
140
the original file. There is an emacs program merge.el which can
141
mechanically produce a changes file from an original and an edited
144
Note: For convenience we pretend that the change file starts with
145
a new line, even if it does not. Thus if @s[ are the first three
146
characters of the file and CHSTART1 = \n@s[, we count this as a
147
CHSTART1. Since it is in the first column, it "appears" to have
153
In order to have a change which involves one of the four letter
154
delimiters given above, we use the convention that "\n@@" in the first
155
column translates to "\n@". You need not perform this quoting of @
156
unless the merger would be confused. For example \n@(defun .. would
157
be ok, since this can't be mistaken for one of our delimiters.
158
Nonetheless \n@@(defun or \n@@s[ would translate to have one @ sign,
159
in the merge output. The reason for not doubling all @ signs, is that
160
it is very easy to scan (visually) a change you are constructing, to
161
see that there are no @ signs in the first column, or at least none
162
which could be confused for the four letter change delimiters
163
"\n@s[","\n@s," ... A poor human constructing a change (.V) file
164
should not have to sort through the X or Y text adding quoting
167
Note on length: Y may be any length, but Xbegin or Xend, may only be
181
/****************** THE CODE ********************/
187
#define CONTEXT_LIMIT 3000 /* size of the longest delimiter or replacement */
191
char ssearch_for_string();
193
#define NULL_OUT (FILE *)0
195
#define CHSTART1 "\n@s["
196
#define CHSTART2 "\n@s,"
197
#define CHSTART3 "\n@s|"
198
#define CHSTART4 "\n@s]"
200
#define NOACCEPT (char *) 0
204
#define eofch(ch) ((unsigned char)ch == (unsigned char) EOF)
208
#define myerror(string,arg) {(void)fprintf(stderr,string,arg); exit(-1);}
213
{FILE *orig,*changes,*out;
214
char *context,*endcontext;
215
char *origname,*altername,*outname;
217
context=malloc(CONTEXT_LIMIT+2);
218
endcontext=malloc(CONTEXT_LIMIT+2);
224
origname=filenames; altername=filenames+200; outname=filenames+400;
225
/* get names from stdin */
226
if (tem=scanf("%s %s %s",origname,altername,outname));
227
else myerror("Three args weren't supplied: scanf returned %d\n",tem);
229
else{ if (!((argc==3) || (argc==4)))
230
{ myerror("Usage: merge file-orig file-changes [out-file]\n %d args given",argc-1);}
234
if (argc >= 4) outname=argv[3];}};
237
/* now we have the names either from command or stdin, so open files */
239
if(origname[0]=='-' && origname[1]==NULL)
242
orig=fopen(origname,"r");
243
if (!orig) {perror(origname); exit(-1);};
246
changes=fopen(altername,"r");
247
if (!changes) {perror(origname); exit(-1);};
249
{out=fopen(outname,"w");
250
if (out); else {perror(outname); exit(-1);}}
253
/* check if the file starts with chstart1 - newline. to avoid
254
people thinking that starting file with @s[ is ok. */
255
{char *str = CHSTART1;
257
while(*(++str)) /* skip the newline start */
258
{ (ch=getc(changes));
261
{ ungetc(ch,changes); goto not_found;}
266
{while(search_for_string(changes,CHSTART1,NULL_OUT,FALSE) > 0)
269
ssearch_for_string(changes,CHSTART2,context,CONTEXT_LIMIT,TRUE,
272
{myerror("\nNo end for start change context in change file:\n`%s'\n",context);};
273
if (found==ACCEPT[1])
277
if /* there is probably a non null endcontext */
278
(ssearch_for_string(changes,CHSTART3,endcontext,
279
CONTEXT_LIMIT,TRUE,NOACCEPT));
281
{myerror("No %s at beginning of line to denote end of change context",
283
/* skip in orig down to the end of the context,copying thru begin context */
284
if (search_for_string(orig,context,out,FALSE)>0);
285
else{myerror("\nCould not find the change start in original:\n`%s'\n"
287
if /* copy out the changed version */
288
(search_for_string(changes,CHSTART4,out,TRUE)>0);
290
{myerror("No %s at beginning of line to denote end of change context",
293
/*finish skipping over the region to be deleted in orig */
294
{if( search_for_string(orig,endcontext,NULL_OUT,FALSE) > 0);
296
{myerror("\nCould not find the end of the change in original:\n`%s'\n",
304
string_match(sta,stb)
307
{if (*(sta++) != *(stb++)) return 0;}
308
if (*stb==0) return 1; else return 0;
318
if (eofch(ch) && feof(file)) break;
321
/* advance file to end of first occurrence of string, copying to out
322
until the beginning of string */
324
#define USE_UNQUOTE 1
326
search_for_string(file,string,out,unquoting)
331
result=search_for_string1(file,string,out,USE_UNQUOTE && unquoting);
335
char *nxt,*lim,*ungetlim,*bp;
336
char buffer[CONTEXT_LIMIT];
347
{char x=((bp==buffer)? getc(file) : *--bp);
352
#define mygetc(file) ((bp==buffer)? getc(file) : *--bp)
353
#define myungetc(ch) *bp++ = ch
355
search_for_string1(file,string,out,unquoting)
359
{ /* char *nxt,*lim; */
364
if (*string==NUL) return 1; unquoting;
368
if ((eofch(ch)) &&(feof(file))) return 0;
370
{ /* loop for checking */
374
if (eofch(ch) && feof(file))
375
{char *cp=string;while (cp++<=s)
376
{putc(*cp,file) ; return 0;}};
378
{ if (out) putc(*string,out);
380
if (!(unquoting && ch==string[1] && (s-string ==2)))
382
while (--cp > string)
387
/* printf("<found one>"); */
389
else if (out) putc(ch,out);};
394
#define PUTC(ch,out) {if(ind++ < outlim) ((*(out++))=(ch));\
398
ssearch_for_string1(file,string,out,outlim,unquoting,accept)
399
int outlim,unquoting;
402
char *string,*accept;
405
if (*string==NUL) return 'a';
409
if (feof(file)) return (char) 0;
413
{if ((*s==(ch=getc(file)))
414
|| (accept && *s==*accept && ch == *(accept+1)));
417
{char *cp; cp=string;
418
if (unquoting && ch==string[1] && (s-string ==2)) s--;
420
{PUTC(*cp,out);cp++;}
424
{PUTC(((char) 0),out);
428
else if (out) PUTC(ch,out);
433
ssearch_for_string(file,string,out1,outlim,unquoting,accept)
437
char *string,*accept;
440
result=ssearch_for_string1(file,string,out1,outlim,unquoting,accept);
446
* 1)The buffering for mygetc could be more efficient (in local variable).
447
* 2)Eliminate the double function calls used during debugging.
448
* 3)Improve error message, for help in finding context if a change