1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
86 #include "got_error.h"
87 #include "got_object.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 #define diff_output printf
187 static FILE *opentemp(const char *);
188 static void output(char *, FILE *, char *, FILE *, int);
189 static void check(FILE *, FILE *, int);
190 static void range(int, int, char *);
191 static void uni_range(int, int);
192 static void dump_context_vec(FILE *, FILE *, int);
193 static void dump_unified_vec(FILE *, FILE *, int);
194 static void prepare(int, FILE *, off_t, int);
195 static void prune(void);
196 static void equiv(struct line *, int, struct line *, int, int *);
197 static void unravel(int);
198 static void unsort(struct line *, int, int *);
199 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
200 static void sort(struct line *, int);
201 static void print_header(const char *, const char *);
202 static int ignoreline(char *);
203 static int asciifile(FILE *);
204 static int fetch(long *, int, int, FILE *, int, int, int);
205 static int newcand(int, int, int);
206 static int search(int *, int, int);
207 static int skipline(FILE *);
208 static int isqrt(int);
209 static int stone(int *, int, int *, int *, int);
210 static int readhash(FILE *, int);
211 static int files_differ(FILE *, FILE *, int);
212 static char *match_function(const long *, int, FILE *);
213 static char *preadline(int, size_t, off_t);
216 int *J; /* will be overlaid on class */
217 int *class; /* will be overlaid on file[0] */
218 int *klist; /* will be overlaid on file[0] after class */
219 int *member; /* will be overlaid on file[1] */
221 int inifdef; /* whether or not we are in a #ifdef block */
223 int pref, suff; /* length of prefix and suffix */
226 long *ixnew; /* will be overlaid on file[1] */
227 long *ixold; /* will be overlaid on klist */
228 struct cand *clist; /* merely a free storage pot for candidates */
229 int clistlen; /* the length of clist */
230 struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
231 u_char *chrtran; /* translation table for case-folding */
232 struct context_vec *context_vec_start;
233 struct context_vec *context_vec_end;
234 struct context_vec *context_vec_ptr;
237 #define FUNCTION_CONTEXT_SIZE 55
238 static char lastbuf[FUNCTION_CONTEXT_SIZE];
240 static int lastmatchline;
244 int diff_format, diff_context, status;
245 char *ifdefname, *diffargs, *label[2], *ignore_pats;
246 struct stat stb1, stb2;
250 * chrtran points to one of 2 translation tables: cup2low if folding upper to
251 * lower case clow2low if not folding case
253 u_char clow2low[256] = {
254 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
255 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
256 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
257 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
258 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
259 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
260 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
261 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
262 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
263 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
264 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
265 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
266 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
267 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
268 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
269 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
270 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
271 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
272 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
273 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
274 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
275 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
276 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
280 u_char cup2low[256] = {
281 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
282 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
283 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
284 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
285 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
286 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
287 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
288 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
289 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
290 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
291 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
292 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
293 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
294 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
295 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
296 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
297 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
298 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
299 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
300 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
301 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
302 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
303 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
307 const struct got_error *
308 got_diffreg(int *rval, char *file1, char *file2, int flags)
310 const struct got_error *err = NULL;
314 diff_args.diff_format = D_UNIFIED;
316 if (strcmp(file1, "-") == 0)
317 fstat(STDIN_FILENO, &diff_args.stb1);
318 else if (stat(file1, &diff_args.stb1) != 0)
319 return got_error(GOT_ERR_BAD_PATH);
321 if (strcmp(file2, "-") == 0)
322 fstat(STDIN_FILENO, &diff_args.stb2);
323 else if (stat(file2, &diff_args.stb2) != 0)
324 return got_error(GOT_ERR_BAD_PATH);
331 ds.context_vec_ptr = ds.context_vec_start - 1;
332 if (flags & D_IGNORECASE)
333 ds.chrtran = cup2low;
335 ds.chrtran = clow2low;
336 if (S_ISDIR(diff_args.stb1.st_mode) != S_ISDIR(diff_args.stb2.st_mode)) {
337 *rval = (S_ISDIR(diff_args.stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
340 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
343 if (flags & D_EMPTY1)
344 f1 = fopen(_PATH_DEVNULL, "r");
346 if (!S_ISREG(diff_args.stb1.st_mode)) {
347 if ((f1 = opentemp(file1)) == NULL ||
348 fstat(fileno(f1), &diff_args.stb1) < 0) {
349 diff_args.status |= 2;
352 } else if (strcmp(file1, "-") == 0)
355 f1 = fopen(file1, "r");
358 diff_args.status |= 2;
362 if (flags & D_EMPTY2)
363 f2 = fopen(_PATH_DEVNULL, "r");
365 if (!S_ISREG(diff_args.stb2.st_mode)) {
366 if ((f2 = opentemp(file2)) == NULL ||
367 fstat(fileno(f2), &diff_args.stb2) < 0) {
368 diff_args.status |= 2;
371 } else if (strcmp(file2, "-") == 0)
374 f2 = fopen(file2, "r");
377 diff_args.status |= 2;
381 switch (files_differ(f1, f2, flags)) {
388 diff_args.status |= 2;
392 if ((flags & D_FORCEASCII) == 0 &&
393 (!asciifile(f1) || !asciifile(f2))) {
395 diff_args.status |= 1;
398 prepare(0, f1, diff_args.stb1.st_size, flags);
399 prepare(1, f2, diff_args.stb2.st_size, flags);
402 sort(ds.sfile[0], ds.slen[0]);
403 sort(ds.sfile[1], ds.slen[1]);
405 ds.member = (int *)file[1];
406 equiv(ds.sfile[0], ds.slen[0], ds.sfile[1], ds.slen[1], ds.member);
407 ds.member = reallocarray(ds.member, ds.slen[1] + 2, sizeof(*ds.member));
408 if (ds.member == NULL) {
409 err = got_error(GOT_ERR_NO_MEM);
413 ds.class = (int *)file[0];
414 unsort(ds.sfile[0], ds.slen[0], ds.class);
415 ds.class = reallocarray(ds.class, ds.slen[0] + 2, sizeof(*ds.class));
416 if (ds.class == NULL) {
417 err = got_error(GOT_ERR_NO_MEM);
421 ds.klist = calloc(ds.slen[0] + 2, sizeof(*ds.klist));
422 if (ds.klist == NULL) {
423 err = got_error(GOT_ERR_NO_MEM);
428 ds.clist = calloc(ds.clistlen, sizeof(*ds.clist));
429 if (ds.clist == NULL) {
430 err = got_error(GOT_ERR_NO_MEM);
433 i = stone(ds.class, ds.slen[0], ds.member, ds.klist, flags);
437 ds.J = reallocarray(ds.J, ds.len[0] + 2, sizeof(*ds.J));
439 err = got_error(GOT_ERR_NO_MEM);
442 unravel(ds.klist[i]);
448 ds.ixold = reallocarray(ds.ixold, ds.len[0] + 2, sizeof(*ds.ixold));
449 if (ds.ixold == NULL) {
450 err = got_error(GOT_ERR_NO_MEM);
453 ds.ixnew = reallocarray(ds.ixnew, ds.len[1] + 2, sizeof(*ds.ixnew));
454 if (ds.ixnew == NULL) {
455 err = got_error(GOT_ERR_NO_MEM);
458 check(f1, f2, flags);
459 output(file1, f1, file2, f2, flags);
462 diff_args.status |= 1;
475 * Check to see if the given files differ.
476 * Returns 0 if they are the same, 1 if different, and -1 on error.
477 * XXX - could use code from cmp(1) [faster]
480 files_differ(FILE *f1, FILE *f2, int flags)
482 char buf1[BUFSIZ], buf2[BUFSIZ];
485 if ((flags & (D_EMPTY1|D_EMPTY2)) || diff_args.stb1.st_size != diff_args.stb2.st_size ||
486 (diff_args.stb1.st_mode & S_IFMT) != (diff_args.stb2.st_mode & S_IFMT))
489 i = fread(buf1, 1, sizeof(buf1), f1);
490 j = fread(buf2, 1, sizeof(buf2), f2);
491 if ((!i && ferror(f1)) || (!j && ferror(f2)))
497 if (memcmp(buf1, buf2, i) != 0)
503 opentemp(const char *file)
505 char buf[BUFSIZ], tempfile[PATH_MAX];
509 if (strcmp(file, "-") == 0)
511 else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
514 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
516 if ((ofd = mkstemp(tempfile)) < 0) {
521 while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
522 if (write(ofd, buf, nread) != nread) {
529 lseek(ofd, (off_t)0, SEEK_SET);
530 return (fdopen(ofd, "r"));
534 splice(char *dir, char *file)
539 dirlen = strlen(dir);
540 while (dirlen != 0 && dir[dirlen - 1] == '/')
542 if ((tail = strrchr(file, '/')) == NULL)
546 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
551 prepare(int i, FILE *fd, off_t filesize, int flags)
559 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
563 p = xcalloc(sz + 3, sizeof(*p));
564 for (j = 0; (h = readhash(fd, flags));) {
567 p = xreallocarray(p, sz + 3, sizeof(*p));
580 for (ds.pref = 0; ds.pref < ds.len[0] && ds.pref < ds.len[1] &&
581 file[0][ds.pref + 1].value == file[1][ds.pref + 1].value;
584 for (ds.suff = 0; ds.suff < ds.len[0] - ds.pref && ds.suff < ds.len[1] - ds.pref &&
585 file[0][ds.len[0] - ds.suff].value == file[1][ds.len[1] - ds.suff].value;
588 for (j = 0; j < 2; j++) {
589 ds.sfile[j] = file[j] + ds.pref;
590 ds.slen[j] = ds.len[j] - ds.pref - ds.suff;
591 for (i = 0; i <= ds.slen[j]; i++)
592 ds.sfile[j][i].serial = i;
597 equiv(struct line *a, int n, struct line *b, int m, int *c)
602 while (i <= n && j <= m) {
603 if (a[i].value < b[j].value)
605 else if (a[i].value == b[j].value)
616 while (b[j + 1].value == b[j].value) {
624 /* Code taken from ping.c */
633 do { /* newton was a stinker */
638 } while ((x - y) > 1 || (x - y) < -1);
644 stone(int *a, int n, int *b, int *c, int flags)
647 int oldc, tc, oldl, sq;
648 u_int numtries, bound;
650 if (flags & D_MINIMAL)
654 bound = MAXIMUM(256, sq);
658 c[0] = newcand(0, 0, 0);
659 for (i = 1; i <= n; i++) {
668 if (y <= ds.clist[oldc].y)
674 if (ds.clist[c[l]].y <= y)
677 c[l] = newcand(i, y, oldc);
682 c[l] = newcand(i, y, oldc);
686 } while ((y = b[++j]) > 0 && numtries < bound);
692 newcand(int x, int y, int pred)
696 if (ds.clen == ds.clistlen) {
697 ds.clistlen = ds.clistlen * 11 / 10;
698 ds.clist = xreallocarray(ds.clist, ds.clistlen, sizeof(*ds.clist));
700 q = ds.clist + ds.clen;
708 search(int *c, int k, int y)
712 if (ds.clist[c[k]].y < y) /* quick look for typical case */
720 t = ds.clist[c[l]].y;
737 for (i = 0; i <= ds.len[0]; i++)
738 ds.J[i] = i <= ds.pref ? i :
739 i > ds.len[0] - ds.suff ? i + ds.len[1] - ds.len[0] : 0;
740 for (q = ds.clist + p; q->y != 0; q = ds.clist + q->pred)
741 ds.J[q->x + ds.pref] = q->y + ds.pref;
745 * Check does double duty:
746 * 1. ferret out any fortuitous correspondences due
747 * to confounding by hashing (which result in "jackpot")
748 * 2. collect random access indexes to the two files
751 check(FILE *f1, FILE *f2, int flags)
753 int i, j, jackpot, c, d;
759 ds.ixold[0] = ds.ixnew[0] = 0;
762 for (i = 1; i <= ds.len[0]; i++) {
764 ds.ixold[i] = ctold += skipline(f1);
767 while (j < ds.J[i]) {
768 ds.ixnew[j] = ctnew += skipline(f2);
771 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
776 * GNU diff ignores a missing newline
777 * in one file for -b or -w.
779 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
780 if (c == EOF && d == '\n') {
783 } else if (c == '\n' && d == EOF) {
790 if ((flags & D_FOLDBLANKS) && isspace(c) &&
796 } while (isspace(c = getc(f1)));
801 } while (isspace(d = getc(f2)));
802 } else if ((flags & D_IGNOREBLANKS)) {
803 while (isspace(c) && c != '\n') {
807 while (isspace(d) && d != '\n') {
812 if (ds.chrtran[c] != ds.chrtran[d]) {
815 if (c != '\n' && c != EOF)
816 ctold += skipline(f1);
817 if (d != '\n' && c != EOF)
818 ctnew += skipline(f2);
821 if (c == '\n' || c == EOF)
828 if ((c = getc(f1)) != (d = getc(f2))) {
831 if (c != '\n' && c != EOF)
832 ctold += skipline(f1);
833 if (d != '\n' && c != EOF)
834 ctnew += skipline(f2);
837 if (c == '\n' || c == EOF)
845 for (; j <= ds.len[1]; j++)
846 ds.ixnew[j] = ctnew += skipline(f2);
849 * fprintf(stderr, "jackpot\n");
853 /* shellsort CACM #201 */
855 sort(struct line *a, int n)
857 struct line *ai, *aim, w;
862 for (j = 1; j <= n; j *= 2)
864 for (m /= 2; m != 0; m /= 2) {
866 for (j = 1; j <= k; j++) {
867 for (ai = &a[j]; ai > a; ai -= m) {
870 break; /* wraparound */
871 if (aim->value > ai[0].value ||
872 (aim->value == ai[0].value &&
873 aim->serial > ai[0].serial))
875 w.value = ai[0].value;
876 ai[0].value = aim->value;
877 aim->value = w.value;
878 w.serial = ai[0].serial;
879 ai[0].serial = aim->serial;
880 aim->serial = w.serial;
887 unsort(struct line *f, int l, int *b)
891 a = xcalloc(l + 1, sizeof(*a));
892 for (i = 1; i <= l; i++)
893 a[f[i].serial] = f[i].value;
894 for (i = 1; i <= l; i++)
904 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
910 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
912 int m, i0, i1, j0, j1;
918 ds.J[m + 1] = ds.len[1] + 1;
919 if (diff_args.diff_format != D_EDIT) {
920 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
921 while (i0 <= m && ds.J[i0] == ds.J[i0 - 1] + 1)
923 j0 = ds.J[i0 - 1] + 1;
925 while (i1 < m && ds.J[i1 + 1] == 0)
927 j1 = ds.J[i1 + 1] - 1;
929 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
932 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
933 while (i0 >= 1 && ds.J[i0] == ds.J[i0 + 1] - 1 && ds.J[i0] != 0)
935 j0 = ds.J[i0 + 1] - 1;
937 while (i1 > 1 && ds.J[i1 - 1] == 0)
939 j1 = ds.J[i1 - 1] + 1;
941 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
945 change(file1, f1, file2, f2, 1, 0, 1, ds.len[1], &flags);
946 if (diff_args.diff_format == D_IFDEF) {
949 if ((c = getc(f1)) == EOF)
951 diff_output("%c", c);
955 if (ds.anychange != 0) {
956 if (diff_args.diff_format == D_CONTEXT)
957 dump_context_vec(f1, f2, flags);
958 else if (diff_args.diff_format == D_UNIFIED)
959 dump_unified_vec(f1, f2, flags);
964 range(int a, int b, char *separator)
966 diff_output("%d", a > b ? b : a);
968 diff_output("%s%d", separator, b);
972 uni_range(int a, int b)
975 diff_output("%d,%d", a, b - a + 1);
977 diff_output("%d", b);
979 diff_output("%d,0", b);
983 preadline(int fd, size_t rlen, off_t off)
988 line = xmalloc(rlen + 1);
989 if ((nr = pread(fd, line, rlen, off)) < 0)
991 if (nr > 0 && line[nr-1] == '\n')
998 ignoreline(char *line)
1000 return 0; /* do not ignore any lines */
1004 * Indicate that there is a difference between lines a and b of the from file
1005 * to get to lines c to d of the to file. If a is greater then b then there
1006 * are no lines in the from file involved and this means that there were
1007 * lines appended (beginning at b). If c is greater than d then there are
1008 * lines missing from the to file.
1011 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1014 static size_t max_context = 64;
1018 if (diff_args.diff_format != D_IFDEF && a > b && c > d)
1020 if (diff_args.ignore_pats != NULL) {
1023 * All lines in the change, insert, or delete must
1024 * match an ignore pattern for the change to be
1027 if (a <= b) { /* Changes and deletes. */
1028 for (i = a; i <= b; i++) {
1029 line = preadline(fileno(f1),
1030 ds.ixold[i] - ds.ixold[i - 1], ds.ixold[i - 1]);
1031 if (!ignoreline(line))
1035 if (a > b || c <= d) { /* Changes and inserts. */
1036 for (i = c; i <= d; i++) {
1037 line = preadline(fileno(f2),
1038 ds.ixnew[i] - ds.ixnew[i - 1], ds.ixnew[i - 1]);
1039 if (!ignoreline(line))
1046 if (*pflags & D_HEADER) {
1047 diff_output("%s %s %s\n", diff_args.diffargs, file1, file2);
1048 *pflags &= ~D_HEADER;
1050 if (diff_args.diff_format == D_CONTEXT || diff_args.diff_format == D_UNIFIED) {
1052 * Allocate change records as needed.
1054 if (ds.context_vec_ptr == ds.context_vec_end - 1) {
1055 ptrdiff_t offset = ds.context_vec_ptr - ds.context_vec_start;
1057 ds.context_vec_start = xreallocarray(ds.context_vec_start,
1058 max_context, sizeof(*ds.context_vec_start));
1059 ds.context_vec_end = ds.context_vec_start + max_context;
1060 ds.context_vec_ptr = ds.context_vec_start + offset;
1062 if (ds.anychange == 0) {
1064 * Print the context/unidiff header first time through.
1066 print_header(file1, file2);
1068 } else if (a > ds.context_vec_ptr->b + (2 * diff_args.diff_context) + 1 &&
1069 c > ds.context_vec_ptr->d + (2 * diff_args.diff_context) + 1) {
1071 * If this change is more than 'diff_context' lines from the
1072 * previous change, dump the record and reset it.
1074 if (diff_args.diff_format == D_CONTEXT)
1075 dump_context_vec(f1, f2, *pflags);
1077 dump_unified_vec(f1, f2, *pflags);
1079 ds.context_vec_ptr++;
1080 ds.context_vec_ptr->a = a;
1081 ds.context_vec_ptr->b = b;
1082 ds.context_vec_ptr->c = c;
1083 ds.context_vec_ptr->d = d;
1086 if (ds.anychange == 0)
1088 switch (diff_args.diff_format) {
1094 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1095 if (diff_args.diff_format == D_NORMAL)
1100 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1106 diff_output("a%d %d\n", b, d - c + 1);
1108 diff_output("d%d %d\n", a, b - a + 1);
1110 /* add changed lines */
1111 diff_output("a%d %d\n", b, d - c + 1);
1115 if (diff_args.diff_format == D_NORMAL || diff_args.diff_format == D_IFDEF) {
1116 fetch(ds.ixold, a, b, f1, '<', 1, *pflags);
1117 if (a <= b && c <= d && diff_args.diff_format == D_NORMAL)
1118 diff_output("---\n");
1120 i = fetch(ds.ixnew, c, d, f2, diff_args.diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1121 if (i != 0 && diff_args.diff_format == D_EDIT) {
1123 * A non-zero return value for D_EDIT indicates that the
1124 * last line printed was a bare dot (".") that has been
1125 * escaped as ".." to prevent ed(1) from misinterpreting
1126 * it. We have to add a substitute command to change this
1127 * back and restart where we left off.
1130 diff_output("%ds/.//\n", a + i - 1);
1136 if ((diff_args.diff_format == D_EDIT || diff_args.diff_format == D_REVERSE) && c <= d)
1139 diff_output("#endif /* %s */\n", diff_args.ifdefname);
1145 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1147 int i, j, c, lastc, col, nc;
1150 * When doing #ifdef's, copy down to current line
1151 * if this is the first file, so that stuff makes it to output.
1153 if (diff_args.diff_format == D_IFDEF && oldfile) {
1154 long curpos = ftell(lb);
1155 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1156 nc = f[a > b ? b : a - 1] - curpos;
1157 for (i = 0; i < nc; i++)
1158 diff_output("%c", getc(lb));
1162 if (diff_args.diff_format == D_IFDEF) {
1164 diff_output("#else /* %s%s */\n",
1165 oldfile == 1 ? "!" : "", diff_args.ifdefname);
1168 diff_output("#ifndef %s\n", diff_args.ifdefname);
1170 diff_output("#ifdef %s\n", diff_args.ifdefname);
1172 ds.inifdef = 1 + oldfile;
1174 for (i = a; i <= b; i++) {
1175 fseek(lb, f[i - 1], SEEK_SET);
1176 nc = f[i] - f[i - 1];
1177 if (diff_args.diff_format != D_IFDEF && ch != '\0') {
1178 diff_output("%c", ch);
1179 if (diff_args.Tflag && (diff_args.diff_format == D_NORMAL || diff_args.diff_format == D_CONTEXT
1180 || diff_args.diff_format == D_UNIFIED))
1182 else if (diff_args.diff_format != D_UNIFIED)
1186 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1187 if ((c = getc(lb)) == EOF) {
1188 if (diff_args.diff_format == D_EDIT || diff_args.diff_format == D_REVERSE ||
1189 diff_args.diff_format == D_NREVERSE)
1190 warnx("No newline at end of file");
1192 diff_output("\n\\ No newline at end of "
1196 if (c == '\t' && (flags & D_EXPANDTABS)) {
1199 } while (++col & 7);
1201 if (diff_args.diff_format == D_EDIT && j == 1 && c == '\n'
1204 * Don't print a bare "." line
1205 * since that will confuse ed(1).
1206 * Print ".." instead and return,
1207 * giving the caller an offset
1208 * from which to restart.
1213 diff_output("%c", c);
1222 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1225 readhash(FILE *f, int flags)
1232 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1233 if (flags & D_IGNORECASE)
1234 for (i = 0; (t = getc(f)) != '\n'; i++) {
1240 sum = sum * 127 + ds.chrtran[t];
1243 for (i = 0; (t = getc(f)) != '\n'; i++) {
1249 sum = sum * 127 + t;
1253 switch (t = getc(f)) {
1262 if (space && (flags & D_IGNOREBLANKS) == 0) {
1266 sum = sum * 127 + ds.chrtran[t];
1280 * There is a remote possibility that we end up with a zero sum.
1281 * Zero is used as an EOF marker, so return 1 instead.
1283 return (sum == 0 ? 1 : sum);
1289 unsigned char buf[BUFSIZ];
1296 cnt = fread(buf, 1, sizeof(buf), f);
1297 return (memchr(buf, '\0', cnt) == NULL);
1300 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1303 match_function(const long *f, int pos, FILE *fp)
1305 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1307 int last = lastline;
1311 while (pos > last) {
1312 fseek(fp, f[pos - 1], SEEK_SET);
1313 nc = f[pos] - f[pos - 1];
1314 if (nc >= sizeof(buf))
1315 nc = sizeof(buf) - 1;
1316 nc = fread(buf, 1, nc, fp);
1319 buf[strcspn(buf, "\n")] = '\0';
1320 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1321 if (begins_with(buf, "private:")) {
1323 state = " (private)";
1324 } else if (begins_with(buf, "protected:")) {
1326 state = " (protected)";
1327 } else if (begins_with(buf, "public:")) {
1329 state = " (public)";
1331 strlcpy(lastbuf, buf, sizeof lastbuf);
1333 strlcat(lastbuf, state,
1335 lastmatchline = pos;
1342 return lastmatchline > 0 ? lastbuf : NULL;
1345 /* dump accumulated "context" diff changes */
1347 dump_context_vec(FILE *f1, FILE *f2, int flags)
1349 struct context_vec *cvp = ds.context_vec_start;
1350 int lowa, upb, lowc, upd, do_output;
1354 if (ds.context_vec_start > ds.context_vec_ptr)
1357 b = d = 0; /* gcc */
1358 lowa = MAXIMUM(1, cvp->a - diff_args.diff_context);
1359 upb = MINIMUM(ds.len[0], ds.context_vec_ptr->b + diff_args.diff_context);
1360 lowc = MAXIMUM(1, cvp->c - diff_args.diff_context);
1361 upd = MINIMUM(ds.len[1], ds.context_vec_ptr->d + diff_args.diff_context);
1363 diff_output("***************");
1364 if ((flags & D_PROTOTYPE)) {
1365 f = match_function(ds.ixold, lowa-1, f1);
1367 diff_output(" %s", f);
1369 diff_output("\n*** ");
1370 range(lowa, upb, ",");
1371 diff_output(" ****\n");
1374 * Output changes to the "old" file. The first loop suppresses
1375 * output if there were no changes to the "old" file (we'll see
1376 * the "old" lines as context in the "new" list).
1379 for (; cvp <= ds.context_vec_ptr; cvp++)
1380 if (cvp->a <= cvp->b) {
1381 cvp = ds.context_vec_start;
1386 while (cvp <= ds.context_vec_ptr) {
1392 if (a <= b && c <= d)
1395 ch = (a <= b) ? 'd' : 'a';
1398 fetch(ds.ixold, lowa, b, f1, ' ', 0, flags);
1400 fetch(ds.ixold, lowa, a - 1, f1, ' ', 0, flags);
1401 fetch(ds.ixold, a, b, f1,
1402 ch == 'c' ? '!' : '-', 0, flags);
1407 fetch(ds.ixold, b + 1, upb, f1, ' ', 0, flags);
1409 /* output changes to the "new" file */
1410 diff_output("--- ");
1411 range(lowc, upd, ",");
1412 diff_output(" ----\n");
1415 for (cvp = ds.context_vec_start; cvp <= ds.context_vec_ptr; cvp++)
1416 if (cvp->c <= cvp->d) {
1417 cvp = ds.context_vec_start;
1422 while (cvp <= ds.context_vec_ptr) {
1428 if (a <= b && c <= d)
1431 ch = (a <= b) ? 'd' : 'a';
1434 fetch(ds.ixnew, lowc, d, f2, ' ', 0, flags);
1436 fetch(ds.ixnew, lowc, c - 1, f2, ' ', 0, flags);
1437 fetch(ds.ixnew, c, d, f2,
1438 ch == 'c' ? '!' : '+', 0, flags);
1443 fetch(ds.ixnew, d + 1, upd, f2, ' ', 0, flags);
1445 ds.context_vec_ptr = ds.context_vec_start - 1;
1448 /* dump accumulated "unified" diff changes */
1450 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1452 struct context_vec *cvp = ds.context_vec_start;
1453 int lowa, upb, lowc, upd;
1457 if (ds.context_vec_start > ds.context_vec_ptr)
1460 b = d = 0; /* gcc */
1461 lowa = MAXIMUM(1, cvp->a - diff_args.diff_context);
1462 upb = MINIMUM(ds.len[0], ds.context_vec_ptr->b + diff_args.diff_context);
1463 lowc = MAXIMUM(1, cvp->c - diff_args.diff_context);
1464 upd = MINIMUM(ds.len[1], ds.context_vec_ptr->d + diff_args.diff_context);
1466 diff_output("@@ -");
1467 uni_range(lowa, upb);
1469 uni_range(lowc, upd);
1471 if ((flags & D_PROTOTYPE)) {
1472 f = match_function(ds.ixold, lowa-1, f1);
1474 diff_output(" %s", f);
1479 * Output changes in "unified" diff format--the old and new lines
1480 * are printed together.
1482 for (; cvp <= ds.context_vec_ptr; cvp++) {
1489 * c: both new and old changes
1490 * d: only changes in the old file
1491 * a: only changes in the new file
1493 if (a <= b && c <= d)
1496 ch = (a <= b) ? 'd' : 'a';
1500 fetch(ds.ixold, lowa, a - 1, f1, ' ', 0, flags);
1501 fetch(ds.ixold, a, b, f1, '-', 0, flags);
1502 fetch(ds.ixnew, c, d, f2, '+', 0, flags);
1505 fetch(ds.ixold, lowa, a - 1, f1, ' ', 0, flags);
1506 fetch(ds.ixold, a, b, f1, '-', 0, flags);
1509 fetch(ds.ixnew, lowc, c - 1, f2, ' ', 0, flags);
1510 fetch(ds.ixnew, c, d, f2, '+', 0, flags);
1516 fetch(ds.ixnew, d + 1, upd, f2, ' ', 0, flags);
1518 ds.context_vec_ptr = ds.context_vec_start - 1;
1522 print_header(const char *file1, const char *file2)
1524 if (diff_args.label[0] != NULL)
1525 diff_output("%s %s\n", diff_args.diff_format == D_CONTEXT ? "***" : "---",
1526 diff_args.label[0]);
1528 diff_output("%s %s\t%s", diff_args.diff_format == D_CONTEXT ? "***" : "---",
1529 file1, ctime(&diff_args.stb1.st_mtime));
1530 if (diff_args.label[1] != NULL)
1531 diff_output("%s %s\n", diff_args.diff_format == D_CONTEXT ? "---" : "+++",
1532 diff_args.label[1]);
1534 diff_output("%s %s\t%s", diff_args.diff_format == D_CONTEXT ? "---" : "+++",
1535 file2, ctime(&diff_args.stb2.st_mtime));