1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
69 #include <sys/queue.h>
86 #include "got_error.h"
87 #include "got_object.h"
91 #include "xmalloc.h" /* XXX should return GOT_ERR_NO_MEM instead of exiting */
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
97 * diff - compare two files.
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
175 * The following struct is used to record change information when
176 * doing a "context" or "unified" diff. (see routine "change" to
177 * understand the highly mnemonic field names)
180 int a; /* start line in old file */
181 int b; /* end line in old file */
182 int c; /* start line in new file */
183 int d; /* end line in new file */
186 #define diff_output printf
187 static FILE *opentemp(const char *);
188 static void output(struct got_diff_state *, struct got_diff_args *, char *, FILE *, char *, FILE *, int);
189 static void check(struct got_diff_state *, FILE *, FILE *, int);
190 static void range(int, int, char *);
191 static void uni_range(int, int);
192 static void dump_context_vec(struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static void dump_unified_vec(struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
194 static void prepare(struct got_diff_state *, int, FILE *, off_t, int);
195 static void prune(struct got_diff_state *);
196 static void equiv(struct line *, int, struct line *, int, int *);
197 static void unravel(struct got_diff_state *, int);
198 static void unsort(struct line *, int, int *);
199 static void change(struct got_diff_state *, struct got_diff_args *, char *, FILE *, char *, FILE *, int, int, int, int, int *);
200 static void sort(struct line *, int);
201 static void print_header(struct got_diff_state *, struct got_diff_args *, const char *, const char *);
202 static int ignoreline(char *);
203 static int asciifile(FILE *);
204 static int fetch(struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
205 static int newcand(struct got_diff_state *, int, int, int);
206 static int search(struct got_diff_state *, int *, int, int);
207 static int skipline(FILE *);
208 static int isqrt(int);
209 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
210 static int readhash(struct got_diff_state *, FILE *, int);
211 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
212 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
213 static char *preadline(int, size_t, off_t);
216 * chrtran points to one of 2 translation tables: cup2low if folding upper to
217 * lower case clow2low if not folding case
219 u_char clow2low[256] = {
220 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
221 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
222 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
223 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
224 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
225 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
226 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
227 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
228 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
229 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
230 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
231 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
232 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
233 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
234 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
235 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
236 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
237 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
238 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
239 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
240 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
241 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
242 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
246 u_char cup2low[256] = {
247 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
248 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
249 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
250 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
251 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
252 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
253 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
254 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
255 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
256 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
257 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
258 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
259 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
260 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
261 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
262 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
263 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
264 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
265 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
266 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
267 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
268 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
269 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
273 const struct got_error *
274 got_diffreg(int *rval, char *file1, char *file2, int flags,
275 struct got_diff_args *args, struct got_diff_state *ds)
277 const struct got_error *err = NULL;
281 if (strcmp(file1, "-") == 0)
282 fstat(STDIN_FILENO, &ds->stb1);
283 else if (stat(file1, &ds->stb1) != 0)
284 return got_error(GOT_ERR_BAD_PATH);
286 if (strcmp(file2, "-") == 0)
287 fstat(STDIN_FILENO, &ds->stb2);
288 else if (stat(file2, &ds->stb2) != 0)
289 return got_error(GOT_ERR_BAD_PATH);
295 ds->lastmatchline = 0;
296 ds->context_vec_ptr = ds->context_vec_start - 1;
297 if (flags & D_IGNORECASE)
298 ds->chrtran = cup2low;
300 ds->chrtran = clow2low;
301 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
302 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
305 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
308 if (flags & D_EMPTY1)
309 f1 = fopen(_PATH_DEVNULL, "r");
311 if (!S_ISREG(ds->stb1.st_mode)) {
312 if ((f1 = opentemp(file1)) == NULL ||
313 fstat(fileno(f1), &ds->stb1) < 0) {
317 } else if (strcmp(file1, "-") == 0)
320 f1 = fopen(file1, "r");
327 if (flags & D_EMPTY2)
328 f2 = fopen(_PATH_DEVNULL, "r");
330 if (!S_ISREG(ds->stb2.st_mode)) {
331 if ((f2 = opentemp(file2)) == NULL ||
332 fstat(fileno(f2), &ds->stb2) < 0) {
336 } else if (strcmp(file2, "-") == 0)
339 f2 = fopen(file2, "r");
346 switch (files_differ(ds, f1, f2, flags)) {
357 if ((flags & D_FORCEASCII) == 0 &&
358 (!asciifile(f1) || !asciifile(f2))) {
363 prepare(ds, 0, f1, ds->stb1.st_size, flags);
364 prepare(ds, 1, f2, ds->stb2.st_size, flags);
367 sort(ds->sfile[0], ds->slen[0]);
368 sort(ds->sfile[1], ds->slen[1]);
370 ds->member = (int *)ds->file[1];
371 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
372 ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
373 if (ds->member == NULL) {
374 err = got_error(GOT_ERR_NO_MEM);
378 ds->class = (int *)ds->file[0];
379 unsort(ds->sfile[0], ds->slen[0], ds->class);
380 ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
381 if (ds->class == NULL) {
382 err = got_error(GOT_ERR_NO_MEM);
386 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
387 if (ds->klist == NULL) {
388 err = got_error(GOT_ERR_NO_MEM);
393 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
394 if (ds->clist == NULL) {
395 err = got_error(GOT_ERR_NO_MEM);
398 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
402 ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
404 err = got_error(GOT_ERR_NO_MEM);
407 unravel(ds, ds->klist[i]);
413 ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
414 if (ds->ixold == NULL) {
415 err = got_error(GOT_ERR_NO_MEM);
418 ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
419 if (ds->ixnew == NULL) {
420 err = got_error(GOT_ERR_NO_MEM);
423 check(ds, f1, f2, flags);
424 output(ds, args, file1, f1, file2, f2, flags);
440 * Check to see if the given files differ.
441 * Returns 0 if they are the same, 1 if different, and -1 on error.
442 * XXX - could use code from cmp(1) [faster]
445 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
447 char buf1[BUFSIZ], buf2[BUFSIZ];
450 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
451 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
454 i = fread(buf1, 1, sizeof(buf1), f1);
455 j = fread(buf2, 1, sizeof(buf2), f2);
456 if ((!i && ferror(f1)) || (!j && ferror(f2)))
462 if (memcmp(buf1, buf2, i) != 0)
468 opentemp(const char *file)
470 char buf[BUFSIZ], tempfile[PATH_MAX];
474 if (strcmp(file, "-") == 0)
476 else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
479 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
481 if ((ofd = mkstemp(tempfile)) < 0) {
486 while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
487 if (write(ofd, buf, nread) != nread) {
494 lseek(ofd, (off_t)0, SEEK_SET);
495 return (fdopen(ofd, "r"));
499 splice(char *dir, char *file)
504 dirlen = strlen(dir);
505 while (dirlen != 0 && dir[dirlen - 1] == '/')
507 if ((tail = strrchr(file, '/')) == NULL)
511 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
516 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
524 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
528 p = xcalloc(sz + 3, sizeof(*p));
529 for (j = 0; (h = readhash(ds, fd, flags));) {
532 p = xreallocarray(p, sz + 3, sizeof(*p));
541 prune(struct got_diff_state *ds)
545 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
546 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
549 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
550 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
553 for (j = 0; j < 2; j++) {
554 ds->sfile[j] = ds->file[j] + ds->pref;
555 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
556 for (i = 0; i <= ds->slen[j]; i++)
557 ds->sfile[j][i].serial = i;
562 equiv(struct line *a, int n, struct line *b, int m, int *c)
567 while (i <= n && j <= m) {
568 if (a[i].value < b[j].value)
570 else if (a[i].value == b[j].value)
581 while (b[j + 1].value == b[j].value) {
589 /* Code taken from ping.c */
598 do { /* newton was a stinker */
603 } while ((x - y) > 1 || (x - y) < -1);
609 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
612 int oldc, tc, oldl, sq;
613 u_int numtries, bound;
615 if (flags & D_MINIMAL)
619 bound = MAXIMUM(256, sq);
623 c[0] = newcand(ds, 0, 0, 0);
624 for (i = 1; i <= n; i++) {
633 if (y <= ds->clist[oldc].y)
635 l = search(ds, c, k, y);
639 if (ds->clist[c[l]].y <= y)
642 c[l] = newcand(ds, i, y, oldc);
647 c[l] = newcand(ds, i, y, oldc);
651 } while ((y = b[++j]) > 0 && numtries < bound);
657 newcand(struct got_diff_state *ds, int x, int y, int pred)
661 if (ds->clen == ds->clistlen) {
662 ds->clistlen = ds->clistlen * 11 / 10;
663 ds->clist = xreallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
665 q = ds->clist + ds->clen;
673 search(struct got_diff_state *ds, int *c, int k, int y)
677 if (ds->clist[c[k]].y < y) /* quick look for typical case */
685 t = ds->clist[c[l]].y;
697 unravel(struct got_diff_state *ds, int p)
702 for (i = 0; i <= ds->len[0]; i++)
703 ds->J[i] = i <= ds->pref ? i :
704 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
705 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
706 ds->J[q->x + ds->pref] = q->y + ds->pref;
710 * Check does double duty:
711 * 1. ferret out any fortuitous correspondences due
712 * to confounding by hashing (which result in "jackpot")
713 * 2. collect random access indexes to the two files
716 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
718 int i, j, jackpot, c, d;
724 ds->ixold[0] = ds->ixnew[0] = 0;
727 for (i = 1; i <= ds->len[0]; i++) {
729 ds->ixold[i] = ctold += skipline(f1);
732 while (j < ds->J[i]) {
733 ds->ixnew[j] = ctnew += skipline(f2);
736 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
741 * GNU diff ignores a missing newline
742 * in one file for -b or -w.
744 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
745 if (c == EOF && d == '\n') {
748 } else if (c == '\n' && d == EOF) {
755 if ((flags & D_FOLDBLANKS) && isspace(c) &&
761 } while (isspace(c = getc(f1)));
766 } while (isspace(d = getc(f2)));
767 } else if ((flags & D_IGNOREBLANKS)) {
768 while (isspace(c) && c != '\n') {
772 while (isspace(d) && d != '\n') {
777 if (ds->chrtran[c] != ds->chrtran[d]) {
780 if (c != '\n' && c != EOF)
781 ctold += skipline(f1);
782 if (d != '\n' && c != EOF)
783 ctnew += skipline(f2);
786 if (c == '\n' || c == EOF)
793 if ((c = getc(f1)) != (d = getc(f2))) {
796 if (c != '\n' && c != EOF)
797 ctold += skipline(f1);
798 if (d != '\n' && c != EOF)
799 ctnew += skipline(f2);
802 if (c == '\n' || c == EOF)
806 ds->ixold[i] = ctold;
807 ds->ixnew[j] = ctnew;
810 for (; j <= ds->len[1]; j++)
811 ds->ixnew[j] = ctnew += skipline(f2);
814 * fprintf(stderr, "jackpot\n");
818 /* shellsort CACM #201 */
820 sort(struct line *a, int n)
822 struct line *ai, *aim, w;
827 for (j = 1; j <= n; j *= 2)
829 for (m /= 2; m != 0; m /= 2) {
831 for (j = 1; j <= k; j++) {
832 for (ai = &a[j]; ai > a; ai -= m) {
835 break; /* wraparound */
836 if (aim->value > ai[0].value ||
837 (aim->value == ai[0].value &&
838 aim->serial > ai[0].serial))
840 w.value = ai[0].value;
841 ai[0].value = aim->value;
842 aim->value = w.value;
843 w.serial = ai[0].serial;
844 ai[0].serial = aim->serial;
845 aim->serial = w.serial;
852 unsort(struct line *f, int l, int *b)
856 a = xcalloc(l + 1, sizeof(*a));
857 for (i = 1; i <= l; i++)
858 a[f[i].serial] = f[i].value;
859 for (i = 1; i <= l; i++)
869 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
875 output(struct got_diff_state *ds, struct got_diff_args *args,
876 char *file1, FILE *f1, char *file2, FILE *f2, int flags)
878 int m, i0, i1, j0, j1;
884 ds->J[m + 1] = ds->len[1] + 1;
885 if (args->diff_format != D_EDIT) {
886 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
887 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
889 j0 = ds->J[i0 - 1] + 1;
891 while (i1 < m && ds->J[i1 + 1] == 0)
893 j1 = ds->J[i1 + 1] - 1;
895 change(ds, args, file1, f1, file2, f2, i0, i1, j0, j1, &flags);
898 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
899 while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
901 j0 = ds->J[i0 + 1] - 1;
903 while (i1 > 1 && ds->J[i1 - 1] == 0)
905 j1 = ds->J[i1 - 1] + 1;
907 change(ds, args, file1, f1, file2, f2, i1, i0, j1, j0, &flags);
911 change(ds, args, file1, f1, file2, f2, 1, 0, 1, ds->len[1], &flags);
912 if (args->diff_format == D_IFDEF) {
915 if ((c = getc(f1)) == EOF)
917 diff_output("%c", c);
921 if (ds->anychange != 0) {
922 if (args->diff_format == D_CONTEXT)
923 dump_context_vec(ds, args, f1, f2, flags);
924 else if (args->diff_format == D_UNIFIED)
925 dump_unified_vec(ds, args, f1, f2, flags);
930 range(int a, int b, char *separator)
932 diff_output("%d", a > b ? b : a);
934 diff_output("%s%d", separator, b);
938 uni_range(int a, int b)
941 diff_output("%d,%d", a, b - a + 1);
943 diff_output("%d", b);
945 diff_output("%d,0", b);
949 preadline(int fd, size_t rlen, off_t off)
954 line = xmalloc(rlen + 1);
955 if ((nr = pread(fd, line, rlen, off)) < 0)
957 if (nr > 0 && line[nr-1] == '\n')
964 ignoreline(char *line)
966 return 0; /* do not ignore any lines */
970 * Indicate that there is a difference between lines a and b of the from file
971 * to get to lines c to d of the to file. If a is greater then b then there
972 * are no lines in the from file involved and this means that there were
973 * lines appended (beginning at b). If c is greater than d then there are
974 * lines missing from the to file.
977 change(struct got_diff_state *ds, struct got_diff_args *args,
978 char *file1, FILE *f1, char *file2, FILE *f2,
979 int a, int b, int c, int d, int *pflags)
981 static size_t max_context = 64;
985 if (args->diff_format != D_IFDEF && a > b && c > d)
987 if (args->ignore_pats != NULL) {
990 * All lines in the change, insert, or delete must
991 * match an ignore pattern for the change to be
994 if (a <= b) { /* Changes and deletes. */
995 for (i = a; i <= b; i++) {
996 line = preadline(fileno(f1),
997 ds->ixold[i] - ds->ixold[i - 1], ds->ixold[i - 1]);
998 if (!ignoreline(line))
1002 if (a > b || c <= d) { /* Changes and inserts. */
1003 for (i = c; i <= d; i++) {
1004 line = preadline(fileno(f2),
1005 ds->ixnew[i] - ds->ixnew[i - 1], ds->ixnew[i - 1]);
1006 if (!ignoreline(line))
1013 if (*pflags & D_HEADER) {
1014 diff_output("%s %s %s\n", args->diffargs, file1, file2);
1015 *pflags &= ~D_HEADER;
1017 if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
1019 * Allocate change records as needed.
1021 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
1022 ptrdiff_t offset = ds->context_vec_ptr - ds->context_vec_start;
1024 ds->context_vec_start = xreallocarray(ds->context_vec_start,
1025 max_context, sizeof(*ds->context_vec_start));
1026 ds->context_vec_end = ds->context_vec_start + max_context;
1027 ds->context_vec_ptr = ds->context_vec_start + offset;
1029 if (ds->anychange == 0) {
1031 * Print the context/unidiff header first time through.
1033 print_header(ds, args, file1, file2);
1035 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
1036 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
1038 * If this change is more than 'diff_context' lines from the
1039 * previous change, dump the record and reset it.
1041 if (args->diff_format == D_CONTEXT)
1042 dump_context_vec(ds, args, f1, f2, *pflags);
1044 dump_unified_vec(ds, args, f1, f2, *pflags);
1046 ds->context_vec_ptr++;
1047 ds->context_vec_ptr->a = a;
1048 ds->context_vec_ptr->b = b;
1049 ds->context_vec_ptr->c = c;
1050 ds->context_vec_ptr->d = d;
1053 if (ds->anychange == 0)
1055 switch (args->diff_format) {
1061 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1062 if (args->diff_format == D_NORMAL)
1067 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1073 diff_output("a%d %d\n", b, d - c + 1);
1075 diff_output("d%d %d\n", a, b - a + 1);
1077 /* add changed lines */
1078 diff_output("a%d %d\n", b, d - c + 1);
1082 if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1083 fetch(ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1084 if (a <= b && c <= d && args->diff_format == D_NORMAL)
1085 diff_output("---\n");
1087 i = fetch(ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1088 if (i != 0 && args->diff_format == D_EDIT) {
1090 * A non-zero return value for D_EDIT indicates that the
1091 * last line printed was a bare dot (".") that has been
1092 * escaped as ".." to prevent ed(1) from misinterpreting
1093 * it. We have to add a substitute command to change this
1094 * back and restart where we left off.
1097 diff_output("%ds/.//\n", a + i - 1);
1103 if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1106 diff_output("#endif /* %s */\n", args->ifdefname);
1112 fetch(struct got_diff_state *ds, struct got_diff_args *args,
1113 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1115 int i, j, c, lastc, col, nc;
1118 * When doing #ifdef's, copy down to current line
1119 * if this is the first file, so that stuff makes it to output.
1121 if (args->diff_format == D_IFDEF && oldfile) {
1122 long curpos = ftell(lb);
1123 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1124 nc = f[a > b ? b : a - 1] - curpos;
1125 for (i = 0; i < nc; i++)
1126 diff_output("%c", getc(lb));
1130 if (args->diff_format == D_IFDEF) {
1132 diff_output("#else /* %s%s */\n",
1133 oldfile == 1 ? "!" : "", args->ifdefname);
1136 diff_output("#ifndef %s\n", args->ifdefname);
1138 diff_output("#ifdef %s\n", args->ifdefname);
1140 ds->inifdef = 1 + oldfile;
1142 for (i = a; i <= b; i++) {
1143 fseek(lb, f[i - 1], SEEK_SET);
1144 nc = f[i] - f[i - 1];
1145 if (args->diff_format != D_IFDEF && ch != '\0') {
1146 diff_output("%c", ch);
1147 if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1148 || args->diff_format == D_UNIFIED))
1150 else if (args->diff_format != D_UNIFIED)
1154 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1155 if ((c = getc(lb)) == EOF) {
1156 if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1157 args->diff_format == D_NREVERSE)
1158 warnx("No newline at end of file");
1160 diff_output("\n\\ No newline at end of "
1164 if (c == '\t' && (flags & D_EXPANDTABS)) {
1167 } while (++col & 7);
1169 if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1172 * Don't print a bare "." line
1173 * since that will confuse ed(1).
1174 * Print ".." instead and return,
1175 * giving the caller an offset
1176 * from which to restart.
1181 diff_output("%c", c);
1190 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1193 readhash(struct got_diff_state *ds, FILE *f, int flags)
1200 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1201 if (flags & D_IGNORECASE)
1202 for (i = 0; (t = getc(f)) != '\n'; i++) {
1208 sum = sum * 127 + ds->chrtran[t];
1211 for (i = 0; (t = getc(f)) != '\n'; i++) {
1217 sum = sum * 127 + t;
1221 switch (t = getc(f)) {
1230 if (space && (flags & D_IGNOREBLANKS) == 0) {
1234 sum = sum * 127 + ds->chrtran[t];
1248 * There is a remote possibility that we end up with a zero sum.
1249 * Zero is used as an EOF marker, so return 1 instead.
1251 return (sum == 0 ? 1 : sum);
1257 unsigned char buf[BUFSIZ];
1264 cnt = fread(buf, 1, sizeof(buf), f);
1265 return (memchr(buf, '\0', cnt) == NULL);
1268 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1271 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1273 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1275 int last = ds->lastline;
1279 while (pos > last) {
1280 fseek(fp, f[pos - 1], SEEK_SET);
1281 nc = f[pos] - f[pos - 1];
1282 if (nc >= sizeof(buf))
1283 nc = sizeof(buf) - 1;
1284 nc = fread(buf, 1, nc, fp);
1287 buf[strcspn(buf, "\n")] = '\0';
1288 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1289 if (begins_with(buf, "private:")) {
1291 state = " (private)";
1292 } else if (begins_with(buf, "protected:")) {
1294 state = " (protected)";
1295 } else if (begins_with(buf, "public:")) {
1297 state = " (public)";
1299 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1301 strlcat(ds->lastbuf, state,
1302 sizeof ds->lastbuf);
1303 ds->lastmatchline = pos;
1310 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1313 /* dump accumulated "context" diff changes */
1315 dump_context_vec(struct got_diff_state *ds, struct got_diff_args *args,
1316 FILE *f1, FILE *f2, int flags)
1318 struct context_vec *cvp = ds->context_vec_start;
1319 int lowa, upb, lowc, upd, do_output;
1323 if (ds->context_vec_start > ds->context_vec_ptr)
1326 b = d = 0; /* gcc */
1327 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1328 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1329 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1330 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1332 diff_output("***************");
1333 if ((flags & D_PROTOTYPE)) {
1334 f = match_function(ds, ds->ixold, lowa-1, f1);
1336 diff_output(" %s", f);
1338 diff_output("\n*** ");
1339 range(lowa, upb, ",");
1340 diff_output(" ****\n");
1343 * Output changes to the "old" file. The first loop suppresses
1344 * output if there were no changes to the "old" file (we'll see
1345 * the "old" lines as context in the "new" list).
1348 for (; cvp <= ds->context_vec_ptr; cvp++)
1349 if (cvp->a <= cvp->b) {
1350 cvp = ds->context_vec_start;
1355 while (cvp <= ds->context_vec_ptr) {
1361 if (a <= b && c <= d)
1364 ch = (a <= b) ? 'd' : 'a';
1367 fetch(ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1369 fetch(ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1370 fetch(ds, args, ds->ixold, a, b, f1,
1371 ch == 'c' ? '!' : '-', 0, flags);
1376 fetch(ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1378 /* output changes to the "new" file */
1379 diff_output("--- ");
1380 range(lowc, upd, ",");
1381 diff_output(" ----\n");
1384 for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1385 if (cvp->c <= cvp->d) {
1386 cvp = ds->context_vec_start;
1391 while (cvp <= ds->context_vec_ptr) {
1397 if (a <= b && c <= d)
1400 ch = (a <= b) ? 'd' : 'a';
1403 fetch(ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1405 fetch(ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1406 fetch(ds, args, ds->ixnew, c, d, f2,
1407 ch == 'c' ? '!' : '+', 0, flags);
1412 fetch(ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1414 ds->context_vec_ptr = ds->context_vec_start - 1;
1417 /* dump accumulated "unified" diff changes */
1419 dump_unified_vec(struct got_diff_state *ds, struct got_diff_args *args,
1420 FILE *f1, FILE *f2, int flags)
1422 struct context_vec *cvp = ds->context_vec_start;
1423 int lowa, upb, lowc, upd;
1427 if (ds->context_vec_start > ds->context_vec_ptr)
1430 b = d = 0; /* gcc */
1431 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1432 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1433 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1434 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1436 diff_output("@@ -");
1437 uni_range(lowa, upb);
1439 uni_range(lowc, upd);
1441 if ((flags & D_PROTOTYPE)) {
1442 f = match_function(ds, ds->ixold, lowa-1, f1);
1444 diff_output(" %s", f);
1449 * Output changes in "unified" diff format--the old and new lines
1450 * are printed together.
1452 for (; cvp <= ds->context_vec_ptr; cvp++) {
1459 * c: both new and old changes
1460 * d: only changes in the old file
1461 * a: only changes in the new file
1463 if (a <= b && c <= d)
1466 ch = (a <= b) ? 'd' : 'a';
1470 fetch(ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1471 fetch(ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1472 fetch(ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1475 fetch(ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1476 fetch(ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1479 fetch(ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1480 fetch(ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1486 fetch(ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1488 ds->context_vec_ptr = ds->context_vec_start - 1;
1492 print_header(struct got_diff_state *ds, struct got_diff_args *args,
1493 const char *file1, const char *file2)
1495 if (args->label[0] != NULL)
1496 diff_output("%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1499 diff_output("%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1500 file1, ctime(&ds->stb1.st_mtime));
1501 if (args->label[1] != NULL)
1502 diff_output("%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1505 diff_output("%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1506 file2, ctime(&ds->stb2.st_mtime));