Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void range(FILE *, int, int, char *);
178 static void uni_range(FILE *, int, int);
179 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
180 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
181 static void prune(struct got_diff_state *);
182 static void equiv(struct line *, int, struct line *, int, int *);
183 static void unravel(struct got_diff_state *, int);
184 static int unsort(struct line *, int, int *);
185 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
186 static void sort(struct line *, int);
187 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
188 static int asciifile(FILE *);
189 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
190 static int newcand(struct got_diff_state *, int, int, int, int *);
191 static int search(struct got_diff_state *, int *, int, int);
192 static int skipline(FILE *);
193 static int isqrt(int);
194 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
195 static int readhash(struct got_diff_state *, FILE *, int);
196 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
197 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
199 /*
200 * chrtran points to one of 2 translation tables: cup2low if folding upper to
201 * lower case clow2low if not folding case
202 */
203 u_char clow2low[256] = {
204 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
205 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
206 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
207 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
208 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
209 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
210 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
211 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
212 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
213 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
214 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
215 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
216 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
217 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
218 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
219 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
220 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
221 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
222 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
223 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
224 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
225 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
226 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
227 0xfd, 0xfe, 0xff
228 };
230 u_char cup2low[256] = {
231 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
232 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
233 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
234 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
235 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
236 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
237 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
238 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
239 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
240 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
241 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
242 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
243 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
244 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
245 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
246 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
247 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
248 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
249 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
250 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
251 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
252 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
253 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
254 0xfd, 0xfe, 0xff
255 };
257 static void
258 diff_output(FILE *outfile, const char *fmt, ...)
260 va_list ap;
262 if (outfile == NULL)
263 return;
265 va_start(ap, fmt);
266 vfprintf(outfile, fmt, ap);
267 va_end(ap);
270 const struct got_error *
271 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
272 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
273 struct got_diff_changes *changes)
275 const struct got_error *err = NULL;
276 int i, *p;
277 long *lp;
279 *rval = D_SAME;
280 ds->anychange = 0;
281 ds->lastline = 0;
282 ds->lastmatchline = 0;
283 ds->context_vec_ptr = ds->context_vec_start - 1;
284 ds->max_context = GOT_DIFF_MAX_CONTEXT;
285 if (flags & D_IGNORECASE)
286 ds->chrtran = cup2low;
287 else
288 ds->chrtran = clow2low;
289 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
290 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
291 return NULL;
293 if (flags & D_EMPTY1) {
294 f1 = fopen(_PATH_DEVNULL, "r");
295 if (f1 == NULL) {
296 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
297 goto closem;
300 else if (f1 == NULL) {
301 args->status |= 2;
302 goto closem;
305 if (flags & D_EMPTY2) {
306 f2 = fopen(_PATH_DEVNULL, "r");
307 if (f2 == NULL) {
308 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
309 goto closem;
311 } else if (f2 == NULL) {
312 args->status |= 2;
313 goto closem;
316 switch (files_differ(ds, f1, f2, flags)) {
317 case 0:
318 goto closem;
319 case 1:
320 break;
321 default:
322 /* error */
323 args->status |= 2;
324 goto closem;
327 if ((flags & D_FORCEASCII) == 0 &&
328 (!asciifile(f1) || !asciifile(f2))) {
329 *rval = D_BINARY;
330 args->status |= 1;
331 goto closem;
333 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 err = got_error_from_errno("prepare");
335 goto closem;
337 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 err = got_error_from_errno("prepare");
339 goto closem;
342 prune(ds);
343 sort(ds->sfile[0], ds->slen[0]);
344 sort(ds->sfile[1], ds->slen[1]);
346 ds->member = (int *)ds->file[1];
347 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
349 if (p == NULL) {
350 err = got_error_from_errno("reallocarray");
351 goto closem;
353 ds->member = p;
355 ds->class = (int *)ds->file[0];
356 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
357 err = got_error_from_errno("unsort");
358 goto closem;
360 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
361 if (p == NULL) {
362 err = got_error_from_errno("reallocarray");
363 goto closem;
365 ds->class = p;
367 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
368 if (ds->klist == NULL) {
369 err = got_error_from_errno("calloc");
370 goto closem;
372 ds->clen = 0;
373 ds->clistlen = 100;
374 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
375 if (ds->clist == NULL) {
376 err = got_error_from_errno("calloc");
377 goto closem;
379 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
380 if (i < 0) {
381 err = got_error_from_errno("stone");
382 goto closem;
385 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
386 if (p == NULL) {
387 err = got_error_from_errno("reallocarray");
388 goto closem;
390 ds->J = p;
391 unravel(ds, ds->klist[i]);
393 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
394 if (lp == NULL) {
395 err = got_error_from_errno("reallocarray");
396 goto closem;
398 ds->ixold = lp;
399 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
400 if (lp == NULL) {
401 err = got_error_from_errno("reallocarray");
402 goto closem;
404 ds->ixnew = lp;
405 check(ds, f1, f2, flags);
406 if (output(outfile, changes, ds, args, args->label[0], f1,
407 args->label[1], f2, flags))
408 err = got_error_from_errno("output");
409 closem:
410 free(ds->J);
411 free(ds->member);
412 free(ds->class);
413 free(ds->clist);
414 free(ds->klist);
415 free(ds->ixold);
416 free(ds->ixnew);
417 if (ds->anychange) {
418 args->status |= 1;
419 if (*rval == D_SAME)
420 *rval = D_DIFFER;
422 if ((flags & D_EMPTY1) && f1) {
423 if (fclose(f1) != 0 && err == NULL)
424 err = got_error_from_errno("fclose");
426 if ((flags & D_EMPTY2) && f2) {
427 if (fclose(f2) != 0 && err == NULL)
428 err = got_error_from_errno("fclose");
430 return (err);
433 /*
434 * Check to see if the given files differ.
435 * Returns 0 if they are the same, 1 if different, and -1 on error.
436 * XXX - could use code from cmp(1) [faster]
437 */
438 static int
439 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
441 char buf1[BUFSIZ], buf2[BUFSIZ];
442 size_t i, j;
444 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
445 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
446 return (1);
447 for (;;) {
448 i = fread(buf1, 1, sizeof(buf1), f1);
449 j = fread(buf2, 1, sizeof(buf2), f2);
450 if ((!i && ferror(f1)) || (!j && ferror(f2)))
451 return (-1);
452 if (i != j)
453 return (1);
454 if (i == 0)
455 return (0);
456 if (memcmp(buf1, buf2, i) != 0)
457 return (1);
461 static int
462 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
464 struct line *p, *q;
465 int j, h;
466 size_t sz;
468 rewind(fd);
470 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
471 if (sz < 100)
472 sz = 100;
474 p = calloc(sz + 3, sizeof(*p));
475 if (p == NULL)
476 return (-1);
477 for (j = 0; (h = readhash(ds, fd, flags));) {
478 if (j == sz) {
479 sz = sz * 3 / 2;
480 q = reallocarray(p, sz + 3, sizeof(*p));
481 if (q == NULL) {
482 free(p);
483 return (-1);
485 p = q;
487 p[++j].value = h;
489 ds->len[i] = j;
490 ds->file[i] = p;
492 return (0);
495 static void
496 prune(struct got_diff_state *ds)
498 int i, j;
500 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
501 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
502 ds->pref++)
504 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
505 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
506 ds->suff++)
508 for (j = 0; j < 2; j++) {
509 ds->sfile[j] = ds->file[j] + ds->pref;
510 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
511 for (i = 0; i <= ds->slen[j]; i++)
512 ds->sfile[j][i].serial = i;
516 static void
517 equiv(struct line *a, int n, struct line *b, int m, int *c)
519 int i, j;
521 i = j = 1;
522 while (i <= n && j <= m) {
523 if (a[i].value < b[j].value)
524 a[i++].value = 0;
525 else if (a[i].value == b[j].value)
526 a[i++].value = j;
527 else
528 j++;
530 while (i <= n)
531 a[i++].value = 0;
532 b[m + 1].value = 0;
533 j = 0;
534 while (++j <= m) {
535 c[j] = -b[j].serial;
536 while (b[j + 1].value == b[j].value) {
537 j++;
538 c[j] = b[j].serial;
541 c[j] = -1;
544 /* Code taken from ping.c */
545 static int
546 isqrt(int n)
548 int y, x = 1;
550 if (n == 0)
551 return (0);
553 do { /* newton was a stinker */
554 y = x;
555 x = n / x;
556 x += y;
557 x /= 2;
558 } while ((x - y) > 1 || (x - y) < -1);
560 return (x);
563 static int
564 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
566 int i, k, y, j, l;
567 int oldc, tc, oldl, sq;
568 u_int numtries, bound;
569 int error;
571 if (flags & D_MINIMAL)
572 bound = UINT_MAX;
573 else {
574 sq = isqrt(n);
575 bound = MAXIMUM(256, sq);
578 k = 0;
579 c[0] = newcand(ds, 0, 0, 0, &error);
580 if (error)
581 return -1;
582 for (i = 1; i <= n; i++) {
583 j = a[i];
584 if (j == 0)
585 continue;
586 y = -b[j];
587 oldl = 0;
588 oldc = c[0];
589 numtries = 0;
590 do {
591 if (y <= ds->clist[oldc].y)
592 continue;
593 l = search(ds, c, k, y);
594 if (l != oldl + 1)
595 oldc = c[l - 1];
596 if (l <= k) {
597 if (ds->clist[c[l]].y <= y)
598 continue;
599 tc = c[l];
600 c[l] = newcand(ds, i, y, oldc, &error);
601 if (error)
602 return -1;
603 oldc = tc;
604 oldl = l;
605 numtries++;
606 } else {
607 c[l] = newcand(ds, i, y, oldc, &error);
608 if (error)
609 return -1;
610 k++;
611 break;
613 } while ((y = b[++j]) > 0 && numtries < bound);
615 return (k);
618 static int
619 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
621 struct cand *q;
623 if (ds->clen == ds->clistlen) {
624 ds->clistlen = ds->clistlen * 11 / 10;
625 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
626 if (q == NULL) {
627 *errorp = -1;
628 free(ds->clist);
629 ds->clist = NULL;
630 return 0;
632 ds->clist = q;
634 q = ds->clist + ds->clen;
635 q->x = x;
636 q->y = y;
637 q->pred = pred;
638 *errorp = 0;
639 return (ds->clen++);
642 static int
643 search(struct got_diff_state *ds, int *c, int k, int y)
645 int i, j, l, t;
647 if (ds->clist[c[k]].y < y) /* quick look for typical case */
648 return (k + 1);
649 i = 0;
650 j = k + 1;
651 for (;;) {
652 l = (i + j) / 2;
653 if (l <= i)
654 break;
655 t = ds->clist[c[l]].y;
656 if (t > y)
657 j = l;
658 else if (t < y)
659 i = l;
660 else
661 return (l);
663 return (l + 1);
666 static void
667 unravel(struct got_diff_state *ds, int p)
669 struct cand *q;
670 int i;
672 for (i = 0; i <= ds->len[0]; i++)
673 ds->J[i] = i <= ds->pref ? i :
674 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
675 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
676 ds->J[q->x + ds->pref] = q->y + ds->pref;
679 /*
680 * Check does double duty:
681 * 1. ferret out any fortuitous correspondences due
682 * to confounding by hashing (which result in "jackpot")
683 * 2. collect random access indexes to the two files
684 */
685 static void
686 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
688 int i, j, jackpot, c, d;
689 long ctold, ctnew;
691 rewind(f1);
692 rewind(f2);
693 j = 1;
694 ds->ixold[0] = ds->ixnew[0] = 0;
695 jackpot = 0;
696 ctold = ctnew = 0;
697 for (i = 1; i <= ds->len[0]; i++) {
698 if (ds->J[i] == 0) {
699 ds->ixold[i] = ctold += skipline(f1);
700 continue;
702 while (j < ds->J[i]) {
703 ds->ixnew[j] = ctnew += skipline(f2);
704 j++;
706 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
707 for (;;) {
708 c = getc(f1);
709 d = getc(f2);
710 /*
711 * GNU diff ignores a missing newline
712 * in one file for -b or -w.
713 */
714 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
715 if (c == EOF && d == '\n') {
716 ctnew++;
717 break;
718 } else if (c == '\n' && d == EOF) {
719 ctold++;
720 break;
723 ctold++;
724 ctnew++;
725 if ((flags & D_FOLDBLANKS) && isspace(c) &&
726 isspace(d)) {
727 do {
728 if (c == '\n')
729 break;
730 ctold++;
731 } while (isspace(c = getc(f1)));
732 do {
733 if (d == '\n')
734 break;
735 ctnew++;
736 } while (isspace(d = getc(f2)));
737 } else if ((flags & D_IGNOREBLANKS)) {
738 while (isspace(c) && c != '\n') {
739 c = getc(f1);
740 ctold++;
742 while (isspace(d) && d != '\n') {
743 d = getc(f2);
744 ctnew++;
747 if (ds->chrtran[c] != ds->chrtran[d]) {
748 jackpot++;
749 ds->J[i] = 0;
750 if (c != '\n' && c != EOF)
751 ctold += skipline(f1);
752 if (d != '\n' && c != EOF)
753 ctnew += skipline(f2);
754 break;
756 if (c == '\n' || c == EOF)
757 break;
759 } else {
760 for (;;) {
761 ctold++;
762 ctnew++;
763 if ((c = getc(f1)) != (d = getc(f2))) {
764 /* jackpot++; */
765 ds->J[i] = 0;
766 if (c != '\n' && c != EOF)
767 ctold += skipline(f1);
768 if (d != '\n' && c != EOF)
769 ctnew += skipline(f2);
770 break;
772 if (c == '\n' || c == EOF)
773 break;
776 ds->ixold[i] = ctold;
777 ds->ixnew[j] = ctnew;
778 j++;
780 for (; j <= ds->len[1]; j++)
781 ds->ixnew[j] = ctnew += skipline(f2);
782 /*
783 * if (jackpot)
784 * fprintf(stderr, "jackpot\n");
785 */
788 /* shellsort CACM #201 */
789 static void
790 sort(struct line *a, int n)
792 struct line *ai, *aim, w;
793 int j, m = 0, k;
795 if (n == 0)
796 return;
797 for (j = 1; j <= n; j *= 2)
798 m = 2 * j - 1;
799 for (m /= 2; m != 0; m /= 2) {
800 k = n - m;
801 for (j = 1; j <= k; j++) {
802 for (ai = &a[j]; ai > a; ai -= m) {
803 aim = &ai[m];
804 if (aim < ai)
805 break; /* wraparound */
806 if (aim->value > ai[0].value ||
807 (aim->value == ai[0].value &&
808 aim->serial > ai[0].serial))
809 break;
810 w.value = ai[0].value;
811 ai[0].value = aim->value;
812 aim->value = w.value;
813 w.serial = ai[0].serial;
814 ai[0].serial = aim->serial;
815 aim->serial = w.serial;
821 static int
822 unsort(struct line *f, int l, int *b)
824 int *a, i;
826 a = calloc(l + 1, sizeof(*a));
827 if (a == NULL)
828 return (-1);
829 for (i = 1; i <= l; i++)
830 a[f[i].serial] = f[i].value;
831 for (i = 1; i <= l; i++)
832 b[i] = a[i];
833 free(a);
835 return (0);
838 static int
839 skipline(FILE *f)
841 int i, c;
843 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
844 continue;
845 return (i);
848 static int
849 output(FILE *outfile, struct got_diff_changes *changes,
850 struct got_diff_state *ds, struct got_diff_args *args,
851 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
853 int m, i0, i1, j0, j1;
854 int error = 0;
856 rewind(f1);
857 rewind(f2);
858 m = ds->len[0];
859 ds->J[0] = 0;
860 ds->J[m + 1] = ds->len[1] + 1;
861 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
862 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
863 i0++;
864 j0 = ds->J[i0 - 1] + 1;
865 i1 = i0 - 1;
866 while (i1 < m && ds->J[i1 + 1] == 0)
867 i1++;
868 j1 = ds->J[i1 + 1] - 1;
869 ds->J[i1] = j1;
870 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
871 i0, i1, j0, j1, &flags);
872 if (error)
873 return (error);
875 if (m == 0) {
876 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
877 1, 0, 1, ds->len[1], &flags);
878 if (error)
879 return (error);
881 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
882 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
884 return (0);
887 static void
888 range(FILE *outfile, int a, int b, char *separator)
890 diff_output(outfile, "%d", a > b ? b : a);
891 if (a < b)
892 diff_output(outfile, "%s%d", separator, b);
895 static void
896 uni_range(FILE *outfile, int a, int b)
898 if (a < b)
899 diff_output(outfile, "%d,%d", a, b - a + 1);
900 else if (a == b)
901 diff_output(outfile, "%d", b);
902 else
903 diff_output(outfile, "%d,0", b);
906 /*
907 * Indicate that there is a difference between lines a and b of the from file
908 * to get to lines c to d of the to file. If a is greater then b then there
909 * are no lines in the from file involved and this means that there were
910 * lines appended (beginning at b). If c is greater than d then there are
911 * lines missing from the to file.
912 */
913 static int
914 change(FILE *outfile, struct got_diff_changes *changes,
915 struct got_diff_state *ds, struct got_diff_args *args,
916 const char *file1, FILE *f1, const char *file2, FILE *f2,
917 int a, int b, int c, int d, int *pflags)
919 int i;
921 if (a > b && c > d)
922 return (0);
924 if (*pflags & D_HEADER) {
925 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
926 *pflags &= ~D_HEADER;
928 if (args->diff_format == D_UNIFIED) {
929 /*
930 * Allocate change records as needed.
931 */
932 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
933 struct context_vec *cvp;
934 ptrdiff_t offset;
935 offset = ds->context_vec_ptr - ds->context_vec_start;
936 ds->max_context <<= 1;
937 ds->context_vec_start =
938 cvp = reallocarray(ds->context_vec_start,
939 ds->max_context, sizeof(*ds->context_vec_start));
940 if (cvp == NULL) {
941 free(ds->context_vec_start);
942 return (-1);
944 ds->context_vec_start = cvp;
945 ds->context_vec_end = ds->context_vec_start +
946 ds->max_context;
947 ds->context_vec_ptr = ds->context_vec_start + offset;
949 if (ds->anychange == 0) {
950 /*
951 * Print the context/unidiff header first time through.
952 */
953 print_header(outfile, ds, args, file1, file2);
954 ds->anychange = 1;
955 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
956 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
957 /*
958 * If this change is more than 'diff_context' lines from the
959 * previous change, dump the record and reset it.
960 */
961 dump_unified_vec(outfile, changes, ds, args, f1, f2,
962 *pflags);
964 ds->context_vec_ptr++;
965 ds->context_vec_ptr->a = a;
966 ds->context_vec_ptr->b = b;
967 ds->context_vec_ptr->c = c;
968 ds->context_vec_ptr->d = d;
969 return (0);
971 if (ds->anychange == 0)
972 ds->anychange = 1;
973 if (args->diff_format == D_BRIEF)
974 return (0);
975 if (args->diff_format == D_NORMAL) {
976 range(outfile, a, b, ",");
977 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
978 range(outfile, c, d, ",");
979 diff_output(outfile, "\n");
980 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
981 if (a <= b && c <= d)
982 diff_output(outfile, "---\n");
984 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2,
985 args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
986 return (0);
989 static int
990 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
991 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
993 int i, j, c, lastc, col, nc;
995 if (a > b)
996 return (0);
997 for (i = a; i <= b; i++) {
998 fseek(lb, f[i - 1], SEEK_SET);
999 nc = f[i] - f[i - 1];
1000 if (ch != '\0') {
1001 diff_output(outfile, "%c", ch);
1002 if (args->Tflag && (args->diff_format == D_UNIFIED ||
1003 args->diff_format == D_NORMAL))
1004 diff_output(outfile, "\t");
1005 else if (args->diff_format != D_UNIFIED)
1006 diff_output(outfile, " ");
1008 col = 0;
1009 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1010 if ((c = getc(lb)) == EOF) {
1011 diff_output(outfile, "\n\\ No newline at end of "
1012 "file\n");
1013 return (0);
1015 if (c == '\t' && (flags & D_EXPANDTABS)) {
1016 do {
1017 diff_output(outfile, " ");
1018 } while (++col & 7);
1019 } else {
1020 diff_output(outfile, "%c", c);
1021 col++;
1025 return (0);
1029 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1031 static int
1032 readhash(struct got_diff_state *ds, FILE *f, int flags)
1034 int i, t, space;
1035 int sum;
1037 sum = 1;
1038 space = 0;
1039 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1040 if (flags & D_IGNORECASE)
1041 for (i = 0; (t = getc(f)) != '\n'; i++) {
1042 if (t == EOF) {
1043 if (i == 0)
1044 return (0);
1045 break;
1047 sum = sum * 127 + ds->chrtran[t];
1049 else
1050 for (i = 0; (t = getc(f)) != '\n'; i++) {
1051 if (t == EOF) {
1052 if (i == 0)
1053 return (0);
1054 break;
1056 sum = sum * 127 + t;
1058 } else {
1059 for (i = 0;;) {
1060 switch (t = getc(f)) {
1061 case '\t':
1062 case '\r':
1063 case '\v':
1064 case '\f':
1065 case ' ':
1066 space++;
1067 continue;
1068 default:
1069 if (space && (flags & D_IGNOREBLANKS) == 0) {
1070 i++;
1071 space = 0;
1073 sum = sum * 127 + ds->chrtran[t];
1074 i++;
1075 continue;
1076 case EOF:
1077 if (i == 0)
1078 return (0);
1079 /* FALLTHROUGH */
1080 case '\n':
1081 break;
1083 break;
1087 * There is a remote possibility that we end up with a zero sum.
1088 * Zero is used as an EOF marker, so return 1 instead.
1090 return (sum == 0 ? 1 : sum);
1093 static int
1094 asciifile(FILE *f)
1096 unsigned char buf[BUFSIZ];
1097 size_t cnt;
1099 if (f == NULL)
1100 return (1);
1102 rewind(f);
1103 cnt = fread(buf, 1, sizeof(buf), f);
1104 return (memchr(buf, '\0', cnt) == NULL);
1107 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1109 static char *
1110 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1112 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1113 size_t nc;
1114 int last = ds->lastline;
1115 char *state = NULL;
1117 ds->lastline = pos;
1118 while (pos > last) {
1119 fseek(fp, f[pos - 1], SEEK_SET);
1120 nc = f[pos] - f[pos - 1];
1121 if (nc >= sizeof(buf))
1122 nc = sizeof(buf) - 1;
1123 nc = fread(buf, 1, nc, fp);
1124 if (nc > 0) {
1125 buf[nc] = '\0';
1126 buf[strcspn(buf, "\n")] = '\0';
1127 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1128 if (begins_with(buf, "private:")) {
1129 if (!state)
1130 state = " (private)";
1131 } else if (begins_with(buf, "protected:")) {
1132 if (!state)
1133 state = " (protected)";
1134 } else if (begins_with(buf, "public:")) {
1135 if (!state)
1136 state = " (public)";
1137 } else {
1138 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1139 if (state)
1140 strlcat(ds->lastbuf, state,
1141 sizeof ds->lastbuf);
1142 ds->lastmatchline = pos;
1143 return ds->lastbuf;
1147 pos--;
1149 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1152 /* dump accumulated "unified" diff changes */
1153 static void
1154 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1155 struct got_diff_state *ds, struct got_diff_args *args,
1156 FILE *f1, FILE *f2, int flags)
1158 struct context_vec *cvp = ds->context_vec_start;
1159 int lowa, upb, lowc, upd;
1160 int a, b, c, d;
1161 char ch, *f;
1163 if (ds->context_vec_start > ds->context_vec_ptr)
1164 return;
1166 b = d = 0; /* gcc */
1167 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1168 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1169 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1170 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1172 diff_output(outfile, "@@ -");
1173 uni_range(outfile, lowa, upb);
1174 diff_output(outfile, " +");
1175 uni_range(outfile, lowc, upd);
1176 diff_output(outfile, " @@");
1177 if ((flags & D_PROTOTYPE)) {
1178 f = match_function(ds, ds->ixold, lowa-1, f1);
1179 if (f != NULL)
1180 diff_output(outfile, " %s", f);
1182 diff_output(outfile, "\n");
1185 * Output changes in "unified" diff format--the old and new lines
1186 * are printed together.
1188 for (; cvp <= ds->context_vec_ptr; cvp++) {
1189 if (changes) {
1190 struct got_diff_change *change;
1191 change = calloc(1, sizeof(*change));
1192 if (change) {
1193 memcpy(&change->cv, cvp, sizeof(change->cv));
1194 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1195 entry);
1196 changes->nchanges++;
1200 a = cvp->a;
1201 b = cvp->b;
1202 c = cvp->c;
1203 d = cvp->d;
1206 * c: both new and old changes
1207 * d: only changes in the old file
1208 * a: only changes in the new file
1210 if (a <= b && c <= d)
1211 ch = 'c';
1212 else
1213 ch = (a <= b) ? 'd' : 'a';
1215 switch (ch) {
1216 case 'c':
1217 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1218 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1219 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1220 break;
1221 case 'd':
1222 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1223 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1224 break;
1225 case 'a':
1226 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1227 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1228 break;
1230 lowa = b + 1;
1231 lowc = d + 1;
1233 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1235 ds->context_vec_ptr = ds->context_vec_start - 1;
1238 static void
1239 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1240 const char *file1, const char *file2)
1242 if (args->label[0] != NULL)
1243 diff_output(outfile, "--- %s\n", args->label[0]);
1244 else
1245 diff_output(outfile, "--- %s\t%s", file1,
1246 ctime(&ds->stb1.st_mtime));
1247 if (args->label[1] != NULL)
1248 diff_output(outfile, "+++ %s\n", args->label[1]);
1249 else
1250 diff_output(outfile, "+++ %s\t%s", file2,
1251 ctime(&ds->stb2.st_mtime));