Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void range(FILE *, int, int, char *);
178 static void uni_range(FILE *, int, int);
179 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
180 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
181 static void prune(struct got_diff_state *);
182 static void equiv(struct line *, int, struct line *, int, int *);
183 static void unravel(struct got_diff_state *, int);
184 static int unsort(struct line *, int, int *);
185 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
186 static void sort(struct line *, int);
187 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
188 static int asciifile(FILE *);
189 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
190 static int newcand(struct got_diff_state *, int, int, int, int *);
191 static int search(struct got_diff_state *, int *, int, int);
192 static int skipline(FILE *);
193 static int isqrt(int);
194 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
195 static int readhash(struct got_diff_state *, FILE *, int);
196 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
197 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
199 /*
200 * chrtran points to one of 2 translation tables: cup2low if folding upper to
201 * lower case clow2low if not folding case
202 */
203 u_char clow2low[256] = {
204 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
205 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
206 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
207 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
208 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
209 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
210 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
211 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
212 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
213 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
214 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
215 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
216 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
217 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
218 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
219 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
220 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
221 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
222 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
223 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
224 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
225 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
226 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
227 0xfd, 0xfe, 0xff
228 };
230 u_char cup2low[256] = {
231 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
232 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
233 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
234 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
235 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
236 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
237 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
238 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
239 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
240 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
241 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
242 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
243 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
244 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
245 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
246 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
247 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
248 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
249 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
250 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
251 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
252 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
253 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
254 0xfd, 0xfe, 0xff
255 };
257 static void
258 diff_output(FILE *outfile, const char *fmt, ...)
260 va_list ap;
262 if (outfile == NULL)
263 return;
265 va_start(ap, fmt);
266 vfprintf(outfile, fmt, ap);
267 va_end(ap);
270 const struct got_error *
271 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
272 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
273 struct got_diff_changes *changes)
275 const struct got_error *err = NULL;
276 int i, *p;
277 long *lp;
279 *rval = D_SAME;
280 ds->anychange = 0;
281 ds->lastline = 0;
282 ds->lastmatchline = 0;
283 ds->context_vec_ptr = ds->context_vec_start - 1;
284 ds->max_context = GOT_DIFF_MAX_CONTEXT;
285 if (flags & D_IGNORECASE)
286 ds->chrtran = cup2low;
287 else
288 ds->chrtran = clow2low;
289 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
290 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
291 return NULL;
293 if (flags & D_EMPTY1) {
294 f1 = fopen(_PATH_DEVNULL, "r");
295 if (f1 == NULL) {
296 err = got_error_from_errno();
297 goto closem;
300 else if (f1 == NULL) {
301 args->status |= 2;
302 goto closem;
305 if (flags & D_EMPTY2) {
306 f2 = fopen(_PATH_DEVNULL, "r");
307 if (f2 == NULL) {
308 err = got_error_from_errno();
309 goto closem;
311 } else if (f2 == NULL) {
312 args->status |= 2;
313 goto closem;
316 switch (files_differ(ds, f1, f2, flags)) {
317 case 0:
318 goto closem;
319 case 1:
320 break;
321 default:
322 /* error */
323 args->status |= 2;
324 goto closem;
327 if ((flags & D_FORCEASCII) == 0 &&
328 (!asciifile(f1) || !asciifile(f2))) {
329 *rval = D_BINARY;
330 args->status |= 1;
331 goto closem;
333 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 err = got_error_from_errno();
335 goto closem;
337 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 err = got_error_from_errno();
339 goto closem;
342 prune(ds);
343 sort(ds->sfile[0], ds->slen[0]);
344 sort(ds->sfile[1], ds->slen[1]);
346 ds->member = (int *)ds->file[1];
347 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
349 if (p == NULL) {
350 err = got_error_from_errno();
351 goto closem;
353 ds->member = p;
355 ds->class = (int *)ds->file[0];
356 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
357 err = got_error_from_errno();
358 goto closem;
360 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
361 if (p == NULL) {
362 err = got_error_from_errno();
363 goto closem;
365 ds->class = p;
367 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
368 if (ds->klist == NULL) {
369 err = got_error_from_errno();
370 goto closem;
372 ds->clen = 0;
373 ds->clistlen = 100;
374 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
375 if (ds->clist == NULL) {
376 err = got_error_from_errno();
377 goto closem;
379 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
380 if (i < 0) {
381 err = got_error_from_errno();
382 goto closem;
385 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
386 if (p == NULL) {
387 err = got_error_from_errno();
388 goto closem;
390 ds->J = p;
391 unravel(ds, ds->klist[i]);
393 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
394 if (lp == NULL) {
395 err = got_error_from_errno();
396 goto closem;
398 ds->ixold = lp;
399 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
400 if (lp == NULL) {
401 err = got_error_from_errno();
402 goto closem;
404 ds->ixnew = lp;
405 check(ds, f1, f2, flags);
406 if (output(outfile, changes, ds, args, args->label[0], f1,
407 args->label[1], f2, flags))
408 err = got_error_from_errno();
409 closem:
410 free(ds->J);
411 free(ds->member);
412 free(ds->class);
413 free(ds->clist);
414 free(ds->klist);
415 free(ds->ixold);
416 free(ds->ixnew);
417 if (ds->anychange) {
418 args->status |= 1;
419 if (*rval == D_SAME)
420 *rval = D_DIFFER;
422 if (f1 != NULL)
423 fclose(f1);
424 if (f2 != NULL)
425 fclose(f2);
427 return (err);
430 /*
431 * Check to see if the given files differ.
432 * Returns 0 if they are the same, 1 if different, and -1 on error.
433 * XXX - could use code from cmp(1) [faster]
434 */
435 static int
436 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
438 char buf1[BUFSIZ], buf2[BUFSIZ];
439 size_t i, j;
441 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
442 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
443 return (1);
444 for (;;) {
445 i = fread(buf1, 1, sizeof(buf1), f1);
446 j = fread(buf2, 1, sizeof(buf2), f2);
447 if ((!i && ferror(f1)) || (!j && ferror(f2)))
448 return (-1);
449 if (i != j)
450 return (1);
451 if (i == 0)
452 return (0);
453 if (memcmp(buf1, buf2, i) != 0)
454 return (1);
458 static int
459 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
461 struct line *p, *q;
462 int j, h;
463 size_t sz;
465 rewind(fd);
467 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
468 if (sz < 100)
469 sz = 100;
471 p = calloc(sz + 3, sizeof(*p));
472 if (p == NULL)
473 return (-1);
474 for (j = 0; (h = readhash(ds, fd, flags));) {
475 if (j == sz) {
476 sz = sz * 3 / 2;
477 q = reallocarray(p, sz + 3, sizeof(*p));
478 if (q == NULL) {
479 free(p);
480 return (-1);
482 p = q;
484 p[++j].value = h;
486 ds->len[i] = j;
487 ds->file[i] = p;
489 return (0);
492 static void
493 prune(struct got_diff_state *ds)
495 int i, j;
497 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
498 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
499 ds->pref++)
501 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
502 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
503 ds->suff++)
505 for (j = 0; j < 2; j++) {
506 ds->sfile[j] = ds->file[j] + ds->pref;
507 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
508 for (i = 0; i <= ds->slen[j]; i++)
509 ds->sfile[j][i].serial = i;
513 static void
514 equiv(struct line *a, int n, struct line *b, int m, int *c)
516 int i, j;
518 i = j = 1;
519 while (i <= n && j <= m) {
520 if (a[i].value < b[j].value)
521 a[i++].value = 0;
522 else if (a[i].value == b[j].value)
523 a[i++].value = j;
524 else
525 j++;
527 while (i <= n)
528 a[i++].value = 0;
529 b[m + 1].value = 0;
530 j = 0;
531 while (++j <= m) {
532 c[j] = -b[j].serial;
533 while (b[j + 1].value == b[j].value) {
534 j++;
535 c[j] = b[j].serial;
538 c[j] = -1;
541 /* Code taken from ping.c */
542 static int
543 isqrt(int n)
545 int y, x = 1;
547 if (n == 0)
548 return (0);
550 do { /* newton was a stinker */
551 y = x;
552 x = n / x;
553 x += y;
554 x /= 2;
555 } while ((x - y) > 1 || (x - y) < -1);
557 return (x);
560 static int
561 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
563 int i, k, y, j, l;
564 int oldc, tc, oldl, sq;
565 u_int numtries, bound;
566 int error;
568 if (flags & D_MINIMAL)
569 bound = UINT_MAX;
570 else {
571 sq = isqrt(n);
572 bound = MAXIMUM(256, sq);
575 k = 0;
576 c[0] = newcand(ds, 0, 0, 0, &error);
577 if (error)
578 return -1;
579 for (i = 1; i <= n; i++) {
580 j = a[i];
581 if (j == 0)
582 continue;
583 y = -b[j];
584 oldl = 0;
585 oldc = c[0];
586 numtries = 0;
587 do {
588 if (y <= ds->clist[oldc].y)
589 continue;
590 l = search(ds, c, k, y);
591 if (l != oldl + 1)
592 oldc = c[l - 1];
593 if (l <= k) {
594 if (ds->clist[c[l]].y <= y)
595 continue;
596 tc = c[l];
597 c[l] = newcand(ds, i, y, oldc, &error);
598 if (error)
599 return -1;
600 oldc = tc;
601 oldl = l;
602 numtries++;
603 } else {
604 c[l] = newcand(ds, i, y, oldc, &error);
605 if (error)
606 return -1;
607 k++;
608 break;
610 } while ((y = b[++j]) > 0 && numtries < bound);
612 return (k);
615 static int
616 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
618 struct cand *q;
620 if (ds->clen == ds->clistlen) {
621 ds->clistlen = ds->clistlen * 11 / 10;
622 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
623 if (q == NULL) {
624 *errorp = -1;
625 free(ds->clist);
626 ds->clist = NULL;
627 return 0;
629 ds->clist = q;
631 q = ds->clist + ds->clen;
632 q->x = x;
633 q->y = y;
634 q->pred = pred;
635 *errorp = 0;
636 return (ds->clen++);
639 static int
640 search(struct got_diff_state *ds, int *c, int k, int y)
642 int i, j, l, t;
644 if (ds->clist[c[k]].y < y) /* quick look for typical case */
645 return (k + 1);
646 i = 0;
647 j = k + 1;
648 for (;;) {
649 l = (i + j) / 2;
650 if (l <= i)
651 break;
652 t = ds->clist[c[l]].y;
653 if (t > y)
654 j = l;
655 else if (t < y)
656 i = l;
657 else
658 return (l);
660 return (l + 1);
663 static void
664 unravel(struct got_diff_state *ds, int p)
666 struct cand *q;
667 int i;
669 for (i = 0; i <= ds->len[0]; i++)
670 ds->J[i] = i <= ds->pref ? i :
671 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
672 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
673 ds->J[q->x + ds->pref] = q->y + ds->pref;
676 /*
677 * Check does double duty:
678 * 1. ferret out any fortuitous correspondences due
679 * to confounding by hashing (which result in "jackpot")
680 * 2. collect random access indexes to the two files
681 */
682 static void
683 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
685 int i, j, jackpot, c, d;
686 long ctold, ctnew;
688 rewind(f1);
689 rewind(f2);
690 j = 1;
691 ds->ixold[0] = ds->ixnew[0] = 0;
692 jackpot = 0;
693 ctold = ctnew = 0;
694 for (i = 1; i <= ds->len[0]; i++) {
695 if (ds->J[i] == 0) {
696 ds->ixold[i] = ctold += skipline(f1);
697 continue;
699 while (j < ds->J[i]) {
700 ds->ixnew[j] = ctnew += skipline(f2);
701 j++;
703 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
704 for (;;) {
705 c = getc(f1);
706 d = getc(f2);
707 /*
708 * GNU diff ignores a missing newline
709 * in one file for -b or -w.
710 */
711 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
712 if (c == EOF && d == '\n') {
713 ctnew++;
714 break;
715 } else if (c == '\n' && d == EOF) {
716 ctold++;
717 break;
720 ctold++;
721 ctnew++;
722 if ((flags & D_FOLDBLANKS) && isspace(c) &&
723 isspace(d)) {
724 do {
725 if (c == '\n')
726 break;
727 ctold++;
728 } while (isspace(c = getc(f1)));
729 do {
730 if (d == '\n')
731 break;
732 ctnew++;
733 } while (isspace(d = getc(f2)));
734 } else if ((flags & D_IGNOREBLANKS)) {
735 while (isspace(c) && c != '\n') {
736 c = getc(f1);
737 ctold++;
739 while (isspace(d) && d != '\n') {
740 d = getc(f2);
741 ctnew++;
744 if (ds->chrtran[c] != ds->chrtran[d]) {
745 jackpot++;
746 ds->J[i] = 0;
747 if (c != '\n' && c != EOF)
748 ctold += skipline(f1);
749 if (d != '\n' && c != EOF)
750 ctnew += skipline(f2);
751 break;
753 if (c == '\n' || c == EOF)
754 break;
756 } else {
757 for (;;) {
758 ctold++;
759 ctnew++;
760 if ((c = getc(f1)) != (d = getc(f2))) {
761 /* jackpot++; */
762 ds->J[i] = 0;
763 if (c != '\n' && c != EOF)
764 ctold += skipline(f1);
765 if (d != '\n' && c != EOF)
766 ctnew += skipline(f2);
767 break;
769 if (c == '\n' || c == EOF)
770 break;
773 ds->ixold[i] = ctold;
774 ds->ixnew[j] = ctnew;
775 j++;
777 for (; j <= ds->len[1]; j++)
778 ds->ixnew[j] = ctnew += skipline(f2);
779 /*
780 * if (jackpot)
781 * fprintf(stderr, "jackpot\n");
782 */
785 /* shellsort CACM #201 */
786 static void
787 sort(struct line *a, int n)
789 struct line *ai, *aim, w;
790 int j, m = 0, k;
792 if (n == 0)
793 return;
794 for (j = 1; j <= n; j *= 2)
795 m = 2 * j - 1;
796 for (m /= 2; m != 0; m /= 2) {
797 k = n - m;
798 for (j = 1; j <= k; j++) {
799 for (ai = &a[j]; ai > a; ai -= m) {
800 aim = &ai[m];
801 if (aim < ai)
802 break; /* wraparound */
803 if (aim->value > ai[0].value ||
804 (aim->value == ai[0].value &&
805 aim->serial > ai[0].serial))
806 break;
807 w.value = ai[0].value;
808 ai[0].value = aim->value;
809 aim->value = w.value;
810 w.serial = ai[0].serial;
811 ai[0].serial = aim->serial;
812 aim->serial = w.serial;
818 static int
819 unsort(struct line *f, int l, int *b)
821 int *a, i;
823 a = calloc(l + 1, sizeof(*a));
824 if (a == NULL)
825 return (-1);
826 for (i = 1; i <= l; i++)
827 a[f[i].serial] = f[i].value;
828 for (i = 1; i <= l; i++)
829 b[i] = a[i];
830 free(a);
832 return (0);
835 static int
836 skipline(FILE *f)
838 int i, c;
840 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
841 continue;
842 return (i);
845 static int
846 output(FILE *outfile, struct got_diff_changes *changes,
847 struct got_diff_state *ds, struct got_diff_args *args,
848 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
850 int m, i0, i1, j0, j1;
851 int error = 0;
853 rewind(f1);
854 rewind(f2);
855 m = ds->len[0];
856 ds->J[0] = 0;
857 ds->J[m + 1] = ds->len[1] + 1;
858 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
859 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
860 i0++;
861 j0 = ds->J[i0 - 1] + 1;
862 i1 = i0 - 1;
863 while (i1 < m && ds->J[i1 + 1] == 0)
864 i1++;
865 j1 = ds->J[i1 + 1] - 1;
866 ds->J[i1] = j1;
867 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
868 i0, i1, j0, j1, &flags);
869 if (error)
870 return (error);
872 if (m == 0) {
873 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
874 1, 0, 1, ds->len[1], &flags);
875 if (error)
876 return (error);
878 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
879 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
881 return (0);
884 static void
885 range(FILE *outfile, int a, int b, char *separator)
887 diff_output(outfile, "%d", a > b ? b : a);
888 if (a < b)
889 diff_output(outfile, "%s%d", separator, b);
892 static void
893 uni_range(FILE *outfile, int a, int b)
895 if (a < b)
896 diff_output(outfile, "%d,%d", a, b - a + 1);
897 else if (a == b)
898 diff_output(outfile, "%d", b);
899 else
900 diff_output(outfile, "%d,0", b);
903 /*
904 * Indicate that there is a difference between lines a and b of the from file
905 * to get to lines c to d of the to file. If a is greater then b then there
906 * are no lines in the from file involved and this means that there were
907 * lines appended (beginning at b). If c is greater than d then there are
908 * lines missing from the to file.
909 */
910 static int
911 change(FILE *outfile, struct got_diff_changes *changes,
912 struct got_diff_state *ds, struct got_diff_args *args,
913 const char *file1, FILE *f1, const char *file2, FILE *f2,
914 int a, int b, int c, int d, int *pflags)
916 int i;
918 if (a > b && c > d)
919 return (0);
921 if (*pflags & D_HEADER) {
922 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
923 *pflags &= ~D_HEADER;
925 if (args->diff_format == D_UNIFIED) {
926 /*
927 * Allocate change records as needed.
928 */
929 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
930 struct context_vec *cvp;
931 ptrdiff_t offset;
932 offset = ds->context_vec_ptr - ds->context_vec_start;
933 ds->max_context <<= 1;
934 ds->context_vec_start =
935 cvp = reallocarray(ds->context_vec_start,
936 ds->max_context, sizeof(*ds->context_vec_start));
937 if (cvp == NULL) {
938 free(ds->context_vec_start);
939 return (-1);
941 ds->context_vec_start = cvp;
942 ds->context_vec_end = ds->context_vec_start +
943 ds->max_context;
944 ds->context_vec_ptr = ds->context_vec_start + offset;
946 if (ds->anychange == 0) {
947 /*
948 * Print the context/unidiff header first time through.
949 */
950 print_header(outfile, ds, args, file1, file2);
951 ds->anychange = 1;
952 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
953 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
954 /*
955 * If this change is more than 'diff_context' lines from the
956 * previous change, dump the record and reset it.
957 */
958 dump_unified_vec(outfile, changes, ds, args, f1, f2,
959 *pflags);
961 ds->context_vec_ptr++;
962 ds->context_vec_ptr->a = a;
963 ds->context_vec_ptr->b = b;
964 ds->context_vec_ptr->c = c;
965 ds->context_vec_ptr->d = d;
966 return (0);
968 if (ds->anychange == 0)
969 ds->anychange = 1;
970 if (args->diff_format == D_BRIEF)
971 return (0);
972 if (args->diff_format == D_NORMAL) {
973 range(outfile, a, b, ",");
974 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
975 range(outfile, c, d, ",");
976 diff_output(outfile, "\n");
977 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
978 if (a <= b && c <= d)
979 diff_output(outfile, "---\n");
981 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2,
982 args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
983 return (0);
986 static int
987 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
988 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
990 int i, j, c, lastc, col, nc;
992 if (a > b)
993 return (0);
994 for (i = a; i <= b; i++) {
995 fseek(lb, f[i - 1], SEEK_SET);
996 nc = f[i] - f[i - 1];
997 if (ch != '\0') {
998 diff_output(outfile, "%c", ch);
999 if (args->Tflag && (args->diff_format == D_UNIFIED ||
1000 args->diff_format == D_NORMAL))
1001 diff_output(outfile, "\t");
1002 else if (args->diff_format != D_UNIFIED)
1003 diff_output(outfile, " ");
1005 col = 0;
1006 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1007 if ((c = getc(lb)) == EOF) {
1008 diff_output(outfile, "\n\\ No newline at end of "
1009 "file\n");
1010 return (0);
1012 if (c == '\t' && (flags & D_EXPANDTABS)) {
1013 do {
1014 diff_output(outfile, " ");
1015 } while (++col & 7);
1016 } else {
1017 diff_output(outfile, "%c", c);
1018 col++;
1022 return (0);
1026 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1028 static int
1029 readhash(struct got_diff_state *ds, FILE *f, int flags)
1031 int i, t, space;
1032 int sum;
1034 sum = 1;
1035 space = 0;
1036 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1037 if (flags & D_IGNORECASE)
1038 for (i = 0; (t = getc(f)) != '\n'; i++) {
1039 if (t == EOF) {
1040 if (i == 0)
1041 return (0);
1042 break;
1044 sum = sum * 127 + ds->chrtran[t];
1046 else
1047 for (i = 0; (t = getc(f)) != '\n'; i++) {
1048 if (t == EOF) {
1049 if (i == 0)
1050 return (0);
1051 break;
1053 sum = sum * 127 + t;
1055 } else {
1056 for (i = 0;;) {
1057 switch (t = getc(f)) {
1058 case '\t':
1059 case '\r':
1060 case '\v':
1061 case '\f':
1062 case ' ':
1063 space++;
1064 continue;
1065 default:
1066 if (space && (flags & D_IGNOREBLANKS) == 0) {
1067 i++;
1068 space = 0;
1070 sum = sum * 127 + ds->chrtran[t];
1071 i++;
1072 continue;
1073 case EOF:
1074 if (i == 0)
1075 return (0);
1076 /* FALLTHROUGH */
1077 case '\n':
1078 break;
1080 break;
1084 * There is a remote possibility that we end up with a zero sum.
1085 * Zero is used as an EOF marker, so return 1 instead.
1087 return (sum == 0 ? 1 : sum);
1090 static int
1091 asciifile(FILE *f)
1093 unsigned char buf[BUFSIZ];
1094 size_t cnt;
1096 if (f == NULL)
1097 return (1);
1099 rewind(f);
1100 cnt = fread(buf, 1, sizeof(buf), f);
1101 return (memchr(buf, '\0', cnt) == NULL);
1104 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1106 static char *
1107 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1109 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1110 size_t nc;
1111 int last = ds->lastline;
1112 char *state = NULL;
1114 ds->lastline = pos;
1115 while (pos > last) {
1116 fseek(fp, f[pos - 1], SEEK_SET);
1117 nc = f[pos] - f[pos - 1];
1118 if (nc >= sizeof(buf))
1119 nc = sizeof(buf) - 1;
1120 nc = fread(buf, 1, nc, fp);
1121 if (nc > 0) {
1122 buf[nc] = '\0';
1123 buf[strcspn(buf, "\n")] = '\0';
1124 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1125 if (begins_with(buf, "private:")) {
1126 if (!state)
1127 state = " (private)";
1128 } else if (begins_with(buf, "protected:")) {
1129 if (!state)
1130 state = " (protected)";
1131 } else if (begins_with(buf, "public:")) {
1132 if (!state)
1133 state = " (public)";
1134 } else {
1135 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1136 if (state)
1137 strlcat(ds->lastbuf, state,
1138 sizeof ds->lastbuf);
1139 ds->lastmatchline = pos;
1140 return ds->lastbuf;
1144 pos--;
1146 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1149 /* dump accumulated "unified" diff changes */
1150 static void
1151 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1152 struct got_diff_state *ds, struct got_diff_args *args,
1153 FILE *f1, FILE *f2, int flags)
1155 struct context_vec *cvp = ds->context_vec_start;
1156 int lowa, upb, lowc, upd;
1157 int a, b, c, d;
1158 char ch, *f;
1160 if (ds->context_vec_start > ds->context_vec_ptr)
1161 return;
1163 b = d = 0; /* gcc */
1164 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1165 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1166 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1167 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1169 diff_output(outfile, "@@ -");
1170 uni_range(outfile, lowa, upb);
1171 diff_output(outfile, " +");
1172 uni_range(outfile, lowc, upd);
1173 diff_output(outfile, " @@");
1174 if ((flags & D_PROTOTYPE)) {
1175 f = match_function(ds, ds->ixold, lowa-1, f1);
1176 if (f != NULL)
1177 diff_output(outfile, " %s", f);
1179 diff_output(outfile, "\n");
1182 * Output changes in "unified" diff format--the old and new lines
1183 * are printed together.
1185 for (; cvp <= ds->context_vec_ptr; cvp++) {
1186 if (changes) {
1187 struct got_diff_change *change;
1188 change = calloc(1, sizeof(*change));
1189 if (change) {
1190 memcpy(&change->cv, cvp, sizeof(change->cv));
1191 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1192 entry);
1193 changes->nchanges++;
1197 a = cvp->a;
1198 b = cvp->b;
1199 c = cvp->c;
1200 d = cvp->d;
1203 * c: both new and old changes
1204 * d: only changes in the old file
1205 * a: only changes in the new file
1207 if (a <= b && c <= d)
1208 ch = 'c';
1209 else
1210 ch = (a <= b) ? 'd' : 'a';
1212 switch (ch) {
1213 case 'c':
1214 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1215 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1216 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1217 break;
1218 case 'd':
1219 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1220 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1221 break;
1222 case 'a':
1223 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1224 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1225 break;
1227 lowa = b + 1;
1228 lowc = d + 1;
1230 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1232 ds->context_vec_ptr = ds->context_vec_start - 1;
1235 static void
1236 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1237 const char *file1, const char *file2)
1239 if (args->label[0] != NULL)
1240 diff_output(outfile, "--- %s\n", args->label[0]);
1241 else
1242 diff_output(outfile, "--- %s\t%s", file1,
1243 ctime(&ds->stb1.st_mtime));
1244 if (args->label[1] != NULL)
1245 diff_output(outfile, "+++ %s\n", args->label[1]);
1246 else
1247 diff_output(outfile, "+++ %s\t%s", file2,
1248 ctime(&ds->stb2.st_mtime));