Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/types.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 #include <sys/queue.h>
72 #include <ctype.h>
73 #include <err.h>
74 #include <errno.h>
75 #include <fcntl.h>
76 #include <paths.h>
77 #include <stdarg.h>
78 #include <stddef.h>
79 #include <stdint.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <time.h>
84 #include <unistd.h>
85 #include <limits.h>
86 #include <sha1.h>
87 #include <zlib.h>
89 #include "got_error.h"
90 #include "got_object.h"
91 #include "got_diff.h"
93 #include "got_lib_diff.h"
95 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
96 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
98 /*
99 * diff - compare two files.
100 */
102 /*
103 * Uses an algorithm due to Harold Stone, which finds
104 * a pair of longest identical subsequences in the two
105 * files.
107 * The major goal is to generate the match vector J.
108 * J[i] is the index of the line in file1 corresponding
109 * to line i file0. J[i] = 0 if there is no
110 * such line in file1.
112 * Lines are hashed so as to work in core. All potential
113 * matches are located by sorting the lines of each file
114 * on the hash (called ``value''). In particular, this
115 * collects the equivalence classes in file1 together.
116 * Subroutine equiv replaces the value of each line in
117 * file0 by the index of the first element of its
118 * matching equivalence in (the reordered) file1.
119 * To save space equiv squeezes file1 into a single
120 * array member in which the equivalence classes
121 * are simply concatenated, except that their first
122 * members are flagged by changing sign.
124 * Next the indices that point into member are unsorted into
125 * array class according to the original order of file0.
127 * The cleverness lies in routine stone. This marches
128 * through the lines of file0, developing a vector klist
129 * of "k-candidates". At step i a k-candidate is a matched
130 * pair of lines x,y (x in file0 y in file1) such that
131 * there is a common subsequence of length k
132 * between the first i lines of file0 and the first y
133 * lines of file1, but there is no such subsequence for
134 * any smaller y. x is the earliest possible mate to y
135 * that occurs in such a subsequence.
137 * Whenever any of the members of the equivalence class of
138 * lines in file1 matable to a line in file0 has serial number
139 * less than the y of some k-candidate, that k-candidate
140 * with the smallest such y is replaced. The new
141 * k-candidate is chained (via pred) to the current
142 * k-1 candidate so that the actual subsequence can
143 * be recovered. When a member has serial number greater
144 * that the y of all k-candidates, the klist is extended.
145 * At the end, the longest subsequence is pulled out
146 * and placed in the array J by unravel
148 * With J in hand, the matches there recorded are
149 * check'ed against reality to assure that no spurious
150 * matches have crept in due to hashing. If they have,
151 * they are broken, and "jackpot" is recorded--a harmless
152 * matter except that a true match for a spuriously
153 * mated line may now be unnecessarily reported as a change.
155 * Much of the complexity of the program comes simply
156 * from trying to minimize core utilization and
157 * maximize the range of doable problems by dynamically
158 * allocating what is needed and reusing what is not.
159 * The core requirements for problems larger than somewhat
160 * are (in words) 2*length(file0) + length(file1) +
161 * 3*(number of k-candidates installed), typically about
162 * 6n words for files of length n.
163 */
165 struct cand {
166 int x;
167 int y;
168 int pred;
169 };
171 struct line {
172 int serial;
173 int value;
174 };
176 static void diff_output(FILE *, const char *, ...);
177 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
178 static void check(struct got_diff_state *, FILE *, FILE *, int);
179 static void range(FILE *, int, int, char *);
180 static void uni_range(FILE *, int, int);
181 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
182 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
183 static void prune(struct got_diff_state *);
184 static void equiv(struct line *, int, struct line *, int, int *);
185 static void unravel(struct got_diff_state *, int);
186 static int unsort(struct line *, int, int *);
187 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
188 static void sort(struct line *, int);
189 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
190 static int asciifile(FILE *);
191 static void fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int);
192 static int newcand(struct got_diff_state *, int, int, int, int *);
193 static int search(struct got_diff_state *, int *, int, int);
194 static int skipline(FILE *);
195 static int isqrt(int);
196 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
197 static int readhash(struct got_diff_state *, FILE *, int);
198 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
199 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
201 /*
202 * chrtran points to one of 2 translation tables: cup2low if folding upper to
203 * lower case clow2low if not folding case
204 */
205 u_char clow2low[256] = {
206 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
207 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
208 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
209 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
210 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
211 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
212 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
213 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
214 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
215 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
216 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
217 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
218 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
219 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
220 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
221 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
222 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
223 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
224 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
225 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
226 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
227 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
228 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
229 0xfd, 0xfe, 0xff
230 };
232 u_char cup2low[256] = {
233 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
234 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
235 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
236 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
237 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
238 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
239 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
240 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
241 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
242 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
243 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
244 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
245 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
246 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
247 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
248 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
249 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
250 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
251 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
252 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
253 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
254 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
255 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
256 0xfd, 0xfe, 0xff
257 };
259 static void
260 diff_output(FILE *outfile, const char *fmt, ...)
262 va_list ap;
264 if (outfile == NULL)
265 return;
267 va_start(ap, fmt);
268 vfprintf(outfile, fmt, ap);
269 va_end(ap);
272 void
273 got_diff_state_free(struct got_diff_state *ds)
275 free(ds->J);
276 free(ds->member);
277 free(ds->class);
278 free(ds->clist);
279 free(ds->klist);
280 free(ds->ixold);
281 free(ds->ixnew);
284 const struct got_error *
285 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
286 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
287 struct got_diff_changes *changes)
289 const struct got_error *err = NULL;
290 int i, *p;
291 long *lp;
293 *rval = D_SAME;
294 ds->anychange = 0;
295 ds->lastline = 0;
296 ds->lastmatchline = 0;
297 ds->context_vec_ptr = ds->context_vec_start - 1;
298 ds->max_context = GOT_DIFF_MAX_CONTEXT;
299 if (flags & D_IGNORECASE)
300 ds->chrtran = cup2low;
301 else
302 ds->chrtran = clow2low;
303 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
304 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
305 return NULL;
307 if (flags & D_EMPTY1) {
308 f1 = fopen(_PATH_DEVNULL, "r");
309 if (f1 == NULL) {
310 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
311 goto closem;
314 else if (f1 == NULL) {
315 args->status |= 2;
316 goto closem;
319 if (flags & D_EMPTY2) {
320 f2 = fopen(_PATH_DEVNULL, "r");
321 if (f2 == NULL) {
322 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
323 goto closem;
325 } else if (f2 == NULL) {
326 args->status |= 2;
327 goto closem;
330 switch (files_differ(ds, f1, f2, flags)) {
331 case 0:
332 goto closem;
333 case 1:
334 break;
335 default:
336 /* error */
337 args->status |= 2;
338 goto closem;
341 if ((flags & D_FORCEASCII) == 0 &&
342 (!asciifile(f1) || !asciifile(f2))) {
343 *rval = D_BINARY;
344 args->status |= 1;
345 goto closem;
347 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
348 err = got_error_from_errno("prepare");
349 goto closem;
351 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
352 err = got_error_from_errno("prepare");
353 goto closem;
356 prune(ds);
357 sort(ds->sfile[0], ds->slen[0]);
358 sort(ds->sfile[1], ds->slen[1]);
360 ds->member = (int *)ds->file[1];
361 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
362 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
363 if (p == NULL) {
364 err = got_error_from_errno("reallocarray");
365 goto closem;
367 ds->member = p;
369 ds->class = (int *)ds->file[0];
370 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
371 err = got_error_from_errno("unsort");
372 goto closem;
374 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
375 if (p == NULL) {
376 err = got_error_from_errno("reallocarray");
377 goto closem;
379 ds->class = p;
381 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
382 if (ds->klist == NULL) {
383 err = got_error_from_errno("calloc");
384 goto closem;
386 ds->clen = 0;
387 ds->clistlen = 100;
388 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
389 if (ds->clist == NULL) {
390 err = got_error_from_errno("calloc");
391 goto closem;
393 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
394 if (i < 0) {
395 err = got_error_from_errno("stone");
396 goto closem;
399 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
400 if (p == NULL) {
401 err = got_error_from_errno("reallocarray");
402 goto closem;
404 ds->J = p;
405 unravel(ds, ds->klist[i]);
407 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
408 if (lp == NULL) {
409 err = got_error_from_errno("reallocarray");
410 goto closem;
412 ds->ixold = lp;
413 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
414 if (lp == NULL) {
415 err = got_error_from_errno("reallocarray");
416 goto closem;
418 ds->ixnew = lp;
419 check(ds, f1, f2, flags);
420 if (output(outfile, changes, ds, args, args->label[0], f1,
421 args->label[1], f2, flags))
422 err = got_error_from_errno("output");
423 closem:
424 if (ds->anychange) {
425 args->status |= 1;
426 if (*rval == D_SAME)
427 *rval = D_DIFFER;
429 if ((flags & D_EMPTY1) && f1) {
430 if (fclose(f1) != 0 && err == NULL)
431 err = got_error_from_errno("fclose");
433 if ((flags & D_EMPTY2) && f2) {
434 if (fclose(f2) != 0 && err == NULL)
435 err = got_error_from_errno("fclose");
437 return (err);
440 /*
441 * Check to see if the given files differ.
442 * Returns 0 if they are the same, 1 if different, and -1 on error.
443 * XXX - could use code from cmp(1) [faster]
444 */
445 static int
446 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
448 char buf1[BUFSIZ], buf2[BUFSIZ];
449 size_t i, j;
451 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
452 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
453 return (1);
454 for (;;) {
455 i = fread(buf1, 1, sizeof(buf1), f1);
456 j = fread(buf2, 1, sizeof(buf2), f2);
457 if ((!i && ferror(f1)) || (!j && ferror(f2)))
458 return (-1);
459 if (i != j)
460 return (1);
461 if (i == 0)
462 return (0);
463 if (memcmp(buf1, buf2, i) != 0)
464 return (1);
468 static int
469 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
471 struct line *p, *q;
472 int j, h;
473 size_t sz;
475 rewind(fd);
477 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
478 if (sz < 100)
479 sz = 100;
481 p = calloc(sz + 3, sizeof(*p));
482 if (p == NULL)
483 return (-1);
484 for (j = 0; (h = readhash(ds, fd, flags));) {
485 if (j == sz) {
486 sz = sz * 3 / 2;
487 q = reallocarray(p, sz + 3, sizeof(*p));
488 if (q == NULL) {
489 free(p);
490 return (-1);
492 p = q;
494 p[++j].value = h;
496 ds->len[i] = j;
497 ds->file[i] = p;
499 return (0);
502 static void
503 prune(struct got_diff_state *ds)
505 int i, j;
507 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
508 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
509 ds->pref++)
511 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
512 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
513 ds->suff++)
515 for (j = 0; j < 2; j++) {
516 ds->sfile[j] = ds->file[j] + ds->pref;
517 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
518 for (i = 0; i <= ds->slen[j]; i++)
519 ds->sfile[j][i].serial = i;
523 static void
524 equiv(struct line *a, int n, struct line *b, int m, int *c)
526 int i, j;
528 i = j = 1;
529 while (i <= n && j <= m) {
530 if (a[i].value < b[j].value)
531 a[i++].value = 0;
532 else if (a[i].value == b[j].value)
533 a[i++].value = j;
534 else
535 j++;
537 while (i <= n)
538 a[i++].value = 0;
539 b[m + 1].value = 0;
540 j = 0;
541 while (++j <= m) {
542 c[j] = -b[j].serial;
543 while (b[j + 1].value == b[j].value) {
544 j++;
545 c[j] = b[j].serial;
548 c[j] = -1;
551 /* Code taken from ping.c */
552 static int
553 isqrt(int n)
555 int y, x = 1;
557 if (n == 0)
558 return (0);
560 do { /* newton was a stinker */
561 y = x;
562 x = n / x;
563 x += y;
564 x /= 2;
565 } while ((x - y) > 1 || (x - y) < -1);
567 return (x);
570 static int
571 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
573 int i, k, y, j, l;
574 int oldc, tc, oldl, sq;
575 u_int numtries, bound;
576 int error;
578 if (flags & D_MINIMAL)
579 bound = UINT_MAX;
580 else {
581 sq = isqrt(n);
582 bound = MAXIMUM(256, sq);
585 k = 0;
586 c[0] = newcand(ds, 0, 0, 0, &error);
587 if (error)
588 return -1;
589 for (i = 1; i <= n; i++) {
590 j = a[i];
591 if (j == 0)
592 continue;
593 y = -b[j];
594 oldl = 0;
595 oldc = c[0];
596 numtries = 0;
597 do {
598 if (y <= ds->clist[oldc].y)
599 continue;
600 l = search(ds, c, k, y);
601 if (l != oldl + 1)
602 oldc = c[l - 1];
603 if (l <= k) {
604 if (ds->clist[c[l]].y <= y)
605 continue;
606 tc = c[l];
607 c[l] = newcand(ds, i, y, oldc, &error);
608 if (error)
609 return -1;
610 oldc = tc;
611 oldl = l;
612 numtries++;
613 } else {
614 c[l] = newcand(ds, i, y, oldc, &error);
615 if (error)
616 return -1;
617 k++;
618 break;
620 } while ((y = b[++j]) > 0 && numtries < bound);
622 return (k);
625 static int
626 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
628 struct cand *q;
630 if (ds->clen == ds->clistlen) {
631 ds->clistlen = ds->clistlen * 11 / 10;
632 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
633 if (q == NULL) {
634 *errorp = -1;
635 free(ds->clist);
636 ds->clist = NULL;
637 return 0;
639 ds->clist = q;
641 q = ds->clist + ds->clen;
642 q->x = x;
643 q->y = y;
644 q->pred = pred;
645 *errorp = 0;
646 return (ds->clen++);
649 static int
650 search(struct got_diff_state *ds, int *c, int k, int y)
652 int i, j, l, t;
654 if (ds->clist[c[k]].y < y) /* quick look for typical case */
655 return (k + 1);
656 i = 0;
657 j = k + 1;
658 for (;;) {
659 l = (i + j) / 2;
660 if (l <= i)
661 break;
662 t = ds->clist[c[l]].y;
663 if (t > y)
664 j = l;
665 else if (t < y)
666 i = l;
667 else
668 return (l);
670 return (l + 1);
673 static void
674 unravel(struct got_diff_state *ds, int p)
676 struct cand *q;
677 int i;
679 for (i = 0; i <= ds->len[0]; i++)
680 ds->J[i] = i <= ds->pref ? i :
681 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
682 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
683 ds->J[q->x + ds->pref] = q->y + ds->pref;
686 /*
687 * Check does double duty:
688 * 1. ferret out any fortuitous correspondences due
689 * to confounding by hashing (which result in "jackpot")
690 * 2. collect random access indexes to the two files
691 */
692 static void
693 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
695 int i, j, jackpot, c, d;
696 long ctold, ctnew;
698 rewind(f1);
699 rewind(f2);
700 j = 1;
701 ds->ixold[0] = ds->ixnew[0] = 0;
702 jackpot = 0;
703 ctold = ctnew = 0;
704 for (i = 1; i <= ds->len[0]; i++) {
705 if (ds->J[i] == 0) {
706 ds->ixold[i] = ctold += skipline(f1);
707 continue;
709 while (j < ds->J[i]) {
710 ds->ixnew[j] = ctnew += skipline(f2);
711 j++;
713 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
714 for (;;) {
715 c = getc(f1);
716 d = getc(f2);
717 /*
718 * GNU diff ignores a missing newline
719 * in one file for -b or -w.
720 */
721 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
722 if (c == EOF && d == '\n') {
723 ctnew++;
724 break;
725 } else if (c == '\n' && d == EOF) {
726 ctold++;
727 break;
730 ctold++;
731 ctnew++;
732 if ((flags & D_FOLDBLANKS) && isspace(c) &&
733 isspace(d)) {
734 do {
735 if (c == '\n')
736 break;
737 ctold++;
738 } while (isspace(c = getc(f1)));
739 do {
740 if (d == '\n')
741 break;
742 ctnew++;
743 } while (isspace(d = getc(f2)));
744 } else if ((flags & D_IGNOREBLANKS)) {
745 while (isspace(c) && c != '\n') {
746 c = getc(f1);
747 ctold++;
749 while (isspace(d) && d != '\n') {
750 d = getc(f2);
751 ctnew++;
754 if (ds->chrtran[c] != ds->chrtran[d]) {
755 jackpot++;
756 ds->J[i] = 0;
757 if (c != '\n' && c != EOF)
758 ctold += skipline(f1);
759 if (d != '\n' && c != EOF)
760 ctnew += skipline(f2);
761 break;
763 if (c == '\n' || c == EOF)
764 break;
766 } else {
767 for (;;) {
768 ctold++;
769 ctnew++;
770 if ((c = getc(f1)) != (d = getc(f2))) {
771 /* jackpot++; */
772 ds->J[i] = 0;
773 if (c != '\n' && c != EOF)
774 ctold += skipline(f1);
775 if (d != '\n' && c != EOF)
776 ctnew += skipline(f2);
777 break;
779 if (c == '\n' || c == EOF)
780 break;
783 ds->ixold[i] = ctold;
784 ds->ixnew[j] = ctnew;
785 j++;
787 for (; j <= ds->len[1]; j++)
788 ds->ixnew[j] = ctnew += skipline(f2);
789 /*
790 * if (jackpot)
791 * fprintf(stderr, "jackpot\n");
792 */
795 /* shellsort CACM #201 */
796 static void
797 sort(struct line *a, int n)
799 struct line *ai, *aim, w;
800 int j, m = 0, k;
802 if (n == 0)
803 return;
804 for (j = 1; j <= n; j *= 2)
805 m = 2 * j - 1;
806 for (m /= 2; m != 0; m /= 2) {
807 k = n - m;
808 for (j = 1; j <= k; j++) {
809 for (ai = &a[j]; ai > a; ai -= m) {
810 aim = &ai[m];
811 if (aim < ai)
812 break; /* wraparound */
813 if (aim->value > ai[0].value ||
814 (aim->value == ai[0].value &&
815 aim->serial > ai[0].serial))
816 break;
817 w.value = ai[0].value;
818 ai[0].value = aim->value;
819 aim->value = w.value;
820 w.serial = ai[0].serial;
821 ai[0].serial = aim->serial;
822 aim->serial = w.serial;
828 static int
829 unsort(struct line *f, int l, int *b)
831 int *a, i;
833 a = calloc(l + 1, sizeof(*a));
834 if (a == NULL)
835 return (-1);
836 for (i = 1; i <= l; i++)
837 a[f[i].serial] = f[i].value;
838 for (i = 1; i <= l; i++)
839 b[i] = a[i];
840 free(a);
842 return (0);
845 static int
846 skipline(FILE *f)
848 int i, c;
850 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
851 continue;
852 return (i);
855 static int
856 output(FILE *outfile, struct got_diff_changes *changes,
857 struct got_diff_state *ds, struct got_diff_args *args,
858 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
860 int m, i0, i1, j0, j1;
861 int error = 0;
863 rewind(f1);
864 rewind(f2);
865 m = ds->len[0];
866 ds->J[0] = 0;
867 ds->J[m + 1] = ds->len[1] + 1;
868 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
869 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
870 i0++;
871 j0 = ds->J[i0 - 1] + 1;
872 i1 = i0 - 1;
873 while (i1 < m && ds->J[i1 + 1] == 0)
874 i1++;
875 j1 = ds->J[i1 + 1] - 1;
876 ds->J[i1] = j1;
877 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
878 i0, i1, j0, j1, &flags);
879 if (error)
880 return (error);
882 if (m == 0) {
883 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
884 1, 0, 1, ds->len[1], &flags);
885 if (error)
886 return (error);
888 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
889 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
891 return (0);
894 static void
895 range(FILE *outfile, int a, int b, char *separator)
897 diff_output(outfile, "%d", a > b ? b : a);
898 if (a < b)
899 diff_output(outfile, "%s%d", separator, b);
902 static void
903 uni_range(FILE *outfile, int a, int b)
905 if (a < b)
906 diff_output(outfile, "%d,%d", a, b - a + 1);
907 else if (a == b)
908 diff_output(outfile, "%d", b);
909 else
910 diff_output(outfile, "%d,0", b);
913 /*
914 * Indicate that there is a difference between lines a and b of the from file
915 * to get to lines c to d of the to file. If a is greater then b then there
916 * are no lines in the from file involved and this means that there were
917 * lines appended (beginning at b). If c is greater than d then there are
918 * lines missing from the to file.
919 */
920 static int
921 change(FILE *outfile, struct got_diff_changes *changes,
922 struct got_diff_state *ds, struct got_diff_args *args,
923 const char *file1, FILE *f1, const char *file2, FILE *f2,
924 int a, int b, int c, int d, int *pflags)
926 if (a > b && c > d)
927 return (0);
929 if (*pflags & D_HEADER) {
930 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
931 *pflags &= ~D_HEADER;
933 if (args->diff_format == D_UNIFIED) {
934 /*
935 * Allocate change records as needed.
936 */
937 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
938 struct context_vec *cvp;
939 ptrdiff_t offset;
940 offset = ds->context_vec_ptr - ds->context_vec_start;
941 ds->max_context <<= 1;
942 cvp = reallocarray(ds->context_vec_start,
943 ds->max_context, sizeof(*ds->context_vec_start));
944 if (cvp == NULL) {
945 free(ds->context_vec_start);
946 return (-1);
948 ds->context_vec_start = cvp;
949 ds->context_vec_end = ds->context_vec_start +
950 ds->max_context;
951 ds->context_vec_ptr = ds->context_vec_start + offset;
953 if (ds->anychange == 0) {
954 /*
955 * Print the context/unidiff header first time through.
956 */
957 print_header(outfile, ds, args, file1, file2);
958 ds->anychange = 1;
959 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
960 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
961 /*
962 * If this change is more than 'diff_context' lines from the
963 * previous change, dump the record and reset it.
964 */
965 dump_unified_vec(outfile, changes, ds, args, f1, f2,
966 *pflags);
968 ds->context_vec_ptr++;
969 ds->context_vec_ptr->a = a;
970 ds->context_vec_ptr->b = b;
971 ds->context_vec_ptr->c = c;
972 ds->context_vec_ptr->d = d;
973 return (0);
975 if (ds->anychange == 0)
976 ds->anychange = 1;
977 if (args->diff_format == D_BRIEF)
978 return (0);
979 if (args->diff_format == D_NORMAL) {
980 range(outfile, a, b, ",");
981 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
982 range(outfile, c, d, ",");
983 diff_output(outfile, "\n");
984 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', *pflags);
985 if (a <= b && c <= d)
986 diff_output(outfile, "---\n");
988 fetch(outfile, ds, args, ds->ixnew, c, d, f2,
989 args->diff_format == D_NORMAL ? '>' : '\0', *pflags);
990 return (0);
993 static void
994 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
995 long *f, int a, int b, FILE *lb, int ch, int flags)
997 int i, j, c, col, nc;
999 if (a > b)
1000 return;
1001 for (i = a; i <= b; i++) {
1002 fseek(lb, f[i - 1], SEEK_SET);
1003 nc = f[i] - f[i - 1];
1004 if (ch != '\0') {
1005 diff_output(outfile, "%c", ch);
1006 if (args->Tflag && (args->diff_format == D_UNIFIED ||
1007 args->diff_format == D_NORMAL))
1008 diff_output(outfile, "\t");
1009 else if (args->diff_format != D_UNIFIED)
1010 diff_output(outfile, " ");
1012 col = 0;
1013 for (j = 0; j < nc; j++) {
1014 if ((c = getc(lb)) == EOF) {
1015 diff_output(outfile, "\n\\ No newline at end of "
1016 "file\n");
1017 return;
1019 if (c == '\t' && (flags & D_EXPANDTABS)) {
1020 do {
1021 diff_output(outfile, " ");
1022 } while (++col & 7);
1023 } else {
1024 diff_output(outfile, "%c", c);
1025 col++;
1032 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1034 static int
1035 readhash(struct got_diff_state *ds, FILE *f, int flags)
1037 int i, t, space;
1038 int sum;
1040 sum = 1;
1041 space = 0;
1042 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1043 if (flags & D_IGNORECASE)
1044 for (i = 0; (t = getc(f)) != '\n'; i++) {
1045 if (t == EOF) {
1046 if (i == 0)
1047 return (0);
1048 break;
1050 sum = sum * 127 + ds->chrtran[t];
1052 else
1053 for (i = 0; (t = getc(f)) != '\n'; i++) {
1054 if (t == EOF) {
1055 if (i == 0)
1056 return (0);
1057 break;
1059 sum = sum * 127 + t;
1061 } else {
1062 for (i = 0;;) {
1063 switch (t = getc(f)) {
1064 case '\t':
1065 case '\r':
1066 case '\v':
1067 case '\f':
1068 case ' ':
1069 space++;
1070 continue;
1071 default:
1072 if (space && (flags & D_IGNOREBLANKS) == 0) {
1073 i++;
1074 space = 0;
1076 sum = sum * 127 + ds->chrtran[t];
1077 i++;
1078 continue;
1079 case EOF:
1080 if (i == 0)
1081 return (0);
1082 /* FALLTHROUGH */
1083 case '\n':
1084 break;
1086 break;
1090 * There is a remote possibility that we end up with a zero sum.
1091 * Zero is used as an EOF marker, so return 1 instead.
1093 return (sum == 0 ? 1 : sum);
1096 static int
1097 asciifile(FILE *f)
1099 unsigned char buf[BUFSIZ];
1100 size_t cnt;
1102 if (f == NULL)
1103 return (1);
1105 rewind(f);
1106 cnt = fread(buf, 1, sizeof(buf), f);
1107 return (memchr(buf, '\0', cnt) == NULL);
1110 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1112 static char *
1113 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1115 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1116 size_t nc;
1117 int last = ds->lastline;
1118 char *state = NULL;
1120 ds->lastline = pos;
1121 while (pos > last) {
1122 fseek(fp, f[pos - 1], SEEK_SET);
1123 nc = f[pos] - f[pos - 1];
1124 if (nc >= sizeof(buf))
1125 nc = sizeof(buf) - 1;
1126 nc = fread(buf, 1, nc, fp);
1127 if (nc > 0) {
1128 buf[nc] = '\0';
1129 buf[strcspn(buf, "\n")] = '\0';
1130 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1131 if (begins_with(buf, "private:")) {
1132 if (!state)
1133 state = " (private)";
1134 } else if (begins_with(buf, "protected:")) {
1135 if (!state)
1136 state = " (protected)";
1137 } else if (begins_with(buf, "public:")) {
1138 if (!state)
1139 state = " (public)";
1140 } else {
1141 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1142 if (state)
1143 strlcat(ds->lastbuf, state,
1144 sizeof ds->lastbuf);
1145 ds->lastmatchline = pos;
1146 return ds->lastbuf;
1150 pos--;
1152 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1155 /* dump accumulated "unified" diff changes */
1156 static void
1157 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1158 struct got_diff_state *ds, struct got_diff_args *args,
1159 FILE *f1, FILE *f2, int flags)
1161 struct context_vec *cvp = ds->context_vec_start;
1162 int lowa, upb, lowc, upd;
1163 int a, b, c, d;
1164 char ch, *f;
1166 if (ds->context_vec_start > ds->context_vec_ptr)
1167 return;
1169 b = d = 0; /* gcc */
1170 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1171 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1172 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1173 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1175 diff_output(outfile, "@@ -");
1176 uni_range(outfile, lowa, upb);
1177 diff_output(outfile, " +");
1178 uni_range(outfile, lowc, upd);
1179 diff_output(outfile, " @@");
1180 if ((flags & D_PROTOTYPE)) {
1181 f = match_function(ds, ds->ixold, lowa-1, f1);
1182 if (f != NULL)
1183 diff_output(outfile, " %s", f);
1185 diff_output(outfile, "\n");
1188 * Output changes in "unified" diff format--the old and new lines
1189 * are printed together.
1191 for (; cvp <= ds->context_vec_ptr; cvp++) {
1192 if (changes) {
1193 struct got_diff_change *change;
1194 change = calloc(1, sizeof(*change));
1195 if (change) {
1196 memcpy(&change->cv, cvp, sizeof(change->cv));
1197 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1198 entry);
1199 changes->nchanges++;
1203 a = cvp->a;
1204 b = cvp->b;
1205 c = cvp->c;
1206 d = cvp->d;
1209 * c: both new and old changes
1210 * d: only changes in the old file
1211 * a: only changes in the new file
1213 if (a <= b && c <= d)
1214 ch = 'c';
1215 else
1216 ch = (a <= b) ? 'd' : 'a';
1218 switch (ch) {
1219 case 'c':
1220 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1221 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1222 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1223 break;
1224 case 'd':
1225 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1226 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1227 break;
1228 case 'a':
1229 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', flags);
1230 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1231 break;
1233 lowa = b + 1;
1234 lowc = d + 1;
1236 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', flags);
1238 ds->context_vec_ptr = ds->context_vec_start - 1;
1241 void
1242 got_diff_dump_change(FILE *outfile, struct got_diff_change *change,
1243 struct got_diff_state *ds, struct got_diff_args *args,
1244 FILE *f1, FILE *f2, int diff_flags)
1246 ds->context_vec_ptr = &change->cv;
1247 ds->context_vec_start = &change->cv;
1248 ds->context_vec_end = &change->cv;
1250 /* XXX TODO needs error checking */
1251 dump_unified_vec(outfile, NULL, ds, args, f1, f2, diff_flags);
1254 static void
1255 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1256 const char *file1, const char *file2)
1258 if (args->label[0] != NULL)
1259 diff_output(outfile, "--- %s\n", args->label[0]);
1260 else
1261 diff_output(outfile, "--- %s\t%s", file1,
1262 ctime(&ds->stb1.st_mtime));
1263 if (args->label[1] != NULL)
1264 diff_output(outfile, "+++ %s\n", args->label[1]);
1265 else
1266 diff_output(outfile, "+++ %s\t%s", file2,
1267 ctime(&ds->stb2.st_mtime));