Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void uni_range(FILE *, int, int);
178 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
179 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
180 static void prune(struct got_diff_state *);
181 static void equiv(struct line *, int, struct line *, int, int *);
182 static void unravel(struct got_diff_state *, int);
183 static int unsort(struct line *, int, int *);
184 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
185 static void sort(struct line *, int);
186 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
187 static int asciifile(FILE *);
188 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
189 static int newcand(struct got_diff_state *, int, int, int, int *);
190 static int search(struct got_diff_state *, int *, int, int);
191 static int skipline(FILE *);
192 static int isqrt(int);
193 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
194 static int readhash(struct got_diff_state *, FILE *, int);
195 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
196 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
198 /*
199 * chrtran points to one of 2 translation tables: cup2low if folding upper to
200 * lower case clow2low if not folding case
201 */
202 u_char clow2low[256] = {
203 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
204 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
205 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
206 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
207 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
208 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
209 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
210 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
211 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
212 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
213 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
214 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
215 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
216 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
217 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
218 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
219 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
220 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
221 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
222 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
223 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
224 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
225 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
226 0xfd, 0xfe, 0xff
227 };
229 u_char cup2low[256] = {
230 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
231 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
232 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
233 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
234 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
235 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
236 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
237 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
238 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
239 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
240 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
241 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
242 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
243 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
244 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
245 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
246 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
247 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
248 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
249 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
250 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
251 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
252 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
253 0xfd, 0xfe, 0xff
254 };
256 static void
257 diff_output(FILE *outfile, const char *fmt, ...)
259 va_list ap;
261 if (outfile == NULL)
262 return;
264 va_start(ap, fmt);
265 vfprintf(outfile, fmt, ap);
266 va_end(ap);
269 const struct got_error *
270 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
271 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
272 struct got_diff_changes *changes)
274 const struct got_error *err = NULL;
275 int i, *p;
276 long *lp;
278 *rval = D_SAME;
279 ds->anychange = 0;
280 ds->lastline = 0;
281 ds->lastmatchline = 0;
282 ds->context_vec_ptr = ds->context_vec_start - 1;
283 ds->max_context = 64;
284 if (flags & D_IGNORECASE)
285 ds->chrtran = cup2low;
286 else
287 ds->chrtran = clow2low;
288 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
289 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
290 return NULL;
292 if (flags & D_EMPTY1) {
293 f1 = fopen(_PATH_DEVNULL, "r");
294 if (f1 == NULL) {
295 err = got_error_from_errno();
296 goto closem;
299 else if (f1 == NULL) {
300 args->status |= 2;
301 goto closem;
304 if (flags & D_EMPTY2) {
305 f2 = fopen(_PATH_DEVNULL, "r");
306 if (f2 == NULL) {
307 err = got_error_from_errno();
308 goto closem;
310 } else if (f2 == NULL) {
311 args->status |= 2;
312 goto closem;
315 switch (files_differ(ds, f1, f2, flags)) {
316 case 0:
317 goto closem;
318 case 1:
319 break;
320 default:
321 /* error */
322 args->status |= 2;
323 goto closem;
326 if ((flags & D_FORCEASCII) == 0 &&
327 (!asciifile(f1) || !asciifile(f2))) {
328 *rval = D_BINARY;
329 args->status |= 1;
330 goto closem;
332 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
333 err = got_error_from_errno();
334 goto closem;
336 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
337 err = got_error_from_errno();
338 goto closem;
341 prune(ds);
342 sort(ds->sfile[0], ds->slen[0]);
343 sort(ds->sfile[1], ds->slen[1]);
345 ds->member = (int *)ds->file[1];
346 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
347 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
348 if (p == NULL) {
349 err = got_error_from_errno();
350 goto closem;
352 ds->member = p;
354 ds->class = (int *)ds->file[0];
355 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
356 err = got_error_from_errno();
357 goto closem;
359 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
360 if (p == NULL) {
361 err = got_error_from_errno();
362 goto closem;
364 ds->class = p;
366 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
367 if (ds->klist == NULL) {
368 err = got_error_from_errno();
369 goto closem;
371 ds->clen = 0;
372 ds->clistlen = 100;
373 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
374 if (ds->clist == NULL) {
375 err = got_error_from_errno();
376 goto closem;
378 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
379 if (i < 0) {
380 err = got_error_from_errno();
381 goto closem;
384 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
385 if (p == NULL) {
386 err = got_error_from_errno();
387 goto closem;
389 ds->J = p;
390 unravel(ds, ds->klist[i]);
392 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
393 if (lp == NULL) {
394 err = got_error_from_errno();
395 goto closem;
397 ds->ixold = lp;
398 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
399 if (lp == NULL) {
400 err = got_error_from_errno();
401 goto closem;
403 ds->ixnew = lp;
404 check(ds, f1, f2, flags);
405 if (output(outfile, changes, ds, args, args->label[0], f1,
406 args->label[1], f2, flags))
407 err = got_error_from_errno();
408 closem:
409 free(ds->J);
410 free(ds->member);
411 free(ds->class);
412 free(ds->clist);
413 free(ds->klist);
414 free(ds->ixold);
415 free(ds->ixnew);
416 if (ds->anychange) {
417 args->status |= 1;
418 if (*rval == D_SAME)
419 *rval = D_DIFFER;
421 if (f1 != NULL)
422 fclose(f1);
423 if (f2 != NULL)
424 fclose(f2);
426 return (err);
429 /*
430 * Check to see if the given files differ.
431 * Returns 0 if they are the same, 1 if different, and -1 on error.
432 * XXX - could use code from cmp(1) [faster]
433 */
434 static int
435 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
437 char buf1[BUFSIZ], buf2[BUFSIZ];
438 size_t i, j;
440 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
441 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
442 return (1);
443 for (;;) {
444 i = fread(buf1, 1, sizeof(buf1), f1);
445 j = fread(buf2, 1, sizeof(buf2), f2);
446 if ((!i && ferror(f1)) || (!j && ferror(f2)))
447 return (-1);
448 if (i != j)
449 return (1);
450 if (i == 0)
451 return (0);
452 if (memcmp(buf1, buf2, i) != 0)
453 return (1);
457 static int
458 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
460 struct line *p, *q;
461 int j, h;
462 size_t sz;
464 rewind(fd);
466 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
467 if (sz < 100)
468 sz = 100;
470 p = calloc(sz + 3, sizeof(*p));
471 if (p == NULL)
472 return (-1);
473 for (j = 0; (h = readhash(ds, fd, flags));) {
474 if (j == sz) {
475 sz = sz * 3 / 2;
476 q = reallocarray(p, sz + 3, sizeof(*p));
477 if (q == NULL) {
478 free(p);
479 return (-1);
481 p = q;
483 p[++j].value = h;
485 ds->len[i] = j;
486 ds->file[i] = p;
488 return (0);
491 static void
492 prune(struct got_diff_state *ds)
494 int i, j;
496 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
497 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
498 ds->pref++)
500 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
501 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
502 ds->suff++)
504 for (j = 0; j < 2; j++) {
505 ds->sfile[j] = ds->file[j] + ds->pref;
506 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
507 for (i = 0; i <= ds->slen[j]; i++)
508 ds->sfile[j][i].serial = i;
512 static void
513 equiv(struct line *a, int n, struct line *b, int m, int *c)
515 int i, j;
517 i = j = 1;
518 while (i <= n && j <= m) {
519 if (a[i].value < b[j].value)
520 a[i++].value = 0;
521 else if (a[i].value == b[j].value)
522 a[i++].value = j;
523 else
524 j++;
526 while (i <= n)
527 a[i++].value = 0;
528 b[m + 1].value = 0;
529 j = 0;
530 while (++j <= m) {
531 c[j] = -b[j].serial;
532 while (b[j + 1].value == b[j].value) {
533 j++;
534 c[j] = b[j].serial;
537 c[j] = -1;
540 /* Code taken from ping.c */
541 static int
542 isqrt(int n)
544 int y, x = 1;
546 if (n == 0)
547 return (0);
549 do { /* newton was a stinker */
550 y = x;
551 x = n / x;
552 x += y;
553 x /= 2;
554 } while ((x - y) > 1 || (x - y) < -1);
556 return (x);
559 static int
560 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
562 int i, k, y, j, l;
563 int oldc, tc, oldl, sq;
564 u_int numtries, bound;
565 int error;
567 if (flags & D_MINIMAL)
568 bound = UINT_MAX;
569 else {
570 sq = isqrt(n);
571 bound = MAXIMUM(256, sq);
574 k = 0;
575 c[0] = newcand(ds, 0, 0, 0, &error);
576 if (error)
577 return -1;
578 for (i = 1; i <= n; i++) {
579 j = a[i];
580 if (j == 0)
581 continue;
582 y = -b[j];
583 oldl = 0;
584 oldc = c[0];
585 numtries = 0;
586 do {
587 if (y <= ds->clist[oldc].y)
588 continue;
589 l = search(ds, c, k, y);
590 if (l != oldl + 1)
591 oldc = c[l - 1];
592 if (l <= k) {
593 if (ds->clist[c[l]].y <= y)
594 continue;
595 tc = c[l];
596 c[l] = newcand(ds, i, y, oldc, &error);
597 if (error)
598 return -1;
599 oldc = tc;
600 oldl = l;
601 numtries++;
602 } else {
603 c[l] = newcand(ds, i, y, oldc, &error);
604 if (error)
605 return -1;
606 k++;
607 break;
609 } while ((y = b[++j]) > 0 && numtries < bound);
611 return (k);
614 static int
615 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
617 struct cand *q;
619 if (ds->clen == ds->clistlen) {
620 ds->clistlen = ds->clistlen * 11 / 10;
621 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
622 if (q == NULL) {
623 *errorp = -1;
624 free(ds->clist);
625 ds->clist = NULL;
626 return 0;
628 ds->clist = q;
630 q = ds->clist + ds->clen;
631 q->x = x;
632 q->y = y;
633 q->pred = pred;
634 *errorp = 0;
635 return (ds->clen++);
638 static int
639 search(struct got_diff_state *ds, int *c, int k, int y)
641 int i, j, l, t;
643 if (ds->clist[c[k]].y < y) /* quick look for typical case */
644 return (k + 1);
645 i = 0;
646 j = k + 1;
647 for (;;) {
648 l = (i + j) / 2;
649 if (l <= i)
650 break;
651 t = ds->clist[c[l]].y;
652 if (t > y)
653 j = l;
654 else if (t < y)
655 i = l;
656 else
657 return (l);
659 return (l + 1);
662 static void
663 unravel(struct got_diff_state *ds, int p)
665 struct cand *q;
666 int i;
668 for (i = 0; i <= ds->len[0]; i++)
669 ds->J[i] = i <= ds->pref ? i :
670 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
671 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
672 ds->J[q->x + ds->pref] = q->y + ds->pref;
675 /*
676 * Check does double duty:
677 * 1. ferret out any fortuitous correspondences due
678 * to confounding by hashing (which result in "jackpot")
679 * 2. collect random access indexes to the two files
680 */
681 static void
682 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
684 int i, j, jackpot, c, d;
685 long ctold, ctnew;
687 rewind(f1);
688 rewind(f2);
689 j = 1;
690 ds->ixold[0] = ds->ixnew[0] = 0;
691 jackpot = 0;
692 ctold = ctnew = 0;
693 for (i = 1; i <= ds->len[0]; i++) {
694 if (ds->J[i] == 0) {
695 ds->ixold[i] = ctold += skipline(f1);
696 continue;
698 while (j < ds->J[i]) {
699 ds->ixnew[j] = ctnew += skipline(f2);
700 j++;
702 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
703 for (;;) {
704 c = getc(f1);
705 d = getc(f2);
706 /*
707 * GNU diff ignores a missing newline
708 * in one file for -b or -w.
709 */
710 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
711 if (c == EOF && d == '\n') {
712 ctnew++;
713 break;
714 } else if (c == '\n' && d == EOF) {
715 ctold++;
716 break;
719 ctold++;
720 ctnew++;
721 if ((flags & D_FOLDBLANKS) && isspace(c) &&
722 isspace(d)) {
723 do {
724 if (c == '\n')
725 break;
726 ctold++;
727 } while (isspace(c = getc(f1)));
728 do {
729 if (d == '\n')
730 break;
731 ctnew++;
732 } while (isspace(d = getc(f2)));
733 } else if ((flags & D_IGNOREBLANKS)) {
734 while (isspace(c) && c != '\n') {
735 c = getc(f1);
736 ctold++;
738 while (isspace(d) && d != '\n') {
739 d = getc(f2);
740 ctnew++;
743 if (ds->chrtran[c] != ds->chrtran[d]) {
744 jackpot++;
745 ds->J[i] = 0;
746 if (c != '\n' && c != EOF)
747 ctold += skipline(f1);
748 if (d != '\n' && c != EOF)
749 ctnew += skipline(f2);
750 break;
752 if (c == '\n' || c == EOF)
753 break;
755 } else {
756 for (;;) {
757 ctold++;
758 ctnew++;
759 if ((c = getc(f1)) != (d = getc(f2))) {
760 /* jackpot++; */
761 ds->J[i] = 0;
762 if (c != '\n' && c != EOF)
763 ctold += skipline(f1);
764 if (d != '\n' && c != EOF)
765 ctnew += skipline(f2);
766 break;
768 if (c == '\n' || c == EOF)
769 break;
772 ds->ixold[i] = ctold;
773 ds->ixnew[j] = ctnew;
774 j++;
776 for (; j <= ds->len[1]; j++)
777 ds->ixnew[j] = ctnew += skipline(f2);
778 /*
779 * if (jackpot)
780 * fprintf(stderr, "jackpot\n");
781 */
784 /* shellsort CACM #201 */
785 static void
786 sort(struct line *a, int n)
788 struct line *ai, *aim, w;
789 int j, m = 0, k;
791 if (n == 0)
792 return;
793 for (j = 1; j <= n; j *= 2)
794 m = 2 * j - 1;
795 for (m /= 2; m != 0; m /= 2) {
796 k = n - m;
797 for (j = 1; j <= k; j++) {
798 for (ai = &a[j]; ai > a; ai -= m) {
799 aim = &ai[m];
800 if (aim < ai)
801 break; /* wraparound */
802 if (aim->value > ai[0].value ||
803 (aim->value == ai[0].value &&
804 aim->serial > ai[0].serial))
805 break;
806 w.value = ai[0].value;
807 ai[0].value = aim->value;
808 aim->value = w.value;
809 w.serial = ai[0].serial;
810 ai[0].serial = aim->serial;
811 aim->serial = w.serial;
817 static int
818 unsort(struct line *f, int l, int *b)
820 int *a, i;
822 a = calloc(l + 1, sizeof(*a));
823 if (a == NULL)
824 return (-1);
825 for (i = 1; i <= l; i++)
826 a[f[i].serial] = f[i].value;
827 for (i = 1; i <= l; i++)
828 b[i] = a[i];
829 free(a);
831 return (0);
834 static int
835 skipline(FILE *f)
837 int i, c;
839 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
840 continue;
841 return (i);
844 static int
845 output(FILE *outfile, struct got_diff_changes *changes,
846 struct got_diff_state *ds, struct got_diff_args *args,
847 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
849 int m, i0, i1, j0, j1;
850 int error = 0;
852 rewind(f1);
853 rewind(f2);
854 m = ds->len[0];
855 ds->J[0] = 0;
856 ds->J[m + 1] = ds->len[1] + 1;
857 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
858 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
859 i0++;
860 j0 = ds->J[i0 - 1] + 1;
861 i1 = i0 - 1;
862 while (i1 < m && ds->J[i1 + 1] == 0)
863 i1++;
864 j1 = ds->J[i1 + 1] - 1;
865 ds->J[i1] = j1;
866 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
867 i0, i1, j0, j1, &flags);
868 if (error)
869 return (error);
871 if (m == 0) {
872 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
873 1, 0, 1, ds->len[1], &flags);
874 if (error)
875 return (error);
877 if (ds->anychange != 0)
878 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
880 return (0);
883 static void
884 uni_range(FILE *outfile, int a, int b)
886 if (a < b)
887 diff_output(outfile, "%d,%d", a, b - a + 1);
888 else if (a == b)
889 diff_output(outfile, "%d", b);
890 else
891 diff_output(outfile, "%d,0", b);
894 /*
895 * Indicate that there is a difference between lines a and b of the from file
896 * to get to lines c to d of the to file. If a is greater then b then there
897 * are no lines in the from file involved and this means that there were
898 * lines appended (beginning at b). If c is greater than d then there are
899 * lines missing from the to file.
900 */
901 static int
902 change(FILE *outfile, struct got_diff_changes *changes,
903 struct got_diff_state *ds, struct got_diff_args *args,
904 const char *file1, FILE *f1, const char *file2, FILE *f2,
905 int a, int b, int c, int d, int *pflags)
907 int i;
909 if (a > b && c > d)
910 return (0);
912 if (*pflags & D_HEADER) {
913 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
914 *pflags &= ~D_HEADER;
916 if (args->diff_format == D_UNIFIED) {
917 /*
918 * Allocate change records as needed.
919 */
920 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
921 struct context_vec *cvp;
922 ptrdiff_t offset;
923 offset = ds->context_vec_ptr - ds->context_vec_start;
924 ds->max_context <<= 1;
925 ds->context_vec_start =
926 cvp = reallocarray(ds->context_vec_start,
927 ds->max_context, sizeof(*ds->context_vec_start));
928 if (cvp == NULL) {
929 free(ds->context_vec_start);
930 return (-1);
932 ds->context_vec_start = cvp;
933 ds->context_vec_end = ds->context_vec_start +
934 ds->max_context;
935 ds->context_vec_ptr = ds->context_vec_start + offset;
937 if (ds->anychange == 0) {
938 /*
939 * Print the context/unidiff header first time through.
940 */
941 print_header(outfile, ds, args, file1, file2);
942 ds->anychange = 1;
943 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
944 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
945 /*
946 * If this change is more than 'diff_context' lines from the
947 * previous change, dump the record and reset it.
948 */
949 dump_unified_vec(outfile, changes, ds, args, f1, f2,
950 *pflags);
952 ds->context_vec_ptr++;
953 ds->context_vec_ptr->a = a;
954 ds->context_vec_ptr->b = b;
955 ds->context_vec_ptr->c = c;
956 ds->context_vec_ptr->d = d;
957 return (0);
959 if (ds->anychange == 0)
960 ds->anychange = 1;
961 if (args->diff_format == D_BRIEF)
962 return (0);
963 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, '\0', 0, *pflags);
964 return (0);
967 static int
968 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
969 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
971 int i, j, c, lastc, col, nc;
973 if (a > b)
974 return (0);
975 for (i = a; i <= b; i++) {
976 fseek(lb, f[i - 1], SEEK_SET);
977 nc = f[i] - f[i - 1];
978 if (ch != '\0') {
979 diff_output(outfile, "%c", ch);
980 if (args->Tflag && args->diff_format == D_UNIFIED)
981 diff_output(outfile, "\t");
982 else if (args->diff_format != D_UNIFIED)
983 diff_output(outfile, " ");
985 col = 0;
986 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
987 if ((c = getc(lb)) == EOF) {
988 diff_output(outfile, "\n\\ No newline at end of "
989 "file\n");
990 return (0);
992 if (c == '\t' && (flags & D_EXPANDTABS)) {
993 do {
994 diff_output(outfile, " ");
995 } while (++col & 7);
996 } else {
997 diff_output(outfile, "%c", c);
998 col++;
1002 return (0);
1006 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1008 static int
1009 readhash(struct got_diff_state *ds, FILE *f, int flags)
1011 int i, t, space;
1012 int sum;
1014 sum = 1;
1015 space = 0;
1016 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1017 if (flags & D_IGNORECASE)
1018 for (i = 0; (t = getc(f)) != '\n'; i++) {
1019 if (t == EOF) {
1020 if (i == 0)
1021 return (0);
1022 break;
1024 sum = sum * 127 + ds->chrtran[t];
1026 else
1027 for (i = 0; (t = getc(f)) != '\n'; i++) {
1028 if (t == EOF) {
1029 if (i == 0)
1030 return (0);
1031 break;
1033 sum = sum * 127 + t;
1035 } else {
1036 for (i = 0;;) {
1037 switch (t = getc(f)) {
1038 case '\t':
1039 case '\r':
1040 case '\v':
1041 case '\f':
1042 case ' ':
1043 space++;
1044 continue;
1045 default:
1046 if (space && (flags & D_IGNOREBLANKS) == 0) {
1047 i++;
1048 space = 0;
1050 sum = sum * 127 + ds->chrtran[t];
1051 i++;
1052 continue;
1053 case EOF:
1054 if (i == 0)
1055 return (0);
1056 /* FALLTHROUGH */
1057 case '\n':
1058 break;
1060 break;
1064 * There is a remote possibility that we end up with a zero sum.
1065 * Zero is used as an EOF marker, so return 1 instead.
1067 return (sum == 0 ? 1 : sum);
1070 static int
1071 asciifile(FILE *f)
1073 unsigned char buf[BUFSIZ];
1074 size_t cnt;
1076 if (f == NULL)
1077 return (1);
1079 rewind(f);
1080 cnt = fread(buf, 1, sizeof(buf), f);
1081 return (memchr(buf, '\0', cnt) == NULL);
1084 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1086 static char *
1087 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1089 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1090 size_t nc;
1091 int last = ds->lastline;
1092 char *state = NULL;
1094 ds->lastline = pos;
1095 while (pos > last) {
1096 fseek(fp, f[pos - 1], SEEK_SET);
1097 nc = f[pos] - f[pos - 1];
1098 if (nc >= sizeof(buf))
1099 nc = sizeof(buf) - 1;
1100 nc = fread(buf, 1, nc, fp);
1101 if (nc > 0) {
1102 buf[nc] = '\0';
1103 buf[strcspn(buf, "\n")] = '\0';
1104 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1105 if (begins_with(buf, "private:")) {
1106 if (!state)
1107 state = " (private)";
1108 } else if (begins_with(buf, "protected:")) {
1109 if (!state)
1110 state = " (protected)";
1111 } else if (begins_with(buf, "public:")) {
1112 if (!state)
1113 state = " (public)";
1114 } else {
1115 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1116 if (state)
1117 strlcat(ds->lastbuf, state,
1118 sizeof ds->lastbuf);
1119 ds->lastmatchline = pos;
1120 return ds->lastbuf;
1124 pos--;
1126 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1129 /* dump accumulated "unified" diff changes */
1130 static void
1131 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1132 struct got_diff_state *ds, struct got_diff_args *args,
1133 FILE *f1, FILE *f2, int flags)
1135 struct context_vec *cvp = ds->context_vec_start;
1136 int lowa, upb, lowc, upd;
1137 int a, b, c, d;
1138 char ch, *f;
1140 if (ds->context_vec_start > ds->context_vec_ptr)
1141 return;
1143 b = d = 0; /* gcc */
1144 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1145 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1146 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1147 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1149 diff_output(outfile, "@@ -");
1150 uni_range(outfile, lowa, upb);
1151 diff_output(outfile, " +");
1152 uni_range(outfile, lowc, upd);
1153 diff_output(outfile, " @@");
1154 if ((flags & D_PROTOTYPE)) {
1155 f = match_function(ds, ds->ixold, lowa-1, f1);
1156 if (f != NULL)
1157 diff_output(outfile, " %s", f);
1159 diff_output(outfile, "\n");
1162 * Output changes in "unified" diff format--the old and new lines
1163 * are printed together.
1165 for (; cvp <= ds->context_vec_ptr; cvp++) {
1166 if (changes) {
1167 struct got_diff_change *change;
1168 change = calloc(1, sizeof(*change));
1169 if (change) {
1170 memcpy(&change->cv, cvp, sizeof(change->cv));
1171 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1172 entry);
1173 changes->nchanges++;
1177 a = cvp->a;
1178 b = cvp->b;
1179 c = cvp->c;
1180 d = cvp->d;
1183 * c: both new and old changes
1184 * d: only changes in the old file
1185 * a: only changes in the new file
1187 if (a <= b && c <= d)
1188 ch = 'c';
1189 else
1190 ch = (a <= b) ? 'd' : 'a';
1192 switch (ch) {
1193 case 'c':
1194 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1195 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1196 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1197 break;
1198 case 'd':
1199 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1200 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1201 break;
1202 case 'a':
1203 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1204 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1205 break;
1207 lowa = b + 1;
1208 lowc = d + 1;
1210 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1212 ds->context_vec_ptr = ds->context_vec_start - 1;
1215 static void
1216 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1217 const char *file1, const char *file2)
1219 if (args->label[0] != NULL)
1220 diff_output(outfile, "--- %s\n", args->label[0]);
1221 else
1222 diff_output(outfile, "--- %s\t%s", file1,
1223 ctime(&ds->stb1.st_mtime));
1224 if (args->label[1] != NULL)
1225 diff_output(outfile, "+++ %s\n", args->label[1]);
1226 else
1227 diff_output(outfile, "+++ %s\t%s", file2,
1228 ctime(&ds->stb2.st_mtime));