Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void range(FILE *, int, int, char *);
178 static void uni_range(FILE *, int, int);
179 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
180 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
181 static void prune(struct got_diff_state *);
182 static void equiv(struct line *, int, struct line *, int, int *);
183 static void unravel(struct got_diff_state *, int);
184 static int unsort(struct line *, int, int *);
185 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
186 static void sort(struct line *, int);
187 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
188 static int asciifile(FILE *);
189 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
190 static int newcand(struct got_diff_state *, int, int, int, int *);
191 static int search(struct got_diff_state *, int *, int, int);
192 static int skipline(FILE *);
193 static int isqrt(int);
194 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
195 static int readhash(struct got_diff_state *, FILE *, int);
196 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
197 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
199 /*
200 * chrtran points to one of 2 translation tables: cup2low if folding upper to
201 * lower case clow2low if not folding case
202 */
203 u_char clow2low[256] = {
204 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
205 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
206 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
207 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
208 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
209 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
210 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
211 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
212 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
213 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
214 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
215 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
216 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
217 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
218 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
219 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
220 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
221 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
222 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
223 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
224 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
225 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
226 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
227 0xfd, 0xfe, 0xff
228 };
230 u_char cup2low[256] = {
231 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
232 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
233 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
234 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
235 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
236 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
237 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
238 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
239 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
240 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
241 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
242 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
243 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
244 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
245 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
246 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
247 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
248 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
249 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
250 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
251 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
252 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
253 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
254 0xfd, 0xfe, 0xff
255 };
257 static void
258 diff_output(FILE *outfile, const char *fmt, ...)
260 va_list ap;
262 if (outfile == NULL)
263 return;
265 va_start(ap, fmt);
266 vfprintf(outfile, fmt, ap);
267 va_end(ap);
270 const struct got_error *
271 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
272 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
273 struct got_diff_changes *changes)
275 const struct got_error *err = NULL;
276 int i, *p;
277 long *lp;
279 *rval = D_SAME;
280 ds->anychange = 0;
281 ds->lastline = 0;
282 ds->lastmatchline = 0;
283 ds->context_vec_ptr = ds->context_vec_start - 1;
284 ds->max_context = GOT_DIFF_MAX_CONTEXT;
285 if (flags & D_IGNORECASE)
286 ds->chrtran = cup2low;
287 else
288 ds->chrtran = clow2low;
289 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
290 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
291 return NULL;
293 if (flags & D_EMPTY1) {
294 f1 = fopen(_PATH_DEVNULL, "r");
295 if (f1 == NULL) {
296 err = got_error_from_errno();
297 goto closem;
300 else if (f1 == NULL) {
301 args->status |= 2;
302 goto closem;
305 if (flags & D_EMPTY2) {
306 f2 = fopen(_PATH_DEVNULL, "r");
307 if (f2 == NULL) {
308 err = got_error_from_errno();
309 goto closem;
311 } else if (f2 == NULL) {
312 args->status |= 2;
313 goto closem;
316 switch (files_differ(ds, f1, f2, flags)) {
317 case 0:
318 goto closem;
319 case 1:
320 break;
321 default:
322 /* error */
323 args->status |= 2;
324 goto closem;
327 if ((flags & D_FORCEASCII) == 0 &&
328 (!asciifile(f1) || !asciifile(f2))) {
329 *rval = D_BINARY;
330 args->status |= 1;
331 goto closem;
333 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
334 err = got_error_from_errno();
335 goto closem;
337 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
338 err = got_error_from_errno();
339 goto closem;
342 prune(ds);
343 sort(ds->sfile[0], ds->slen[0]);
344 sort(ds->sfile[1], ds->slen[1]);
346 ds->member = (int *)ds->file[1];
347 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
348 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
349 if (p == NULL) {
350 err = got_error_from_errno();
351 goto closem;
353 ds->member = p;
355 ds->class = (int *)ds->file[0];
356 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
357 err = got_error_from_errno();
358 goto closem;
360 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
361 if (p == NULL) {
362 err = got_error_from_errno();
363 goto closem;
365 ds->class = p;
367 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
368 if (ds->klist == NULL) {
369 err = got_error_from_errno();
370 goto closem;
372 ds->clen = 0;
373 ds->clistlen = 100;
374 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
375 if (ds->clist == NULL) {
376 err = got_error_from_errno();
377 goto closem;
379 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
380 if (i < 0) {
381 err = got_error_from_errno();
382 goto closem;
385 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
386 if (p == NULL) {
387 err = got_error_from_errno();
388 goto closem;
390 ds->J = p;
391 unravel(ds, ds->klist[i]);
393 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
394 if (lp == NULL) {
395 err = got_error_from_errno();
396 goto closem;
398 ds->ixold = lp;
399 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
400 if (lp == NULL) {
401 err = got_error_from_errno();
402 goto closem;
404 ds->ixnew = lp;
405 check(ds, f1, f2, flags);
406 if (output(outfile, changes, ds, args, args->label[0], f1,
407 args->label[1], f2, flags))
408 err = got_error_from_errno();
409 closem:
410 free(ds->J);
411 free(ds->member);
412 free(ds->class);
413 free(ds->clist);
414 free(ds->klist);
415 free(ds->ixold);
416 free(ds->ixnew);
417 if (ds->anychange) {
418 args->status |= 1;
419 if (*rval == D_SAME)
420 *rval = D_DIFFER;
423 return (err);
426 /*
427 * Check to see if the given files differ.
428 * Returns 0 if they are the same, 1 if different, and -1 on error.
429 * XXX - could use code from cmp(1) [faster]
430 */
431 static int
432 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
434 char buf1[BUFSIZ], buf2[BUFSIZ];
435 size_t i, j;
437 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
438 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
439 return (1);
440 for (;;) {
441 i = fread(buf1, 1, sizeof(buf1), f1);
442 j = fread(buf2, 1, sizeof(buf2), f2);
443 if ((!i && ferror(f1)) || (!j && ferror(f2)))
444 return (-1);
445 if (i != j)
446 return (1);
447 if (i == 0)
448 return (0);
449 if (memcmp(buf1, buf2, i) != 0)
450 return (1);
454 static int
455 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
457 struct line *p, *q;
458 int j, h;
459 size_t sz;
461 rewind(fd);
463 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
464 if (sz < 100)
465 sz = 100;
467 p = calloc(sz + 3, sizeof(*p));
468 if (p == NULL)
469 return (-1);
470 for (j = 0; (h = readhash(ds, fd, flags));) {
471 if (j == sz) {
472 sz = sz * 3 / 2;
473 q = reallocarray(p, sz + 3, sizeof(*p));
474 if (q == NULL) {
475 free(p);
476 return (-1);
478 p = q;
480 p[++j].value = h;
482 ds->len[i] = j;
483 ds->file[i] = p;
485 return (0);
488 static void
489 prune(struct got_diff_state *ds)
491 int i, j;
493 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
494 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
495 ds->pref++)
497 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
498 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
499 ds->suff++)
501 for (j = 0; j < 2; j++) {
502 ds->sfile[j] = ds->file[j] + ds->pref;
503 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
504 for (i = 0; i <= ds->slen[j]; i++)
505 ds->sfile[j][i].serial = i;
509 static void
510 equiv(struct line *a, int n, struct line *b, int m, int *c)
512 int i, j;
514 i = j = 1;
515 while (i <= n && j <= m) {
516 if (a[i].value < b[j].value)
517 a[i++].value = 0;
518 else if (a[i].value == b[j].value)
519 a[i++].value = j;
520 else
521 j++;
523 while (i <= n)
524 a[i++].value = 0;
525 b[m + 1].value = 0;
526 j = 0;
527 while (++j <= m) {
528 c[j] = -b[j].serial;
529 while (b[j + 1].value == b[j].value) {
530 j++;
531 c[j] = b[j].serial;
534 c[j] = -1;
537 /* Code taken from ping.c */
538 static int
539 isqrt(int n)
541 int y, x = 1;
543 if (n == 0)
544 return (0);
546 do { /* newton was a stinker */
547 y = x;
548 x = n / x;
549 x += y;
550 x /= 2;
551 } while ((x - y) > 1 || (x - y) < -1);
553 return (x);
556 static int
557 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
559 int i, k, y, j, l;
560 int oldc, tc, oldl, sq;
561 u_int numtries, bound;
562 int error;
564 if (flags & D_MINIMAL)
565 bound = UINT_MAX;
566 else {
567 sq = isqrt(n);
568 bound = MAXIMUM(256, sq);
571 k = 0;
572 c[0] = newcand(ds, 0, 0, 0, &error);
573 if (error)
574 return -1;
575 for (i = 1; i <= n; i++) {
576 j = a[i];
577 if (j == 0)
578 continue;
579 y = -b[j];
580 oldl = 0;
581 oldc = c[0];
582 numtries = 0;
583 do {
584 if (y <= ds->clist[oldc].y)
585 continue;
586 l = search(ds, c, k, y);
587 if (l != oldl + 1)
588 oldc = c[l - 1];
589 if (l <= k) {
590 if (ds->clist[c[l]].y <= y)
591 continue;
592 tc = c[l];
593 c[l] = newcand(ds, i, y, oldc, &error);
594 if (error)
595 return -1;
596 oldc = tc;
597 oldl = l;
598 numtries++;
599 } else {
600 c[l] = newcand(ds, i, y, oldc, &error);
601 if (error)
602 return -1;
603 k++;
604 break;
606 } while ((y = b[++j]) > 0 && numtries < bound);
608 return (k);
611 static int
612 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
614 struct cand *q;
616 if (ds->clen == ds->clistlen) {
617 ds->clistlen = ds->clistlen * 11 / 10;
618 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
619 if (q == NULL) {
620 *errorp = -1;
621 free(ds->clist);
622 ds->clist = NULL;
623 return 0;
625 ds->clist = q;
627 q = ds->clist + ds->clen;
628 q->x = x;
629 q->y = y;
630 q->pred = pred;
631 *errorp = 0;
632 return (ds->clen++);
635 static int
636 search(struct got_diff_state *ds, int *c, int k, int y)
638 int i, j, l, t;
640 if (ds->clist[c[k]].y < y) /* quick look for typical case */
641 return (k + 1);
642 i = 0;
643 j = k + 1;
644 for (;;) {
645 l = (i + j) / 2;
646 if (l <= i)
647 break;
648 t = ds->clist[c[l]].y;
649 if (t > y)
650 j = l;
651 else if (t < y)
652 i = l;
653 else
654 return (l);
656 return (l + 1);
659 static void
660 unravel(struct got_diff_state *ds, int p)
662 struct cand *q;
663 int i;
665 for (i = 0; i <= ds->len[0]; i++)
666 ds->J[i] = i <= ds->pref ? i :
667 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
668 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
669 ds->J[q->x + ds->pref] = q->y + ds->pref;
672 /*
673 * Check does double duty:
674 * 1. ferret out any fortuitous correspondences due
675 * to confounding by hashing (which result in "jackpot")
676 * 2. collect random access indexes to the two files
677 */
678 static void
679 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
681 int i, j, jackpot, c, d;
682 long ctold, ctnew;
684 rewind(f1);
685 rewind(f2);
686 j = 1;
687 ds->ixold[0] = ds->ixnew[0] = 0;
688 jackpot = 0;
689 ctold = ctnew = 0;
690 for (i = 1; i <= ds->len[0]; i++) {
691 if (ds->J[i] == 0) {
692 ds->ixold[i] = ctold += skipline(f1);
693 continue;
695 while (j < ds->J[i]) {
696 ds->ixnew[j] = ctnew += skipline(f2);
697 j++;
699 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
700 for (;;) {
701 c = getc(f1);
702 d = getc(f2);
703 /*
704 * GNU diff ignores a missing newline
705 * in one file for -b or -w.
706 */
707 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
708 if (c == EOF && d == '\n') {
709 ctnew++;
710 break;
711 } else if (c == '\n' && d == EOF) {
712 ctold++;
713 break;
716 ctold++;
717 ctnew++;
718 if ((flags & D_FOLDBLANKS) && isspace(c) &&
719 isspace(d)) {
720 do {
721 if (c == '\n')
722 break;
723 ctold++;
724 } while (isspace(c = getc(f1)));
725 do {
726 if (d == '\n')
727 break;
728 ctnew++;
729 } while (isspace(d = getc(f2)));
730 } else if ((flags & D_IGNOREBLANKS)) {
731 while (isspace(c) && c != '\n') {
732 c = getc(f1);
733 ctold++;
735 while (isspace(d) && d != '\n') {
736 d = getc(f2);
737 ctnew++;
740 if (ds->chrtran[c] != ds->chrtran[d]) {
741 jackpot++;
742 ds->J[i] = 0;
743 if (c != '\n' && c != EOF)
744 ctold += skipline(f1);
745 if (d != '\n' && c != EOF)
746 ctnew += skipline(f2);
747 break;
749 if (c == '\n' || c == EOF)
750 break;
752 } else {
753 for (;;) {
754 ctold++;
755 ctnew++;
756 if ((c = getc(f1)) != (d = getc(f2))) {
757 /* jackpot++; */
758 ds->J[i] = 0;
759 if (c != '\n' && c != EOF)
760 ctold += skipline(f1);
761 if (d != '\n' && c != EOF)
762 ctnew += skipline(f2);
763 break;
765 if (c == '\n' || c == EOF)
766 break;
769 ds->ixold[i] = ctold;
770 ds->ixnew[j] = ctnew;
771 j++;
773 for (; j <= ds->len[1]; j++)
774 ds->ixnew[j] = ctnew += skipline(f2);
775 /*
776 * if (jackpot)
777 * fprintf(stderr, "jackpot\n");
778 */
781 /* shellsort CACM #201 */
782 static void
783 sort(struct line *a, int n)
785 struct line *ai, *aim, w;
786 int j, m = 0, k;
788 if (n == 0)
789 return;
790 for (j = 1; j <= n; j *= 2)
791 m = 2 * j - 1;
792 for (m /= 2; m != 0; m /= 2) {
793 k = n - m;
794 for (j = 1; j <= k; j++) {
795 for (ai = &a[j]; ai > a; ai -= m) {
796 aim = &ai[m];
797 if (aim < ai)
798 break; /* wraparound */
799 if (aim->value > ai[0].value ||
800 (aim->value == ai[0].value &&
801 aim->serial > ai[0].serial))
802 break;
803 w.value = ai[0].value;
804 ai[0].value = aim->value;
805 aim->value = w.value;
806 w.serial = ai[0].serial;
807 ai[0].serial = aim->serial;
808 aim->serial = w.serial;
814 static int
815 unsort(struct line *f, int l, int *b)
817 int *a, i;
819 a = calloc(l + 1, sizeof(*a));
820 if (a == NULL)
821 return (-1);
822 for (i = 1; i <= l; i++)
823 a[f[i].serial] = f[i].value;
824 for (i = 1; i <= l; i++)
825 b[i] = a[i];
826 free(a);
828 return (0);
831 static int
832 skipline(FILE *f)
834 int i, c;
836 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
837 continue;
838 return (i);
841 static int
842 output(FILE *outfile, struct got_diff_changes *changes,
843 struct got_diff_state *ds, struct got_diff_args *args,
844 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
846 int m, i0, i1, j0, j1;
847 int error = 0;
849 rewind(f1);
850 rewind(f2);
851 m = ds->len[0];
852 ds->J[0] = 0;
853 ds->J[m + 1] = ds->len[1] + 1;
854 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
855 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
856 i0++;
857 j0 = ds->J[i0 - 1] + 1;
858 i1 = i0 - 1;
859 while (i1 < m && ds->J[i1 + 1] == 0)
860 i1++;
861 j1 = ds->J[i1 + 1] - 1;
862 ds->J[i1] = j1;
863 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
864 i0, i1, j0, j1, &flags);
865 if (error)
866 return (error);
868 if (m == 0) {
869 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
870 1, 0, 1, ds->len[1], &flags);
871 if (error)
872 return (error);
874 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
875 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
877 return (0);
880 static void
881 range(FILE *outfile, int a, int b, char *separator)
883 diff_output(outfile, "%d", a > b ? b : a);
884 if (a < b)
885 diff_output(outfile, "%s%d", separator, b);
888 static void
889 uni_range(FILE *outfile, int a, int b)
891 if (a < b)
892 diff_output(outfile, "%d,%d", a, b - a + 1);
893 else if (a == b)
894 diff_output(outfile, "%d", b);
895 else
896 diff_output(outfile, "%d,0", b);
899 /*
900 * Indicate that there is a difference between lines a and b of the from file
901 * to get to lines c to d of the to file. If a is greater then b then there
902 * are no lines in the from file involved and this means that there were
903 * lines appended (beginning at b). If c is greater than d then there are
904 * lines missing from the to file.
905 */
906 static int
907 change(FILE *outfile, struct got_diff_changes *changes,
908 struct got_diff_state *ds, struct got_diff_args *args,
909 const char *file1, FILE *f1, const char *file2, FILE *f2,
910 int a, int b, int c, int d, int *pflags)
912 int i;
914 if (a > b && c > d)
915 return (0);
917 if (*pflags & D_HEADER) {
918 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
919 *pflags &= ~D_HEADER;
921 if (args->diff_format == D_UNIFIED) {
922 /*
923 * Allocate change records as needed.
924 */
925 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
926 struct context_vec *cvp;
927 ptrdiff_t offset;
928 offset = ds->context_vec_ptr - ds->context_vec_start;
929 ds->max_context <<= 1;
930 ds->context_vec_start =
931 cvp = reallocarray(ds->context_vec_start,
932 ds->max_context, sizeof(*ds->context_vec_start));
933 if (cvp == NULL) {
934 free(ds->context_vec_start);
935 return (-1);
937 ds->context_vec_start = cvp;
938 ds->context_vec_end = ds->context_vec_start +
939 ds->max_context;
940 ds->context_vec_ptr = ds->context_vec_start + offset;
942 if (ds->anychange == 0) {
943 /*
944 * Print the context/unidiff header first time through.
945 */
946 print_header(outfile, ds, args, file1, file2);
947 ds->anychange = 1;
948 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
949 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
950 /*
951 * If this change is more than 'diff_context' lines from the
952 * previous change, dump the record and reset it.
953 */
954 dump_unified_vec(outfile, changes, ds, args, f1, f2,
955 *pflags);
957 ds->context_vec_ptr++;
958 ds->context_vec_ptr->a = a;
959 ds->context_vec_ptr->b = b;
960 ds->context_vec_ptr->c = c;
961 ds->context_vec_ptr->d = d;
962 return (0);
964 if (ds->anychange == 0)
965 ds->anychange = 1;
966 if (args->diff_format == D_BRIEF)
967 return (0);
968 if (args->diff_format == D_NORMAL) {
969 range(outfile, a, b, ",");
970 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
971 range(outfile, c, d, ",");
972 diff_output(outfile, "\n");
973 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
974 if (a <= b && c <= d)
975 diff_output(outfile, "---\n");
977 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2,
978 args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
979 return (0);
982 static int
983 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
984 long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
986 int i, j, c, lastc, col, nc;
988 if (a > b)
989 return (0);
990 for (i = a; i <= b; i++) {
991 fseek(lb, f[i - 1], SEEK_SET);
992 nc = f[i] - f[i - 1];
993 if (ch != '\0') {
994 diff_output(outfile, "%c", ch);
995 if (args->Tflag && (args->diff_format == D_UNIFIED ||
996 args->diff_format == D_NORMAL))
997 diff_output(outfile, "\t");
998 else if (args->diff_format != D_UNIFIED)
999 diff_output(outfile, " ");
1001 col = 0;
1002 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1003 if ((c = getc(lb)) == EOF) {
1004 diff_output(outfile, "\n\\ No newline at end of "
1005 "file\n");
1006 return (0);
1008 if (c == '\t' && (flags & D_EXPANDTABS)) {
1009 do {
1010 diff_output(outfile, " ");
1011 } while (++col & 7);
1012 } else {
1013 diff_output(outfile, "%c", c);
1014 col++;
1018 return (0);
1022 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1024 static int
1025 readhash(struct got_diff_state *ds, FILE *f, int flags)
1027 int i, t, space;
1028 int sum;
1030 sum = 1;
1031 space = 0;
1032 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1033 if (flags & D_IGNORECASE)
1034 for (i = 0; (t = getc(f)) != '\n'; i++) {
1035 if (t == EOF) {
1036 if (i == 0)
1037 return (0);
1038 break;
1040 sum = sum * 127 + ds->chrtran[t];
1042 else
1043 for (i = 0; (t = getc(f)) != '\n'; i++) {
1044 if (t == EOF) {
1045 if (i == 0)
1046 return (0);
1047 break;
1049 sum = sum * 127 + t;
1051 } else {
1052 for (i = 0;;) {
1053 switch (t = getc(f)) {
1054 case '\t':
1055 case '\r':
1056 case '\v':
1057 case '\f':
1058 case ' ':
1059 space++;
1060 continue;
1061 default:
1062 if (space && (flags & D_IGNOREBLANKS) == 0) {
1063 i++;
1064 space = 0;
1066 sum = sum * 127 + ds->chrtran[t];
1067 i++;
1068 continue;
1069 case EOF:
1070 if (i == 0)
1071 return (0);
1072 /* FALLTHROUGH */
1073 case '\n':
1074 break;
1076 break;
1080 * There is a remote possibility that we end up with a zero sum.
1081 * Zero is used as an EOF marker, so return 1 instead.
1083 return (sum == 0 ? 1 : sum);
1086 static int
1087 asciifile(FILE *f)
1089 unsigned char buf[BUFSIZ];
1090 size_t cnt;
1092 if (f == NULL)
1093 return (1);
1095 rewind(f);
1096 cnt = fread(buf, 1, sizeof(buf), f);
1097 return (memchr(buf, '\0', cnt) == NULL);
1100 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1102 static char *
1103 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1105 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1106 size_t nc;
1107 int last = ds->lastline;
1108 char *state = NULL;
1110 ds->lastline = pos;
1111 while (pos > last) {
1112 fseek(fp, f[pos - 1], SEEK_SET);
1113 nc = f[pos] - f[pos - 1];
1114 if (nc >= sizeof(buf))
1115 nc = sizeof(buf) - 1;
1116 nc = fread(buf, 1, nc, fp);
1117 if (nc > 0) {
1118 buf[nc] = '\0';
1119 buf[strcspn(buf, "\n")] = '\0';
1120 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1121 if (begins_with(buf, "private:")) {
1122 if (!state)
1123 state = " (private)";
1124 } else if (begins_with(buf, "protected:")) {
1125 if (!state)
1126 state = " (protected)";
1127 } else if (begins_with(buf, "public:")) {
1128 if (!state)
1129 state = " (public)";
1130 } else {
1131 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1132 if (state)
1133 strlcat(ds->lastbuf, state,
1134 sizeof ds->lastbuf);
1135 ds->lastmatchline = pos;
1136 return ds->lastbuf;
1140 pos--;
1142 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1145 /* dump accumulated "unified" diff changes */
1146 static void
1147 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1148 struct got_diff_state *ds, struct got_diff_args *args,
1149 FILE *f1, FILE *f2, int flags)
1151 struct context_vec *cvp = ds->context_vec_start;
1152 int lowa, upb, lowc, upd;
1153 int a, b, c, d;
1154 char ch, *f;
1156 if (ds->context_vec_start > ds->context_vec_ptr)
1157 return;
1159 b = d = 0; /* gcc */
1160 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1161 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1162 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1163 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1165 diff_output(outfile, "@@ -");
1166 uni_range(outfile, lowa, upb);
1167 diff_output(outfile, " +");
1168 uni_range(outfile, lowc, upd);
1169 diff_output(outfile, " @@");
1170 if ((flags & D_PROTOTYPE)) {
1171 f = match_function(ds, ds->ixold, lowa-1, f1);
1172 if (f != NULL)
1173 diff_output(outfile, " %s", f);
1175 diff_output(outfile, "\n");
1178 * Output changes in "unified" diff format--the old and new lines
1179 * are printed together.
1181 for (; cvp <= ds->context_vec_ptr; cvp++) {
1182 if (changes) {
1183 struct got_diff_change *change;
1184 change = calloc(1, sizeof(*change));
1185 if (change) {
1186 memcpy(&change->cv, cvp, sizeof(change->cv));
1187 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1188 entry);
1189 changes->nchanges++;
1193 a = cvp->a;
1194 b = cvp->b;
1195 c = cvp->c;
1196 d = cvp->d;
1199 * c: both new and old changes
1200 * d: only changes in the old file
1201 * a: only changes in the new file
1203 if (a <= b && c <= d)
1204 ch = 'c';
1205 else
1206 ch = (a <= b) ? 'd' : 'a';
1208 switch (ch) {
1209 case 'c':
1210 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1211 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1212 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1213 break;
1214 case 'd':
1215 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1216 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1217 break;
1218 case 'a':
1219 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1220 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1221 break;
1223 lowa = b + 1;
1224 lowc = d + 1;
1226 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1228 ds->context_vec_ptr = ds->context_vec_start - 1;
1231 static void
1232 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1233 const char *file1, const char *file2)
1235 if (args->label[0] != NULL)
1236 diff_output(outfile, "--- %s\n", args->label[0]);
1237 else
1238 diff_output(outfile, "--- %s\t%s", file1,
1239 ctime(&ds->stb1.st_mtime));
1240 if (args->label[1] != NULL)
1241 diff_output(outfile, "+++ %s\n", args->label[1]);
1242 else
1243 diff_output(outfile, "+++ %s\t%s", file2,
1244 ctime(&ds->stb2.st_mtime));