Blob


1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
91 #include "got_lib_diff.h"
93 #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
96 /*
97 * diff - compare two files.
98 */
100 /*
101 * Uses an algorithm due to Harold Stone, which finds
102 * a pair of longest identical subsequences in the two
103 * files.
105 * The major goal is to generate the match vector J.
106 * J[i] is the index of the line in file1 corresponding
107 * to line i file0. J[i] = 0 if there is no
108 * such line in file1.
110 * Lines are hashed so as to work in core. All potential
111 * matches are located by sorting the lines of each file
112 * on the hash (called ``value''). In particular, this
113 * collects the equivalence classes in file1 together.
114 * Subroutine equiv replaces the value of each line in
115 * file0 by the index of the first element of its
116 * matching equivalence in (the reordered) file1.
117 * To save space equiv squeezes file1 into a single
118 * array member in which the equivalence classes
119 * are simply concatenated, except that their first
120 * members are flagged by changing sign.
122 * Next the indices that point into member are unsorted into
123 * array class according to the original order of file0.
125 * The cleverness lies in routine stone. This marches
126 * through the lines of file0, developing a vector klist
127 * of "k-candidates". At step i a k-candidate is a matched
128 * pair of lines x,y (x in file0 y in file1) such that
129 * there is a common subsequence of length k
130 * between the first i lines of file0 and the first y
131 * lines of file1, but there is no such subsequence for
132 * any smaller y. x is the earliest possible mate to y
133 * that occurs in such a subsequence.
135 * Whenever any of the members of the equivalence class of
136 * lines in file1 matable to a line in file0 has serial number
137 * less than the y of some k-candidate, that k-candidate
138 * with the smallest such y is replaced. The new
139 * k-candidate is chained (via pred) to the current
140 * k-1 candidate so that the actual subsequence can
141 * be recovered. When a member has serial number greater
142 * that the y of all k-candidates, the klist is extended.
143 * At the end, the longest subsequence is pulled out
144 * and placed in the array J by unravel
146 * With J in hand, the matches there recorded are
147 * check'ed against reality to assure that no spurious
148 * matches have crept in due to hashing. If they have,
149 * they are broken, and "jackpot" is recorded--a harmless
150 * matter except that a true match for a spuriously
151 * mated line may now be unnecessarily reported as a change.
153 * Much of the complexity of the program comes simply
154 * from trying to minimize core utilization and
155 * maximize the range of doable problems by dynamically
156 * allocating what is needed and reusing what is not.
157 * The core requirements for problems larger than somewhat
158 * are (in words) 2*length(file0) + length(file1) +
159 * 3*(number of k-candidates installed), typically about
160 * 6n words for files of length n.
161 */
163 struct cand {
164 int x;
165 int y;
166 int pred;
167 };
169 struct line {
170 int serial;
171 int value;
172 };
174 static void diff_output(FILE *, const char *, ...);
175 static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
176 static void check(struct got_diff_state *, FILE *, FILE *, int);
177 static void range(FILE *, int, int, char *);
178 static void uni_range(FILE *, int, int);
179 static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
180 static int prepare(struct got_diff_state *, int, FILE *, off_t, int);
181 static void prune(struct got_diff_state *);
182 static void equiv(struct line *, int, struct line *, int, int *);
183 static void unravel(struct got_diff_state *, int);
184 static int unsort(struct line *, int, int *);
185 static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
186 static void sort(struct line *, int);
187 static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
188 static int asciifile(FILE *);
189 static int fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int);
190 static int newcand(struct got_diff_state *, int, int, int, int *);
191 static int search(struct got_diff_state *, int *, int, int);
192 static int skipline(FILE *);
193 static int isqrt(int);
194 static int stone(struct got_diff_state *, int *, int, int *, int *, int);
195 static int readhash(struct got_diff_state *, FILE *, int);
196 static int files_differ(struct got_diff_state *, FILE *, FILE *, int);
197 static char *match_function(struct got_diff_state *, const long *, int, FILE *);
199 /*
200 * chrtran points to one of 2 translation tables: cup2low if folding upper to
201 * lower case clow2low if not folding case
202 */
203 u_char clow2low[256] = {
204 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
205 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
206 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
207 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
208 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
209 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
210 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
211 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
212 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
213 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
214 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
215 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
216 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
217 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
218 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
219 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
220 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
221 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
222 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
223 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
224 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
225 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
226 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
227 0xfd, 0xfe, 0xff
228 };
230 u_char cup2low[256] = {
231 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
232 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
233 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
234 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
235 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
236 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
237 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
238 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
239 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
240 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
241 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
242 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
243 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
244 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
245 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
246 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
247 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
248 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
249 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
250 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
251 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
252 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
253 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
254 0xfd, 0xfe, 0xff
255 };
257 static void
258 diff_output(FILE *outfile, const char *fmt, ...)
260 va_list ap;
262 if (outfile == NULL)
263 return;
265 va_start(ap, fmt);
266 vfprintf(outfile, fmt, ap);
267 va_end(ap);
270 void
271 got_diff_state_free(struct got_diff_state *ds)
273 free(ds->J);
274 free(ds->member);
275 free(ds->class);
276 free(ds->clist);
277 free(ds->klist);
278 free(ds->ixold);
279 free(ds->ixnew);
282 const struct got_error *
283 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
284 struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile,
285 struct got_diff_changes *changes)
287 const struct got_error *err = NULL;
288 int i, *p;
289 long *lp;
291 *rval = D_SAME;
292 ds->anychange = 0;
293 ds->lastline = 0;
294 ds->lastmatchline = 0;
295 ds->context_vec_ptr = ds->context_vec_start - 1;
296 ds->max_context = GOT_DIFF_MAX_CONTEXT;
297 if (flags & D_IGNORECASE)
298 ds->chrtran = cup2low;
299 else
300 ds->chrtran = clow2low;
301 if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
302 *rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
303 return NULL;
305 if (flags & D_EMPTY1) {
306 f1 = fopen(_PATH_DEVNULL, "r");
307 if (f1 == NULL) {
308 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
309 goto closem;
312 else if (f1 == NULL) {
313 args->status |= 2;
314 goto closem;
317 if (flags & D_EMPTY2) {
318 f2 = fopen(_PATH_DEVNULL, "r");
319 if (f2 == NULL) {
320 err = got_error_from_errno2("fopen", _PATH_DEVNULL);
321 goto closem;
323 } else if (f2 == NULL) {
324 args->status |= 2;
325 goto closem;
328 switch (files_differ(ds, f1, f2, flags)) {
329 case 0:
330 goto closem;
331 case 1:
332 break;
333 default:
334 /* error */
335 args->status |= 2;
336 goto closem;
339 if ((flags & D_FORCEASCII) == 0 &&
340 (!asciifile(f1) || !asciifile(f2))) {
341 *rval = D_BINARY;
342 args->status |= 1;
343 goto closem;
345 if (prepare(ds, 0, f1, ds->stb1.st_size, flags)) {
346 err = got_error_from_errno("prepare");
347 goto closem;
349 if (prepare(ds, 1, f2, ds->stb2.st_size, flags)) {
350 err = got_error_from_errno("prepare");
351 goto closem;
354 prune(ds);
355 sort(ds->sfile[0], ds->slen[0]);
356 sort(ds->sfile[1], ds->slen[1]);
358 ds->member = (int *)ds->file[1];
359 equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
360 p = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
361 if (p == NULL) {
362 err = got_error_from_errno("reallocarray");
363 goto closem;
365 ds->member = p;
367 ds->class = (int *)ds->file[0];
368 if (unsort(ds->sfile[0], ds->slen[0], ds->class)) {
369 err = got_error_from_errno("unsort");
370 goto closem;
372 p = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
373 if (p == NULL) {
374 err = got_error_from_errno("reallocarray");
375 goto closem;
377 ds->class = p;
379 ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
380 if (ds->klist == NULL) {
381 err = got_error_from_errno("calloc");
382 goto closem;
384 ds->clen = 0;
385 ds->clistlen = 100;
386 ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
387 if (ds->clist == NULL) {
388 err = got_error_from_errno("calloc");
389 goto closem;
391 i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
392 if (i < 0) {
393 err = got_error_from_errno("stone");
394 goto closem;
397 p = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
398 if (p == NULL) {
399 err = got_error_from_errno("reallocarray");
400 goto closem;
402 ds->J = p;
403 unravel(ds, ds->klist[i]);
405 lp = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
406 if (lp == NULL) {
407 err = got_error_from_errno("reallocarray");
408 goto closem;
410 ds->ixold = lp;
411 lp = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
412 if (lp == NULL) {
413 err = got_error_from_errno("reallocarray");
414 goto closem;
416 ds->ixnew = lp;
417 check(ds, f1, f2, flags);
418 if (output(outfile, changes, ds, args, args->label[0], f1,
419 args->label[1], f2, flags))
420 err = got_error_from_errno("output");
421 closem:
422 if (ds->anychange) {
423 args->status |= 1;
424 if (*rval == D_SAME)
425 *rval = D_DIFFER;
427 if ((flags & D_EMPTY1) && f1) {
428 if (fclose(f1) != 0 && err == NULL)
429 err = got_error_from_errno("fclose");
431 if ((flags & D_EMPTY2) && f2) {
432 if (fclose(f2) != 0 && err == NULL)
433 err = got_error_from_errno("fclose");
435 return (err);
438 /*
439 * Check to see if the given files differ.
440 * Returns 0 if they are the same, 1 if different, and -1 on error.
441 * XXX - could use code from cmp(1) [faster]
442 */
443 static int
444 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
446 char buf1[BUFSIZ], buf2[BUFSIZ];
447 size_t i, j;
449 if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
450 (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
451 return (1);
452 for (;;) {
453 i = fread(buf1, 1, sizeof(buf1), f1);
454 j = fread(buf2, 1, sizeof(buf2), f2);
455 if ((!i && ferror(f1)) || (!j && ferror(f2)))
456 return (-1);
457 if (i != j)
458 return (1);
459 if (i == 0)
460 return (0);
461 if (memcmp(buf1, buf2, i) != 0)
462 return (1);
466 static int
467 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
469 struct line *p, *q;
470 int j, h;
471 size_t sz;
473 rewind(fd);
475 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
476 if (sz < 100)
477 sz = 100;
479 p = calloc(sz + 3, sizeof(*p));
480 if (p == NULL)
481 return (-1);
482 for (j = 0; (h = readhash(ds, fd, flags));) {
483 if (j == sz) {
484 sz = sz * 3 / 2;
485 q = reallocarray(p, sz + 3, sizeof(*p));
486 if (q == NULL) {
487 free(p);
488 return (-1);
490 p = q;
492 p[++j].value = h;
494 ds->len[i] = j;
495 ds->file[i] = p;
497 return (0);
500 static void
501 prune(struct got_diff_state *ds)
503 int i, j;
505 for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
506 ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
507 ds->pref++)
509 for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
510 ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
511 ds->suff++)
513 for (j = 0; j < 2; j++) {
514 ds->sfile[j] = ds->file[j] + ds->pref;
515 ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
516 for (i = 0; i <= ds->slen[j]; i++)
517 ds->sfile[j][i].serial = i;
521 static void
522 equiv(struct line *a, int n, struct line *b, int m, int *c)
524 int i, j;
526 i = j = 1;
527 while (i <= n && j <= m) {
528 if (a[i].value < b[j].value)
529 a[i++].value = 0;
530 else if (a[i].value == b[j].value)
531 a[i++].value = j;
532 else
533 j++;
535 while (i <= n)
536 a[i++].value = 0;
537 b[m + 1].value = 0;
538 j = 0;
539 while (++j <= m) {
540 c[j] = -b[j].serial;
541 while (b[j + 1].value == b[j].value) {
542 j++;
543 c[j] = b[j].serial;
546 c[j] = -1;
549 /* Code taken from ping.c */
550 static int
551 isqrt(int n)
553 int y, x = 1;
555 if (n == 0)
556 return (0);
558 do { /* newton was a stinker */
559 y = x;
560 x = n / x;
561 x += y;
562 x /= 2;
563 } while ((x - y) > 1 || (x - y) < -1);
565 return (x);
568 static int
569 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
571 int i, k, y, j, l;
572 int oldc, tc, oldl, sq;
573 u_int numtries, bound;
574 int error;
576 if (flags & D_MINIMAL)
577 bound = UINT_MAX;
578 else {
579 sq = isqrt(n);
580 bound = MAXIMUM(256, sq);
583 k = 0;
584 c[0] = newcand(ds, 0, 0, 0, &error);
585 if (error)
586 return -1;
587 for (i = 1; i <= n; i++) {
588 j = a[i];
589 if (j == 0)
590 continue;
591 y = -b[j];
592 oldl = 0;
593 oldc = c[0];
594 numtries = 0;
595 do {
596 if (y <= ds->clist[oldc].y)
597 continue;
598 l = search(ds, c, k, y);
599 if (l != oldl + 1)
600 oldc = c[l - 1];
601 if (l <= k) {
602 if (ds->clist[c[l]].y <= y)
603 continue;
604 tc = c[l];
605 c[l] = newcand(ds, i, y, oldc, &error);
606 if (error)
607 return -1;
608 oldc = tc;
609 oldl = l;
610 numtries++;
611 } else {
612 c[l] = newcand(ds, i, y, oldc, &error);
613 if (error)
614 return -1;
615 k++;
616 break;
618 } while ((y = b[++j]) > 0 && numtries < bound);
620 return (k);
623 static int
624 newcand(struct got_diff_state *ds, int x, int y, int pred, int *errorp)
626 struct cand *q;
628 if (ds->clen == ds->clistlen) {
629 ds->clistlen = ds->clistlen * 11 / 10;
630 q = reallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
631 if (q == NULL) {
632 *errorp = -1;
633 free(ds->clist);
634 ds->clist = NULL;
635 return 0;
637 ds->clist = q;
639 q = ds->clist + ds->clen;
640 q->x = x;
641 q->y = y;
642 q->pred = pred;
643 *errorp = 0;
644 return (ds->clen++);
647 static int
648 search(struct got_diff_state *ds, int *c, int k, int y)
650 int i, j, l, t;
652 if (ds->clist[c[k]].y < y) /* quick look for typical case */
653 return (k + 1);
654 i = 0;
655 j = k + 1;
656 for (;;) {
657 l = (i + j) / 2;
658 if (l <= i)
659 break;
660 t = ds->clist[c[l]].y;
661 if (t > y)
662 j = l;
663 else if (t < y)
664 i = l;
665 else
666 return (l);
668 return (l + 1);
671 static void
672 unravel(struct got_diff_state *ds, int p)
674 struct cand *q;
675 int i;
677 for (i = 0; i <= ds->len[0]; i++)
678 ds->J[i] = i <= ds->pref ? i :
679 i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
680 for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
681 ds->J[q->x + ds->pref] = q->y + ds->pref;
684 /*
685 * Check does double duty:
686 * 1. ferret out any fortuitous correspondences due
687 * to confounding by hashing (which result in "jackpot")
688 * 2. collect random access indexes to the two files
689 */
690 static void
691 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
693 int i, j, jackpot, c, d;
694 long ctold, ctnew;
696 rewind(f1);
697 rewind(f2);
698 j = 1;
699 ds->ixold[0] = ds->ixnew[0] = 0;
700 jackpot = 0;
701 ctold = ctnew = 0;
702 for (i = 1; i <= ds->len[0]; i++) {
703 if (ds->J[i] == 0) {
704 ds->ixold[i] = ctold += skipline(f1);
705 continue;
707 while (j < ds->J[i]) {
708 ds->ixnew[j] = ctnew += skipline(f2);
709 j++;
711 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
712 for (;;) {
713 c = getc(f1);
714 d = getc(f2);
715 /*
716 * GNU diff ignores a missing newline
717 * in one file for -b or -w.
718 */
719 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
720 if (c == EOF && d == '\n') {
721 ctnew++;
722 break;
723 } else if (c == '\n' && d == EOF) {
724 ctold++;
725 break;
728 ctold++;
729 ctnew++;
730 if ((flags & D_FOLDBLANKS) && isspace(c) &&
731 isspace(d)) {
732 do {
733 if (c == '\n')
734 break;
735 ctold++;
736 } while (isspace(c = getc(f1)));
737 do {
738 if (d == '\n')
739 break;
740 ctnew++;
741 } while (isspace(d = getc(f2)));
742 } else if ((flags & D_IGNOREBLANKS)) {
743 while (isspace(c) && c != '\n') {
744 c = getc(f1);
745 ctold++;
747 while (isspace(d) && d != '\n') {
748 d = getc(f2);
749 ctnew++;
752 if (ds->chrtran[c] != ds->chrtran[d]) {
753 jackpot++;
754 ds->J[i] = 0;
755 if (c != '\n' && c != EOF)
756 ctold += skipline(f1);
757 if (d != '\n' && c != EOF)
758 ctnew += skipline(f2);
759 break;
761 if (c == '\n' || c == EOF)
762 break;
764 } else {
765 for (;;) {
766 ctold++;
767 ctnew++;
768 if ((c = getc(f1)) != (d = getc(f2))) {
769 /* jackpot++; */
770 ds->J[i] = 0;
771 if (c != '\n' && c != EOF)
772 ctold += skipline(f1);
773 if (d != '\n' && c != EOF)
774 ctnew += skipline(f2);
775 break;
777 if (c == '\n' || c == EOF)
778 break;
781 ds->ixold[i] = ctold;
782 ds->ixnew[j] = ctnew;
783 j++;
785 for (; j <= ds->len[1]; j++)
786 ds->ixnew[j] = ctnew += skipline(f2);
787 /*
788 * if (jackpot)
789 * fprintf(stderr, "jackpot\n");
790 */
793 /* shellsort CACM #201 */
794 static void
795 sort(struct line *a, int n)
797 struct line *ai, *aim, w;
798 int j, m = 0, k;
800 if (n == 0)
801 return;
802 for (j = 1; j <= n; j *= 2)
803 m = 2 * j - 1;
804 for (m /= 2; m != 0; m /= 2) {
805 k = n - m;
806 for (j = 1; j <= k; j++) {
807 for (ai = &a[j]; ai > a; ai -= m) {
808 aim = &ai[m];
809 if (aim < ai)
810 break; /* wraparound */
811 if (aim->value > ai[0].value ||
812 (aim->value == ai[0].value &&
813 aim->serial > ai[0].serial))
814 break;
815 w.value = ai[0].value;
816 ai[0].value = aim->value;
817 aim->value = w.value;
818 w.serial = ai[0].serial;
819 ai[0].serial = aim->serial;
820 aim->serial = w.serial;
826 static int
827 unsort(struct line *f, int l, int *b)
829 int *a, i;
831 a = calloc(l + 1, sizeof(*a));
832 if (a == NULL)
833 return (-1);
834 for (i = 1; i <= l; i++)
835 a[f[i].serial] = f[i].value;
836 for (i = 1; i <= l; i++)
837 b[i] = a[i];
838 free(a);
840 return (0);
843 static int
844 skipline(FILE *f)
846 int i, c;
848 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849 continue;
850 return (i);
853 static int
854 output(FILE *outfile, struct got_diff_changes *changes,
855 struct got_diff_state *ds, struct got_diff_args *args,
856 const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
858 int m, i0, i1, j0, j1;
859 int error = 0;
861 rewind(f1);
862 rewind(f2);
863 m = ds->len[0];
864 ds->J[0] = 0;
865 ds->J[m + 1] = ds->len[1] + 1;
866 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
867 while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
868 i0++;
869 j0 = ds->J[i0 - 1] + 1;
870 i1 = i0 - 1;
871 while (i1 < m && ds->J[i1 + 1] == 0)
872 i1++;
873 j1 = ds->J[i1 + 1] - 1;
874 ds->J[i1] = j1;
875 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
876 i0, i1, j0, j1, &flags);
877 if (error)
878 return (error);
880 if (m == 0) {
881 error = change(outfile, changes, ds, args, file1, f1, file2, f2,
882 1, 0, 1, ds->len[1], &flags);
883 if (error)
884 return (error);
886 if (ds->anychange != 0 && args->diff_format == D_UNIFIED)
887 dump_unified_vec(outfile, changes, ds, args, f1, f2, flags);
889 return (0);
892 static void
893 range(FILE *outfile, int a, int b, char *separator)
895 diff_output(outfile, "%d", a > b ? b : a);
896 if (a < b)
897 diff_output(outfile, "%s%d", separator, b);
900 static void
901 uni_range(FILE *outfile, int a, int b)
903 if (a < b)
904 diff_output(outfile, "%d,%d", a, b - a + 1);
905 else if (a == b)
906 diff_output(outfile, "%d", b);
907 else
908 diff_output(outfile, "%d,0", b);
911 /*
912 * Indicate that there is a difference between lines a and b of the from file
913 * to get to lines c to d of the to file. If a is greater then b then there
914 * are no lines in the from file involved and this means that there were
915 * lines appended (beginning at b). If c is greater than d then there are
916 * lines missing from the to file.
917 */
918 static int
919 change(FILE *outfile, struct got_diff_changes *changes,
920 struct got_diff_state *ds, struct got_diff_args *args,
921 const char *file1, FILE *f1, const char *file2, FILE *f2,
922 int a, int b, int c, int d, int *pflags)
924 int i;
926 if (a > b && c > d)
927 return (0);
929 if (*pflags & D_HEADER) {
930 diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
931 *pflags &= ~D_HEADER;
933 if (args->diff_format == D_UNIFIED) {
934 /*
935 * Allocate change records as needed.
936 */
937 if (ds->context_vec_ptr == ds->context_vec_end - 1) {
938 struct context_vec *cvp;
939 ptrdiff_t offset;
940 offset = ds->context_vec_ptr - ds->context_vec_start;
941 ds->max_context <<= 1;
942 cvp = reallocarray(ds->context_vec_start,
943 ds->max_context, sizeof(*ds->context_vec_start));
944 if (cvp == NULL) {
945 free(ds->context_vec_start);
946 return (-1);
948 ds->context_vec_start = cvp;
949 ds->context_vec_end = ds->context_vec_start +
950 ds->max_context;
951 ds->context_vec_ptr = ds->context_vec_start + offset;
953 if (ds->anychange == 0) {
954 /*
955 * Print the context/unidiff header first time through.
956 */
957 print_header(outfile, ds, args, file1, file2);
958 ds->anychange = 1;
959 } else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
960 c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
961 /*
962 * If this change is more than 'diff_context' lines from the
963 * previous change, dump the record and reset it.
964 */
965 dump_unified_vec(outfile, changes, ds, args, f1, f2,
966 *pflags);
968 ds->context_vec_ptr++;
969 ds->context_vec_ptr->a = a;
970 ds->context_vec_ptr->b = b;
971 ds->context_vec_ptr->c = c;
972 ds->context_vec_ptr->d = d;
973 return (0);
975 if (ds->anychange == 0)
976 ds->anychange = 1;
977 if (args->diff_format == D_BRIEF)
978 return (0);
979 if (args->diff_format == D_NORMAL) {
980 range(outfile, a, b, ",");
981 diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
982 range(outfile, c, d, ",");
983 diff_output(outfile, "\n");
984 fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', *pflags);
985 if (a <= b && c <= d)
986 diff_output(outfile, "---\n");
988 i = fetch(outfile, ds, args, ds->ixnew, c, d, f2,
989 args->diff_format == D_NORMAL ? '>' : '\0', *pflags);
990 return (0);
993 static int
994 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
995 long *f, int a, int b, FILE *lb, int ch, int flags)
997 int i, j, c, lastc, col, nc;
999 if (a > b)
1000 return (0);
1001 for (i = a; i <= b; i++) {
1002 fseek(lb, f[i - 1], SEEK_SET);
1003 nc = f[i] - f[i - 1];
1004 if (ch != '\0') {
1005 diff_output(outfile, "%c", ch);
1006 if (args->Tflag && (args->diff_format == D_UNIFIED ||
1007 args->diff_format == D_NORMAL))
1008 diff_output(outfile, "\t");
1009 else if (args->diff_format != D_UNIFIED)
1010 diff_output(outfile, " ");
1012 col = 0;
1013 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1014 if ((c = getc(lb)) == EOF) {
1015 diff_output(outfile, "\n\\ No newline at end of "
1016 "file\n");
1017 return (0);
1019 if (c == '\t' && (flags & D_EXPANDTABS)) {
1020 do {
1021 diff_output(outfile, " ");
1022 } while (++col & 7);
1023 } else {
1024 diff_output(outfile, "%c", c);
1025 col++;
1029 return (0);
1033 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1035 static int
1036 readhash(struct got_diff_state *ds, FILE *f, int flags)
1038 int i, t, space;
1039 int sum;
1041 sum = 1;
1042 space = 0;
1043 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1044 if (flags & D_IGNORECASE)
1045 for (i = 0; (t = getc(f)) != '\n'; i++) {
1046 if (t == EOF) {
1047 if (i == 0)
1048 return (0);
1049 break;
1051 sum = sum * 127 + ds->chrtran[t];
1053 else
1054 for (i = 0; (t = getc(f)) != '\n'; i++) {
1055 if (t == EOF) {
1056 if (i == 0)
1057 return (0);
1058 break;
1060 sum = sum * 127 + t;
1062 } else {
1063 for (i = 0;;) {
1064 switch (t = getc(f)) {
1065 case '\t':
1066 case '\r':
1067 case '\v':
1068 case '\f':
1069 case ' ':
1070 space++;
1071 continue;
1072 default:
1073 if (space && (flags & D_IGNOREBLANKS) == 0) {
1074 i++;
1075 space = 0;
1077 sum = sum * 127 + ds->chrtran[t];
1078 i++;
1079 continue;
1080 case EOF:
1081 if (i == 0)
1082 return (0);
1083 /* FALLTHROUGH */
1084 case '\n':
1085 break;
1087 break;
1091 * There is a remote possibility that we end up with a zero sum.
1092 * Zero is used as an EOF marker, so return 1 instead.
1094 return (sum == 0 ? 1 : sum);
1097 static int
1098 asciifile(FILE *f)
1100 unsigned char buf[BUFSIZ];
1101 size_t cnt;
1103 if (f == NULL)
1104 return (1);
1106 rewind(f);
1107 cnt = fread(buf, 1, sizeof(buf), f);
1108 return (memchr(buf, '\0', cnt) == NULL);
1111 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1113 static char *
1114 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1116 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1117 size_t nc;
1118 int last = ds->lastline;
1119 char *state = NULL;
1121 ds->lastline = pos;
1122 while (pos > last) {
1123 fseek(fp, f[pos - 1], SEEK_SET);
1124 nc = f[pos] - f[pos - 1];
1125 if (nc >= sizeof(buf))
1126 nc = sizeof(buf) - 1;
1127 nc = fread(buf, 1, nc, fp);
1128 if (nc > 0) {
1129 buf[nc] = '\0';
1130 buf[strcspn(buf, "\n")] = '\0';
1131 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1132 if (begins_with(buf, "private:")) {
1133 if (!state)
1134 state = " (private)";
1135 } else if (begins_with(buf, "protected:")) {
1136 if (!state)
1137 state = " (protected)";
1138 } else if (begins_with(buf, "public:")) {
1139 if (!state)
1140 state = " (public)";
1141 } else {
1142 strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1143 if (state)
1144 strlcat(ds->lastbuf, state,
1145 sizeof ds->lastbuf);
1146 ds->lastmatchline = pos;
1147 return ds->lastbuf;
1151 pos--;
1153 return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1156 /* dump accumulated "unified" diff changes */
1157 static void
1158 dump_unified_vec(FILE *outfile, struct got_diff_changes *changes,
1159 struct got_diff_state *ds, struct got_diff_args *args,
1160 FILE *f1, FILE *f2, int flags)
1162 struct context_vec *cvp = ds->context_vec_start;
1163 int lowa, upb, lowc, upd;
1164 int a, b, c, d;
1165 char ch, *f;
1167 if (ds->context_vec_start > ds->context_vec_ptr)
1168 return;
1170 b = d = 0; /* gcc */
1171 lowa = MAXIMUM(1, cvp->a - args->diff_context);
1172 upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1173 lowc = MAXIMUM(1, cvp->c - args->diff_context);
1174 upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1176 diff_output(outfile, "@@ -");
1177 uni_range(outfile, lowa, upb);
1178 diff_output(outfile, " +");
1179 uni_range(outfile, lowc, upd);
1180 diff_output(outfile, " @@");
1181 if ((flags & D_PROTOTYPE)) {
1182 f = match_function(ds, ds->ixold, lowa-1, f1);
1183 if (f != NULL)
1184 diff_output(outfile, " %s", f);
1186 diff_output(outfile, "\n");
1189 * Output changes in "unified" diff format--the old and new lines
1190 * are printed together.
1192 for (; cvp <= ds->context_vec_ptr; cvp++) {
1193 if (changes) {
1194 struct got_diff_change *change;
1195 change = calloc(1, sizeof(*change));
1196 if (change) {
1197 memcpy(&change->cv, cvp, sizeof(change->cv));
1198 SIMPLEQ_INSERT_TAIL(&changes->entries, change,
1199 entry);
1200 changes->nchanges++;
1204 a = cvp->a;
1205 b = cvp->b;
1206 c = cvp->c;
1207 d = cvp->d;
1210 * c: both new and old changes
1211 * d: only changes in the old file
1212 * a: only changes in the new file
1214 if (a <= b && c <= d)
1215 ch = 'c';
1216 else
1217 ch = (a <= b) ? 'd' : 'a';
1219 switch (ch) {
1220 case 'c':
1221 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1222 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1223 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1224 break;
1225 case 'd':
1226 fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', flags);
1227 fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', flags);
1228 break;
1229 case 'a':
1230 fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', flags);
1231 fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', flags);
1232 break;
1234 lowa = b + 1;
1235 lowc = d + 1;
1237 fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', flags);
1239 ds->context_vec_ptr = ds->context_vec_start - 1;
1242 void
1243 got_diff_dump_change(FILE *outfile, struct got_diff_change *change,
1244 struct got_diff_state *ds, struct got_diff_args *args,
1245 FILE *f1, FILE *f2, int diff_flags)
1247 ds->context_vec_ptr = &change->cv;
1248 ds->context_vec_start = &change->cv;
1249 ds->context_vec_end = &change->cv;
1251 /* XXX TODO needs error checking */
1252 dump_unified_vec(outfile, NULL, ds, args, f1, f2, diff_flags);
1255 static void
1256 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1257 const char *file1, const char *file2)
1259 if (args->label[0] != NULL)
1260 diff_output(outfile, "--- %s\n", args->label[0]);
1261 else
1262 diff_output(outfile, "--- %s\t%s", file1,
1263 ctime(&ds->stb1.st_mtime));
1264 if (args->label[1] != NULL)
1265 diff_output(outfile, "+++ %s\n", args->label[1]);
1266 else
1267 diff_output(outfile, "+++ %s\t%s", file2,
1268 ctime(&ds->stb2.st_mtime));