op public repos

Blob

Date:: Fri Dec 1 21:09:59 2017 UTC
Message:: make tree diffing work
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stdarg.h>
77 #include <stddef.h>
78 #include <stdint.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <unistd.h>
83 #include <limits.h>
84 #include <sha1.h>
85 #include <zlib.h>
86 
87 #include "got_error.h"
88 #include "got_object.h"
89 #include "got_diff.h"
90 
91 #include "diff.h"
92 #include "xmalloc.h" /* XXX should return GOT_ERR_NO_MEM instead of exiting */
93 
94 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
95 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
96 
97 /*
98  * diff - compare two files.
99  */
100 
101 /*
102  *	Uses an algorithm due to Harold Stone, which finds
103  *	a pair of longest identical subsequences in the two
104  *	files.
105  *
106  *	The major goal is to generate the match vector J.
107  *	J[i] is the index of the line in file1 corresponding
108  *	to line i file0. J[i] = 0 if there is no
109  *	such line in file1.
110  *
111  *	Lines are hashed so as to work in core. All potential
112  *	matches are located by sorting the lines of each file
113  *	on the hash (called ``value''). In particular, this
114  *	collects the equivalence classes in file1 together.
115  *	Subroutine equiv replaces the value of each line in
116  *	file0 by the index of the first element of its
117  *	matching equivalence in (the reordered) file1.
118  *	To save space equiv squeezes file1 into a single
119  *	array member in which the equivalence classes
120  *	are simply concatenated, except that their first
121  *	members are flagged by changing sign.
122  *
123  *	Next the indices that point into member are unsorted into
124  *	array class according to the original order of file0.
125  *
126  *	The cleverness lies in routine stone. This marches
127  *	through the lines of file0, developing a vector klist
128  *	of "k-candidates". At step i a k-candidate is a matched
129  *	pair of lines x,y (x in file0 y in file1) such that
130  *	there is a common subsequence of length k
131  *	between the first i lines of file0 and the first y
132  *	lines of file1, but there is no such subsequence for
133  *	any smaller y. x is the earliest possible mate to y
134  *	that occurs in such a subsequence.
135  *
136  *	Whenever any of the members of the equivalence class of
137  *	lines in file1 matable to a line in file0 has serial number
138  *	less than the y of some k-candidate, that k-candidate
139  *	with the smallest such y is replaced. The new
140  *	k-candidate is chained (via pred) to the current
141  *	k-1 candidate so that the actual subsequence can
142  *	be recovered. When a member has serial number greater
143  *	that the y of all k-candidates, the klist is extended.
144  *	At the end, the longest subsequence is pulled out
145  *	and placed in the array J by unravel
146  *
147  *	With J in hand, the matches there recorded are
148  *	check'ed against reality to assure that no spurious
149  *	matches have crept in due to hashing. If they have,
150  *	they are broken, and "jackpot" is recorded--a harmless
151  *	matter except that a true match for a spuriously
152  *	mated line may now be unnecessarily reported as a change.
153  *
154  *	Much of the complexity of the program comes simply
155  *	from trying to minimize core utilization and
156  *	maximize the range of doable problems by dynamically
157  *	allocating what is needed and reusing what is not.
158  *	The core requirements for problems larger than somewhat
159  *	are (in words) 2*length(file0) + length(file1) +
160  *	3*(number of k-candidates installed),  typically about
161  *	6n words for files of length n.
162  */
163 
164 struct cand {
165 	int	x;
166 	int	y;
167 	int	pred;
168 };
169 
170 struct line {
171 	int	serial;
172 	int	value;
173 };
174 
175 /*
176  * The following struct is used to record change information when
177  * doing a "context" or "unified" diff.  (see routine "change" to
178  * understand the highly mnemonic field names)
179  */
180 struct context_vec {
181 	int	a;		/* start line in old file */
182 	int	b;		/* end line in old file */
183 	int	c;		/* start line in new file */
184 	int	d;		/* end line in new file */
185 };
186 
187 static void	 diff_output(FILE *, const char *, ...);
188 static void	 output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int);
189 static void	 check(struct got_diff_state *, FILE *, FILE *, int);
190 static void	 range(FILE *, int, int, char *);
191 static void	 uni_range(FILE *, int, int);
192 static void	 dump_context_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
193 static void	 dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int);
194 static void	 prepare(struct got_diff_state *, int, FILE *, off_t, int);
195 static void	 prune(struct got_diff_state *);
196 static void	 equiv(struct line *, int, struct line *, int, int *);
197 static void	 unravel(struct got_diff_state *, int);
198 static void	 unsort(struct line *, int, int *);
199 static void	 change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *);
200 static void	 sort(struct line *, int);
201 static void	 print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *);
202 static int	 ignoreline(char *);
203 static int	 asciifile(FILE *);
204 static int	 fetch(FILE *, struct got_diff_state *, struct got_diff_args *, long *, int, int, FILE *, int, int, int);
205 static int	 newcand(struct got_diff_state *, int, int, int);
206 static int	 search(struct got_diff_state *, int *, int, int);
207 static int	 skipline(FILE *);
208 static int	 isqrt(int);
209 static int	 stone(struct got_diff_state *, int *, int, int *, int *, int);
210 static int	 readhash(struct got_diff_state *, FILE *, int);
211 static int	 files_differ(struct got_diff_state *, FILE *, FILE *, int);
212 static char	*match_function(struct got_diff_state *, const long *, int, FILE *);
213 static char	*preadline(int, size_t, off_t);
214 
215 /*
216  * chrtran points to one of 2 translation tables: cup2low if folding upper to
217  * lower case clow2low if not folding case
218  */
219 u_char clow2low[256] = {
220 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
221 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
222 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
223 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
224 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
225 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
226 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
227 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
228 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
229 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
230 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
231 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
232 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
233 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
234 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
235 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
236 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
237 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
238 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
239 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
240 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
241 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
242 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
243 	0xfd, 0xfe, 0xff
244 };
245 
246 u_char cup2low[256] = {
247 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
248 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
249 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
250 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
251 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
252 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
253 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
254 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
255 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
256 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
257 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
258 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
259 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
260 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
261 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
262 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
263 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
264 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
265 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
266 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
267 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
268 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
269 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
270 	0xfd, 0xfe, 0xff
271 };
272 
273 static void
274 diff_output(FILE *outfile, const char *fmt, ...)
275 {
276 	va_list ap;
277 
278 	va_start(ap, fmt);
279 	vfprintf(outfile, fmt, ap);
280 	va_end(ap);
281 }
282 
283 const struct got_error *
284 got_diffreg(int *rval, FILE *f1, FILE *f2, int flags,
285     struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile)
286 {
287 	const struct got_error *err = NULL;
288 	int i;
289 
290 	*rval = D_SAME;
291 	ds->anychange = 0;
292 	ds->lastline = 0;
293 	ds->lastmatchline = 0;
294 	ds->context_vec_ptr = ds->context_vec_start - 1;
295 	if (flags & D_IGNORECASE)
296 		ds->chrtran = cup2low;
297 	else
298 		ds->chrtran = clow2low;
299 	if (S_ISDIR(ds->stb1.st_mode) != S_ISDIR(ds->stb2.st_mode)) {
300 		*rval = (S_ISDIR(ds->stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
301 		return NULL;
302 	}
303 	if (flags & D_EMPTY1)
304 		f1 = fopen(_PATH_DEVNULL, "r");
305 	else if (f1 == NULL) {
306 		args->status |= 2;
307 		goto closem;
308 	}
309 
310 	if (flags & D_EMPTY2)
311 		f2 = fopen(_PATH_DEVNULL, "r");
312 	else if (f2 == NULL) {
313 		args->status |= 2;
314 		goto closem;
315 	}
316 
317 	switch (files_differ(ds, f1, f2, flags)) {
318 	case 0:
319 		goto closem;
320 	case 1:
321 		break;
322 	default:
323 		/* error */
324 		args->status |= 2;
325 		goto closem;
326 	}
327 
328 	if ((flags & D_FORCEASCII) == 0 &&
329 	    (!asciifile(f1) || !asciifile(f2))) {
330 		*rval = D_BINARY;
331 		args->status |= 1;
332 		goto closem;
333 	}
334 	prepare(ds, 0, f1, ds->stb1.st_size, flags);
335 	prepare(ds, 1, f2, ds->stb2.st_size, flags);
336 
337 	prune(ds);
338 	sort(ds->sfile[0], ds->slen[0]);
339 	sort(ds->sfile[1], ds->slen[1]);
340 
341 	ds->member = (int *)ds->file[1];
342 	equiv(ds->sfile[0], ds->slen[0], ds->sfile[1], ds->slen[1], ds->member);
343 	ds->member = reallocarray(ds->member, ds->slen[1] + 2, sizeof(*ds->member));
344 	if (ds->member == NULL) {
345 		err = got_error(GOT_ERR_NO_MEM);
346 		goto closem;
347 	}
348 
349 	ds->class = (int *)ds->file[0];
350 	unsort(ds->sfile[0], ds->slen[0], ds->class);
351 	ds->class = reallocarray(ds->class, ds->slen[0] + 2, sizeof(*ds->class));
352 	if (ds->class == NULL) {
353 		err = got_error(GOT_ERR_NO_MEM);
354 		goto closem;
355 	}
356 
357 	ds->klist = calloc(ds->slen[0] + 2, sizeof(*ds->klist));
358 	if (ds->klist == NULL) {
359 		err = got_error(GOT_ERR_NO_MEM);
360 		goto closem;
361 	}
362 	ds->clen = 0;
363 	ds->clistlen = 100;
364 	ds->clist = calloc(ds->clistlen, sizeof(*ds->clist));
365 	if (ds->clist == NULL) {
366 		err = got_error(GOT_ERR_NO_MEM);
367 		goto closem;
368 	}
369 	i = stone(ds, ds->class, ds->slen[0], ds->member, ds->klist, flags);
370 	free(ds->member);
371 	free(ds->class);
372 
373 	ds->J = reallocarray(ds->J, ds->len[0] + 2, sizeof(*ds->J));
374 	if (ds->J == NULL) {
375 		err = got_error(GOT_ERR_NO_MEM);
376 		goto closem;
377 	}
378 	unravel(ds, ds->klist[i]);
379 	free(ds->clist);
380 	ds->clist = NULL;
381 	free(ds->klist);
382 	ds->klist = NULL;
383 
384 	ds->ixold = reallocarray(ds->ixold, ds->len[0] + 2, sizeof(*ds->ixold));
385 	if (ds->ixold == NULL) {
386 		err = got_error(GOT_ERR_NO_MEM);
387 		goto closem;
388 	}
389 	ds->ixnew = reallocarray(ds->ixnew, ds->len[1] + 2, sizeof(*ds->ixnew));
390 	if (ds->ixnew == NULL) {
391 		err = got_error(GOT_ERR_NO_MEM);
392 		goto closem;
393 	}
394 	check(ds, f1, f2, flags);
395 	output(outfile, ds, args, args->label[0], f1, args->label[1], f2, flags);
396 closem:
397 	if (ds->anychange) {
398 		args->status |= 1;
399 		if (*rval == D_SAME)
400 			*rval = D_DIFFER;
401 	}
402 	if (f1 != NULL)
403 		fclose(f1);
404 	if (f2 != NULL)
405 		fclose(f2);
406 
407 	return (err);
408 }
409 
410 /*
411  * Check to see if the given files differ.
412  * Returns 0 if they are the same, 1 if different, and -1 on error.
413  * XXX - could use code from cmp(1) [faster]
414  */
415 static int
416 files_differ(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
417 {
418 	char buf1[BUFSIZ], buf2[BUFSIZ];
419 	size_t i, j;
420 
421 	if ((flags & (D_EMPTY1|D_EMPTY2)) || ds->stb1.st_size != ds->stb2.st_size ||
422 	    (ds->stb1.st_mode & S_IFMT) != (ds->stb2.st_mode & S_IFMT))
423 		return (1);
424 	for (;;) {
425 		i = fread(buf1, 1, sizeof(buf1), f1);
426 		j = fread(buf2, 1, sizeof(buf2), f2);
427 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
428 			return (-1);
429 		if (i != j)
430 			return (1);
431 		if (i == 0)
432 			return (0);
433 		if (memcmp(buf1, buf2, i) != 0)
434 			return (1);
435 	}
436 }
437 
438 char *
439 splice(char *dir, char *file)
440 {
441 	char *tail, *buf;
442 	size_t dirlen;
443 
444 	dirlen = strlen(dir);
445 	while (dirlen != 0 && dir[dirlen - 1] == '/')
446 	    dirlen--;
447 	if ((tail = strrchr(file, '/')) == NULL)
448 		tail = file;
449 	else
450 		tail++;
451 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
452 	return (buf);
453 }
454 
455 static void
456 prepare(struct got_diff_state *ds, int i, FILE *fd, off_t filesize, int flags)
457 {
458 	struct line *p;
459 	int j, h;
460 	size_t sz;
461 
462 	rewind(fd);
463 
464 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
465 	if (sz < 100)
466 		sz = 100;
467 
468 	p = xcalloc(sz + 3, sizeof(*p));
469 	for (j = 0; (h = readhash(ds, fd, flags));) {
470 		if (j == sz) {
471 			sz = sz * 3 / 2;
472 			p = xreallocarray(p, sz + 3, sizeof(*p));
473 		}
474 		p[++j].value = h;
475 	}
476 	ds->len[i] = j;
477 	ds->file[i] = p;
478 }
479 
480 static void
481 prune(struct got_diff_state *ds)
482 {
483 	int i, j;
484 
485 	for (ds->pref = 0; ds->pref < ds->len[0] && ds->pref < ds->len[1] &&
486 	    ds->file[0][ds->pref + 1].value == ds->file[1][ds->pref + 1].value;
487 	    ds->pref++)
488 		;
489 	for (ds->suff = 0; ds->suff < ds->len[0] - ds->pref && ds->suff < ds->len[1] - ds->pref &&
490 	    ds->file[0][ds->len[0] - ds->suff].value == ds->file[1][ds->len[1] - ds->suff].value;
491 	    ds->suff++)
492 		;
493 	for (j = 0; j < 2; j++) {
494 		ds->sfile[j] = ds->file[j] + ds->pref;
495 		ds->slen[j] = ds->len[j] - ds->pref - ds->suff;
496 		for (i = 0; i <= ds->slen[j]; i++)
497 			ds->sfile[j][i].serial = i;
498 	}
499 }
500 
501 static void
502 equiv(struct line *a, int n, struct line *b, int m, int *c)
503 {
504 	int i, j;
505 
506 	i = j = 1;
507 	while (i <= n && j <= m) {
508 		if (a[i].value < b[j].value)
509 			a[i++].value = 0;
510 		else if (a[i].value == b[j].value)
511 			a[i++].value = j;
512 		else
513 			j++;
514 	}
515 	while (i <= n)
516 		a[i++].value = 0;
517 	b[m + 1].value = 0;
518 	j = 0;
519 	while (++j <= m) {
520 		c[j] = -b[j].serial;
521 		while (b[j + 1].value == b[j].value) {
522 			j++;
523 			c[j] = b[j].serial;
524 		}
525 	}
526 	c[j] = -1;
527 }
528 
529 /* Code taken from ping.c */
530 static int
531 isqrt(int n)
532 {
533 	int y, x = 1;
534 
535 	if (n == 0)
536 		return (0);
537 
538 	do { /* newton was a stinker */
539 		y = x;
540 		x = n / x;
541 		x += y;
542 		x /= 2;
543 	} while ((x - y) > 1 || (x - y) < -1);
544 
545 	return (x);
546 }
547 
548 static int
549 stone(struct got_diff_state *ds, int *a, int n, int *b, int *c, int flags)
550 {
551 	int i, k, y, j, l;
552 	int oldc, tc, oldl, sq;
553 	u_int numtries, bound;
554 
555 	if (flags & D_MINIMAL)
556 		bound = UINT_MAX;
557 	else {
558 		sq = isqrt(n);
559 		bound = MAXIMUM(256, sq);
560 	}
561 
562 	k = 0;
563 	c[0] = newcand(ds, 0, 0, 0);
564 	for (i = 1; i <= n; i++) {
565 		j = a[i];
566 		if (j == 0)
567 			continue;
568 		y = -b[j];
569 		oldl = 0;
570 		oldc = c[0];
571 		numtries = 0;
572 		do {
573 			if (y <= ds->clist[oldc].y)
574 				continue;
575 			l = search(ds, c, k, y);
576 			if (l != oldl + 1)
577 				oldc = c[l - 1];
578 			if (l <= k) {
579 				if (ds->clist[c[l]].y <= y)
580 					continue;
581 				tc = c[l];
582 				c[l] = newcand(ds, i, y, oldc);
583 				oldc = tc;
584 				oldl = l;
585 				numtries++;
586 			} else {
587 				c[l] = newcand(ds, i, y, oldc);
588 				k++;
589 				break;
590 			}
591 		} while ((y = b[++j]) > 0 && numtries < bound);
592 	}
593 	return (k);
594 }
595 
596 static int
597 newcand(struct got_diff_state *ds, int x, int y, int pred)
598 {
599 	struct cand *q;
600 
601 	if (ds->clen == ds->clistlen) {
602 		ds->clistlen = ds->clistlen * 11 / 10;
603 		ds->clist = xreallocarray(ds->clist, ds->clistlen, sizeof(*ds->clist));
604 	}
605 	q = ds->clist + ds->clen;
606 	q->x = x;
607 	q->y = y;
608 	q->pred = pred;
609 	return (ds->clen++);
610 }
611 
612 static int
613 search(struct got_diff_state *ds, int *c, int k, int y)
614 {
615 	int i, j, l, t;
616 
617 	if (ds->clist[c[k]].y < y)	/* quick look for typical case */
618 		return (k + 1);
619 	i = 0;
620 	j = k + 1;
621 	for (;;) {
622 		l = (i + j) / 2;
623 		if (l <= i)
624 			break;
625 		t = ds->clist[c[l]].y;
626 		if (t > y)
627 			j = l;
628 		else if (t < y)
629 			i = l;
630 		else
631 			return (l);
632 	}
633 	return (l + 1);
634 }
635 
636 static void
637 unravel(struct got_diff_state *ds, int p)
638 {
639 	struct cand *q;
640 	int i;
641 
642 	for (i = 0; i <= ds->len[0]; i++)
643 		ds->J[i] = i <= ds->pref ? i :
644 		    i > ds->len[0] - ds->suff ? i + ds->len[1] - ds->len[0] : 0;
645 	for (q = ds->clist + p; q->y != 0; q = ds->clist + q->pred)
646 		ds->J[q->x + ds->pref] = q->y + ds->pref;
647 }
648 
649 /*
650  * Check does double duty:
651  *  1.	ferret out any fortuitous correspondences due
652  *	to confounding by hashing (which result in "jackpot")
653  *  2.  collect random access indexes to the two files
654  */
655 static void
656 check(struct got_diff_state *ds, FILE *f1, FILE *f2, int flags)
657 {
658 	int i, j, jackpot, c, d;
659 	long ctold, ctnew;
660 
661 	rewind(f1);
662 	rewind(f2);
663 	j = 1;
664 	ds->ixold[0] = ds->ixnew[0] = 0;
665 	jackpot = 0;
666 	ctold = ctnew = 0;
667 	for (i = 1; i <= ds->len[0]; i++) {
668 		if (ds->J[i] == 0) {
669 			ds->ixold[i] = ctold += skipline(f1);
670 			continue;
671 		}
672 		while (j < ds->J[i]) {
673 			ds->ixnew[j] = ctnew += skipline(f2);
674 			j++;
675 		}
676 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
677 			for (;;) {
678 				c = getc(f1);
679 				d = getc(f2);
680 				/*
681 				 * GNU diff ignores a missing newline
682 				 * in one file for -b or -w.
683 				 */
684 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
685 					if (c == EOF && d == '\n') {
686 						ctnew++;
687 						break;
688 					} else if (c == '\n' && d == EOF) {
689 						ctold++;
690 						break;
691 					}
692 				}
693 				ctold++;
694 				ctnew++;
695 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
696 				    isspace(d)) {
697 					do {
698 						if (c == '\n')
699 							break;
700 						ctold++;
701 					} while (isspace(c = getc(f1)));
702 					do {
703 						if (d == '\n')
704 							break;
705 						ctnew++;
706 					} while (isspace(d = getc(f2)));
707 				} else if ((flags & D_IGNOREBLANKS)) {
708 					while (isspace(c) && c != '\n') {
709 						c = getc(f1);
710 						ctold++;
711 					}
712 					while (isspace(d) && d != '\n') {
713 						d = getc(f2);
714 						ctnew++;
715 					}
716 				}
717 				if (ds->chrtran[c] != ds->chrtran[d]) {
718 					jackpot++;
719 					ds->J[i] = 0;
720 					if (c != '\n' && c != EOF)
721 						ctold += skipline(f1);
722 					if (d != '\n' && c != EOF)
723 						ctnew += skipline(f2);
724 					break;
725 				}
726 				if (c == '\n' || c == EOF)
727 					break;
728 			}
729 		} else {
730 			for (;;) {
731 				ctold++;
732 				ctnew++;
733 				if ((c = getc(f1)) != (d = getc(f2))) {
734 					/* jackpot++; */
735 					ds->J[i] = 0;
736 					if (c != '\n' && c != EOF)
737 						ctold += skipline(f1);
738 					if (d != '\n' && c != EOF)
739 						ctnew += skipline(f2);
740 					break;
741 				}
742 				if (c == '\n' || c == EOF)
743 					break;
744 			}
745 		}
746 		ds->ixold[i] = ctold;
747 		ds->ixnew[j] = ctnew;
748 		j++;
749 	}
750 	for (; j <= ds->len[1]; j++)
751 		ds->ixnew[j] = ctnew += skipline(f2);
752 	/*
753 	 * if (jackpot)
754 	 *	fprintf(stderr, "jackpot\n");
755 	 */
756 }
757 
758 /* shellsort CACM #201 */
759 static void
760 sort(struct line *a, int n)
761 {
762 	struct line *ai, *aim, w;
763 	int j, m = 0, k;
764 
765 	if (n == 0)
766 		return;
767 	for (j = 1; j <= n; j *= 2)
768 		m = 2 * j - 1;
769 	for (m /= 2; m != 0; m /= 2) {
770 		k = n - m;
771 		for (j = 1; j <= k; j++) {
772 			for (ai = &a[j]; ai > a; ai -= m) {
773 				aim = &ai[m];
774 				if (aim < ai)
775 					break;	/* wraparound */
776 				if (aim->value > ai[0].value ||
777 				    (aim->value == ai[0].value &&
778 					aim->serial > ai[0].serial))
779 					break;
780 				w.value = ai[0].value;
781 				ai[0].value = aim->value;
782 				aim->value = w.value;
783 				w.serial = ai[0].serial;
784 				ai[0].serial = aim->serial;
785 				aim->serial = w.serial;
786 			}
787 		}
788 	}
789 }
790 
791 static void
792 unsort(struct line *f, int l, int *b)
793 {
794 	int *a, i;
795 
796 	a = xcalloc(l + 1, sizeof(*a));
797 	for (i = 1; i <= l; i++)
798 		a[f[i].serial] = f[i].value;
799 	for (i = 1; i <= l; i++)
800 		b[i] = a[i];
801 	free(a);
802 }
803 
804 static int
805 skipline(FILE *f)
806 {
807 	int i, c;
808 
809 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
810 		continue;
811 	return (i);
812 }
813 
814 static void
815 output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
816     const char *file1, FILE *f1, const char *file2, FILE *f2, int flags)
817 {
818 	int m, i0, i1, j0, j1;
819 
820 	rewind(f1);
821 	rewind(f2);
822 	m = ds->len[0];
823 	ds->J[0] = 0;
824 	ds->J[m + 1] = ds->len[1] + 1;
825 	if (args->diff_format != D_EDIT) {
826 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
827 			while (i0 <= m && ds->J[i0] == ds->J[i0 - 1] + 1)
828 				i0++;
829 			j0 = ds->J[i0 - 1] + 1;
830 			i1 = i0 - 1;
831 			while (i1 < m && ds->J[i1 + 1] == 0)
832 				i1++;
833 			j1 = ds->J[i1 + 1] - 1;
834 			ds->J[i1] = j1;
835 			change(outfile, ds, args, file1, f1, file2, f2, i0, i1, j0, j1, &flags);
836 		}
837 	} else {
838 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
839 			while (i0 >= 1 && ds->J[i0] == ds->J[i0 + 1] - 1 && ds->J[i0] != 0)
840 				i0--;
841 			j0 = ds->J[i0 + 1] - 1;
842 			i1 = i0 + 1;
843 			while (i1 > 1 && ds->J[i1 - 1] == 0)
844 				i1--;
845 			j1 = ds->J[i1 - 1] + 1;
846 			ds->J[i1] = j1;
847 			change(outfile, ds, args, file1, f1, file2, f2, i1, i0, j1, j0, &flags);
848 		}
849 	}
850 	if (m == 0)
851 		change(outfile, ds, args, file1, f1, file2, f2, 1, 0, 1, ds->len[1], &flags);
852 	if (args->diff_format == D_IFDEF) {
853 		for (;;) {
854 #define	c i0
855 			if ((c = getc(f1)) == EOF)
856 				return;
857 			diff_output(outfile, "%c", c);
858 		}
859 #undef c
860 	}
861 	if (ds->anychange != 0) {
862 		if (args->diff_format == D_CONTEXT)
863 			dump_context_vec(outfile, ds, args, f1, f2, flags);
864 		else if (args->diff_format == D_UNIFIED)
865 			dump_unified_vec(outfile, ds, args, f1, f2, flags);
866 	}
867 }
868 
869 static void
870 range(FILE *outfile, int a, int b, char *separator)
871 {
872 	diff_output(outfile, "%d", a > b ? b : a);
873 	if (a < b)
874 		diff_output(outfile, "%s%d", separator, b);
875 }
876 
877 static void
878 uni_range(FILE *outfile, int a, int b)
879 {
880 	if (a < b)
881 		diff_output(outfile, "%d,%d", a, b - a + 1);
882 	else if (a == b)
883 		diff_output(outfile, "%d", b);
884 	else
885 		diff_output(outfile, "%d,0", b);
886 }
887 
888 static char *
889 preadline(int fd, size_t rlen, off_t off)
890 {
891 	char *line;
892 	ssize_t nr;
893 
894 	line = xmalloc(rlen + 1);
895 	if ((nr = pread(fd, line, rlen, off)) < 0)
896 		err(2, "preadline");
897 	if (nr > 0 && line[nr-1] == '\n')
898 		nr--;
899 	line[nr] = '\0';
900 	return (line);
901 }
902 
903 static int
904 ignoreline(char *line)
905 {
906 	return 0; /* do not ignore any lines */
907 }
908 
909 /*
910  * Indicate that there is a difference between lines a and b of the from file
911  * to get to lines c to d of the to file.  If a is greater then b then there
912  * are no lines in the from file involved and this means that there were
913  * lines appended (beginning at b).  If c is greater than d then there are
914  * lines missing from the to file.
915  */
916 static void
917 change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
918     const char *file1, FILE *f1, const char *file2, FILE *f2,
919     int a, int b, int c, int d, int *pflags)
920 {
921 	size_t max_context = 64;
922 	int i;
923 
924 restart:
925 	if (args->diff_format != D_IFDEF && a > b && c > d)
926 		return;
927 	if (args->ignore_pats != NULL) {
928 		char *line;
929 		/*
930 		 * All lines in the change, insert, or delete must
931 		 * match an ignore pattern for the change to be
932 		 * ignored.
933 		 */
934 		if (a <= b) {		/* Changes and deletes. */
935 			for (i = a; i <= b; i++) {
936 				line = preadline(fileno(f1),
937 				    ds->ixold[i] - ds->ixold[i - 1], ds->ixold[i - 1]);
938 				if (!ignoreline(line))
939 					goto proceed;
940 			}
941 		}
942 		if (a > b || c <= d) {	/* Changes and inserts. */
943 			for (i = c; i <= d; i++) {
944 				line = preadline(fileno(f2),
945 				    ds->ixnew[i] - ds->ixnew[i - 1], ds->ixnew[i - 1]);
946 				if (!ignoreline(line))
947 					goto proceed;
948 			}
949 		}
950 		return;
951 	}
952 proceed:
953 	if (*pflags & D_HEADER) {
954 		diff_output(outfile, "%s %s %s\n", args->diffargs, file1, file2);
955 		*pflags &= ~D_HEADER;
956 	}
957 	if (args->diff_format == D_CONTEXT || args->diff_format == D_UNIFIED) {
958 		/*
959 		 * Allocate change records as needed.
960 		 */
961 		if (ds->context_vec_ptr == ds->context_vec_end - 1) {
962 			ptrdiff_t offset = ds->context_vec_ptr - ds->context_vec_start;
963 			max_context <<= 1;
964 			ds->context_vec_start = xreallocarray(ds->context_vec_start,
965 			    max_context, sizeof(*ds->context_vec_start));
966 			ds->context_vec_end = ds->context_vec_start + max_context;
967 			ds->context_vec_ptr = ds->context_vec_start + offset;
968 		}
969 		if (ds->anychange == 0) {
970 			/*
971 			 * Print the context/unidiff header first time through.
972 			 */
973 			print_header(outfile, ds, args, file1, file2);
974 			ds->anychange = 1;
975 		} else if (a > ds->context_vec_ptr->b + (2 * args->diff_context) + 1 &&
976 		    c > ds->context_vec_ptr->d + (2 * args->diff_context) + 1) {
977 			/*
978 			 * If this change is more than 'diff_context' lines from the
979 			 * previous change, dump the record and reset it.
980 			 */
981 			if (args->diff_format == D_CONTEXT)
982 				dump_context_vec(outfile, ds, args, f1, f2, *pflags);
983 			else
984 				dump_unified_vec(outfile, ds, args, f1, f2, *pflags);
985 		}
986 		ds->context_vec_ptr++;
987 		ds->context_vec_ptr->a = a;
988 		ds->context_vec_ptr->b = b;
989 		ds->context_vec_ptr->c = c;
990 		ds->context_vec_ptr->d = d;
991 		return;
992 	}
993 	if (ds->anychange == 0)
994 		ds->anychange = 1;
995 	switch (args->diff_format) {
996 	case D_BRIEF:
997 		return;
998 	case D_NORMAL:
999 	case D_EDIT:
1000 		range(outfile, a, b, ",");
1001 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1002 		if (args->diff_format == D_NORMAL)
1003 			range(outfile, c, d, ",");
1004 		diff_output(outfile, "\n");
1005 		break;
1006 	case D_REVERSE:
1007 		diff_output(outfile, "%c", a > b ? 'a' : c > d ? 'd' : 'c');
1008 		range(outfile, a, b, " ");
1009 		diff_output(outfile, "\n");
1010 		break;
1011 	case D_NREVERSE:
1012 		if (a > b)
1013 			diff_output(outfile, "a%d %d\n", b, d - c + 1);
1014 		else {
1015 			diff_output(outfile, "d%d %d\n", a, b - a + 1);
1016 			if (!(c > d))
1017 				/* add changed lines */
1018 				diff_output(outfile, "a%d %d\n", b, d - c + 1);
1019 		}
1020 		break;
1021 	}
1022 	if (args->diff_format == D_NORMAL || args->diff_format == D_IFDEF) {
1023 		fetch(outfile, ds, args, ds->ixold, a, b, f1, '<', 1, *pflags);
1024 		if (a <= b && c <= d && args->diff_format == D_NORMAL)
1025 			diff_output(outfile, "---\n");
1026 	}
1027 	i = fetch(outfile, ds, args, ds->ixnew, c, d, f2, args->diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1028 	if (i != 0 && args->diff_format == D_EDIT) {
1029 		/*
1030 		 * A non-zero return value for D_EDIT indicates that the
1031 		 * last line printed was a bare dot (".") that has been
1032 		 * escaped as ".." to prevent ed(1) from misinterpreting
1033 		 * it.  We have to add a substitute command to change this
1034 		 * back and restart where we left off.
1035 		 */
1036 		diff_output(outfile, ".\n");
1037 		diff_output(outfile, "%ds/.//\n", a + i - 1);
1038 		b = a + i - 1;
1039 		a = b + 1;
1040 		c += i;
1041 		goto restart;
1042 	}
1043 	if ((args->diff_format == D_EDIT || args->diff_format == D_REVERSE) && c <= d)
1044 		diff_output(outfile, ".\n");
1045 	if (ds->inifdef) {
1046 		diff_output(outfile, "#endif /* %s */\n", args->ifdefname);
1047 		ds->inifdef = 0;
1048 	}
1049 }
1050 
1051 static int
1052 fetch(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1053     long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1054 {
1055 	int i, j, c, lastc, col, nc;
1056 
1057 	/*
1058 	 * When doing #ifdef's, copy down to current line
1059 	 * if this is the first file, so that stuff makes it to output.
1060 	 */
1061 	if (args->diff_format == D_IFDEF && oldfile) {
1062 		long curpos = ftell(lb);
1063 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1064 		nc = f[a > b ? b : a - 1] - curpos;
1065 		for (i = 0; i < nc; i++)
1066 			diff_output(outfile, "%c", getc(lb));
1067 	}
1068 	if (a > b)
1069 		return (0);
1070 	if (args->diff_format == D_IFDEF) {
1071 		if (ds->inifdef) {
1072 			diff_output(outfile, "#else /* %s%s */\n",
1073 			    oldfile == 1 ? "!" : "", args->ifdefname);
1074 		} else {
1075 			if (oldfile)
1076 				diff_output(outfile, "#ifndef %s\n", args->ifdefname);
1077 			else
1078 				diff_output(outfile, "#ifdef %s\n", args->ifdefname);
1079 		}
1080 		ds->inifdef = 1 + oldfile;
1081 	}
1082 	for (i = a; i <= b; i++) {
1083 		fseek(lb, f[i - 1], SEEK_SET);
1084 		nc = f[i] - f[i - 1];
1085 		if (args->diff_format != D_IFDEF && ch != '\0') {
1086 			diff_output(outfile, "%c", ch);
1087 			if (args->Tflag && (args->diff_format == D_NORMAL || args->diff_format == D_CONTEXT
1088 			    || args->diff_format == D_UNIFIED))
1089 				diff_output(outfile, "\t");
1090 			else if (args->diff_format != D_UNIFIED)
1091 				diff_output(outfile, " ");
1092 		}
1093 		col = 0;
1094 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1095 			if ((c = getc(lb)) == EOF) {
1096 				if (args->diff_format == D_EDIT || args->diff_format == D_REVERSE ||
1097 				    args->diff_format == D_NREVERSE)
1098 					warnx("No newline at end of file");
1099 				else
1100 					diff_output(outfile, "\n\\ No newline at end of "
1101 					    "file\n");
1102 				return (0);
1103 			}
1104 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1105 				do {
1106 					diff_output(outfile, " ");
1107 				} while (++col & 7);
1108 			} else {
1109 				if (args->diff_format == D_EDIT && j == 1 && c == '\n'
1110 				    && lastc == '.') {
1111 					/*
1112 					 * Don't print a bare "." line
1113 					 * since that will confuse ed(1).
1114 					 * Print ".." instead and return,
1115 					 * giving the caller an offset
1116 					 * from which to restart.
1117 					 */
1118 					diff_output(outfile, ".\n");
1119 					return (i - a + 1);
1120 				}
1121 				diff_output(outfile, "%c", c);
1122 				col++;
1123 			}
1124 		}
1125 	}
1126 	return (0);
1127 }
1128 
1129 /*
1130  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1131  */
1132 static int
1133 readhash(struct got_diff_state *ds, FILE *f, int flags)
1134 {
1135 	int i, t, space;
1136 	int sum;
1137 
1138 	sum = 1;
1139 	space = 0;
1140 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1141 		if (flags & D_IGNORECASE)
1142 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1143 				if (t == EOF) {
1144 					if (i == 0)
1145 						return (0);
1146 					break;
1147 				}
1148 				sum = sum * 127 + ds->chrtran[t];
1149 			}
1150 		else
1151 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1152 				if (t == EOF) {
1153 					if (i == 0)
1154 						return (0);
1155 					break;
1156 				}
1157 				sum = sum * 127 + t;
1158 			}
1159 	} else {
1160 		for (i = 0;;) {
1161 			switch (t = getc(f)) {
1162 			case '\t':
1163 			case '\r':
1164 			case '\v':
1165 			case '\f':
1166 			case ' ':
1167 				space++;
1168 				continue;
1169 			default:
1170 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1171 					i++;
1172 					space = 0;
1173 				}
1174 				sum = sum * 127 + ds->chrtran[t];
1175 				i++;
1176 				continue;
1177 			case EOF:
1178 				if (i == 0)
1179 					return (0);
1180 				/* FALLTHROUGH */
1181 			case '\n':
1182 				break;
1183 			}
1184 			break;
1185 		}
1186 	}
1187 	/*
1188 	 * There is a remote possibility that we end up with a zero sum.
1189 	 * Zero is used as an EOF marker, so return 1 instead.
1190 	 */
1191 	return (sum == 0 ? 1 : sum);
1192 }
1193 
1194 static int
1195 asciifile(FILE *f)
1196 {
1197 	unsigned char buf[BUFSIZ];
1198 	size_t cnt;
1199 
1200 	if (f == NULL)
1201 		return (1);
1202 
1203 	rewind(f);
1204 	cnt = fread(buf, 1, sizeof(buf), f);
1205 	return (memchr(buf, '\0', cnt) == NULL);
1206 }
1207 
1208 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1209 
1210 static char *
1211 match_function(struct got_diff_state *ds, const long *f, int pos, FILE *fp)
1212 {
1213 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1214 	size_t nc;
1215 	int last = ds->lastline;
1216 	char *state = NULL;
1217 
1218 	ds->lastline = pos;
1219 	while (pos > last) {
1220 		fseek(fp, f[pos - 1], SEEK_SET);
1221 		nc = f[pos] - f[pos - 1];
1222 		if (nc >= sizeof(buf))
1223 			nc = sizeof(buf) - 1;
1224 		nc = fread(buf, 1, nc, fp);
1225 		if (nc > 0) {
1226 			buf[nc] = '\0';
1227 			buf[strcspn(buf, "\n")] = '\0';
1228 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1229 				if (begins_with(buf, "private:")) {
1230 					if (!state)
1231 						state = " (private)";
1232 				} else if (begins_with(buf, "protected:")) {
1233 					if (!state)
1234 						state = " (protected)";
1235 				} else if (begins_with(buf, "public:")) {
1236 					if (!state)
1237 						state = " (public)";
1238 				} else {
1239 					strlcpy(ds->lastbuf, buf, sizeof ds->lastbuf);
1240 					if (state)
1241 						strlcat(ds->lastbuf, state,
1242 						    sizeof ds->lastbuf);
1243 					ds->lastmatchline = pos;
1244 					return ds->lastbuf;
1245 				}
1246 			}
1247 		}
1248 		pos--;
1249 	}
1250 	return ds->lastmatchline > 0 ? ds->lastbuf : NULL;
1251 }
1252 
1253 /* dump accumulated "context" diff changes */
1254 static void
1255 dump_context_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1256     FILE *f1, FILE *f2, int flags)
1257 {
1258 	struct context_vec *cvp = ds->context_vec_start;
1259 	int lowa, upb, lowc, upd, do_output;
1260 	int a, b, c, d;
1261 	char ch, *f;
1262 
1263 	if (ds->context_vec_start > ds->context_vec_ptr)
1264 		return;
1265 
1266 	b = d = 0;		/* gcc */
1267 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1268 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1269 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1270 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1271 
1272 	diff_output(outfile, "***************");
1273 	if ((flags & D_PROTOTYPE)) {
1274 		f = match_function(ds, ds->ixold, lowa-1, f1);
1275 		if (f != NULL)
1276 			diff_output(outfile, " %s", f);
1277 	}
1278 	diff_output(outfile, "\n*** ");
1279 	range(outfile, lowa, upb, ",");
1280 	diff_output(outfile, " ****\n");
1281 
1282 	/*
1283 	 * Output changes to the "old" file.  The first loop suppresses
1284 	 * output if there were no changes to the "old" file (we'll see
1285 	 * the "old" lines as context in the "new" list).
1286 	 */
1287 	do_output = 0;
1288 	for (; cvp <= ds->context_vec_ptr; cvp++)
1289 		if (cvp->a <= cvp->b) {
1290 			cvp = ds->context_vec_start;
1291 			do_output++;
1292 			break;
1293 		}
1294 	if (do_output) {
1295 		while (cvp <= ds->context_vec_ptr) {
1296 			a = cvp->a;
1297 			b = cvp->b;
1298 			c = cvp->c;
1299 			d = cvp->d;
1300 
1301 			if (a <= b && c <= d)
1302 				ch = 'c';
1303 			else
1304 				ch = (a <= b) ? 'd' : 'a';
1305 
1306 			if (ch == 'a')
1307 				fetch(outfile, ds, args, ds->ixold, lowa, b, f1, ' ', 0, flags);
1308 			else {
1309 				fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1310 				fetch(outfile, ds, args, ds->ixold, a, b, f1,
1311 				    ch == 'c' ? '!' : '-', 0, flags);
1312 			}
1313 			lowa = b + 1;
1314 			cvp++;
1315 		}
1316 		fetch(outfile, ds, args, ds->ixold, b + 1, upb, f1, ' ', 0, flags);
1317 	}
1318 	/* output changes to the "new" file */
1319 	diff_output(outfile, "--- ");
1320 	range(outfile, lowc, upd, ",");
1321 	diff_output(outfile, " ----\n");
1322 
1323 	do_output = 0;
1324 	for (cvp = ds->context_vec_start; cvp <= ds->context_vec_ptr; cvp++)
1325 		if (cvp->c <= cvp->d) {
1326 			cvp = ds->context_vec_start;
1327 			do_output++;
1328 			break;
1329 		}
1330 	if (do_output) {
1331 		while (cvp <= ds->context_vec_ptr) {
1332 			a = cvp->a;
1333 			b = cvp->b;
1334 			c = cvp->c;
1335 			d = cvp->d;
1336 
1337 			if (a <= b && c <= d)
1338 				ch = 'c';
1339 			else
1340 				ch = (a <= b) ? 'd' : 'a';
1341 
1342 			if (ch == 'd')
1343 				fetch(outfile, ds, args, ds->ixnew, lowc, d, f2, ' ', 0, flags);
1344 			else {
1345 				fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1346 				fetch(outfile, ds, args, ds->ixnew, c, d, f2,
1347 				    ch == 'c' ? '!' : '+', 0, flags);
1348 			}
1349 			lowc = d + 1;
1350 			cvp++;
1351 		}
1352 		fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1353 	}
1354 	ds->context_vec_ptr = ds->context_vec_start - 1;
1355 }
1356 
1357 /* dump accumulated "unified" diff changes */
1358 static void
1359 dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1360     FILE *f1, FILE *f2, int flags)
1361 {
1362 	struct context_vec *cvp = ds->context_vec_start;
1363 	int lowa, upb, lowc, upd;
1364 	int a, b, c, d;
1365 	char ch, *f;
1366 
1367 	if (ds->context_vec_start > ds->context_vec_ptr)
1368 		return;
1369 
1370 	b = d = 0;		/* gcc */
1371 	lowa = MAXIMUM(1, cvp->a - args->diff_context);
1372 	upb = MINIMUM(ds->len[0], ds->context_vec_ptr->b + args->diff_context);
1373 	lowc = MAXIMUM(1, cvp->c - args->diff_context);
1374 	upd = MINIMUM(ds->len[1], ds->context_vec_ptr->d + args->diff_context);
1375 
1376 	diff_output(outfile, "@@ -");
1377 	uni_range(outfile, lowa, upb);
1378 	diff_output(outfile, " +");
1379 	uni_range(outfile, lowc, upd);
1380 	diff_output(outfile, " @@");
1381 	if ((flags & D_PROTOTYPE)) {
1382 		f = match_function(ds, ds->ixold, lowa-1, f1);
1383 		if (f != NULL)
1384 			diff_output(outfile, " %s", f);
1385 	}
1386 	diff_output(outfile, "\n");
1387 
1388 	/*
1389 	 * Output changes in "unified" diff format--the old and new lines
1390 	 * are printed together.
1391 	 */
1392 	for (; cvp <= ds->context_vec_ptr; cvp++) {
1393 		a = cvp->a;
1394 		b = cvp->b;
1395 		c = cvp->c;
1396 		d = cvp->d;
1397 
1398 		/*
1399 		 * c: both new and old changes
1400 		 * d: only changes in the old file
1401 		 * a: only changes in the new file
1402 		 */
1403 		if (a <= b && c <= d)
1404 			ch = 'c';
1405 		else
1406 			ch = (a <= b) ? 'd' : 'a';
1407 
1408 		switch (ch) {
1409 		case 'c':
1410 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1411 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1412 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1413 			break;
1414 		case 'd':
1415 			fetch(outfile, ds, args, ds->ixold, lowa, a - 1, f1, ' ', 0, flags);
1416 			fetch(outfile, ds, args, ds->ixold, a, b, f1, '-', 0, flags);
1417 			break;
1418 		case 'a':
1419 			fetch(outfile, ds, args, ds->ixnew, lowc, c - 1, f2, ' ', 0, flags);
1420 			fetch(outfile, ds, args, ds->ixnew, c, d, f2, '+', 0, flags);
1421 			break;
1422 		}
1423 		lowa = b + 1;
1424 		lowc = d + 1;
1425 	}
1426 	fetch(outfile, ds, args, ds->ixnew, d + 1, upd, f2, ' ', 0, flags);
1427 
1428 	ds->context_vec_ptr = ds->context_vec_start - 1;
1429 }
1430 
1431 static void
1432 print_header(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args,
1433     const char *file1, const char *file2)
1434 {
1435 	if (args->label[0] != NULL)
1436 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "***" : "---",
1437 		    args->label[0]);
1438 	else
1439 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "***" : "---",
1440 		    file1, ctime(&ds->stb1.st_mtime));
1441 	if (args->label[1] != NULL)
1442 		diff_output(outfile, "%s %s\n", args->diff_format == D_CONTEXT ? "---" : "+++",
1443 		    args->label[1]);
1444 	else
1445 		diff_output(outfile, "%s %s\t%s", args->diff_format == D_CONTEXT ? "---" : "+++",
1446 		    file2, ctime(&ds->stb2.st_mtime));
1447 }