op public repos

Blob

Date:: Wed Nov 29 21:15:12 2017 UTC
Message:: wrap global diff state in a struct
Actions:: History | Blame | Raw File
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 #include <sys/queue.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <paths.h>
76 #include <stddef.h>
77 #include <stdint.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82 #include <limits.h>
83 #include <sha1.h>
84 #include <zlib.h>
85 
86 #include "got_error.h"
87 #include "got_object.h"
88 #include "got_diff.h"
89 
90 #include "diff.h"
91 #include "xmalloc.h"
92 
93 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
94 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
95 
96 /*
97  * diff - compare two files.
98  */
99 
100 /*
101  *	Uses an algorithm due to Harold Stone, which finds
102  *	a pair of longest identical subsequences in the two
103  *	files.
104  *
105  *	The major goal is to generate the match vector J.
106  *	J[i] is the index of the line in file1 corresponding
107  *	to line i file0. J[i] = 0 if there is no
108  *	such line in file1.
109  *
110  *	Lines are hashed so as to work in core. All potential
111  *	matches are located by sorting the lines of each file
112  *	on the hash (called ``value''). In particular, this
113  *	collects the equivalence classes in file1 together.
114  *	Subroutine equiv replaces the value of each line in
115  *	file0 by the index of the first element of its
116  *	matching equivalence in (the reordered) file1.
117  *	To save space equiv squeezes file1 into a single
118  *	array member in which the equivalence classes
119  *	are simply concatenated, except that their first
120  *	members are flagged by changing sign.
121  *
122  *	Next the indices that point into member are unsorted into
123  *	array class according to the original order of file0.
124  *
125  *	The cleverness lies in routine stone. This marches
126  *	through the lines of file0, developing a vector klist
127  *	of "k-candidates". At step i a k-candidate is a matched
128  *	pair of lines x,y (x in file0 y in file1) such that
129  *	there is a common subsequence of length k
130  *	between the first i lines of file0 and the first y
131  *	lines of file1, but there is no such subsequence for
132  *	any smaller y. x is the earliest possible mate to y
133  *	that occurs in such a subsequence.
134  *
135  *	Whenever any of the members of the equivalence class of
136  *	lines in file1 matable to a line in file0 has serial number
137  *	less than the y of some k-candidate, that k-candidate
138  *	with the smallest such y is replaced. The new
139  *	k-candidate is chained (via pred) to the current
140  *	k-1 candidate so that the actual subsequence can
141  *	be recovered. When a member has serial number greater
142  *	that the y of all k-candidates, the klist is extended.
143  *	At the end, the longest subsequence is pulled out
144  *	and placed in the array J by unravel
145  *
146  *	With J in hand, the matches there recorded are
147  *	check'ed against reality to assure that no spurious
148  *	matches have crept in due to hashing. If they have,
149  *	they are broken, and "jackpot" is recorded--a harmless
150  *	matter except that a true match for a spuriously
151  *	mated line may now be unnecessarily reported as a change.
152  *
153  *	Much of the complexity of the program comes simply
154  *	from trying to minimize core utilization and
155  *	maximize the range of doable problems by dynamically
156  *	allocating what is needed and reusing what is not.
157  *	The core requirements for problems larger than somewhat
158  *	are (in words) 2*length(file0) + length(file1) +
159  *	3*(number of k-candidates installed),  typically about
160  *	6n words for files of length n.
161  */
162 
163 struct cand {
164 	int	x;
165 	int	y;
166 	int	pred;
167 };
168 
169 struct line {
170 	int	serial;
171 	int	value;
172 } *file[2];
173 
174 /*
175  * The following struct is used to record change information when
176  * doing a "context" or "unified" diff.  (see routine "change" to
177  * understand the highly mnemonic field names)
178  */
179 struct context_vec {
180 	int	a;		/* start line in old file */
181 	int	b;		/* end line in old file */
182 	int	c;		/* start line in new file */
183 	int	d;		/* end line in new file */
184 };
185 
186 #define	diff_output	printf
187 static FILE	*opentemp(const char *);
188 static void	 output(char *, FILE *, char *, FILE *, int);
189 static void	 check(FILE *, FILE *, int);
190 static void	 range(int, int, char *);
191 static void	 uni_range(int, int);
192 static void	 dump_context_vec(FILE *, FILE *, int);
193 static void	 dump_unified_vec(FILE *, FILE *, int);
194 static void	 prepare(int, FILE *, off_t, int);
195 static void	 prune(void);
196 static void	 equiv(struct line *, int, struct line *, int, int *);
197 static void	 unravel(int);
198 static void	 unsort(struct line *, int, int *);
199 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
200 static void	 sort(struct line *, int);
201 static void	 print_header(const char *, const char *);
202 static int	 ignoreline(char *);
203 static int	 asciifile(FILE *);
204 static int	 fetch(long *, int, int, FILE *, int, int, int);
205 static int	 newcand(int, int, int);
206 static int	 search(int *, int, int);
207 static int	 skipline(FILE *);
208 static int	 isqrt(int);
209 static int	 stone(int *, int, int *, int *, int);
210 static int	 readhash(FILE *, int);
211 static int	 files_differ(FILE *, FILE *, int);
212 static char	*match_function(const long *, int, FILE *);
213 static char	*preadline(int, size_t, off_t);
214 
215 struct diff_state {
216 	int  *J;			/* will be overlaid on class */
217 	int  *class;		/* will be overlaid on file[0] */
218 	int  *klist;		/* will be overlaid on file[0] after class */
219 	int  *member;		/* will be overlaid on file[1] */
220 	int   clen;
221 	int   inifdef;		/* whether or not we are in a #ifdef block */
222 	int   len[2];
223 	int   pref, suff;	/* length of prefix and suffix */
224 	int   slen[2];
225 	int   anychange;
226 	long *ixnew;		/* will be overlaid on file[1] */
227 	long *ixold;		/* will be overlaid on klist */
228 	struct cand *clist;	/* merely a free storage pot for candidates */
229 	int   clistlen;		/* the length of clist */
230 	struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
231 	u_char *chrtran;		/* translation table for case-folding */
232 	struct context_vec *context_vec_start;
233 	struct context_vec *context_vec_end;
234 	struct context_vec *context_vec_ptr;
235 } ds;
236 
237 #define FUNCTION_CONTEXT_SIZE	55
238 static char lastbuf[FUNCTION_CONTEXT_SIZE];
239 static int lastline;
240 static int lastmatchline;
241 
242 struct diff_args {
243 	int	 Tflag;
244 	int	 diff_format, diff_context, status;
245 	char	*ifdefname, *diffargs, *label[2], *ignore_pats;
246 	struct stat stb1, stb2;
247 } diff_args;
248 
249 /*
250  * chrtran points to one of 2 translation tables: cup2low if folding upper to
251  * lower case clow2low if not folding case
252  */
253 u_char clow2low[256] = {
254 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
255 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
256 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
257 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
258 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
259 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
260 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
261 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
262 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
263 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
264 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
265 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
266 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
267 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
268 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
269 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
270 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
271 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
272 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
273 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
274 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
275 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
276 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
277 	0xfd, 0xfe, 0xff
278 };
279 
280 u_char cup2low[256] = {
281 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
282 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
283 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
284 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
285 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
286 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
287 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
288 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
289 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
290 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
291 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
292 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
293 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
294 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
295 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
296 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
297 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
298 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
299 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
300 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
301 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
302 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
303 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
304 	0xfd, 0xfe, 0xff
305 };
306 
307 const struct got_error *
308 got_diffreg(int *rval, char *file1, char *file2, int flags)
309 {
310 	const struct got_error *err = NULL;
311 	FILE *f1, *f2;
312 	int i;
313 
314 	diff_args.diff_format = D_UNIFIED;
315 
316 	if (strcmp(file1, "-") == 0)
317 		fstat(STDIN_FILENO, &diff_args.stb1);
318 	else if (stat(file1, &diff_args.stb1) != 0)
319 		return got_error(GOT_ERR_BAD_PATH);
320 
321 	if (strcmp(file2, "-") == 0)
322 		fstat(STDIN_FILENO, &diff_args.stb2);
323 	else if (stat(file2, &diff_args.stb2) != 0)
324 		return got_error(GOT_ERR_BAD_PATH);
325 
326 	f1 = f2 = NULL;
327 	*rval = D_SAME;
328 	ds.anychange = 0;
329 	lastline = 0;
330 	lastmatchline = 0;
331 	ds.context_vec_ptr = ds.context_vec_start - 1;
332 	if (flags & D_IGNORECASE)
333 		ds.chrtran = cup2low;
334 	else
335 		ds.chrtran = clow2low;
336 	if (S_ISDIR(diff_args.stb1.st_mode) != S_ISDIR(diff_args.stb2.st_mode)) {
337 		*rval = (S_ISDIR(diff_args.stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
338 		return NULL;
339 	}
340 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
341 		goto closem;
342 
343 	if (flags & D_EMPTY1)
344 		f1 = fopen(_PATH_DEVNULL, "r");
345 	else {
346 		if (!S_ISREG(diff_args.stb1.st_mode)) {
347 			if ((f1 = opentemp(file1)) == NULL ||
348 			    fstat(fileno(f1), &diff_args.stb1) < 0) {
349 				diff_args.status |= 2;
350 				goto closem;
351 			}
352 		} else if (strcmp(file1, "-") == 0)
353 			f1 = stdin;
354 		else
355 			f1 = fopen(file1, "r");
356 	}
357 	if (f1 == NULL) {
358 		diff_args.status |= 2;
359 		goto closem;
360 	}
361 
362 	if (flags & D_EMPTY2)
363 		f2 = fopen(_PATH_DEVNULL, "r");
364 	else {
365 		if (!S_ISREG(diff_args.stb2.st_mode)) {
366 			if ((f2 = opentemp(file2)) == NULL ||
367 			    fstat(fileno(f2), &diff_args.stb2) < 0) {
368 				diff_args.status |= 2;
369 				goto closem;
370 			}
371 		} else if (strcmp(file2, "-") == 0)
372 			f2 = stdin;
373 		else
374 			f2 = fopen(file2, "r");
375 	}
376 	if (f2 == NULL) {
377 		diff_args.status |= 2;
378 		goto closem;
379 	}
380 
381 	switch (files_differ(f1, f2, flags)) {
382 	case 0:
383 		goto closem;
384 	case 1:
385 		break;
386 	default:
387 		/* error */
388 		diff_args.status |= 2;
389 		goto closem;
390 	}
391 
392 	if ((flags & D_FORCEASCII) == 0 &&
393 	    (!asciifile(f1) || !asciifile(f2))) {
394 		*rval = D_BINARY;
395 		diff_args.status |= 1;
396 		goto closem;
397 	}
398 	prepare(0, f1, diff_args.stb1.st_size, flags);
399 	prepare(1, f2, diff_args.stb2.st_size, flags);
400 
401 	prune();
402 	sort(ds.sfile[0], ds.slen[0]);
403 	sort(ds.sfile[1], ds.slen[1]);
404 
405 	ds.member = (int *)file[1];
406 	equiv(ds.sfile[0], ds.slen[0], ds.sfile[1], ds.slen[1], ds.member);
407 	ds.member = reallocarray(ds.member, ds.slen[1] + 2, sizeof(*ds.member));
408 	if (ds.member == NULL) {
409 		err = got_error(GOT_ERR_NO_MEM);
410 		goto closem;
411 	}
412 
413 	ds.class = (int *)file[0];
414 	unsort(ds.sfile[0], ds.slen[0], ds.class);
415 	ds.class = reallocarray(ds.class, ds.slen[0] + 2, sizeof(*ds.class));
416 	if (ds.class == NULL) {
417 		err = got_error(GOT_ERR_NO_MEM);
418 		goto closem;
419 	}
420 
421 	ds.klist = calloc(ds.slen[0] + 2, sizeof(*ds.klist));
422 	if (ds.klist == NULL) {
423 		err = got_error(GOT_ERR_NO_MEM);
424 		goto closem;
425 	}
426 	ds.clen = 0;
427 	ds.clistlen = 100;
428 	ds.clist = calloc(ds.clistlen, sizeof(*ds.clist));
429 	if (ds.clist == NULL) {
430 		err = got_error(GOT_ERR_NO_MEM);
431 		goto closem;
432 	}
433 	i = stone(ds.class, ds.slen[0], ds.member, ds.klist, flags);
434 	free(ds.member);
435 	free(ds.class);
436 
437 	ds.J = reallocarray(ds.J, ds.len[0] + 2, sizeof(*ds.J));
438 	if (ds.J == NULL) {
439 		err = got_error(GOT_ERR_NO_MEM);
440 		goto closem;
441 	}
442 	unravel(ds.klist[i]);
443 	free(ds.clist);
444 	ds.clist = NULL;
445 	free(ds.klist);
446 	ds.klist = NULL;
447 
448 	ds.ixold = reallocarray(ds.ixold, ds.len[0] + 2, sizeof(*ds.ixold));
449 	if (ds.ixold == NULL) {
450 		err = got_error(GOT_ERR_NO_MEM);
451 		goto closem;
452 	}
453 	ds.ixnew = reallocarray(ds.ixnew, ds.len[1] + 2, sizeof(*ds.ixnew));
454 	if (ds.ixnew == NULL) {
455 		err = got_error(GOT_ERR_NO_MEM);
456 		goto closem;
457 	}
458 	check(f1, f2, flags);
459 	output(file1, f1, file2, f2, flags);
460 closem:
461 	if (ds.anychange) {
462 		diff_args.status |= 1;
463 		if (*rval == D_SAME)
464 			*rval = D_DIFFER;
465 	}
466 	if (f1 != NULL)
467 		fclose(f1);
468 	if (f2 != NULL)
469 		fclose(f2);
470 
471 	return (err);
472 }
473 
474 /*
475  * Check to see if the given files differ.
476  * Returns 0 if they are the same, 1 if different, and -1 on error.
477  * XXX - could use code from cmp(1) [faster]
478  */
479 static int
480 files_differ(FILE *f1, FILE *f2, int flags)
481 {
482 	char buf1[BUFSIZ], buf2[BUFSIZ];
483 	size_t i, j;
484 
485 	if ((flags & (D_EMPTY1|D_EMPTY2)) || diff_args.stb1.st_size != diff_args.stb2.st_size ||
486 	    (diff_args.stb1.st_mode & S_IFMT) != (diff_args.stb2.st_mode & S_IFMT))
487 		return (1);
488 	for (;;) {
489 		i = fread(buf1, 1, sizeof(buf1), f1);
490 		j = fread(buf2, 1, sizeof(buf2), f2);
491 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
492 			return (-1);
493 		if (i != j)
494 			return (1);
495 		if (i == 0)
496 			return (0);
497 		if (memcmp(buf1, buf2, i) != 0)
498 			return (1);
499 	}
500 }
501 
502 static FILE *
503 opentemp(const char *file)
504 {
505 	char buf[BUFSIZ], tempfile[PATH_MAX];
506 	ssize_t nread;
507 	int ifd, ofd;
508 
509 	if (strcmp(file, "-") == 0)
510 		ifd = STDIN_FILENO;
511 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
512 		return (NULL);
513 
514 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
515 
516 	if ((ofd = mkstemp(tempfile)) < 0) {
517 		close(ifd);
518 		return (NULL);
519 	}
520 	unlink(tempfile);
521 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
522 		if (write(ofd, buf, nread) != nread) {
523 			close(ifd);
524 			close(ofd);
525 			return (NULL);
526 		}
527 	}
528 	close(ifd);
529 	lseek(ofd, (off_t)0, SEEK_SET);
530 	return (fdopen(ofd, "r"));
531 }
532 
533 char *
534 splice(char *dir, char *file)
535 {
536 	char *tail, *buf;
537 	size_t dirlen;
538 
539 	dirlen = strlen(dir);
540 	while (dirlen != 0 && dir[dirlen - 1] == '/')
541 	    dirlen--;
542 	if ((tail = strrchr(file, '/')) == NULL)
543 		tail = file;
544 	else
545 		tail++;
546 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
547 	return (buf);
548 }
549 
550 static void
551 prepare(int i, FILE *fd, off_t filesize, int flags)
552 {
553 	struct line *p;
554 	int j, h;
555 	size_t sz;
556 
557 	rewind(fd);
558 
559 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
560 	if (sz < 100)
561 		sz = 100;
562 
563 	p = xcalloc(sz + 3, sizeof(*p));
564 	for (j = 0; (h = readhash(fd, flags));) {
565 		if (j == sz) {
566 			sz = sz * 3 / 2;
567 			p = xreallocarray(p, sz + 3, sizeof(*p));
568 		}
569 		p[++j].value = h;
570 	}
571 	ds.len[i] = j;
572 	file[i] = p;
573 }
574 
575 static void
576 prune(void)
577 {
578 	int i, j;
579 
580 	for (ds.pref = 0; ds.pref < ds.len[0] && ds.pref < ds.len[1] &&
581 	    file[0][ds.pref + 1].value == file[1][ds.pref + 1].value;
582 	    ds.pref++)
583 		;
584 	for (ds.suff = 0; ds.suff < ds.len[0] - ds.pref && ds.suff < ds.len[1] - ds.pref &&
585 	    file[0][ds.len[0] - ds.suff].value == file[1][ds.len[1] - ds.suff].value;
586 	    ds.suff++)
587 		;
588 	for (j = 0; j < 2; j++) {
589 		ds.sfile[j] = file[j] + ds.pref;
590 		ds.slen[j] = ds.len[j] - ds.pref - ds.suff;
591 		for (i = 0; i <= ds.slen[j]; i++)
592 			ds.sfile[j][i].serial = i;
593 	}
594 }
595 
596 static void
597 equiv(struct line *a, int n, struct line *b, int m, int *c)
598 {
599 	int i, j;
600 
601 	i = j = 1;
602 	while (i <= n && j <= m) {
603 		if (a[i].value < b[j].value)
604 			a[i++].value = 0;
605 		else if (a[i].value == b[j].value)
606 			a[i++].value = j;
607 		else
608 			j++;
609 	}
610 	while (i <= n)
611 		a[i++].value = 0;
612 	b[m + 1].value = 0;
613 	j = 0;
614 	while (++j <= m) {
615 		c[j] = -b[j].serial;
616 		while (b[j + 1].value == b[j].value) {
617 			j++;
618 			c[j] = b[j].serial;
619 		}
620 	}
621 	c[j] = -1;
622 }
623 
624 /* Code taken from ping.c */
625 static int
626 isqrt(int n)
627 {
628 	int y, x = 1;
629 
630 	if (n == 0)
631 		return (0);
632 
633 	do { /* newton was a stinker */
634 		y = x;
635 		x = n / x;
636 		x += y;
637 		x /= 2;
638 	} while ((x - y) > 1 || (x - y) < -1);
639 
640 	return (x);
641 }
642 
643 static int
644 stone(int *a, int n, int *b, int *c, int flags)
645 {
646 	int i, k, y, j, l;
647 	int oldc, tc, oldl, sq;
648 	u_int numtries, bound;
649 
650 	if (flags & D_MINIMAL)
651 		bound = UINT_MAX;
652 	else {
653 		sq = isqrt(n);
654 		bound = MAXIMUM(256, sq);
655 	}
656 
657 	k = 0;
658 	c[0] = newcand(0, 0, 0);
659 	for (i = 1; i <= n; i++) {
660 		j = a[i];
661 		if (j == 0)
662 			continue;
663 		y = -b[j];
664 		oldl = 0;
665 		oldc = c[0];
666 		numtries = 0;
667 		do {
668 			if (y <= ds.clist[oldc].y)
669 				continue;
670 			l = search(c, k, y);
671 			if (l != oldl + 1)
672 				oldc = c[l - 1];
673 			if (l <= k) {
674 				if (ds.clist[c[l]].y <= y)
675 					continue;
676 				tc = c[l];
677 				c[l] = newcand(i, y, oldc);
678 				oldc = tc;
679 				oldl = l;
680 				numtries++;
681 			} else {
682 				c[l] = newcand(i, y, oldc);
683 				k++;
684 				break;
685 			}
686 		} while ((y = b[++j]) > 0 && numtries < bound);
687 	}
688 	return (k);
689 }
690 
691 static int
692 newcand(int x, int y, int pred)
693 {
694 	struct cand *q;
695 
696 	if (ds.clen == ds.clistlen) {
697 		ds.clistlen = ds.clistlen * 11 / 10;
698 		ds.clist = xreallocarray(ds.clist, ds.clistlen, sizeof(*ds.clist));
699 	}
700 	q = ds.clist + ds.clen;
701 	q->x = x;
702 	q->y = y;
703 	q->pred = pred;
704 	return (ds.clen++);
705 }
706 
707 static int
708 search(int *c, int k, int y)
709 {
710 	int i, j, l, t;
711 
712 	if (ds.clist[c[k]].y < y)	/* quick look for typical case */
713 		return (k + 1);
714 	i = 0;
715 	j = k + 1;
716 	for (;;) {
717 		l = (i + j) / 2;
718 		if (l <= i)
719 			break;
720 		t = ds.clist[c[l]].y;
721 		if (t > y)
722 			j = l;
723 		else if (t < y)
724 			i = l;
725 		else
726 			return (l);
727 	}
728 	return (l + 1);
729 }
730 
731 static void
732 unravel(int p)
733 {
734 	struct cand *q;
735 	int i;
736 
737 	for (i = 0; i <= ds.len[0]; i++)
738 		ds.J[i] = i <= ds.pref ? i :
739 		    i > ds.len[0] - ds.suff ? i + ds.len[1] - ds.len[0] : 0;
740 	for (q = ds.clist + p; q->y != 0; q = ds.clist + q->pred)
741 		ds.J[q->x + ds.pref] = q->y + ds.pref;
742 }
743 
744 /*
745  * Check does double duty:
746  *  1.	ferret out any fortuitous correspondences due
747  *	to confounding by hashing (which result in "jackpot")
748  *  2.  collect random access indexes to the two files
749  */
750 static void
751 check(FILE *f1, FILE *f2, int flags)
752 {
753 	int i, j, jackpot, c, d;
754 	long ctold, ctnew;
755 
756 	rewind(f1);
757 	rewind(f2);
758 	j = 1;
759 	ds.ixold[0] = ds.ixnew[0] = 0;
760 	jackpot = 0;
761 	ctold = ctnew = 0;
762 	for (i = 1; i <= ds.len[0]; i++) {
763 		if (ds.J[i] == 0) {
764 			ds.ixold[i] = ctold += skipline(f1);
765 			continue;
766 		}
767 		while (j < ds.J[i]) {
768 			ds.ixnew[j] = ctnew += skipline(f2);
769 			j++;
770 		}
771 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
772 			for (;;) {
773 				c = getc(f1);
774 				d = getc(f2);
775 				/*
776 				 * GNU diff ignores a missing newline
777 				 * in one file for -b or -w.
778 				 */
779 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
780 					if (c == EOF && d == '\n') {
781 						ctnew++;
782 						break;
783 					} else if (c == '\n' && d == EOF) {
784 						ctold++;
785 						break;
786 					}
787 				}
788 				ctold++;
789 				ctnew++;
790 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
791 				    isspace(d)) {
792 					do {
793 						if (c == '\n')
794 							break;
795 						ctold++;
796 					} while (isspace(c = getc(f1)));
797 					do {
798 						if (d == '\n')
799 							break;
800 						ctnew++;
801 					} while (isspace(d = getc(f2)));
802 				} else if ((flags & D_IGNOREBLANKS)) {
803 					while (isspace(c) && c != '\n') {
804 						c = getc(f1);
805 						ctold++;
806 					}
807 					while (isspace(d) && d != '\n') {
808 						d = getc(f2);
809 						ctnew++;
810 					}
811 				}
812 				if (ds.chrtran[c] != ds.chrtran[d]) {
813 					jackpot++;
814 					ds.J[i] = 0;
815 					if (c != '\n' && c != EOF)
816 						ctold += skipline(f1);
817 					if (d != '\n' && c != EOF)
818 						ctnew += skipline(f2);
819 					break;
820 				}
821 				if (c == '\n' || c == EOF)
822 					break;
823 			}
824 		} else {
825 			for (;;) {
826 				ctold++;
827 				ctnew++;
828 				if ((c = getc(f1)) != (d = getc(f2))) {
829 					/* jackpot++; */
830 					ds.J[i] = 0;
831 					if (c != '\n' && c != EOF)
832 						ctold += skipline(f1);
833 					if (d != '\n' && c != EOF)
834 						ctnew += skipline(f2);
835 					break;
836 				}
837 				if (c == '\n' || c == EOF)
838 					break;
839 			}
840 		}
841 		ds.ixold[i] = ctold;
842 		ds.ixnew[j] = ctnew;
843 		j++;
844 	}
845 	for (; j <= ds.len[1]; j++)
846 		ds.ixnew[j] = ctnew += skipline(f2);
847 	/*
848 	 * if (jackpot)
849 	 *	fprintf(stderr, "jackpot\n");
850 	 */
851 }
852 
853 /* shellsort CACM #201 */
854 static void
855 sort(struct line *a, int n)
856 {
857 	struct line *ai, *aim, w;
858 	int j, m = 0, k;
859 
860 	if (n == 0)
861 		return;
862 	for (j = 1; j <= n; j *= 2)
863 		m = 2 * j - 1;
864 	for (m /= 2; m != 0; m /= 2) {
865 		k = n - m;
866 		for (j = 1; j <= k; j++) {
867 			for (ai = &a[j]; ai > a; ai -= m) {
868 				aim = &ai[m];
869 				if (aim < ai)
870 					break;	/* wraparound */
871 				if (aim->value > ai[0].value ||
872 				    (aim->value == ai[0].value &&
873 					aim->serial > ai[0].serial))
874 					break;
875 				w.value = ai[0].value;
876 				ai[0].value = aim->value;
877 				aim->value = w.value;
878 				w.serial = ai[0].serial;
879 				ai[0].serial = aim->serial;
880 				aim->serial = w.serial;
881 			}
882 		}
883 	}
884 }
885 
886 static void
887 unsort(struct line *f, int l, int *b)
888 {
889 	int *a, i;
890 
891 	a = xcalloc(l + 1, sizeof(*a));
892 	for (i = 1; i <= l; i++)
893 		a[f[i].serial] = f[i].value;
894 	for (i = 1; i <= l; i++)
895 		b[i] = a[i];
896 	free(a);
897 }
898 
899 static int
900 skipline(FILE *f)
901 {
902 	int i, c;
903 
904 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
905 		continue;
906 	return (i);
907 }
908 
909 static void
910 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
911 {
912 	int m, i0, i1, j0, j1;
913 
914 	rewind(f1);
915 	rewind(f2);
916 	m = ds.len[0];
917 	ds.J[0] = 0;
918 	ds.J[m + 1] = ds.len[1] + 1;
919 	if (diff_args.diff_format != D_EDIT) {
920 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
921 			while (i0 <= m && ds.J[i0] == ds.J[i0 - 1] + 1)
922 				i0++;
923 			j0 = ds.J[i0 - 1] + 1;
924 			i1 = i0 - 1;
925 			while (i1 < m && ds.J[i1 + 1] == 0)
926 				i1++;
927 			j1 = ds.J[i1 + 1] - 1;
928 			ds.J[i1] = j1;
929 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
930 		}
931 	} else {
932 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
933 			while (i0 >= 1 && ds.J[i0] == ds.J[i0 + 1] - 1 && ds.J[i0] != 0)
934 				i0--;
935 			j0 = ds.J[i0 + 1] - 1;
936 			i1 = i0 + 1;
937 			while (i1 > 1 && ds.J[i1 - 1] == 0)
938 				i1--;
939 			j1 = ds.J[i1 - 1] + 1;
940 			ds.J[i1] = j1;
941 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
942 		}
943 	}
944 	if (m == 0)
945 		change(file1, f1, file2, f2, 1, 0, 1, ds.len[1], &flags);
946 	if (diff_args.diff_format == D_IFDEF) {
947 		for (;;) {
948 #define	c i0
949 			if ((c = getc(f1)) == EOF)
950 				return;
951 			diff_output("%c", c);
952 		}
953 #undef c
954 	}
955 	if (ds.anychange != 0) {
956 		if (diff_args.diff_format == D_CONTEXT)
957 			dump_context_vec(f1, f2, flags);
958 		else if (diff_args.diff_format == D_UNIFIED)
959 			dump_unified_vec(f1, f2, flags);
960 	}
961 }
962 
963 static void
964 range(int a, int b, char *separator)
965 {
966 	diff_output("%d", a > b ? b : a);
967 	if (a < b)
968 		diff_output("%s%d", separator, b);
969 }
970 
971 static void
972 uni_range(int a, int b)
973 {
974 	if (a < b)
975 		diff_output("%d,%d", a, b - a + 1);
976 	else if (a == b)
977 		diff_output("%d", b);
978 	else
979 		diff_output("%d,0", b);
980 }
981 
982 static char *
983 preadline(int fd, size_t rlen, off_t off)
984 {
985 	char *line;
986 	ssize_t nr;
987 
988 	line = xmalloc(rlen + 1);
989 	if ((nr = pread(fd, line, rlen, off)) < 0)
990 		err(2, "preadline");
991 	if (nr > 0 && line[nr-1] == '\n')
992 		nr--;
993 	line[nr] = '\0';
994 	return (line);
995 }
996 
997 static int
998 ignoreline(char *line)
999 {
1000 	return 0; /* do not ignore any lines */
1001 }
1002 
1003 /*
1004  * Indicate that there is a difference between lines a and b of the from file
1005  * to get to lines c to d of the to file.  If a is greater then b then there
1006  * are no lines in the from file involved and this means that there were
1007  * lines appended (beginning at b).  If c is greater than d then there are
1008  * lines missing from the to file.
1009  */
1010 static void
1011 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1012     int *pflags)
1013 {
1014 	static size_t max_context = 64;
1015 	int i;
1016 
1017 restart:
1018 	if (diff_args.diff_format != D_IFDEF && a > b && c > d)
1019 		return;
1020 	if (diff_args.ignore_pats != NULL) {
1021 		char *line;
1022 		/*
1023 		 * All lines in the change, insert, or delete must
1024 		 * match an ignore pattern for the change to be
1025 		 * ignored.
1026 		 */
1027 		if (a <= b) {		/* Changes and deletes. */
1028 			for (i = a; i <= b; i++) {
1029 				line = preadline(fileno(f1),
1030 				    ds.ixold[i] - ds.ixold[i - 1], ds.ixold[i - 1]);
1031 				if (!ignoreline(line))
1032 					goto proceed;
1033 			}
1034 		}
1035 		if (a > b || c <= d) {	/* Changes and inserts. */
1036 			for (i = c; i <= d; i++) {
1037 				line = preadline(fileno(f2),
1038 				    ds.ixnew[i] - ds.ixnew[i - 1], ds.ixnew[i - 1]);
1039 				if (!ignoreline(line))
1040 					goto proceed;
1041 			}
1042 		}
1043 		return;
1044 	}
1045 proceed:
1046 	if (*pflags & D_HEADER) {
1047 		diff_output("%s %s %s\n", diff_args.diffargs, file1, file2);
1048 		*pflags &= ~D_HEADER;
1049 	}
1050 	if (diff_args.diff_format == D_CONTEXT || diff_args.diff_format == D_UNIFIED) {
1051 		/*
1052 		 * Allocate change records as needed.
1053 		 */
1054 		if (ds.context_vec_ptr == ds.context_vec_end - 1) {
1055 			ptrdiff_t offset = ds.context_vec_ptr - ds.context_vec_start;
1056 			max_context <<= 1;
1057 			ds.context_vec_start = xreallocarray(ds.context_vec_start,
1058 			    max_context, sizeof(*ds.context_vec_start));
1059 			ds.context_vec_end = ds.context_vec_start + max_context;
1060 			ds.context_vec_ptr = ds.context_vec_start + offset;
1061 		}
1062 		if (ds.anychange == 0) {
1063 			/*
1064 			 * Print the context/unidiff header first time through.
1065 			 */
1066 			print_header(file1, file2);
1067 			ds.anychange = 1;
1068 		} else if (a > ds.context_vec_ptr->b + (2 * diff_args.diff_context) + 1 &&
1069 		    c > ds.context_vec_ptr->d + (2 * diff_args.diff_context) + 1) {
1070 			/*
1071 			 * If this change is more than 'diff_context' lines from the
1072 			 * previous change, dump the record and reset it.
1073 			 */
1074 			if (diff_args.diff_format == D_CONTEXT)
1075 				dump_context_vec(f1, f2, *pflags);
1076 			else
1077 				dump_unified_vec(f1, f2, *pflags);
1078 		}
1079 		ds.context_vec_ptr++;
1080 		ds.context_vec_ptr->a = a;
1081 		ds.context_vec_ptr->b = b;
1082 		ds.context_vec_ptr->c = c;
1083 		ds.context_vec_ptr->d = d;
1084 		return;
1085 	}
1086 	if (ds.anychange == 0)
1087 		ds.anychange = 1;
1088 	switch (diff_args.diff_format) {
1089 	case D_BRIEF:
1090 		return;
1091 	case D_NORMAL:
1092 	case D_EDIT:
1093 		range(a, b, ",");
1094 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1095 		if (diff_args.diff_format == D_NORMAL)
1096 			range(c, d, ",");
1097 		diff_output("\n");
1098 		break;
1099 	case D_REVERSE:
1100 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1101 		range(a, b, " ");
1102 		diff_output("\n");
1103 		break;
1104 	case D_NREVERSE:
1105 		if (a > b)
1106 			diff_output("a%d %d\n", b, d - c + 1);
1107 		else {
1108 			diff_output("d%d %d\n", a, b - a + 1);
1109 			if (!(c > d))
1110 				/* add changed lines */
1111 				diff_output("a%d %d\n", b, d - c + 1);
1112 		}
1113 		break;
1114 	}
1115 	if (diff_args.diff_format == D_NORMAL || diff_args.diff_format == D_IFDEF) {
1116 		fetch(ds.ixold, a, b, f1, '<', 1, *pflags);
1117 		if (a <= b && c <= d && diff_args.diff_format == D_NORMAL)
1118 			diff_output("---\n");
1119 	}
1120 	i = fetch(ds.ixnew, c, d, f2, diff_args.diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1121 	if (i != 0 && diff_args.diff_format == D_EDIT) {
1122 		/*
1123 		 * A non-zero return value for D_EDIT indicates that the
1124 		 * last line printed was a bare dot (".") that has been
1125 		 * escaped as ".." to prevent ed(1) from misinterpreting
1126 		 * it.  We have to add a substitute command to change this
1127 		 * back and restart where we left off.
1128 		 */
1129 		diff_output(".\n");
1130 		diff_output("%ds/.//\n", a + i - 1);
1131 		b = a + i - 1;
1132 		a = b + 1;
1133 		c += i;
1134 		goto restart;
1135 	}
1136 	if ((diff_args.diff_format == D_EDIT || diff_args.diff_format == D_REVERSE) && c <= d)
1137 		diff_output(".\n");
1138 	if (ds.inifdef) {
1139 		diff_output("#endif /* %s */\n", diff_args.ifdefname);
1140 		ds.inifdef = 0;
1141 	}
1142 }
1143 
1144 static int
1145 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1146 {
1147 	int i, j, c, lastc, col, nc;
1148 
1149 	/*
1150 	 * When doing #ifdef's, copy down to current line
1151 	 * if this is the first file, so that stuff makes it to output.
1152 	 */
1153 	if (diff_args.diff_format == D_IFDEF && oldfile) {
1154 		long curpos = ftell(lb);
1155 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1156 		nc = f[a > b ? b : a - 1] - curpos;
1157 		for (i = 0; i < nc; i++)
1158 			diff_output("%c", getc(lb));
1159 	}
1160 	if (a > b)
1161 		return (0);
1162 	if (diff_args.diff_format == D_IFDEF) {
1163 		if (ds.inifdef) {
1164 			diff_output("#else /* %s%s */\n",
1165 			    oldfile == 1 ? "!" : "", diff_args.ifdefname);
1166 		} else {
1167 			if (oldfile)
1168 				diff_output("#ifndef %s\n", diff_args.ifdefname);
1169 			else
1170 				diff_output("#ifdef %s\n", diff_args.ifdefname);
1171 		}
1172 		ds.inifdef = 1 + oldfile;
1173 	}
1174 	for (i = a; i <= b; i++) {
1175 		fseek(lb, f[i - 1], SEEK_SET);
1176 		nc = f[i] - f[i - 1];
1177 		if (diff_args.diff_format != D_IFDEF && ch != '\0') {
1178 			diff_output("%c", ch);
1179 			if (diff_args.Tflag && (diff_args.diff_format == D_NORMAL || diff_args.diff_format == D_CONTEXT
1180 			    || diff_args.diff_format == D_UNIFIED))
1181 				diff_output("\t");
1182 			else if (diff_args.diff_format != D_UNIFIED)
1183 				diff_output(" ");
1184 		}
1185 		col = 0;
1186 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1187 			if ((c = getc(lb)) == EOF) {
1188 				if (diff_args.diff_format == D_EDIT || diff_args.diff_format == D_REVERSE ||
1189 				    diff_args.diff_format == D_NREVERSE)
1190 					warnx("No newline at end of file");
1191 				else
1192 					diff_output("\n\\ No newline at end of "
1193 					    "file\n");
1194 				return (0);
1195 			}
1196 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1197 				do {
1198 					diff_output(" ");
1199 				} while (++col & 7);
1200 			} else {
1201 				if (diff_args.diff_format == D_EDIT && j == 1 && c == '\n'
1202 				    && lastc == '.') {
1203 					/*
1204 					 * Don't print a bare "." line
1205 					 * since that will confuse ed(1).
1206 					 * Print ".." instead and return,
1207 					 * giving the caller an offset
1208 					 * from which to restart.
1209 					 */
1210 					diff_output(".\n");
1211 					return (i - a + 1);
1212 				}
1213 				diff_output("%c", c);
1214 				col++;
1215 			}
1216 		}
1217 	}
1218 	return (0);
1219 }
1220 
1221 /*
1222  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1223  */
1224 static int
1225 readhash(FILE *f, int flags)
1226 {
1227 	int i, t, space;
1228 	int sum;
1229 
1230 	sum = 1;
1231 	space = 0;
1232 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1233 		if (flags & D_IGNORECASE)
1234 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1235 				if (t == EOF) {
1236 					if (i == 0)
1237 						return (0);
1238 					break;
1239 				}
1240 				sum = sum * 127 + ds.chrtran[t];
1241 			}
1242 		else
1243 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1244 				if (t == EOF) {
1245 					if (i == 0)
1246 						return (0);
1247 					break;
1248 				}
1249 				sum = sum * 127 + t;
1250 			}
1251 	} else {
1252 		for (i = 0;;) {
1253 			switch (t = getc(f)) {
1254 			case '\t':
1255 			case '\r':
1256 			case '\v':
1257 			case '\f':
1258 			case ' ':
1259 				space++;
1260 				continue;
1261 			default:
1262 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1263 					i++;
1264 					space = 0;
1265 				}
1266 				sum = sum * 127 + ds.chrtran[t];
1267 				i++;
1268 				continue;
1269 			case EOF:
1270 				if (i == 0)
1271 					return (0);
1272 				/* FALLTHROUGH */
1273 			case '\n':
1274 				break;
1275 			}
1276 			break;
1277 		}
1278 	}
1279 	/*
1280 	 * There is a remote possibility that we end up with a zero sum.
1281 	 * Zero is used as an EOF marker, so return 1 instead.
1282 	 */
1283 	return (sum == 0 ? 1 : sum);
1284 }
1285 
1286 static int
1287 asciifile(FILE *f)
1288 {
1289 	unsigned char buf[BUFSIZ];
1290 	size_t cnt;
1291 
1292 	if (f == NULL)
1293 		return (1);
1294 
1295 	rewind(f);
1296 	cnt = fread(buf, 1, sizeof(buf), f);
1297 	return (memchr(buf, '\0', cnt) == NULL);
1298 }
1299 
1300 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1301 
1302 static char *
1303 match_function(const long *f, int pos, FILE *fp)
1304 {
1305 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1306 	size_t nc;
1307 	int last = lastline;
1308 	char *state = NULL;
1309 
1310 	lastline = pos;
1311 	while (pos > last) {
1312 		fseek(fp, f[pos - 1], SEEK_SET);
1313 		nc = f[pos] - f[pos - 1];
1314 		if (nc >= sizeof(buf))
1315 			nc = sizeof(buf) - 1;
1316 		nc = fread(buf, 1, nc, fp);
1317 		if (nc > 0) {
1318 			buf[nc] = '\0';
1319 			buf[strcspn(buf, "\n")] = '\0';
1320 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1321 				if (begins_with(buf, "private:")) {
1322 					if (!state)
1323 						state = " (private)";
1324 				} else if (begins_with(buf, "protected:")) {
1325 					if (!state)
1326 						state = " (protected)";
1327 				} else if (begins_with(buf, "public:")) {
1328 					if (!state)
1329 						state = " (public)";
1330 				} else {
1331 					strlcpy(lastbuf, buf, sizeof lastbuf);
1332 					if (state)
1333 						strlcat(lastbuf, state,
1334 						    sizeof lastbuf);
1335 					lastmatchline = pos;
1336 					return lastbuf;
1337 				}
1338 			}
1339 		}
1340 		pos--;
1341 	}
1342 	return lastmatchline > 0 ? lastbuf : NULL;
1343 }
1344 
1345 /* dump accumulated "context" diff changes */
1346 static void
1347 dump_context_vec(FILE *f1, FILE *f2, int flags)
1348 {
1349 	struct context_vec *cvp = ds.context_vec_start;
1350 	int lowa, upb, lowc, upd, do_output;
1351 	int a, b, c, d;
1352 	char ch, *f;
1353 
1354 	if (ds.context_vec_start > ds.context_vec_ptr)
1355 		return;
1356 
1357 	b = d = 0;		/* gcc */
1358 	lowa = MAXIMUM(1, cvp->a - diff_args.diff_context);
1359 	upb = MINIMUM(ds.len[0], ds.context_vec_ptr->b + diff_args.diff_context);
1360 	lowc = MAXIMUM(1, cvp->c - diff_args.diff_context);
1361 	upd = MINIMUM(ds.len[1], ds.context_vec_ptr->d + diff_args.diff_context);
1362 
1363 	diff_output("***************");
1364 	if ((flags & D_PROTOTYPE)) {
1365 		f = match_function(ds.ixold, lowa-1, f1);
1366 		if (f != NULL)
1367 			diff_output(" %s", f);
1368 	}
1369 	diff_output("\n*** ");
1370 	range(lowa, upb, ",");
1371 	diff_output(" ****\n");
1372 
1373 	/*
1374 	 * Output changes to the "old" file.  The first loop suppresses
1375 	 * output if there were no changes to the "old" file (we'll see
1376 	 * the "old" lines as context in the "new" list).
1377 	 */
1378 	do_output = 0;
1379 	for (; cvp <= ds.context_vec_ptr; cvp++)
1380 		if (cvp->a <= cvp->b) {
1381 			cvp = ds.context_vec_start;
1382 			do_output++;
1383 			break;
1384 		}
1385 	if (do_output) {
1386 		while (cvp <= ds.context_vec_ptr) {
1387 			a = cvp->a;
1388 			b = cvp->b;
1389 			c = cvp->c;
1390 			d = cvp->d;
1391 
1392 			if (a <= b && c <= d)
1393 				ch = 'c';
1394 			else
1395 				ch = (a <= b) ? 'd' : 'a';
1396 
1397 			if (ch == 'a')
1398 				fetch(ds.ixold, lowa, b, f1, ' ', 0, flags);
1399 			else {
1400 				fetch(ds.ixold, lowa, a - 1, f1, ' ', 0, flags);
1401 				fetch(ds.ixold, a, b, f1,
1402 				    ch == 'c' ? '!' : '-', 0, flags);
1403 			}
1404 			lowa = b + 1;
1405 			cvp++;
1406 		}
1407 		fetch(ds.ixold, b + 1, upb, f1, ' ', 0, flags);
1408 	}
1409 	/* output changes to the "new" file */
1410 	diff_output("--- ");
1411 	range(lowc, upd, ",");
1412 	diff_output(" ----\n");
1413 
1414 	do_output = 0;
1415 	for (cvp = ds.context_vec_start; cvp <= ds.context_vec_ptr; cvp++)
1416 		if (cvp->c <= cvp->d) {
1417 			cvp = ds.context_vec_start;
1418 			do_output++;
1419 			break;
1420 		}
1421 	if (do_output) {
1422 		while (cvp <= ds.context_vec_ptr) {
1423 			a = cvp->a;
1424 			b = cvp->b;
1425 			c = cvp->c;
1426 			d = cvp->d;
1427 
1428 			if (a <= b && c <= d)
1429 				ch = 'c';
1430 			else
1431 				ch = (a <= b) ? 'd' : 'a';
1432 
1433 			if (ch == 'd')
1434 				fetch(ds.ixnew, lowc, d, f2, ' ', 0, flags);
1435 			else {
1436 				fetch(ds.ixnew, lowc, c - 1, f2, ' ', 0, flags);
1437 				fetch(ds.ixnew, c, d, f2,
1438 				    ch == 'c' ? '!' : '+', 0, flags);
1439 			}
1440 			lowc = d + 1;
1441 			cvp++;
1442 		}
1443 		fetch(ds.ixnew, d + 1, upd, f2, ' ', 0, flags);
1444 	}
1445 	ds.context_vec_ptr = ds.context_vec_start - 1;
1446 }
1447 
1448 /* dump accumulated "unified" diff changes */
1449 static void
1450 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1451 {
1452 	struct context_vec *cvp = ds.context_vec_start;
1453 	int lowa, upb, lowc, upd;
1454 	int a, b, c, d;
1455 	char ch, *f;
1456 
1457 	if (ds.context_vec_start > ds.context_vec_ptr)
1458 		return;
1459 
1460 	b = d = 0;		/* gcc */
1461 	lowa = MAXIMUM(1, cvp->a - diff_args.diff_context);
1462 	upb = MINIMUM(ds.len[0], ds.context_vec_ptr->b + diff_args.diff_context);
1463 	lowc = MAXIMUM(1, cvp->c - diff_args.diff_context);
1464 	upd = MINIMUM(ds.len[1], ds.context_vec_ptr->d + diff_args.diff_context);
1465 
1466 	diff_output("@@ -");
1467 	uni_range(lowa, upb);
1468 	diff_output(" +");
1469 	uni_range(lowc, upd);
1470 	diff_output(" @@");
1471 	if ((flags & D_PROTOTYPE)) {
1472 		f = match_function(ds.ixold, lowa-1, f1);
1473 		if (f != NULL)
1474 			diff_output(" %s", f);
1475 	}
1476 	diff_output("\n");
1477 
1478 	/*
1479 	 * Output changes in "unified" diff format--the old and new lines
1480 	 * are printed together.
1481 	 */
1482 	for (; cvp <= ds.context_vec_ptr; cvp++) {
1483 		a = cvp->a;
1484 		b = cvp->b;
1485 		c = cvp->c;
1486 		d = cvp->d;
1487 
1488 		/*
1489 		 * c: both new and old changes
1490 		 * d: only changes in the old file
1491 		 * a: only changes in the new file
1492 		 */
1493 		if (a <= b && c <= d)
1494 			ch = 'c';
1495 		else
1496 			ch = (a <= b) ? 'd' : 'a';
1497 
1498 		switch (ch) {
1499 		case 'c':
1500 			fetch(ds.ixold, lowa, a - 1, f1, ' ', 0, flags);
1501 			fetch(ds.ixold, a, b, f1, '-', 0, flags);
1502 			fetch(ds.ixnew, c, d, f2, '+', 0, flags);
1503 			break;
1504 		case 'd':
1505 			fetch(ds.ixold, lowa, a - 1, f1, ' ', 0, flags);
1506 			fetch(ds.ixold, a, b, f1, '-', 0, flags);
1507 			break;
1508 		case 'a':
1509 			fetch(ds.ixnew, lowc, c - 1, f2, ' ', 0, flags);
1510 			fetch(ds.ixnew, c, d, f2, '+', 0, flags);
1511 			break;
1512 		}
1513 		lowa = b + 1;
1514 		lowc = d + 1;
1515 	}
1516 	fetch(ds.ixnew, d + 1, upd, f2, ' ', 0, flags);
1517 
1518 	ds.context_vec_ptr = ds.context_vec_start - 1;
1519 }
1520 
1521 static void
1522 print_header(const char *file1, const char *file2)
1523 {
1524 	if (diff_args.label[0] != NULL)
1525 		diff_output("%s %s\n", diff_args.diff_format == D_CONTEXT ? "***" : "---",
1526 		    diff_args.label[0]);
1527 	else
1528 		diff_output("%s %s\t%s", diff_args.diff_format == D_CONTEXT ? "***" : "---",
1529 		    file1, ctime(&diff_args.stb1.st_mtime));
1530 	if (diff_args.label[1] != NULL)
1531 		diff_output("%s %s\n", diff_args.diff_format == D_CONTEXT ? "---" : "+++",
1532 		    diff_args.label[1]);
1533 	else
1534 		diff_output("%s %s\t%s", diff_args.diff_format == D_CONTEXT ? "---" : "+++",
1535 		    file2, ctime(&diff_args.stb2.st_mtime));
1536 }