Blob


1 /*
2 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
3 * Copyright (c) 2020 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/mman.h>
19 #include <sys/stat.h>
20 #include <sys/queue.h>
22 #include <errno.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
27 #include "got_object.h"
28 #include "got_opentemp.h"
29 #include "got_error.h"
31 #include "got_lib_diff.h"
33 const struct diff_algo_config myers_then_patience;
34 const struct diff_algo_config myers_then_myers_divide;
35 const struct diff_algo_config patience;
36 const struct diff_algo_config myers_divide;
38 const struct diff_algo_config myers_then_patience = (struct diff_algo_config){
39 .impl = diff_algo_myers,
40 .permitted_state_size = 1024 * 1024 * sizeof(int),
41 .fallback_algo = &patience,
42 };
44 const struct diff_algo_config myers_then_myers_divide =
45 (struct diff_algo_config){
46 .impl = diff_algo_myers,
47 .permitted_state_size = 1024 * 1024 * sizeof(int),
48 .fallback_algo = &myers_divide,
49 };
51 const struct diff_algo_config patience = (struct diff_algo_config){
52 .impl = diff_algo_patience,
53 /* After subdivision, do Patience again: */
54 .inner_algo = &patience,
55 /* If subdivision failed, do Myers Divide et Impera: */
56 .fallback_algo = &myers_then_myers_divide,
57 };
59 const struct diff_algo_config myers_divide = (struct diff_algo_config){
60 .impl = diff_algo_myers_divide,
61 /* When division succeeded, start from the top: */
62 .inner_algo = &myers_then_myers_divide,
63 /* (fallback_algo = NULL implies diff_algo_none). */
64 };
66 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
67 * do a Myers-divide. */
68 const struct diff_config diff_config_myers_then_myers_divide = {
69 .atomize_func = diff_atomize_text_by_line,
70 .algo = &myers_then_myers_divide,
71 };
73 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
74 * do a Patience. */
75 const struct diff_config diff_config_myers_then_patience = {
76 .atomize_func = diff_atomize_text_by_line,
77 .algo = &myers_then_patience,
78 };
80 /* Directly force Patience as a first divider of the source file. */
81 const struct diff_config diff_config_patience = {
82 .atomize_func = diff_atomize_text_by_line,
83 .algo = &patience,
84 };
86 /* Directly force Patience as a first divider of the source file. */
87 const struct diff_config diff_config_no_algo = {
88 .atomize_func = diff_atomize_text_by_line,
89 };
91 const struct got_error *
92 got_diffreg_close(FILE *f1, char *p1, size_t size1,
93 FILE *f2, char *p2, size_t size2)
94 {
95 const struct got_error *err = NULL;
97 if (p1 && munmap(p1, size1) == -1 && err == NULL)
98 err = got_error_from_errno("munmap");
99 if (p2 && munmap(p2, size2) == -1 && err == NULL)
100 err = got_error_from_errno("munmap");
101 if (f1 && fclose(f1) == EOF && err == NULL)
102 err = got_error_from_errno("fclose");
103 if (f2 && fclose(f2) == EOF && err == NULL)
104 err = got_error_from_errno("fclose");
105 return err;
108 const struct got_error *
109 got_diff_get_config(struct diff_config **cfg,
110 enum got_diff_algorithm algorithm,
111 diff_atomize_func_t atomize_func, void *atomize_func_data)
113 *cfg = calloc(1, sizeof(**cfg));
114 if (*cfg == NULL)
115 return got_error_from_errno("calloc");
117 switch (algorithm) {
118 case GOT_DIFF_ALGORITHM_PATIENCE:
119 (*cfg)->algo = &patience;
120 break;
121 case GOT_DIFF_ALGORITHM_MYERS:
122 (*cfg)->algo = &myers_then_myers_divide;
123 break;
124 default:
125 return got_error_msg(GOT_ERR_NOT_IMPL, "bad diff algorithm");
128 if (atomize_func) {
129 (*cfg)->atomize_func = atomize_func;
130 (*cfg)->atomize_func_data = atomize_func_data;
131 } else
132 (*cfg)->atomize_func = diff_atomize_text_by_line;
134 (*cfg)->max_recursion_depth = 0; /* use default recursion depth */
136 return NULL;
139 const struct got_error *
140 got_diff_prepare_file(FILE *f, char **p, size_t *size,
141 struct diff_data *diff_data, const struct diff_config *cfg,
142 int ignore_whitespace, int force_text_diff)
144 const struct got_error *err = NULL;
145 struct stat st;
146 int diff_flags = 0, rc;
148 *size = 0;
150 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
151 if (ignore_whitespace)
152 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
153 if (force_text_diff)
154 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
156 if (fstat(fileno(f), &st) == -1) {
157 err = got_error_from_errno("fstat");
158 goto done;
160 #ifndef GOT_DIFF_NO_MMAP
161 *p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE,
162 fileno(f), 0);
163 if (*p == MAP_FAILED)
164 #endif
165 *p = NULL; /* fall back on file I/O */
167 rc = diff_atomize_file(diff_data, cfg, f, *p, st.st_size, diff_flags);
168 if (rc) {
169 err = got_error_set_errno(rc, "diff_atomize_file");
170 goto done;
172 done:
173 if (err)
174 diff_data_free(diff_data);
175 else
176 *size = st.st_size;
177 return err;
180 const struct got_error *
181 got_diffreg(struct got_diffreg_result **diffreg_result, FILE *f1, FILE *f2,
182 enum got_diff_algorithm algorithm, int ignore_whitespace,
183 int force_text_diff)
185 const struct got_error *err = NULL;
186 struct diff_config *cfg = NULL;
187 char *p1 = NULL, *p2 = NULL;
188 int f1_created = 0, f2_created = 0;
189 size_t size1, size2;
190 struct diff_data d_left, d_right;
191 struct diff_data *left, *right;
192 struct diff_result *diff_result;
194 if (diffreg_result) {
195 *diffreg_result = calloc(1, sizeof(**diffreg_result));
196 if (*diffreg_result == NULL)
197 return got_error_from_errno("calloc");
198 left = &(*diffreg_result)->left;
199 right = &(*diffreg_result)->right;
200 } else {
201 memset(&d_left, 0, sizeof(d_left));
202 memset(&d_right, 0, sizeof(d_right));
203 left = &d_left;
204 right = &d_right;
207 err = got_diff_get_config(&cfg, algorithm, NULL, NULL);
208 if (err)
209 goto done;
211 if (f1 == NULL) {
212 f1_created = 1;
213 f1 = got_opentemp();
214 if (f1 == NULL) {
215 err = got_error_from_errno("got_opentemp");
216 goto done;
219 if (f2 == NULL) {
220 f2_created = 1;
221 f2 = got_opentemp();
222 if (f2 == NULL) {
223 err = got_error_from_errno("got_opentemp");
224 goto done;
228 err = got_diff_prepare_file(f1, &p1, &size1, left, cfg,
229 ignore_whitespace, force_text_diff);
230 if (err)
231 goto done;
233 err = got_diff_prepare_file(f2, &p2, &size2, right, cfg,
234 ignore_whitespace, force_text_diff);
235 if (err)
236 goto done;
238 diff_result = diff_main(cfg, left, right);
239 if (diff_result == NULL) {
240 err = got_error_set_errno(ENOMEM, "malloc");
241 goto done;
243 if (diff_result->rc != DIFF_RC_OK) {
244 err = got_error_set_errno(diff_result->rc, "diff");
245 goto done;
248 if (diffreg_result) {
249 (*diffreg_result)->result = diff_result;
250 if (f1_created)
251 (*diffreg_result)->f1 = f1;
252 (*diffreg_result)->map1 = p1;
253 (*diffreg_result)->size1 = size1;
254 if (f2_created)
255 (*diffreg_result)->f2 = f2;
256 (*diffreg_result)->map2 = p2;
257 (*diffreg_result)->size2 = size2;
259 done:
260 free(cfg);
261 if (diffreg_result == NULL) {
262 diff_data_free(left);
263 diff_data_free(right);
265 if (err) {
266 got_diffreg_close(f1_created ? f1 : NULL, p1, size1,
267 f2_created ? f2 : NULL, p2, size2);
268 if (diffreg_result) {
269 diff_data_free(left);
270 diff_data_free(right);
271 free(*diffreg_result);
272 *diffreg_result = NULL;
276 return err;
279 const struct got_error *
280 got_diffreg_output(off_t **line_offsets, size_t *nlines,
281 struct got_diffreg_result *diff_result, int f1_exists, int f2_exists,
282 const char *path1, const char *path2,
283 enum got_diff_output_format output_format, int context_lines, FILE *outfile)
285 struct diff_input_info info = {
286 .left_path = path1,
287 .right_path = path2,
288 .flags = 0,
289 };
290 int rc;
291 struct diff_output_info *output_info;
293 if (!f1_exists)
294 info.flags |= DIFF_INPUT_LEFT_NONEXISTENT;
295 if (!f2_exists)
296 info.flags |= DIFF_INPUT_RIGHT_NONEXISTENT;
298 switch (output_format) {
299 case GOT_DIFF_OUTPUT_UNIDIFF:
300 rc = diff_output_unidiff(
301 line_offsets ? &output_info : NULL, outfile, &info,
302 diff_result->result, context_lines);
303 if (rc != DIFF_RC_OK)
304 return got_error_set_errno(rc, "diff_output_unidiff");
305 break;
306 case GOT_DIFF_OUTPUT_EDSCRIPT:
307 rc = diff_output_edscript(line_offsets ? &output_info : NULL,
308 outfile, &info, diff_result->result);
309 if (rc != DIFF_RC_OK)
310 return got_error_set_errno(rc, "diff_output_edscript");
311 break;
315 if (line_offsets && *line_offsets) {
316 if (output_info->line_offsets.len > 0) {
317 off_t prev_offset = 0, *p, *o;
318 int i, len;
319 if (*nlines > 0) {
320 prev_offset = (*line_offsets)[*nlines - 1];
321 /*
322 * First line offset is always zero. Skip it
323 * when appending to a pre-populated array.
324 */
325 o = &output_info->line_offsets.head[1];
326 len = output_info->line_offsets.len - 1;
327 } else {
328 o = &output_info->line_offsets.head[0];
329 len = output_info->line_offsets.len;
331 p = reallocarray(*line_offsets, *nlines + len,
332 sizeof(off_t));
333 if (p == NULL)
334 return got_error_from_errno("calloc");
335 for (i = 0; i < len; i++)
336 p[*nlines + i] = o[i] + prev_offset;
337 *line_offsets = p;
338 *nlines += len;
340 diff_output_info_free(output_info);
343 return NULL;
346 const struct got_error *
347 got_diffreg_result_free(struct got_diffreg_result *diffreg_result)
349 const struct got_error *err;
351 diff_result_free(diffreg_result->result);
352 diff_data_free(&diffreg_result->left);
353 diff_data_free(&diffreg_result->right);
354 err = got_diffreg_close(diffreg_result->f1, diffreg_result->map1,
355 diffreg_result->size1, diffreg_result->f2,
356 diffreg_result->map2, diffreg_result->size2);
357 free(diffreg_result);
358 return err;
361 const struct got_error *
362 got_diffreg_result_free_left(struct got_diffreg_result *diffreg_result)
364 diff_data_free(&diffreg_result->left);
365 memset(&diffreg_result->left, 0, sizeof(diffreg_result->left));
366 return got_diffreg_close(diffreg_result->f1, diffreg_result->map1,
367 diffreg_result->size1, NULL, NULL, 0);
370 const struct got_error *
371 got_diffreg_result_free_right(struct got_diffreg_result *diffreg_result)
373 diff_data_free(&diffreg_result->right);
374 memset(&diffreg_result->right, 0, sizeof(diffreg_result->right));
375 return got_diffreg_close(NULL, NULL, 0, diffreg_result->f2,
376 diffreg_result->map2, diffreg_result->size2);