Blob


1 /*
2 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
3 * Copyright (c) 2020 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/mman.h>
19 #include <sys/stat.h>
20 #include <sys/queue.h>
22 #include <errno.h>
23 #include <sha1.h>
24 #include <sha2.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
29 #include "got_object.h"
30 #include "got_opentemp.h"
31 #include "got_error.h"
32 #include "got_diff.h"
34 #include "got_lib_diff.h"
36 const struct diff_algo_config myers_then_patience;
37 const struct diff_algo_config myers_then_myers_divide;
38 const struct diff_algo_config patience;
39 const struct diff_algo_config myers_divide;
41 const struct diff_algo_config myers_then_patience = {
42 .impl = diff_algo_myers,
43 .permitted_state_size = 1024 * 1024 * sizeof(int),
44 .fallback_algo = &patience,
45 };
47 const struct diff_algo_config myers_then_myers_divide = {
48 .impl = diff_algo_myers,
49 .permitted_state_size = 1024 * 1024 * sizeof(int),
50 .fallback_algo = &myers_divide,
51 };
53 const struct diff_algo_config patience = {
54 .impl = diff_algo_patience,
55 /* After subdivision, do Patience again: */
56 .inner_algo = &patience,
57 /* If subdivision failed, do Myers Divide et Impera: */
58 .fallback_algo = &myers_then_myers_divide,
59 };
61 const struct diff_algo_config myers_divide = {
62 .impl = diff_algo_myers_divide,
63 /* When division succeeded, start from the top: */
64 .inner_algo = &myers_then_myers_divide,
65 /* (fallback_algo = NULL implies diff_algo_none). */
66 };
68 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
69 * do a Myers-divide. */
70 const struct diff_config diff_config_myers_then_myers_divide = {
71 .atomize_func = diff_atomize_text_by_line,
72 .algo = &myers_then_myers_divide,
73 };
75 /* If the state for a forward-Myers is small enough, use Myers, otherwise first
76 * do a Patience. */
77 const struct diff_config diff_config_myers_then_patience = {
78 .atomize_func = diff_atomize_text_by_line,
79 .algo = &myers_then_patience,
80 };
82 /* Directly force Patience as a first divider of the source file. */
83 const struct diff_config diff_config_patience = {
84 .atomize_func = diff_atomize_text_by_line,
85 .algo = &patience,
86 };
88 /* Directly force Patience as a first divider of the source file. */
89 const struct diff_config diff_config_no_algo = {
90 .atomize_func = diff_atomize_text_by_line,
91 };
93 const struct got_error *
94 got_diffreg_close(char *p1, size_t size1, char *p2, size_t size2)
95 {
96 const struct got_error *err = NULL;
98 if (p1 && munmap(p1, size1) == -1 && err == NULL)
99 err = got_error_from_errno("munmap");
100 if (p2 && munmap(p2, size2) == -1 && err == NULL)
101 err = got_error_from_errno("munmap");
102 return err;
105 const struct got_error *
106 got_diff_get_config(struct diff_config **cfg,
107 enum got_diff_algorithm algorithm,
108 diff_atomize_func_t atomize_func, void *atomize_func_data)
110 *cfg = calloc(1, sizeof(**cfg));
111 if (*cfg == NULL)
112 return got_error_from_errno("calloc");
114 switch (algorithm) {
115 case GOT_DIFF_ALGORITHM_PATIENCE:
116 (*cfg)->algo = &patience;
117 break;
118 case GOT_DIFF_ALGORITHM_MYERS:
119 (*cfg)->algo = &myers_then_myers_divide;
120 break;
121 default:
122 return got_error_msg(GOT_ERR_NOT_IMPL, "bad diff algorithm");
125 if (atomize_func) {
126 (*cfg)->atomize_func = atomize_func;
127 (*cfg)->atomize_func_data = atomize_func_data;
128 } else
129 (*cfg)->atomize_func = diff_atomize_text_by_line;
131 (*cfg)->max_recursion_depth = 0; /* use default recursion depth */
133 return NULL;
136 const struct got_error *
137 got_diff_prepare_file(FILE *f, char **p, size_t *size,
138 struct diff_data *diff_data, const struct diff_config *cfg,
139 int ignore_whitespace, int force_text_diff)
141 const struct got_error *err = NULL;
142 struct stat st;
143 int diff_flags = 0, rc;
145 *size = 0;
147 diff_flags |= DIFF_FLAG_SHOW_PROTOTYPES;
148 if (ignore_whitespace)
149 diff_flags |= DIFF_FLAG_IGNORE_WHITESPACE;
150 if (force_text_diff)
151 diff_flags |= DIFF_FLAG_FORCE_TEXT_DATA;
153 if (fstat(fileno(f), &st) == -1) {
154 err = got_error_from_errno("fstat");
155 goto done;
157 #ifndef GOT_DIFF_NO_MMAP
158 *p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE,
159 fileno(f), 0);
160 if (*p == MAP_FAILED)
161 #endif
162 *p = NULL; /* fall back on file I/O */
164 rc = diff_atomize_file(diff_data, cfg, f, *p, st.st_size, diff_flags);
165 if (rc) {
166 err = got_error_set_errno(rc, "diff_atomize_file");
167 goto done;
169 done:
170 if (err)
171 diff_data_free(diff_data);
172 else
173 *size = st.st_size;
174 return err;
177 const struct got_error *
178 got_diffreg(struct got_diffreg_result **diffreg_result, FILE *f1, FILE *f2,
179 enum got_diff_algorithm algorithm, int ignore_whitespace,
180 int force_text_diff)
182 const struct got_error *err = NULL;
183 struct diff_config *cfg = NULL;
184 char *p1 = NULL, *p2 = NULL;
185 size_t size1, size2;
186 struct diff_data d_left, d_right;
187 struct diff_data *left, *right;
188 struct diff_result *diff_result;
190 if (diffreg_result) {
191 *diffreg_result = calloc(1, sizeof(**diffreg_result));
192 if (*diffreg_result == NULL)
193 return got_error_from_errno("calloc");
194 left = &(*diffreg_result)->left;
195 right = &(*diffreg_result)->right;
196 } else {
197 memset(&d_left, 0, sizeof(d_left));
198 memset(&d_right, 0, sizeof(d_right));
199 left = &d_left;
200 right = &d_right;
203 err = got_diff_get_config(&cfg, algorithm, NULL, NULL);
204 if (err)
205 goto done;
207 err = got_diff_prepare_file(f1, &p1, &size1, left, cfg,
208 ignore_whitespace, force_text_diff);
209 if (err)
210 goto done;
212 err = got_diff_prepare_file(f2, &p2, &size2, right, cfg,
213 ignore_whitespace, force_text_diff);
214 if (err)
215 goto done;
217 diff_result = diff_main(cfg, left, right);
218 if (diff_result == NULL) {
219 err = got_error_set_errno(ENOMEM, "malloc");
220 goto done;
222 if (diff_result->rc != DIFF_RC_OK) {
223 err = got_error_set_errno(diff_result->rc, "diff");
224 goto done;
227 if (diffreg_result) {
228 (*diffreg_result)->result = diff_result;
229 (*diffreg_result)->map1 = p1;
230 (*diffreg_result)->size1 = size1;
231 (*diffreg_result)->map2 = p2;
232 (*diffreg_result)->size2 = size2;
234 done:
235 free(cfg);
236 if (diffreg_result == NULL) {
237 diff_data_free(left);
238 diff_data_free(right);
240 if (err) {
241 got_diffreg_close(p1, size1, p2, size2);
242 if (diffreg_result) {
243 diff_data_free(left);
244 diff_data_free(right);
245 free(*diffreg_result);
246 *diffreg_result = NULL;
250 return err;
253 const struct got_error *
254 got_diffreg_output(struct got_diff_line **lines, size_t *nlines,
255 struct got_diffreg_result *diff_result, int f1_exists, int f2_exists,
256 const char *path1, const char *path2,
257 enum got_diff_output_format output_format, int context_lines, FILE *outfile)
259 struct diff_input_info info = {
260 .left_path = path1,
261 .right_path = path2,
262 .flags = 0,
263 };
264 int rc;
265 struct diff_output_info *output_info;
267 if (!f1_exists)
268 info.flags |= DIFF_INPUT_LEFT_NONEXISTENT;
269 if (!f2_exists)
270 info.flags |= DIFF_INPUT_RIGHT_NONEXISTENT;
272 switch (output_format) {
273 case GOT_DIFF_OUTPUT_UNIDIFF:
274 rc = diff_output_unidiff(
275 lines ? &output_info : NULL, outfile, &info,
276 diff_result->result, context_lines);
277 if (rc != DIFF_RC_OK)
278 return got_error_set_errno(rc, "diff_output_unidiff");
279 break;
280 case GOT_DIFF_OUTPUT_PLAIN:
281 rc = diff_output_plain(lines ? &output_info : NULL,
282 outfile, &info, diff_result->result, 1);
283 if (rc != DIFF_RC_OK)
284 return got_error_set_errno(rc, "diff_output_edscript");
285 break;
289 if (lines && *lines) {
290 if (output_info->line_offsets.len > 0) {
291 struct got_diff_line *p;
292 off_t prev_offset = 0, *o;
293 uint8_t *o2;
294 int i, len;
295 if (*nlines > 0) {
296 prev_offset = (*lines)[*nlines - 1].offset;
297 /*
298 * First line offset is always zero. Skip it
299 * when appending to a pre-populated array.
300 */
301 o = &output_info->line_offsets.head[1];
302 o2 = &output_info->line_types.head[1];
303 len = output_info->line_offsets.len - 1;
304 } else {
305 o = &output_info->line_offsets.head[0];
306 o2 = &output_info->line_types.head[0];
307 len = output_info->line_offsets.len;
309 p = reallocarray(*lines, *nlines + len, sizeof(**lines));
310 if (p == NULL)
311 return got_error_from_errno("calloc");
312 for (i = 0; i < len; i++) {
313 p[*nlines + i].offset = o[i] + prev_offset;
314 p[*nlines + i].type = o2[i];
316 *lines = p;
317 *nlines += len;
319 diff_output_info_free(output_info);
322 return NULL;
325 const struct got_error *
326 got_diffreg_result_free(struct got_diffreg_result *diffreg_result)
328 const struct got_error *err;
330 diff_result_free(diffreg_result->result);
331 diff_data_free(&diffreg_result->left);
332 diff_data_free(&diffreg_result->right);
333 err = got_diffreg_close(diffreg_result->map1, diffreg_result->size1,
334 diffreg_result->map2, diffreg_result->size2);
335 free(diffreg_result);
336 return err;
339 const struct got_error *
340 got_diffreg_result_free_left(struct got_diffreg_result *diffreg_result)
342 diff_data_free(&diffreg_result->left);
343 memset(&diffreg_result->left, 0, sizeof(diffreg_result->left));
344 return got_diffreg_close(diffreg_result->map1, diffreg_result->size1,
345 NULL, 0);
348 const struct got_error *
349 got_diffreg_result_free_right(struct got_diffreg_result *diffreg_result)
351 diff_data_free(&diffreg_result->right);
352 memset(&diffreg_result->right, 0, sizeof(diffreg_result->right));
353 return got_diffreg_close(NULL, 0, diffreg_result->map2,
354 diffreg_result->size2);