Blob


1 /* Produce a unidiff output from a diff_result. */
2 /*
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <errno.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <assert.h>
25 #include <arraylist.h>
26 #include <diff_main.h>
27 #include <diff_output.h>
29 #include "diff_internal.h"
30 #include "diff_debug.h"
32 bool
33 diff_chunk_context_empty(const struct diff_chunk_context *cc)
34 {
35 return diff_range_empty(&cc->chunk);
36 }
38 int
39 diff_chunk_get_left_start(const struct diff_chunk *c,
40 const struct diff_result *r, int context_lines)
41 {
42 int left_start = diff_atom_root_idx(r->left, c->left_start);
43 return MAX(0, left_start - context_lines);
44 }
46 int
47 diff_chunk_get_left_end(const struct diff_chunk *c,
48 const struct diff_result *r, int context_lines)
49 {
50 int left_start = diff_chunk_get_left_start(c, r, 0);
51 return MIN(r->left->atoms.len,
52 left_start + c->left_count + context_lines);
53 }
55 int
56 diff_chunk_get_right_start(const struct diff_chunk *c,
57 const struct diff_result *r, int context_lines)
58 {
59 int right_start = diff_atom_root_idx(r->right, c->right_start);
60 return MAX(0, right_start - context_lines);
61 }
63 int
64 diff_chunk_get_right_end(const struct diff_chunk *c,
65 const struct diff_result *r, int context_lines)
66 {
67 int right_start = diff_chunk_get_right_start(c, r, 0);
68 return MIN(r->right->atoms.len,
69 right_start + c->right_count + context_lines);
70 }
72 struct diff_chunk *
73 diff_chunk_get(const struct diff_result *r, int chunk_idx)
74 {
75 return &r->chunks.head[chunk_idx];
76 }
78 int
79 diff_chunk_get_left_count(struct diff_chunk *c)
80 {
81 return c->left_count;
82 }
84 int
85 diff_chunk_get_right_count(struct diff_chunk *c)
86 {
87 return c->right_count;
88 }
90 void
91 diff_chunk_context_get(struct diff_chunk_context *cc, const struct diff_result *r,
92 int chunk_idx, int context_lines)
93 {
94 const struct diff_chunk *c = &r->chunks.head[chunk_idx];
95 int left_start = diff_chunk_get_left_start(c, r, context_lines);
96 int left_end = diff_chunk_get_left_end(c, r, context_lines);
97 int right_start = diff_chunk_get_right_start(c, r, context_lines);
98 int right_end = diff_chunk_get_right_end(c, r, context_lines);
100 *cc = (struct diff_chunk_context){
101 .chunk = {
102 .start = chunk_idx,
103 .end = chunk_idx + 1,
104 },
105 .left = {
106 .start = left_start,
107 .end = left_end,
108 },
109 .right = {
110 .start = right_start,
111 .end = right_end,
112 },
113 };
116 bool
117 diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
118 const struct diff_chunk_context *other)
120 return diff_ranges_touch(&cc->chunk, &other->chunk)
121 || diff_ranges_touch(&cc->left, &other->left)
122 || diff_ranges_touch(&cc->right, &other->right);
125 void
126 diff_chunk_contexts_merge(struct diff_chunk_context *cc,
127 const struct diff_chunk_context *other)
129 diff_ranges_merge(&cc->chunk, &other->chunk);
130 diff_ranges_merge(&cc->left, &other->left);
131 diff_ranges_merge(&cc->right, &other->right);
134 void
135 diff_chunk_context_load_change(struct diff_chunk_context *cc,
136 int *nchunks_used,
137 struct diff_result *result,
138 int start_chunk_idx,
139 int context_lines)
141 int i;
142 int seen_minus = 0, seen_plus = 0;
144 if (nchunks_used)
145 *nchunks_used = 0;
147 for (i = start_chunk_idx; i < result->chunks.len; i++) {
148 struct diff_chunk *chunk = &result->chunks.head[i];
149 enum diff_chunk_type t = diff_chunk_type(chunk);
150 struct diff_chunk_context next;
152 if (t != CHUNK_MINUS && t != CHUNK_PLUS) {
153 if (nchunks_used)
154 (*nchunks_used)++;
155 if (seen_minus || seen_plus)
156 break;
157 else
158 continue;
159 } else if (t == CHUNK_MINUS)
160 seen_minus = 1;
161 else if (t == CHUNK_PLUS)
162 seen_plus = 1;
164 if (diff_chunk_context_empty(cc)) {
165 /* Note down the start point, any number of subsequent
166 * chunks may be joined up to this chunk by being
167 * directly adjacent. */
168 diff_chunk_context_get(cc, result, i, context_lines);
169 if (nchunks_used)
170 (*nchunks_used)++;
171 continue;
174 /* There already is a previous chunk noted down for being
175 * printed. Does it join up with this one? */
176 diff_chunk_context_get(&next, result, i, context_lines);
178 if (diff_chunk_contexts_touch(cc, &next)) {
179 /* This next context touches or overlaps the previous
180 * one, join. */
181 diff_chunk_contexts_merge(cc, &next);
182 if (nchunks_used)
183 (*nchunks_used)++;
184 continue;
185 } else
186 break;
190 struct diff_output_unidiff_state {
191 bool header_printed;
192 };
194 struct diff_output_unidiff_state *
195 diff_output_unidiff_state_alloc(void)
197 struct diff_output_unidiff_state *state;
199 state = calloc(1, sizeof(struct diff_output_unidiff_state));
200 if (state != NULL)
201 diff_output_unidiff_state_reset(state);
202 return state;
205 void
206 diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state)
208 state->header_printed = false;
211 void
212 diff_output_unidiff_state_free(struct diff_output_unidiff_state *state)
214 free(state);
217 static int
218 output_unidiff_chunk(struct diff_output_info *outinfo, FILE *dest,
219 struct diff_output_unidiff_state *state,
220 const struct diff_input_info *info,
221 const struct diff_result *result,
222 bool print_header, bool show_function_prototypes,
223 const struct diff_chunk_context *cc)
225 int rc, left_start, left_len, right_start, right_len;
226 off_t outoff = 0, *offp;
227 char *prototype = NULL;
229 if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right))
230 return DIFF_RC_OK;
232 if (outinfo && outinfo->line_offsets.len > 0) {
233 unsigned int idx = outinfo->line_offsets.len - 1;
234 outoff = outinfo->line_offsets.head[idx];
237 if (print_header && !(state->header_printed)) {
238 rc = fprintf(dest, "--- %s\n",
239 diff_output_get_label_left(info));
240 if (rc < 0)
241 return errno;
242 if (outinfo) {
243 ARRAYLIST_ADD(offp, outinfo->line_offsets);
244 if (offp == NULL)
245 return ENOMEM;
246 outoff += rc;
247 *offp = outoff;
250 rc = fprintf(dest, "+++ %s\n",
251 diff_output_get_label_right(info));
252 if (rc < 0)
253 return errno;
254 if (outinfo) {
255 ARRAYLIST_ADD(offp, outinfo->line_offsets);
256 if (offp == NULL)
257 return ENOMEM;
258 outoff += rc;
259 *offp = outoff;
262 state->header_printed = true;
265 left_len = cc->left.end - cc->left.start;
266 if (result->left->atoms.len == 0)
267 left_start = 0;
268 else if (left_len == 0 && cc->left.start > 0)
269 left_start = cc->left.start;
270 else
271 left_start = cc->left.start + 1;
273 right_len = cc->right.end - cc->right.start;
274 if (result->right->atoms.len == 0)
275 right_start = 0;
276 else if (right_len == 0 && cc->right.start > 0)
277 right_start = cc->right.start;
278 else
279 right_start = cc->right.start + 1;
281 if (show_function_prototypes) {
282 rc = diff_output_match_function_prototype(&prototype,
283 result, cc);
284 if (rc)
285 return rc;
288 if (left_len == 1 && right_len == 1) {
289 rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
290 left_start, right_start,
291 prototype ? " " : "",
292 prototype ? : "");
293 } else if (left_len == 1 && right_len != 1) {
294 rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n",
295 left_start, right_start, right_len,
296 prototype ? " " : "",
297 prototype ? : "");
298 } else if (left_len != 1 && right_len == 1) {
299 rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n",
300 left_start, left_len, right_start,
301 prototype ? " " : "",
302 prototype ? : "");
303 } else {
304 rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n",
305 left_start, left_len, right_start, right_len,
306 prototype ? " " : "",
307 prototype ? : "");
309 free(prototype);
310 if (rc < 0)
311 return errno;
312 if (outinfo) {
313 ARRAYLIST_ADD(offp, outinfo->line_offsets);
314 if (offp == NULL)
315 return ENOMEM;
316 outoff += rc;
317 *offp = outoff;
321 /* Got the absolute line numbers where to start printing, and the index
322 * of the interesting (non-context) chunk.
323 * To print context lines above the interesting chunk, nipping on the
324 * previous chunk index may be necessary.
325 * It is guaranteed to be only context lines where left == right, so it
326 * suffices to look on the left. */
327 const struct diff_chunk *first_chunk;
328 int chunk_start_line;
329 first_chunk = &result->chunks.head[cc->chunk.start];
330 chunk_start_line = diff_atom_root_idx(result->left,
331 first_chunk->left_start);
332 if (cc->left.start < chunk_start_line) {
333 rc = diff_output_lines(outinfo, dest, " ",
334 &result->left->atoms.head[cc->left.start],
335 chunk_start_line - cc->left.start);
336 if (rc)
337 return rc;
340 /* Now write out all the joined chunks and contexts between them */
341 int c_idx;
342 for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
343 const struct diff_chunk *c = &result->chunks.head[c_idx];
345 if (c->left_count && c->right_count)
346 rc = diff_output_lines(outinfo, dest,
347 c->solved ? " " : "?",
348 c->left_start, c->left_count);
349 else if (c->left_count && !c->right_count)
350 rc = diff_output_lines(outinfo, dest,
351 c->solved ? "-" : "?",
352 c->left_start, c->left_count);
353 else if (c->right_count && !c->left_count)
354 rc = diff_output_lines(outinfo, dest,
355 c->solved ? "+" : "?",
356 c->right_start, c->right_count);
357 if (rc)
358 return rc;
360 if (cc->chunk.end == result->chunks.len) {
361 rc = diff_output_trailing_newline_msg(outinfo, dest, c);
362 if (rc != DIFF_RC_OK)
363 return rc;
367 /* Trailing context? */
368 const struct diff_chunk *last_chunk;
369 int chunk_end_line;
370 last_chunk = &result->chunks.head[cc->chunk.end - 1];
371 chunk_end_line = diff_atom_root_idx(result->left,
372 last_chunk->left_start
373 + last_chunk->left_count);
374 if (cc->left.end > chunk_end_line) {
375 rc = diff_output_lines(outinfo, dest, " ",
376 &result->left->atoms.head[chunk_end_line],
377 cc->left.end - chunk_end_line);
378 if (rc)
379 return rc;
382 return DIFF_RC_OK;
385 int
386 diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
387 struct diff_output_unidiff_state *state,
388 const struct diff_input_info *info,
389 const struct diff_result *result,
390 const struct diff_chunk_context *cc)
392 struct diff_output_info *outinfo = NULL;
393 int flags = (result->left->root->diff_flags |
394 result->right->root->diff_flags);
395 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
397 if (output_info) {
398 *output_info = diff_output_info_alloc();
399 if (*output_info == NULL)
400 return ENOMEM;
401 outinfo = *output_info;
404 return output_unidiff_chunk(outinfo, dest, state, info,
405 result, false, show_function_prototypes, cc);
408 int
409 diff_output_unidiff(struct diff_output_info **output_info,
410 FILE *dest, const struct diff_input_info *info,
411 const struct diff_result *result,
412 unsigned int context_lines)
414 struct diff_output_unidiff_state *state;
415 struct diff_chunk_context cc = {};
416 struct diff_output_info *outinfo = NULL;
417 int atomizer_flags = (result->left->atomizer_flags|
418 result->right->atomizer_flags);
419 int flags = (result->left->root->diff_flags |
420 result->right->root->diff_flags);
421 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
422 bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
423 bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
424 int i;
426 if (!result)
427 return EINVAL;
428 if (result->rc != DIFF_RC_OK)
429 return result->rc;
431 if (output_info) {
432 *output_info = diff_output_info_alloc();
433 if (*output_info == NULL)
434 return ENOMEM;
435 outinfo = *output_info;
438 if (have_binary && !force_text) {
439 for (i = 0; i < result->chunks.len; i++) {
440 struct diff_chunk *c = &result->chunks.head[i];
441 enum diff_chunk_type t = diff_chunk_type(c);
443 if (t != CHUNK_MINUS && t != CHUNK_PLUS)
444 continue;
446 fprintf(dest, "Binary files %s and %s differ\n",
447 diff_output_get_label_left(info),
448 diff_output_get_label_right(info));
449 break;
452 return DIFF_RC_OK;
455 state = diff_output_unidiff_state_alloc();
456 if (state == NULL) {
457 if (output_info) {
458 diff_output_info_free(*output_info);
459 *output_info = NULL;
461 return ENOMEM;
464 #if DEBUG
465 unsigned int check_left_pos, check_right_pos;
466 check_left_pos = 0;
467 check_right_pos = 0;
468 for (i = 0; i < result->chunks.len; i++) {
469 struct diff_chunk *c = &result->chunks.head[i];
470 enum diff_chunk_type t = diff_chunk_type(c);
472 debug("[%d] %s lines L%d R%d @L %d @R %d\n",
473 i, (t == CHUNK_MINUS ? "minus" :
474 (t == CHUNK_PLUS ? "plus" :
475 (t == CHUNK_SAME ? "same" : "?"))),
476 c->left_count,
477 c->right_count,
478 c->left_start ? diff_atom_root_idx(result->left, c->left_start) : -1,
479 c->right_start ? diff_atom_root_idx(result->right, c->right_start) : -1);
480 assert(check_left_pos == diff_atom_root_idx(result->left, c->left_start));
481 assert(check_right_pos == diff_atom_root_idx(result->right, c->right_start));
482 check_left_pos += c->left_count;
483 check_right_pos += c->right_count;
486 assert(check_left_pos == result->left->atoms.len);
487 assert(check_right_pos == result->right->atoms.len);
488 #endif
490 for (i = 0; i < result->chunks.len; i++) {
491 struct diff_chunk *c = &result->chunks.head[i];
492 enum diff_chunk_type t = diff_chunk_type(c);
493 struct diff_chunk_context next;
495 if (t != CHUNK_MINUS && t != CHUNK_PLUS)
496 continue;
498 if (diff_chunk_context_empty(&cc)) {
499 /* These are the first lines being printed.
500 * Note down the start point, any number of subsequent
501 * chunks may be joined up to this unidiff chunk by
502 * context lines or by being directly adjacent. */
503 diff_chunk_context_get(&cc, result, i, context_lines);
504 debug("new chunk to be printed:"
505 " chunk %d-%d left %d-%d right %d-%d\n",
506 cc.chunk.start, cc.chunk.end,
507 cc.left.start, cc.left.end,
508 cc.right.start, cc.right.end);
509 continue;
512 /* There already is a previous chunk noted down for being
513 * printed. Does it join up with this one? */
514 diff_chunk_context_get(&next, result, i, context_lines);
515 debug("new chunk to be printed:"
516 " chunk %d-%d left %d-%d right %d-%d\n",
517 next.chunk.start, next.chunk.end,
518 next.left.start, next.left.end,
519 next.right.start, next.right.end);
521 if (diff_chunk_contexts_touch(&cc, &next)) {
522 /* This next context touches or overlaps the previous
523 * one, join. */
524 diff_chunk_contexts_merge(&cc, &next);
525 debug("new chunk to be printed touches previous chunk,"
526 " now: left %d-%d right %d-%d\n",
527 cc.left.start, cc.left.end,
528 cc.right.start, cc.right.end);
529 continue;
532 /* No touching, so the previous context is complete with a gap
533 * between it and this next one. Print the previous one and
534 * start fresh here. */
535 debug("new chunk to be printed does not touch previous chunk;"
536 " print left %d-%d right %d-%d\n",
537 cc.left.start, cc.left.end, cc.right.start, cc.right.end);
538 output_unidiff_chunk(outinfo, dest, state, info, result,
539 true, show_function_prototypes, &cc);
540 cc = next;
541 debug("new unprinted chunk is left %d-%d right %d-%d\n",
542 cc.left.start, cc.left.end, cc.right.start, cc.right.end);
545 if (!diff_chunk_context_empty(&cc))
546 output_unidiff_chunk(outinfo, dest, state, info, result,
547 true, show_function_prototypes, &cc);
548 diff_output_unidiff_state_free(state);
549 return DIFF_RC_OK;