Blob


1 /* Produce a unidiff output from a diff_result. */
2 /*
3 * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <errno.h>
19 #include <stdbool.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <assert.h>
26 #include <arraylist.h>
27 #include <diff_main.h>
28 #include <diff_output.h>
30 #include "diff_internal.h"
31 #include "diff_debug.h"
33 bool
34 diff_chunk_context_empty(const struct diff_chunk_context *cc)
35 {
36 return diff_range_empty(&cc->chunk);
37 }
39 int
40 diff_chunk_get_left_start(const struct diff_chunk *c,
41 const struct diff_result *r, int context_lines)
42 {
43 int left_start = diff_atom_root_idx(r->left, c->left_start);
44 return MAX(0, left_start - context_lines);
45 }
47 int
48 diff_chunk_get_left_end(const struct diff_chunk *c,
49 const struct diff_result *r, int context_lines)
50 {
51 int left_start = diff_chunk_get_left_start(c, r, 0);
52 return MIN(r->left->atoms.len,
53 left_start + c->left_count + context_lines);
54 }
56 int
57 diff_chunk_get_right_start(const struct diff_chunk *c,
58 const struct diff_result *r, int context_lines)
59 {
60 int right_start = diff_atom_root_idx(r->right, c->right_start);
61 return MAX(0, right_start - context_lines);
62 }
64 int
65 diff_chunk_get_right_end(const struct diff_chunk *c,
66 const struct diff_result *r, int context_lines)
67 {
68 int right_start = diff_chunk_get_right_start(c, r, 0);
69 return MIN(r->right->atoms.len,
70 right_start + c->right_count + context_lines);
71 }
73 struct diff_chunk *
74 diff_chunk_get(const struct diff_result *r, int chunk_idx)
75 {
76 return &r->chunks.head[chunk_idx];
77 }
79 int
80 diff_chunk_get_left_count(struct diff_chunk *c)
81 {
82 return c->left_count;
83 }
85 int
86 diff_chunk_get_right_count(struct diff_chunk *c)
87 {
88 return c->right_count;
89 }
91 void
92 diff_chunk_context_get(struct diff_chunk_context *cc, const struct diff_result *r,
93 int chunk_idx, int context_lines)
94 {
95 const struct diff_chunk *c = &r->chunks.head[chunk_idx];
96 int left_start = diff_chunk_get_left_start(c, r, context_lines);
97 int left_end = diff_chunk_get_left_end(c, r, context_lines);
98 int right_start = diff_chunk_get_right_start(c, r, context_lines);
99 int right_end = diff_chunk_get_right_end(c, r, context_lines);
101 *cc = (struct diff_chunk_context){
102 .chunk = {
103 .start = chunk_idx,
104 .end = chunk_idx + 1,
105 },
106 .left = {
107 .start = left_start,
108 .end = left_end,
109 },
110 .right = {
111 .start = right_start,
112 .end = right_end,
113 },
114 };
117 bool
118 diff_chunk_contexts_touch(const struct diff_chunk_context *cc,
119 const struct diff_chunk_context *other)
121 return diff_ranges_touch(&cc->chunk, &other->chunk)
122 || diff_ranges_touch(&cc->left, &other->left)
123 || diff_ranges_touch(&cc->right, &other->right);
126 void
127 diff_chunk_contexts_merge(struct diff_chunk_context *cc,
128 const struct diff_chunk_context *other)
130 diff_ranges_merge(&cc->chunk, &other->chunk);
131 diff_ranges_merge(&cc->left, &other->left);
132 diff_ranges_merge(&cc->right, &other->right);
135 void
136 diff_chunk_context_load_change(struct diff_chunk_context *cc,
137 int *nchunks_used,
138 struct diff_result *result,
139 int start_chunk_idx,
140 int context_lines)
142 int i;
143 int seen_minus = 0, seen_plus = 0;
145 if (nchunks_used)
146 *nchunks_used = 0;
148 for (i = start_chunk_idx; i < result->chunks.len; i++) {
149 struct diff_chunk *chunk = &result->chunks.head[i];
150 enum diff_chunk_type t = diff_chunk_type(chunk);
151 struct diff_chunk_context next;
153 if (t != CHUNK_MINUS && t != CHUNK_PLUS) {
154 if (nchunks_used)
155 (*nchunks_used)++;
156 if (seen_minus || seen_plus)
157 break;
158 else
159 continue;
160 } else if (t == CHUNK_MINUS)
161 seen_minus = 1;
162 else if (t == CHUNK_PLUS)
163 seen_plus = 1;
165 if (diff_chunk_context_empty(cc)) {
166 /* Note down the start point, any number of subsequent
167 * chunks may be joined up to this chunk by being
168 * directly adjacent. */
169 diff_chunk_context_get(cc, result, i, context_lines);
170 if (nchunks_used)
171 (*nchunks_used)++;
172 continue;
175 /* There already is a previous chunk noted down for being
176 * printed. Does it join up with this one? */
177 diff_chunk_context_get(&next, result, i, context_lines);
179 if (diff_chunk_contexts_touch(cc, &next)) {
180 /* This next context touches or overlaps the previous
181 * one, join. */
182 diff_chunk_contexts_merge(cc, &next);
183 if (nchunks_used)
184 (*nchunks_used)++;
185 continue;
186 } else
187 break;
191 struct diff_output_unidiff_state {
192 bool header_printed;
193 char prototype[DIFF_FUNCTION_CONTEXT_SIZE];
194 int last_prototype_idx;
195 };
197 struct diff_output_unidiff_state *
198 diff_output_unidiff_state_alloc(void)
200 struct diff_output_unidiff_state *state;
202 state = calloc(1, sizeof(struct diff_output_unidiff_state));
203 if (state != NULL)
204 diff_output_unidiff_state_reset(state);
205 return state;
208 void
209 diff_output_unidiff_state_reset(struct diff_output_unidiff_state *state)
211 state->header_printed = false;
212 memset(state->prototype, 0, sizeof(state->prototype));
213 state->last_prototype_idx = 0;
216 void
217 diff_output_unidiff_state_free(struct diff_output_unidiff_state *state)
219 free(state);
222 static int
223 output_unidiff_chunk(struct diff_output_info *outinfo, FILE *dest,
224 struct diff_output_unidiff_state *state,
225 const struct diff_input_info *info,
226 const struct diff_result *result,
227 bool print_header, bool show_function_prototypes,
228 const struct diff_chunk_context *cc, unsigned int ncontext)
230 int rc, left_start, left_len, right_start, right_len;
231 off_t outoff = 0, *offp;
232 uint8_t *typep;
234 if (diff_range_empty(&cc->left) && diff_range_empty(&cc->right))
235 return DIFF_RC_OK;
237 if (outinfo && outinfo->line_offsets.len > 0) {
238 unsigned int idx = outinfo->line_offsets.len - 1;
239 outoff = outinfo->line_offsets.head[idx];
242 if (print_header && !(state->header_printed)) {
243 rc = fprintf(dest, "--- %s\n",
244 diff_output_get_label_left(info));
245 if (rc < 0)
246 return errno;
247 if (outinfo) {
248 ARRAYLIST_ADD(offp, outinfo->line_offsets);
249 if (offp == NULL)
250 return ENOMEM;
251 outoff += rc;
252 *offp = outoff;
253 ARRAYLIST_ADD(typep, outinfo->line_types);
254 if (typep == NULL)
255 return ENOMEM;
256 *typep = DIFF_LINE_MINUS;
258 rc = fprintf(dest, "+++ %s\n",
259 diff_output_get_label_right(info));
260 if (rc < 0)
261 return errno;
262 if (outinfo) {
263 ARRAYLIST_ADD(offp, outinfo->line_offsets);
264 if (offp == NULL)
265 return ENOMEM;
266 outoff += rc;
267 *offp = outoff;
268 ARRAYLIST_ADD(typep, outinfo->line_types);
269 if (typep == NULL)
270 return ENOMEM;
271 *typep = DIFF_LINE_PLUS;
273 state->header_printed = true;
276 left_len = cc->left.end - cc->left.start;
277 if (result->left->atoms.len == 0)
278 left_start = 0;
279 else if (left_len == 0 && cc->left.start > 0)
280 left_start = cc->left.start;
281 else
282 left_start = cc->left.start + 1;
284 right_len = cc->right.end - cc->right.start;
285 if (result->right->atoms.len == 0)
286 right_start = 0;
287 else if (right_len == 0 && cc->right.start > 0)
288 right_start = cc->right.start;
289 else
290 right_start = cc->right.start + 1;
292 if (show_function_prototypes) {
293 rc = diff_output_match_function_prototype(state->prototype,
294 sizeof(state->prototype), &state->last_prototype_idx,
295 result, cc, ncontext);
296 if (rc)
297 return rc;
300 if (left_len == 1 && right_len == 1) {
301 rc = fprintf(dest, "@@ -%d +%d @@%s%s\n",
302 left_start, right_start,
303 state->prototype[0] ? " " : "",
304 state->prototype[0] ? state->prototype : "");
305 } else if (left_len == 1 && right_len != 1) {
306 rc = fprintf(dest, "@@ -%d +%d,%d @@%s%s\n",
307 left_start, right_start, right_len,
308 state->prototype[0] ? " " : "",
309 state->prototype[0] ? state->prototype : "");
310 } else if (left_len != 1 && right_len == 1) {
311 rc = fprintf(dest, "@@ -%d,%d +%d @@%s%s\n",
312 left_start, left_len, right_start,
313 state->prototype[0] ? " " : "",
314 state->prototype[0] ? state->prototype : "");
315 } else {
316 rc = fprintf(dest, "@@ -%d,%d +%d,%d @@%s%s\n",
317 left_start, left_len, right_start, right_len,
318 state->prototype[0] ? " " : "",
319 state->prototype[0] ? state->prototype : "");
321 if (rc < 0)
322 return errno;
323 if (outinfo) {
324 ARRAYLIST_ADD(offp, outinfo->line_offsets);
325 if (offp == NULL)
326 return ENOMEM;
327 outoff += rc;
328 *offp = outoff;
329 ARRAYLIST_ADD(typep, outinfo->line_types);
330 if (typep == NULL)
331 return ENOMEM;
332 *typep = DIFF_LINE_HUNK;
335 /* Got the absolute line numbers where to start printing, and the index
336 * of the interesting (non-context) chunk.
337 * To print context lines above the interesting chunk, nipping on the
338 * previous chunk index may be necessary.
339 * It is guaranteed to be only context lines where left == right, so it
340 * suffices to look on the left. */
341 const struct diff_chunk *first_chunk;
342 int chunk_start_line;
343 first_chunk = &result->chunks.head[cc->chunk.start];
344 chunk_start_line = diff_atom_root_idx(result->left,
345 first_chunk->left_start);
346 if (cc->left.start < chunk_start_line) {
347 rc = diff_output_lines(outinfo, dest, " ",
348 &result->left->atoms.head[cc->left.start],
349 chunk_start_line - cc->left.start);
350 if (rc)
351 return rc;
354 /* Now write out all the joined chunks and contexts between them */
355 int c_idx;
356 for (c_idx = cc->chunk.start; c_idx < cc->chunk.end; c_idx++) {
357 const struct diff_chunk *c = &result->chunks.head[c_idx];
359 if (c->left_count && c->right_count)
360 rc = diff_output_lines(outinfo, dest,
361 c->solved ? " " : "?",
362 c->left_start, c->left_count);
363 else if (c->left_count && !c->right_count)
364 rc = diff_output_lines(outinfo, dest,
365 c->solved ? "-" : "?",
366 c->left_start, c->left_count);
367 else if (c->right_count && !c->left_count)
368 rc = diff_output_lines(outinfo, dest,
369 c->solved ? "+" : "?",
370 c->right_start, c->right_count);
371 if (rc)
372 return rc;
374 if (cc->chunk.end == result->chunks.len) {
375 rc = diff_output_trailing_newline_msg(outinfo, dest, c);
376 if (rc != DIFF_RC_OK)
377 return rc;
381 /* Trailing context? */
382 const struct diff_chunk *last_chunk;
383 int chunk_end_line;
384 last_chunk = &result->chunks.head[cc->chunk.end - 1];
385 chunk_end_line = diff_atom_root_idx(result->left,
386 last_chunk->left_start
387 + last_chunk->left_count);
388 if (cc->left.end > chunk_end_line) {
389 rc = diff_output_lines(outinfo, dest, " ",
390 &result->left->atoms.head[chunk_end_line],
391 cc->left.end - chunk_end_line);
392 if (rc)
393 return rc;
395 rc = diff_output_trailing_newline_msg(outinfo, dest,
396 &result->chunks.head[result->chunks.len - 1]);
397 if (rc != DIFF_RC_OK)
398 return rc;
401 return DIFF_RC_OK;
404 int
405 diff_output_unidiff_chunk(struct diff_output_info **output_info, FILE *dest,
406 struct diff_output_unidiff_state *state,
407 const struct diff_input_info *info,
408 const struct diff_result *result,
409 const struct diff_chunk_context *cc)
411 struct diff_output_info *outinfo = NULL;
412 int flags = (result->left->root->diff_flags |
413 result->right->root->diff_flags);
414 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
416 if (output_info) {
417 *output_info = diff_output_info_alloc();
418 if (*output_info == NULL)
419 return ENOMEM;
420 outinfo = *output_info;
423 return output_unidiff_chunk(outinfo, dest, state, info,
424 result, false, show_function_prototypes, cc, 0);
427 int
428 diff_output_unidiff(struct diff_output_info **output_info,
429 FILE *dest, const struct diff_input_info *info,
430 const struct diff_result *result,
431 unsigned int context_lines)
433 struct diff_output_unidiff_state *state;
434 struct diff_chunk_context cc = {};
435 struct diff_output_info *outinfo = NULL;
436 int atomizer_flags = (result->left->atomizer_flags|
437 result->right->atomizer_flags);
438 int flags = (result->left->root->diff_flags |
439 result->right->root->diff_flags);
440 bool show_function_prototypes = (flags & DIFF_FLAG_SHOW_PROTOTYPES);
441 bool force_text = (flags & DIFF_FLAG_FORCE_TEXT_DATA);
442 bool have_binary = (atomizer_flags & DIFF_ATOMIZER_FOUND_BINARY_DATA);
443 off_t outoff = 0, *offp;
444 uint8_t *typep;
445 int rc, i;
447 if (!result)
448 return EINVAL;
449 if (result->rc != DIFF_RC_OK)
450 return result->rc;
452 if (output_info) {
453 *output_info = diff_output_info_alloc();
454 if (*output_info == NULL)
455 return ENOMEM;
456 outinfo = *output_info;
459 if (have_binary && !force_text) {
460 for (i = 0; i < result->chunks.len; i++) {
461 struct diff_chunk *c = &result->chunks.head[i];
462 enum diff_chunk_type t = diff_chunk_type(c);
464 if (t != CHUNK_MINUS && t != CHUNK_PLUS)
465 continue;
467 if (outinfo && outinfo->line_offsets.len > 0) {
468 unsigned int idx =
469 outinfo->line_offsets.len - 1;
470 outoff = outinfo->line_offsets.head[idx];
473 rc = fprintf(dest, "Binary files %s and %s differ\n",
474 diff_output_get_label_left(info),
475 diff_output_get_label_right(info));
476 if (outinfo) {
477 ARRAYLIST_ADD(offp, outinfo->line_offsets);
478 if (offp == NULL)
479 return ENOMEM;
480 outoff += rc;
481 *offp = outoff;
482 ARRAYLIST_ADD(typep, outinfo->line_types);
483 if (typep == NULL)
484 return ENOMEM;
485 *typep = DIFF_LINE_NONE;
487 break;
490 return DIFF_RC_OK;
493 state = diff_output_unidiff_state_alloc();
494 if (state == NULL) {
495 if (output_info) {
496 diff_output_info_free(*output_info);
497 *output_info = NULL;
499 return ENOMEM;
502 #if DEBUG
503 unsigned int check_left_pos, check_right_pos;
504 check_left_pos = 0;
505 check_right_pos = 0;
506 for (i = 0; i < result->chunks.len; i++) {
507 struct diff_chunk *c = &result->chunks.head[i];
508 enum diff_chunk_type t = diff_chunk_type(c);
510 debug("[%d] %s lines L%d R%d @L %d @R %d\n",
511 i, (t == CHUNK_MINUS ? "minus" :
512 (t == CHUNK_PLUS ? "plus" :
513 (t == CHUNK_SAME ? "same" : "?"))),
514 c->left_count,
515 c->right_count,
516 c->left_start ? diff_atom_root_idx(result->left, c->left_start) : -1,
517 c->right_start ? diff_atom_root_idx(result->right, c->right_start) : -1);
518 assert(check_left_pos == diff_atom_root_idx(result->left, c->left_start));
519 assert(check_right_pos == diff_atom_root_idx(result->right, c->right_start));
520 check_left_pos += c->left_count;
521 check_right_pos += c->right_count;
524 assert(check_left_pos == result->left->atoms.len);
525 assert(check_right_pos == result->right->atoms.len);
526 #endif
528 for (i = 0; i < result->chunks.len; i++) {
529 struct diff_chunk *c = &result->chunks.head[i];
530 enum diff_chunk_type t = diff_chunk_type(c);
531 struct diff_chunk_context next;
533 if (t != CHUNK_MINUS && t != CHUNK_PLUS)
534 continue;
536 if (diff_chunk_context_empty(&cc)) {
537 /* These are the first lines being printed.
538 * Note down the start point, any number of subsequent
539 * chunks may be joined up to this unidiff chunk by
540 * context lines or by being directly adjacent. */
541 diff_chunk_context_get(&cc, result, i, context_lines);
542 debug("new chunk to be printed:"
543 " chunk %d-%d left %d-%d right %d-%d\n",
544 cc.chunk.start, cc.chunk.end,
545 cc.left.start, cc.left.end,
546 cc.right.start, cc.right.end);
547 continue;
550 /* There already is a previous chunk noted down for being
551 * printed. Does it join up with this one? */
552 diff_chunk_context_get(&next, result, i, context_lines);
553 debug("new chunk to be printed:"
554 " chunk %d-%d left %d-%d right %d-%d\n",
555 next.chunk.start, next.chunk.end,
556 next.left.start, next.left.end,
557 next.right.start, next.right.end);
559 if (diff_chunk_contexts_touch(&cc, &next)) {
560 /* This next context touches or overlaps the previous
561 * one, join. */
562 diff_chunk_contexts_merge(&cc, &next);
563 debug("new chunk to be printed touches previous chunk,"
564 " now: left %d-%d right %d-%d\n",
565 cc.left.start, cc.left.end,
566 cc.right.start, cc.right.end);
567 continue;
570 /* No touching, so the previous context is complete with a gap
571 * between it and this next one. Print the previous one and
572 * start fresh here. */
573 debug("new chunk to be printed does not touch previous chunk;"
574 " print left %d-%d right %d-%d\n",
575 cc.left.start, cc.left.end, cc.right.start, cc.right.end);
576 output_unidiff_chunk(outinfo, dest, state, info, result,
577 true, show_function_prototypes, &cc, context_lines);
578 cc = next;
579 debug("new unprinted chunk is left %d-%d right %d-%d\n",
580 cc.left.start, cc.left.end, cc.right.start, cc.right.end);
583 if (!diff_chunk_context_empty(&cc))
584 output_unidiff_chunk(outinfo, dest, state, info, result,
585 true, show_function_prototypes, &cc, context_lines);
586 diff_output_unidiff_state_free(state);
587 return DIFF_RC_OK;