Commit Diff


commit - f1cbc3bc64f9e253d5c1239d55b3179e6fd215c4
commit + c27a5e6631524233c4a822aa306ab92c09f1d1ca
blob - 63258946e8404a1df4a6b0bd159a0fcc690a7932
blob + 0d4400c94065902d33850e15657291c27011c982
--- lib/blame.c
+++ lib/blame.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
+ * Copyright (c) 2020 Neels Hofmeyr <neels@hofmeyr.de>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -51,9 +52,7 @@ struct got_blame_line {
 
 struct got_blame {
 	FILE *f;
-	char *map;
-	struct diff_data left_data;
-	struct diff_data right_data;
+	off_t size;
 	const struct diff_config *cfg;
 	size_t filesize;
 	int nlines;
@@ -61,6 +60,15 @@ struct got_blame {
 	struct got_blame_line *lines; /* one per line */
 	off_t *line_offsets;		/* one per line */
 	int ncommits;
+
+	/*
+	 * Map line numbers of an older version of the file to valid line
+	 * numbers in blame->f. This map is updated with each commit we
+	 * traverse throughout the file's history.
+	 * Lines mapped to -1 do not correspond to any line in blame->f.
+	 */
+	int *linemap2;
+	int nlines2;
 };
 
 static const struct got_error *
@@ -71,10 +79,10 @@ annotate_line(struct got_blame *blame, int lineno, str
 	const struct got_error *err = NULL;
 	struct got_blame_line *line;
 
-	if (lineno < 1 || lineno > blame->nlines)
+	if (lineno < 0 || lineno >= blame->nlines)
 		return NULL;
 
-	line = &blame->lines[lineno - 1];
+	line = &blame->lines[lineno];
 	if (line->annotated)
 		return NULL;
 
@@ -82,38 +90,54 @@ annotate_line(struct got_blame *blame, int lineno, str
 	line->annotated = 1;
 	blame->nannotated++;
 	if (cb)
-		err = cb(arg, blame->nlines, lineno, id);
+		err = cb(arg, blame->nlines, lineno + 1, id);
 	return err;
 }
 
 static const struct got_error *
-blame_changes(struct got_blame *blame, struct diff_result *diff_result,
-    struct got_object_id *commit_id,
+blame_changes(struct got_blame *blame, int *linemap1,
+    struct diff_result *diff_result, struct got_object_id *commit_id,
     const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
     void *arg)
 {
 	const struct got_error *err = NULL;
 	int i;
+	int idx1 = 0, idx2 = 0;
 
-	for (i = 0; i < diff_result->chunks.len; i++) {
+	for (i = 0; i < diff_result->chunks.len &&
+	    blame->nannotated < blame->nlines; i++) {
 		struct diff_chunk *c = diff_chunk_get(diff_result, i);
+		unsigned int left_start, left_count;
 		unsigned int right_start, right_count;
-		int lineno, len;
-		int ln;
+		int j;
 
-		if (diff_chunk_get_left_count(c) != 0)
+		/*
+		 * We do not need to worry about idx1/idx2 growing out
+		 * of bounds because the diff implementation ensures
+		 * that chunk ranges never exceed the number of lines
+		 * in the left/right input files.
+		 */
+		left_start = diff_chunk_get_left_start(c, diff_result, 0);
+		left_count = diff_chunk_get_left_count(c);
+		right_start = diff_chunk_get_right_start(c, diff_result, 0);
+		right_count = diff_chunk_get_right_count(c);
+
+		if (left_count == right_count) {
+			for (j = 0; j < left_count; j++) {
+				linemap1[idx1++] = blame->linemap2[idx2++];
+			}
 			continue;
+		}
 
-		len = diff_chunk_get_right_count(c);
-		if (len == 0)
+		if (right_count == 0) {
+			for (j = 0; j < left_count; j++) {
+				linemap1[idx1++] = -1;
+			}
 			continue;
+		}
 
-		right_start = diff_chunk_get_right_start(c, diff_result, 0);
-		right_count = diff_chunk_get_right_count(c);
-
-		lineno = right_start + 1;
-		len = right_count;
-		for (ln = lineno; ln < lineno + len; ln++) {
+		for (j = 0; j < right_count; j++) {
+			int ln = blame->linemap2[idx2++];
 			err = annotate_line(blame, ln, commit_id, cb, arg);
 			if (err)
 				return err;
@@ -126,73 +150,117 @@ blame_changes(struct got_blame *blame, struct diff_res
 }
 
 static const struct got_error *
-blame_commit(struct got_blame *blame, struct got_object_id *parent_id,
-    struct got_object_id *id, const char *path, struct got_repository *repo,
+blame_commit(struct got_blame *blame, struct got_object_id *id,
+    const char *path, struct got_repository *repo,
     const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
     void *arg)
 {
 	const struct got_error *err = NULL;
-	struct got_object *obj = NULL;
-	struct got_object_id *obj_id = NULL;
 	struct got_commit_object *commit = NULL;
-	struct got_blob_object *blob = NULL;
+	struct got_object_qid *pid = NULL;
+	struct got_object_id *blob_id = NULL, *pblob_id = NULL;
+	struct got_blob_object *blob = NULL, *pblob = NULL;
 	struct got_diffreg_result *diffreg_result = NULL;
+	FILE *f1 = NULL, *f2 = NULL;
+	size_t size1, size2;
+	int nlines1, nlines2;
+	int *linemap1 = NULL;
 
-	err = got_object_open_as_commit(&commit, repo, parent_id);
+	err = got_object_open_as_commit(&commit, repo, id);
 	if (err)
 		return err;
 
-	err = got_object_id_by_path(&obj_id, repo, parent_id, path);
+	pid = SIMPLEQ_FIRST(got_object_commit_get_parent_ids(commit));
+	if (pid == NULL) {
+		got_object_commit_close(commit);
+		return NULL;
+	}
+
+	err = got_object_id_by_path(&blob_id, repo, id, path);
 	if (err) {
 		if (err->code == GOT_ERR_NO_TREE_ENTRY)
 			err = NULL;
 		goto done;
 	}
 
-	err = got_object_open(&obj, repo, obj_id);
+	err = got_object_open_as_blob(&blob, repo, blob_id, 8192);
 	if (err)
 		goto done;
 
-	if (obj->type != GOT_OBJ_TYPE_BLOB) {
-		err = got_error_path(path, GOT_ERR_OBJ_TYPE);
+	f2 = got_opentemp();
+	if (f2 == NULL) {
+		err = got_error_from_errno("got_opentemp");
 		goto done;
 	}
-
-	err = got_object_blob_open(&blob, repo, obj, 8192);
+	err = got_object_blob_dump_to_file(&size2, &nlines2, NULL,
+	    f2, blob);
 	if (err)
 		goto done;
 
-	if (fseek(blame->f, 0L, SEEK_SET) == -1) {
-		err = got_ferror(blame->f, GOT_ERR_IO);
+	err = got_object_id_by_path(&pblob_id, repo, pid->id, path);
+	if (err) {
+		if (err->code == GOT_ERR_NO_TREE_ENTRY)
+			err = NULL;
 		goto done;
 	}
 
-	diff_data_free(&blame->left_data);
-	memset(&blame->left_data, 0, sizeof(blame->left_data));
-	err = got_diff_blob_prepared_file(&diffreg_result, &blame->left_data,
-	    blob, &blame->right_data, blame->f, blame->map, blame->filesize,
-	    blame->cfg, 0);
+	err = got_object_open_as_blob(&pblob, repo, pblob_id, 8192);
 	if (err)
 		goto done;
 
+	f1 = got_opentemp();
+	if (f1 == NULL) {
+		err = got_error_from_errno("got_opentemp");
+		goto done;
+	}
+	err = got_object_blob_dump_to_file(&size1, &nlines1, NULL, f1, pblob);
+	if (err)
+		goto done;
+
+	err = got_diff_files(&diffreg_result, f1, "", f2, "",
+	    0, 0, NULL);
+	if (err)
+		goto done;
 	if (diffreg_result->result->chunks.len > 0) {
-		err = blame_changes(blame, diffreg_result->result, id, cb, arg);
+		if (nlines1 > 0) {
+			linemap1 = calloc(nlines1, sizeof(*linemap1));
+			if (linemap1 == NULL) {
+				err = got_error_from_errno("malloc");
+				goto done;
+			}
+		}
+		err = blame_changes(blame, linemap1,
+		    diffreg_result->result, id, cb, arg);
+		if (err) {
+			free(linemap1);
+			goto done;
+		}
+		if (linemap1) {
+			free(blame->linemap2);
+			blame->linemap2 = linemap1;
+			blame->nlines2 = nlines1;
+		}
 	} else if (cb)
 		err = cb(arg, blame->nlines, -1, id);
 done:
 	if (diffreg_result) {
 		const struct got_error *free_err;
-		free_err = got_diffreg_result_free_left(diffreg_result);
+		free_err = got_diffreg_result_free(diffreg_result);
 		if (free_err && err == NULL)
 			err = free_err;
 	}
 	if (commit)
 		got_object_commit_close(commit);
-	free(obj_id);
-	if (obj)
-		got_object_close(obj);
+	free(blob_id);
+	free(pblob_id);
 	if (blob)
 		got_object_blob_close(blob);
+	if (pblob)
+		got_object_blob_close(pblob);
+	if (f1 && fclose(f1) != 0 && err == NULL)
+		err = got_error_from_errno("fclose");
+	if (f2 && fclose(f2) != 0 && err == NULL)
+		err = got_error_from_errno("fclose");
 	return err;
 }
 
@@ -201,13 +269,10 @@ blame_close(struct got_blame *blame)
 {
 	const struct got_error *err = NULL;
 
-	diff_data_free(&blame->left_data);
-	diff_data_free(&blame->right_data);
-	if (blame->map && munmap(blame->map, blame->filesize) == -1)
-		err = got_error_from_errno("munmap");
 	if (blame->f && fclose(blame->f) != 0 && err == NULL)
 		err = got_error_from_errno("fclose");
 	free(blame->lines);
+	free(blame->linemap2);
 	free(blame);
 	return err;
 }
@@ -223,16 +288,15 @@ blame_open(struct got_blame **blamep, const char *path
 	struct got_object_id *obj_id = NULL;
 	struct got_blob_object *blob = NULL;
 	struct got_blame *blame = NULL;
-	struct got_object_id *id = NULL, *pid = NULL;
-	int lineno, created;
-	size_t size;
+	struct got_object_id *id = NULL;
+	int lineno;
 	struct got_commit_graph *graph = NULL;
 
 	*blamep = NULL;
 
 	err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
 	if (err)
-		return err;
+		goto done;
 
 	err = got_object_open(&obj, repo, obj_id);
 	if (err)
@@ -248,8 +312,10 @@ blame_open(struct got_blame **blamep, const char *path
 		goto done;
 
 	blame = calloc(1, sizeof(*blame));
-	if (blame == NULL)
-		return got_error_from_errno("calloc");
+	if (blame == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
 
 	blame->f = got_opentemp();
 	if (blame->f == NULL) {
@@ -261,11 +327,7 @@ blame_open(struct got_blame **blamep, const char *path
 	if (err || blame->nlines == 0)
 		goto done;
 
-	blame->cfg = got_diff_get_config(GOT_DIFF_ALGORITHM_PATIENCE),
-	err = got_diff_prepare_file(&blame->f, &blame->map, &created, &size,
-	    &blame->right_data, blame->cfg, 0);
-	if (err)
-		goto done;
+	blame->cfg = got_diff_get_config(GOT_DIFF_ALGORITHM_PATIENCE);
 
 	/* Don't include \n at EOF in the blame line count. */
 	if (blame->line_offsets[blame->nlines - 1] == blame->filesize)
@@ -277,38 +339,50 @@ blame_open(struct got_blame **blamep, const char *path
 		goto done;
 	}
 
+	blame->nlines2 = blame->nlines;
+	blame->linemap2 = calloc(blame->nlines2, sizeof(*blame->linemap2));
+	if (blame->linemap2 == NULL) {
+		err = got_error_from_errno("calloc");
+		goto done;
+	}
+	for (lineno = 0; lineno < blame->nlines2; lineno++)
+		blame->linemap2[lineno] = lineno;
+
 	err = got_commit_graph_open(&graph, path, 1);
 	if (err)
-		return err;
+		goto done;
+
 	err = got_commit_graph_iter_start(graph, start_commit_id, repo,
 	    cancel_cb, cancel_arg);
 	if (err)
 		goto done;
-	id = start_commit_id;
 	for (;;) {
-		err = got_commit_graph_iter_next(&pid, graph, repo,
+		struct got_object_id *next_id;
+		err = got_commit_graph_iter_next(&next_id, graph, repo,
 		    cancel_cb, cancel_arg);
 		if (err) {
-			if (err->code == GOT_ERR_ITER_COMPLETED)
+			if (err->code == GOT_ERR_ITER_COMPLETED) {
 				err = NULL;
-			break;
+				break;
+			}
+			goto done;
 		}
-		if (pid) {
-			err = blame_commit(blame, pid, id, path, repo, cb, arg);
+		if (next_id) {
+			id = next_id;
+			err = blame_commit(blame, id, path, repo, cb, arg);
 			if (err) {
 				if (err->code == GOT_ERR_ITER_COMPLETED)
 					err = NULL;
-				break;
+				goto done;
 			}
 			if (blame->nannotated == blame->nlines)
 				break;
 		}
-		id = pid;
 	}
 
 	if (id && blame->nannotated < blame->nlines) {
 		/* Annotate remaining non-annotated lines with last commit. */
-		for (lineno = 1; lineno <= blame->nlines; lineno++) {
+		for (lineno = 0; lineno < blame->nlines; lineno++) {
 			err = annotate_line(blame, lineno, id, cb, arg);
 			if (err)
 				goto done;
blob - 10d305bc5b630ca236e00ba304d323a7c271501d
blob + d666c6b3541b953a5c3463d3a8b2c9a0d3ade54c
--- regress/cmdline/blame.sh
+++ regress/cmdline/blame.sh
@@ -881,7 +881,105 @@ test_blame_symlink() {
 
 	test_done "$testroot" "$ret"
 }
+
+test_blame_lines_shifted_skip() {
+	local testroot=`test_init blame_lines_shifted_skip`
+
+	got checkout $testroot/repo $testroot/wt > /dev/null
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	cat > $testroot/wt/alpha <<EOF
+A
+B
+C
+D
+EOF
+	(cd $testroot/wt && got commit -m "change 1" > /dev/null)
+	local commit1=`git_show_head $testroot/repo`
+	local short_commit1=`trim_obj_id 32 $commit1`
+	local author_time=`git_show_author_time $testroot/repo`
 
+	cat > $testroot/wt/alpha <<EOF
+A
+B
+Y
+C
+P
+Q
+EOF
+	(cd $testroot/wt && got commit -m "change 2" > /dev/null)
+	local commit2=`git_show_head $testroot/repo`
+	local short_commit2=`trim_obj_id 32 $commit2`
+
+	cat > $testroot/wt/alpha <<EOF
+A
+B
+Y
+C
+D
+P
+Q
+EOF
+	(cd $testroot/wt && got commit -m "change 3" > /dev/null)
+	local commit3=`git_show_head $testroot/repo`
+	local short_commit3=`trim_obj_id 32 $commit3`
+	local author_time=`git_show_author_time $testroot/repo`
+
+	cat > $testroot/wt/alpha <<EOF
+A
+B
+C
+P
+Y
+Q
+EOF
+	(cd $testroot/wt && got commit -m "change 4" > /dev/null)
+	local commit4=`git_show_head $testroot/repo`
+	local short_commit4=`trim_obj_id 32 $commit4`
+	local author_time=`git_show_author_time $testroot/repo`
+
+	cat > $testroot/wt/alpha <<EOF
+X
+A
+B
+C
+P
+Y
+Q
+EOF
+	(cd $testroot/wt && got commit -m "change 5" > /dev/null)
+	local commit5=`git_show_head $testroot/repo`
+	local short_commit5=`trim_obj_id 32 $commit5`
+	local author_time=`git_show_author_time $testroot/repo`
+
+	(cd $testroot/wt && got blame alpha > $testroot/stdout)
+
+	d=`date -r $author_time +"%G-%m-%d"`
+	echo "1) $short_commit5 $d $GOT_AUTHOR_8 X" > $testroot/stdout.expected
+	echo "2) $short_commit1 $d $GOT_AUTHOR_8 A" >> $testroot/stdout.expected
+	echo "3) $short_commit1 $d $GOT_AUTHOR_8 B" >> $testroot/stdout.expected
+	echo "4) $short_commit1 $d $GOT_AUTHOR_8 C" >> $testroot/stdout.expected
+	echo "5) $short_commit2 $d $GOT_AUTHOR_8 P" >> $testroot/stdout.expected
+	echo "6) $short_commit4 $d $GOT_AUTHOR_8 Y" >> $testroot/stdout.expected
+	echo "7) $short_commit2 $d $GOT_AUTHOR_8 Q" >> $testroot/stdout.expected
+
+	cmp -s $testroot/stdout.expected $testroot/stdout
+	ret="$?"
+	if [ "$ret" != "0" ]; then
+		diff -u $testroot/stdout.expected $testroot/stdout
+		test_done "$testroot" "$ret"
+		return 1
+	fi
+
+	blame_cmp "$testroot" "alpha"
+	ret="$?"
+	test_done "$testroot" "$ret"
+}
+
 test_parseargs "$@"
 run_test test_blame_basic
 run_test test_blame_tag
@@ -895,3 +993,4 @@ run_test test_blame_blame_h
 run_test test_blame_added_on_branch
 run_test test_blame_submodule
 run_test test_blame_symlink
+run_test test_blame_lines_shifted_skip