Commit Diff


commit - 89dc8b78ad19e890f6dd1fe4259652a32ff89b16
commit + 8c35ff140a39c592c937c6ddbf5e723e95a6fbfa
blob - 18552faa524fd738b0f8f26b302df1e5785530ef
blob + af7e1af4697f37ad206124a5aef6deec87e7134f
--- lib/blame.c
+++ lib/blame.c
@@ -19,6 +19,7 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 
+#include <errno.h>
 #include <sha1.h>
 #include <string.h>
 #include <stdio.h>
@@ -50,24 +51,41 @@ struct got_blame_line {
 };
 
 struct got_blame {
-	FILE *f;
-	off_t size;
 	struct diff_config *cfg;
-	off_t filesize;
-	int nlines;
-	int nannotated;
+	int nlines;	/* number of lines in file being blamed */
+	int nannotated;	/* number of lines already annotated */
 	struct got_blame_line *lines; /* one per line */
-	off_t *line_offsets;		/* one per line */
 	int ncommits;
 
 	/*
+	 * These change with every traversed commit. After diffing
+	 * commits N:N-1, in preparation for diffing commits N-1:N-2,
+	 * data for commit N is retained and flipped into data for N-1.
+	 * 
+	 */
+	FILE *f1; /* older version from commit N-1. */
+	FILE *f2; /* newer version from commit N. */
+	unsigned char *map1;
+	unsigned char *map2;
+	off_t size1;
+	off_t size2;
+	int nlines1;
+	int nlines2;
+	off_t *line_offsets1;
+	off_t *line_offsets2;
+
+	/*
 	 * Map line numbers of an older version of the file to valid line
-	 * numbers in blame->f. This map is updated with each commit we
-	 * traverse throughout the file's history.
-	 * Lines mapped to -1 do not correspond to any line in blame->f.
+	 * numbers in the version of the file being blamed. This map is
+	 * updated with each commit we traverse throughout the file's history.
+	 * Lines mapped to -1 do not correspond to any line in the version
+	 * being blamed.
 	 */
+	int *linemap1;
 	int *linemap2;
-	int nlines2;
+
+	struct diff_data *data1;
+	struct diff_data *data2;
 };
 
 static const struct got_error *
@@ -94,8 +112,8 @@ annotate_line(struct got_blame *blame, int lineno, str
 }
 
 static const struct got_error *
-blame_changes(struct got_blame *blame, int *linemap1,
-    struct diff_result *diff_result, struct got_object_id *commit_id,
+blame_changes(struct got_blame *blame, struct diff_result *diff_result,
+    struct got_object_id *commit_id,
     const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
     void *arg)
 {
@@ -123,14 +141,15 @@ blame_changes(struct got_blame *blame, int *linemap1,
 
 		if (left_count == right_count) {
 			for (j = 0; j < left_count; j++) {
-				linemap1[idx1++] = blame->linemap2[idx2++];
+				blame->linemap1[idx1++] =
+				    blame->linemap2[idx2++];
 			}
 			continue;
 		}
 
 		if (right_count == 0) {
 			for (j = 0; j < left_count; j++) {
-				linemap1[idx1++] = -1;
+				blame->linemap1[idx1++] = -1;
 			}
 			continue;
 		}
@@ -149,6 +168,32 @@ blame_changes(struct got_blame *blame, int *linemap1,
 }
 
 static const struct got_error *
+blame_prepare_file(FILE *f, unsigned char **p, off_t *size,
+    int *nlines, off_t **line_offsets, struct diff_data *diff_data,
+    const struct diff_config *cfg, struct got_blob_object *blob)
+{
+	const struct got_error *err = NULL;
+	int rc;
+
+	err = got_object_blob_dump_to_file(size, nlines, line_offsets,
+	    f, blob);
+	if (err)
+		return err;
+
+#ifndef GOT_DIFF_NO_MMAP
+	*p = mmap(NULL, *size, PROT_READ, MAP_PRIVATE, fileno(f), 0);
+	if (*p == MAP_FAILED)
+#endif
+		*p = NULL; /* fall back on file I/O */
+
+	rc = diff_atomize_file(diff_data, cfg, f, *p, *size, 0);
+	if (rc)
+		return got_error_set_errno(rc, "diff_atomize_file");
+
+	return NULL;
+}
+
+static const struct got_error *
 blame_commit(struct got_blame *blame, struct got_object_id *id,
     const char *path, struct got_repository *repo,
     const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
@@ -157,13 +202,9 @@ blame_commit(struct got_blame *blame, struct got_objec
 	const struct got_error *err = NULL;
 	struct got_commit_object *commit = NULL;
 	struct got_object_qid *pid = NULL;
-	struct got_object_id *blob_id = NULL, *pblob_id = NULL;
-	struct got_blob_object *blob = NULL, *pblob = NULL;
-	struct got_diffreg_result *diffreg_result = NULL;
-	FILE *f1 = NULL, *f2 = NULL;
-	off_t size1, size2;
-	int nlines1, nlines2;
-	int *linemap1 = NULL;
+	struct got_object_id *pblob_id = NULL;
+	struct got_blob_object *pblob = NULL;
+	struct diff_result *diff_result = NULL;
 
 	err = got_object_open_as_commit(&commit, repo, id);
 	if (err)
@@ -175,91 +216,60 @@ blame_commit(struct got_blame *blame, struct got_objec
 		return NULL;
 	}
 
-	err = got_object_id_by_path(&blob_id, repo, id, path);
+	err = got_object_id_by_path(&pblob_id, repo, pid->id, path);
 	if (err) {
 		if (err->code == GOT_ERR_NO_TREE_ENTRY)
 			err = NULL;
 		goto done;
 	}
 
-	err = got_object_open_as_blob(&blob, repo, blob_id, 8192);
+	err = got_object_open_as_blob(&pblob, repo, pblob_id, 8192);
 	if (err)
 		goto done;
 
-	f2 = got_opentemp();
-	if (f2 == NULL) {
+	blame->f1 = got_opentemp();
+	if (blame->f1 == NULL) {
 		err = got_error_from_errno("got_opentemp");
 		goto done;
 	}
-	err = got_object_blob_dump_to_file(&size2, &nlines2, NULL,
-	    f2, blob);
+
+	err = blame_prepare_file(blame->f1, &blame->map1, &blame->size1,
+	    &blame->nlines1, &blame->line_offsets1, blame->data1,
+	    blame->cfg, pblob);
 	if (err)
 		goto done;
 
-	err = got_object_id_by_path(&pblob_id, repo, pid->id, path);
-	if (err) {
-		if (err->code == GOT_ERR_NO_TREE_ENTRY)
-			err = NULL;
+	diff_result = diff_main(blame->cfg, blame->data1, blame->data2);
+	if (diff_result == NULL) {
+		err = got_error_set_errno(ENOMEM, "malloc");
 		goto done;
 	}
-
-	err = got_object_open_as_blob(&pblob, repo, pblob_id, 8192);
-	if (err)
+	if (diff_result->rc != DIFF_RC_OK) {
+		err = got_error_set_errno(diff_result->rc, "diff");
 		goto done;
-
-	f1 = got_opentemp();
-	if (f1 == NULL) {
-		err = got_error_from_errno("got_opentemp");
-		goto done;
 	}
-	err = got_object_blob_dump_to_file(&size1, &nlines1, NULL, f1, pblob);
-	if (err)
-		goto done;
-
-	err = got_diff_files(&diffreg_result, f1, "", f2, "",
-	    0, 0, NULL);
-	if (err)
-		goto done;
-	if (diffreg_result->result->chunks.len > 0) {
-		if (nlines1 > 0) {
-			linemap1 = calloc(nlines1, sizeof(*linemap1));
-			if (linemap1 == NULL) {
+	if (diff_result->chunks.len > 0) {
+		if (blame->nlines1 > 0) {
+			blame->linemap1 = calloc(blame->nlines1,
+			    sizeof(*blame->linemap1));
+			if (blame->linemap1 == NULL) {
 				err = got_error_from_errno("malloc");
 				goto done;
 			}
 		}
-		err = blame_changes(blame, linemap1,
-		    diffreg_result->result, id, cb, arg);
-		if (err) {
-			free(linemap1);
+		err = blame_changes(blame, diff_result, id, cb, arg);
+		if (err)
 			goto done;
-		}
-		if (linemap1) {
-			free(blame->linemap2);
-			blame->linemap2 = linemap1;
-			blame->nlines2 = nlines1;
-		}
 	} else if (cb)
 		err = cb(arg, blame->nlines, -1, id);
 done:
-	if (diffreg_result) {
-		const struct got_error *free_err;
-		free_err = got_diffreg_result_free(diffreg_result);
-		if (free_err && err == NULL)
-			err = free_err;
-	}
+	if (diff_result)
+		diff_result_free(diff_result);
 	if (commit)
 		got_object_commit_close(commit);
-	free(blob_id);
 	free(pblob_id);
-	if (blob)
-		got_object_blob_close(blob);
 	if (pblob)
 		got_object_blob_close(pblob);
-	if (f1 && fclose(f1) != 0 && err == NULL)
-		err = got_error_from_errno("fclose");
-	if (f2 && fclose(f2) != 0 && err == NULL)
-		err = got_error_from_errno("fclose");
 	return err;
 }
 
@@ -268,23 +278,213 @@ blame_close(struct got_blame *blame)
 {
 	const struct got_error *err = NULL;
 
-	if (blame->f && fclose(blame->f) != 0 && err == NULL)
+	diff_data_free(blame->data1);
+	free(blame->data1);
+	diff_data_free(blame->data2);
+	free(blame->data2);
+	if (blame->map1) {
+		if (munmap(blame->map1, blame->size1) == -1 && err == NULL)
+			err = got_error_from_errno("munmap");
+	}
+	if (blame->map2) {
+		if (munmap(blame->map2, blame->size2) == -1 && err == NULL)
+			err = got_error_from_errno("munmap");
+	}
+	if (blame->f1 && fclose(blame->f1) != 0 && err == NULL)
 		err = got_error_from_errno("fclose");
+	if (blame->f2 && fclose(blame->f2) != 0 && err == NULL)
+		err = got_error_from_errno("fclose");
 	free(blame->lines);
+	free(blame->line_offsets1);
+	free(blame->line_offsets2);
+	free(blame->linemap1);
 	free(blame->linemap2);
 	free(blame->cfg);
 	free(blame);
 	return err;
+}
+
+static int
+atomize_file(struct diff_data *d, FILE *f, off_t filesize, int nlines,
+    off_t *line_offsets)
+{
+	int i, rc = DIFF_RC_OK;
+
+	ARRAYLIST_INIT(d->atoms, nlines);
+
+	for (i = 0; i < nlines; i++) {
+		struct diff_atom *atom;
+		off_t len, pos = line_offsets[i];
+		unsigned int hash = 0;
+		int j;
+
+		ARRAYLIST_ADD(atom, d->atoms);
+		if (atom == NULL) {
+			rc = errno;
+			break;
+		}
+
+		if (i < nlines - 1)
+			len = line_offsets[i + 1] - pos;
+		else
+			len = filesize - pos;
+
+		if (fseeko(f, pos, SEEK_SET) == -1) {
+			rc = errno;
+			break;
+		}
+		for (j = 0; j < len; j++) {
+			int c = fgetc(f);
+			if (c == EOF) {
+				if (feof(f))
+					rc = EIO; /* unexpected EOF */
+				else
+					rc = errno;
+				goto done;
+			}
+
+			hash = diff_atom_hash_update(hash, (unsigned char)c);
+		}
+		*atom = (struct diff_atom){
+			.root = d,
+			.pos = pos,
+			.at = NULL,	/* atom data is not memory-mapped */
+			.len = len,
+			.hash = hash,
+		};
+	}
+done:
+	if (rc)
+		ARRAYLIST_FREE(d->atoms);
+
+	return rc;
 }
 
+static int
+atomize_file_mmap(struct diff_data *d, unsigned char *p,
+    off_t filesize, int nlines, off_t *line_offsets)
+{
+	int i, rc = DIFF_RC_OK;
+
+	ARRAYLIST_INIT(d->atoms, nlines);
+
+	for (i = 0; i < nlines; i++) {
+		struct diff_atom *atom;
+		off_t len, pos = line_offsets[i];
+		unsigned int hash = 0;
+		int j;
+
+		ARRAYLIST_ADD(atom, d->atoms);
+		if (atom == NULL) {
+			rc = errno;
+			break;
+		}
+
+		if (i < nlines - 1)
+			len = line_offsets[i + 1] - pos;
+		else
+			len = filesize - pos;
+
+		for (j = 0; j < len; j++)
+			hash = diff_atom_hash_update(hash, p[pos + j]);
+
+		*atom = (struct diff_atom){
+			.root = d,
+			.pos = pos,
+			.at = &p[pos],
+			.len = len,
+			.hash = hash,
+		};
+	}
+
+	if (rc)
+		ARRAYLIST_FREE(d->atoms);
+
+	return rc;
+}
+
+/* Implements diff_atomize_func_t */
+static int
+blame_atomize_file(void *arg, struct diff_data *d)
+{
+	struct got_blame *blame = arg;
+
+	if (d->f == blame->f1) {
+		if (blame->map1)
+			return atomize_file_mmap(d, blame->map1,
+			    blame->size1, blame->nlines1,
+			    blame->line_offsets1);
+		else
+			return atomize_file(d, blame->f1, blame->size1,
+			    blame->nlines1, blame->line_offsets1);
+	} else if (d->f == blame->f2) {
+		if (d->atoms.len > 0) {
+			/* Re-use data from previous commit. */
+			return DIFF_RC_OK;
+		}
+		if (blame->map2)
+			return atomize_file_mmap(d, blame->map2,
+			    blame->size2, blame->nlines2,
+			    blame->line_offsets2);
+		else
+			return atomize_file(d, blame->f2, blame->size2,
+			    blame->nlines2, blame->line_offsets2);
+	}
+
+	return DIFF_RC_OK;
+}
+
 static const struct got_error *
+close_file2_and_reuse_file1(struct got_blame *blame)
+{
+	struct diff_data *d;
+
+	free(blame->line_offsets2);
+	blame->line_offsets2 = blame->line_offsets1;
+	blame->line_offsets1 = NULL;
+
+	free(blame->linemap2);
+	blame->linemap2 = blame->linemap1;
+	blame->linemap1 = NULL;
+
+	if (blame->map2) {
+		if (munmap(blame->map2, blame->size2) == -1)
+			return got_error_from_errno("munmap");
+		blame->map2 = blame->map1;
+		blame->map2 = NULL;
+
+	}
+	blame->size2 = blame->size1;
+	blame->size1 = 0;
+
+	if (fclose(blame->f2) == EOF)
+		return got_error_from_errno("fclose");
+	blame->f2 = blame->f1;
+	blame->f1 = NULL;
+
+	blame->nlines2 = blame->nlines1;
+	blame->nlines1 = 0;
+
+	free(blame->line_offsets2);
+	blame->line_offsets2 = blame->line_offsets1;
+	blame->line_offsets2 = NULL;
+
+	diff_data_free(blame->data2); /* does not free pointer itself */
+	memset(blame->data2, 0, sizeof(*blame->data2));
+	d = blame->data2;
+	blame->data2 = blame->data1;
+	blame->data1 = d;
+
+	return NULL;
+}
+
+static const struct got_error *
 blame_open(struct got_blame **blamep, const char *path,
     struct got_object_id *start_commit_id, struct got_repository *repo,
     const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
     void *arg, got_cancel_cb cancel_cb, void *cancel_arg)
 {
 	const struct got_error *err = NULL;
-	struct got_object *obj = NULL;
 	struct got_object_id *obj_id = NULL;
 	struct got_blob_object *blob = NULL;
 	struct got_blame *blame = NULL;
@@ -298,42 +498,49 @@ blame_open(struct got_blame **blamep, const char *path
 	if (err)
 		goto done;
 
-	err = got_object_open(&obj, repo, obj_id);
+	err = got_object_open_as_blob(&blob, repo, obj_id, 8192);
 	if (err)
 		goto done;
 
-	if (obj->type != GOT_OBJ_TYPE_BLOB) {
-		err = got_error_path(path, GOT_ERR_OBJ_TYPE);
+	blame = calloc(1, sizeof(*blame));
+	if (blame == NULL) {
+		err = got_error_from_errno("calloc");
 		goto done;
 	}
 
-	err = got_object_blob_open(&blob, repo, obj, 8192);
-	if (err)
+	blame->data1 = calloc(1, sizeof(*blame->data1));
+	if (blame->data1 == NULL) {
+		err = got_error_from_errno("calloc");
 		goto done;
-
-	blame = calloc(1, sizeof(*blame));
-	if (blame == NULL) {
+	}
+	blame->data2 = calloc(1, sizeof(*blame->data2));
+	if (blame->data2 == NULL) {
 		err = got_error_from_errno("calloc");
 		goto done;
 	}
 
-	blame->f = got_opentemp();
-	if (blame->f == NULL) {
+	blame->f2 = got_opentemp();
+	if (blame->f2 == NULL) {
 		err = got_error_from_errno("got_opentemp");
 		goto done;
 	}
-	err = got_object_blob_dump_to_file(&blame->filesize, &blame->nlines,
-	    &blame->line_offsets, blame->f, blob);
-	if (err || blame->nlines == 0)
-		goto done;
-
 	err = got_diff_get_config(&blame->cfg, GOT_DIFF_ALGORITHM_PATIENCE,
-	    NULL, NULL);
+	    blame_atomize_file, blame);
 	if (err)
 		goto done;
 
+	err = blame_prepare_file(blame->f2, &blame->map2, &blame->size2,
+	    &blame->nlines2, &blame->line_offsets2, blame->data2,
+	    blame->cfg, blob);
+	blame->nlines = blame->nlines2;
+	if (err || blame->nlines == 0)
+		goto done;
+
+	got_object_blob_close(blob);
+	blob = NULL;
+
 	/* Don't include \n at EOF in the blame line count. */
-	if (blame->line_offsets[blame->nlines - 1] == blame->filesize)
+	if (blame->line_offsets2[blame->nlines - 1] == blame->size2)
 		blame->nlines--;
 
 	blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
@@ -342,7 +549,6 @@ blame_open(struct got_blame **blamep, const char *path
 		goto done;
 	}
 
-	blame->nlines2 = blame->nlines;
 	blame->linemap2 = calloc(blame->nlines2, sizeof(*blame->linemap2));
 	if (blame->linemap2 == NULL) {
 		err = got_error_from_errno("calloc");
@@ -380,6 +586,10 @@ blame_open(struct got_blame **blamep, const char *path
 			}
 			if (blame->nannotated == blame->nlines)
 				break;
+
+			err = close_file2_and_reuse_file1(blame);
+			if (err)
+				goto done;
 		}
 	}
 
@@ -396,8 +606,6 @@ done:
 	if (graph)
 		got_commit_graph_close(graph);
 	free(obj_id);
-	if (obj)
-		got_object_close(obj);
 	if (blob)
 		got_object_blob_close(blob);
 	if (err) {