commit 404c43c4fa13923747c649f395f5b2bcb9fcd484 from: Stefan Sperling date: Thu Jun 21 21:22:57 2018 UTC basic 'got blame' implementation commit - a19581a24bec8161b09eab50d721edc1c8f42be9 commit + 404c43c4fa13923747c649f395f5b2bcb9fcd484 blob - f3bd4030b0a0f870930ffa01cef63c30db0846af blob + 4ca05ec8487753b03115ac175e8878dac1c78bcb --- got/Makefile +++ got/Makefile @@ -1,7 +1,7 @@ .PATH:${.CURDIR}/../lib PROG= got -SRCS= got.c commit_graph.c delta.c diff.c diffreg.c error.c \ +SRCS= got.c blame.c commit_graph.c delta.c diff.c diffreg.c error.c \ fileindex.c object.c object_idset.c opentemp.c path.c \ pack.c privsep.c reference.c repository.c sha1.c \ worktree.c zbuf.c blob - cdf523b6507a59b3fd3dda104d5d87083b7af51c blob + bbdd8feb7717e3603a45f96b3801a65d6f196eb5 --- got/got.c +++ got/got.c @@ -36,6 +36,7 @@ #include "got_worktree.h" #include "got_diff.h" #include "got_commit_graph.h" +#include "got_blame.h" #ifndef nitems #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) @@ -52,10 +53,12 @@ __dead static void usage(void); __dead static void usage_checkout(void); __dead static void usage_log(void); __dead static void usage_diff(void); +__dead static void usage_blame(void); static const struct got_error* cmd_checkout(int, char *[]); static const struct got_error* cmd_log(int, char *[]); static const struct got_error* cmd_diff(int, char *[]); +static const struct got_error* cmd_blame(int, char *[]); #ifdef notyet static const struct got_error* cmd_status(int, char *[]); #endif @@ -67,6 +70,8 @@ static struct cmd got_commands[] = { "show repository history" }, { "diff", cmd_diff, usage_diff, "compare files and directories" }, + { "blame", cmd_blame, usage_blame, + " show when lines in a file were changed" }, #ifdef notyet { "status", cmd_status, usage_status, "show modification status of files" }, @@ -648,8 +653,94 @@ done: if (repo) got_repo_close(repo); return error; +} + +__dead static void +usage_blame(void) +{ + fprintf(stderr, "usage: %s blame [-c commit] [repository-path] path\n", + getprogname()); + exit(1); } +static const struct got_error * +cmd_blame(int argc, char *argv[]) +{ + const struct got_error *error; + struct got_repository *repo = NULL; + char *repo_path = NULL; + char *path = NULL; + struct got_object_id *commit_id = NULL; + char *commit_id_str = NULL; + int ch; + +#ifndef PROFILE + if (pledge("stdio rpath wpath cpath flock proc", NULL) == -1) + err(1, "pledge"); +#endif + + while ((ch = getopt(argc, argv, "c:")) != -1) { + switch (ch) { + case 'c': + commit_id_str = optarg; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + if (argc == 0) { + usage_blame(); + } else if (argc == 1) { + repo_path = getcwd(NULL, 0); + if (repo_path == NULL) + return got_error_from_errno(); + path = argv[0]; + } else if (argc == 2) { + repo_path = realpath(argv[0], NULL); + if (repo_path == NULL) + return got_error_from_errno(); + path = argv[1]; + } else + usage_blame(); + + error = got_repo_open(&repo, repo_path); + free(repo_path); + if (error != NULL) + goto done; + + if (commit_id_str == NULL) { + struct got_reference *head_ref; + error = got_ref_open(&head_ref, repo, GOT_REF_HEAD); + if (error != NULL) + return error; + error = got_ref_resolve(&commit_id, repo, head_ref); + got_ref_close(head_ref); + if (error != NULL) + return error; + } else { + struct got_object *obj; + error = got_object_open_by_id_str(&obj, repo, commit_id_str); + if (error != NULL) + return error; + commit_id = got_object_get_id(obj); + if (commit_id == NULL) + error = got_error_from_errno(); + got_object_close(obj); + } + + error = got_blame(path, commit_id, repo, stdout); +done: + free(commit_id); + if (repo) + got_repo_close(repo); + return error; +} + #ifdef notyet static const struct got_error * cmd_status(int argc __unused, char *argv[] __unused) blob - /dev/null blob + 0f683c8087cfa06140945242d684704f6e32d761 (mode 644) --- /dev/null +++ lib/blame.c @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2018 Stefan Sperling + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "got_error.h" +#include "got_object.h" +#include "got_blame.h" +#include "got_opentemp.h" + +#include "got_lib_zbuf.h" +#include "got_lib_delta.h" +#include "got_lib_object.h" +#include "got_lib_diff.h" + +struct got_blame_line { + int annotated; + struct got_object_id id; /* one per line */ +}; + +struct got_blame { + FILE *f; + size_t nlines; + struct got_blame_line *lines; /* one per line */ +}; + +static const struct got_error * +dump_blob_and_count_lines(size_t *nlines, FILE *outfile, + struct got_blob_object *blob) +{ + const struct got_error *err = NULL; + size_t len, hdrlen; + const uint8_t *buf; + int i; + + hdrlen = got_object_blob_get_hdrlen(blob); + *nlines = 0; + do { + err = got_object_blob_read_block(&len, blob); + if (err) + return err; + if (len == 0) + break; + buf = got_object_blob_get_read_buf(blob); + for (i = 0; i < len; i++) { + if (buf[i] == '\n') + (*nlines)++; + } + /* Skip blob object header first time around. */ + fwrite(buf + hdrlen, len - hdrlen, 1, outfile); + hdrlen = 0; + } while (len != 0); + + + fflush(outfile); + rewind(outfile); + + return NULL; +} + +static const struct got_error * +annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id) +{ + struct got_blame_line *line; + + if (lineno < 1 || lineno > blame->nlines) + return got_error(GOT_ERR_RANGE); + + line = &blame->lines[lineno - 1]; + if (line->annotated) + return NULL; + + memcpy(&line->id, id, sizeof(line->id)); + line->annotated = 1; + return NULL; +} + +static const struct got_error * +blame_commit(struct got_blame *blame, struct got_object_id *id, + struct got_object_id *pid, const char *path, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object *obj = NULL, *pobj = NULL; + struct got_blob_object *blob = NULL, *pblob = NULL; + struct got_diff_changes *changes = NULL; + + err = got_object_open_by_path(&obj, repo, id, path); + if (err) + goto done; + if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) { + err = got_error(GOT_ERR_OBJ_TYPE); + goto done; + } + + err = got_object_open_by_path(&pobj, repo, pid, path); + if (err) { + if (err->code == GOT_ERR_NO_OBJ) { + /* Blob's history began in previous commit. */ + err = got_error(GOT_ERR_ITER_COMPLETED); + } + goto done; + } + if (got_object_get_type(pobj) != GOT_OBJ_TYPE_BLOB) { + /* + * Encountered a non-blob at the path (probably a tree). + * Blob's history began in previous commit. + */ + err = got_error(GOT_ERR_ITER_COMPLETED); + goto done; + } + + /* If blob hashes match then don't bother with diffing. */ + if (got_object_id_cmp(&obj->id, &pobj->id) == 0) + goto done; + + err = got_object_blob_open(&blob, repo, obj, 8192); + if (err) + goto done; + + err = got_object_blob_open(&pblob, repo, pobj, 8192); + if (err) + goto done; + + err = got_diff_blob_lines_changed(&changes, blob, pblob); + if (err) + goto done; + + if (changes) { + struct got_diff_change *change; + char *id_str; + err = got_object_id_str(&id_str, id); + if (err) + goto done; + + SIMPLEQ_FOREACH(change, &changes->entries, entry) { + int a = change->cv.a; + int b = change->cv.b; + int lineno; + for (lineno = a; lineno <= b; lineno++) { + err = annotate_line(blame, lineno, id); + if (err) + goto done; + } + } + free(id_str); + } +done: + if (obj) + got_object_close(obj); + if (pobj) + got_object_close(pobj); + if (blob) + got_object_blob_close(blob); + if (pblob) + got_object_blob_close(pblob); + return err; +} + +static void +blame_close(struct got_blame *blame) +{ + if (blame->f) + fclose(blame->f); + free(blame->lines); + free(blame); +} + +static const struct got_error * +blame_open(struct got_blame **blamep, const char *path, + struct got_object_id *start_commit_id, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_object *obj = NULL; + struct got_blob_object *blob = NULL; + struct got_blame *blame = NULL; + struct got_commit_object *commit = NULL; + struct got_object_id *id = NULL; + int lineno; + + *blamep = NULL; + + err = got_object_open_by_path(&obj, repo, start_commit_id, path); + if (err) + return err; + if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) { + err = got_error(GOT_ERR_OBJ_TYPE); + goto done; + } + + err = got_object_blob_open(&blob, repo, obj, 8192); + if (err) + goto done; + + blame = calloc(1, sizeof(*blame)); + if (blame == NULL) + return got_error_from_errno(); + + blame->f = got_opentemp(); + if (blame->f == NULL) { + err = got_error_from_errno(); + goto done; + } + err = dump_blob_and_count_lines(&blame->nlines, blame->f, blob); + if (err) + goto done; + + blame->lines = calloc(blame->nlines, sizeof(*blame->lines)); + if (blame->lines == NULL) { + err = got_error_from_errno(); + goto done; + } + + /* Loop over first-parent history and try to blame commits. */ + err = got_object_open_as_commit(&commit, repo, start_commit_id); + if (err) + goto done; + id = got_object_id_dup(start_commit_id); + if (id == NULL) { + err = got_error_from_errno(); + goto done; + } + while (1) { + struct got_object_qid *pid; + struct got_commit_object *pcommit; + + pid = SIMPLEQ_FIRST(&commit->parent_ids); + if (pid == NULL) + break; + + err = got_object_open_as_commit(&pcommit, repo, pid->id); + if (err) + break; + + err = blame_commit(blame, id, pid->id, path, repo); + if (err) { + if (err->code == GOT_ERR_ITER_COMPLETED) + err = NULL; + got_object_commit_close(pcommit); + break; + } + free(id); + id = got_object_id_dup(pid->id); + got_object_commit_close(commit); + commit = pcommit; + if (id == NULL) { + err = got_error_from_errno(); + goto done; + } + } + + /* Annotate remaining non-annotated lines with last commit. */ + for (lineno = 1; lineno < blame->nlines; lineno++) { + err = annotate_line(blame, lineno, id); + if (err) + break; + } + +done: + free(id); + if (obj) + got_object_close(obj); + if (blob) + got_object_blob_close(blob); + if (commit) + got_object_commit_close(commit); + if (err) + blame_close(blame); + else + *blamep = blame; + + return err; +} + +static const struct got_error * +blame_line(struct got_object_id **id, struct got_blame *blame, int lineno) +{ + if (lineno < 1 || lineno > blame->nlines) + return got_error(GOT_ERR_RANGE); + *id = &blame->lines[lineno - 1].id; + return NULL; +} + +static char * +parse_next_line(FILE *f, size_t *len) +{ + char *line; + size_t linelen; + size_t lineno; + const char delim[3] = { '\0', '\0', '\0'}; + + line = fparseln(f, &linelen, &lineno, delim, 0); + if (len) + *len = linelen; + return line; +} + +const struct got_error * +got_blame(const char *path, struct got_object_id *start_commit_id, + struct got_repository *repo, FILE *outfile) +{ + const struct got_error *err = NULL; + struct got_blame *blame; + int lineno; + char *abspath; + + if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1) + return got_error_from_errno(); + + err = blame_open(&blame, abspath, start_commit_id, repo); + if (err) { + free(abspath); + return err; + } + + for (lineno = 1; lineno < blame->nlines; lineno++) { + struct got_object_id *id; + char *line, *id_str; + + line = parse_next_line(blame->f, NULL); + if (line == NULL) + break; + + err = blame_line(&id, blame, lineno); + if (err) + break; + + err = got_object_id_str(&id_str, id); + if (err) { + free(line); + break; + } + + fprintf(outfile, "%.8s %s\n", id_str, line); + free(line); + free(id_str); + } + + blame_close(blame); + free(abspath); + return err; +} blob - 2fb482f6e9a84df16124b0c3c805b68224bea7b4 blob + e4f7dbb99d52e456c8c1133c3467a5c9f4956ea3 --- lib/diff.c +++ lib/diff.c @@ -33,9 +33,10 @@ #include "got_lib_diff.h" #include "got_lib_path.h" -const struct got_error * -got_diff_blob(struct got_blob_object *blob1, struct got_blob_object *blob2, - const char *label1, const char *label2, FILE *outfile) +static const struct got_error * +diff_blobs(struct got_blob_object *blob1, struct got_blob_object *blob2, + const char *label1, const char *label2, FILE *outfile, + struct got_diff_changes *changes) { struct got_diff_state ds; struct got_diff_args args; @@ -100,7 +101,7 @@ got_diff_blob(struct got_blob_object *blob1, struct go args.diff_context = 3; flags |= D_PROTOTYPE; - err = got_diffreg(&res, f1, f2, flags, &args, &ds, outfile); + err = got_diffreg(&res, f1, f2, flags, &args, &ds, outfile, changes); done: if (f1) fclose(f1); @@ -109,6 +110,44 @@ done: return err; } +const struct got_error * +got_diff_blob(struct got_blob_object *blob1, struct got_blob_object *blob2, + const char *label1, const char *label2, FILE *outfile) +{ + return diff_blobs(blob1, blob2, label1, label2, outfile, NULL); +} + +const struct got_error * +got_diff_blob_lines_changed(struct got_diff_changes **changes, + struct got_blob_object *blob1, struct got_blob_object *blob2) +{ + const struct got_error *err = NULL; + + *changes = calloc(1, sizeof(**changes)); + if (*changes == NULL) + return got_error_from_errno(); + SIMPLEQ_INIT(&(*changes)->entries); + + err = diff_blobs(blob1, blob2, NULL, NULL, NULL, *changes); + if (err) { + got_diff_free_changes(*changes); + *changes = NULL; + } + return err; +} + +void +got_diff_free_changes(struct got_diff_changes *changes) +{ + struct got_diff_change *change; + while (!SIMPLEQ_EMPTY(&changes->entries)) { + change = SIMPLEQ_FIRST(&changes->entries); + SIMPLEQ_REMOVE_HEAD(&changes->entries, entry); + free(change); + } + free(changes); +} + struct got_tree_entry * match_entry_by_name(struct got_tree_entry *te1, struct got_tree_object *tree2) { blob - bae648774b96b2a6c828b2cd39cf1e0f49154897 blob + b2fe3246d9bd07001e60870521c45f5d37e08ac9 --- lib/diffreg.c +++ lib/diffreg.c @@ -171,29 +171,17 @@ struct line { int value; }; -/* - * The following struct is used to record change information when - * doing a "context" or "unified" diff. (see routine "change" to - * understand the highly mnemonic field names) - */ -struct context_vec { - int a; /* start line in old file */ - int b; /* end line in old file */ - int c; /* start line in new file */ - int d; /* end line in new file */ -}; - static void diff_output(FILE *, const char *, ...); -static int output(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int); +static int output(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int); static void check(struct got_diff_state *, FILE *, FILE *, int); static void uni_range(FILE *, int, int); -static void dump_unified_vec(FILE *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int); +static void dump_unified_vec(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, FILE *, FILE *, int); static int prepare(struct got_diff_state *, int, FILE *, off_t, int); static void prune(struct got_diff_state *); static void equiv(struct line *, int, struct line *, int, int *); static void unravel(struct got_diff_state *, int); static int unsort(struct line *, int, int *); -static int change(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *); +static int change(FILE *, struct got_diff_changes *, struct got_diff_state *, struct got_diff_args *, const char *, FILE *, const char *, FILE *, int, int, int, int, int *); static void sort(struct line *, int); static void print_header(FILE *, struct got_diff_state *, struct got_diff_args *, const char *, const char *); static int asciifile(FILE *); @@ -270,6 +258,9 @@ diff_output(FILE *outfile, const char *fmt, ...) { va_list ap; + if (outfile == NULL) + return; + va_start(ap, fmt); vfprintf(outfile, fmt, ap); va_end(ap); @@ -277,7 +268,8 @@ diff_output(FILE *outfile, const char *fmt, ...) const struct got_error * got_diffreg(int *rval, FILE *f1, FILE *f2, int flags, - struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile) + struct got_diff_args *args, struct got_diff_state *ds, FILE *outfile, + struct got_diff_changes *changes) { const struct got_error *err = NULL; int i, *p; @@ -410,8 +402,8 @@ got_diffreg(int *rval, FILE *f1, FILE *f2, int flags, } ds->ixnew = lp; check(ds, f1, f2, flags); - if (output(outfile, ds, args, args->label[0], f1, args->label[1], f2, - flags)) + if (output(outfile, changes, ds, args, args->label[0], f1, + args->label[1], f2, flags)) err = got_error_from_errno(); closem: free(ds->J); @@ -850,7 +842,8 @@ skipline(FILE *f) } static int -output(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args, +output(FILE *outfile, struct got_diff_changes *changes, + struct got_diff_state *ds, struct got_diff_args *args, const char *file1, FILE *f1, const char *file2, FILE *f2, int flags) { int m, i0, i1, j0, j1; @@ -870,19 +863,19 @@ output(FILE *outfile, struct got_diff_state *ds, struc i1++; j1 = ds->J[i1 + 1] - 1; ds->J[i1] = j1; - error = change(outfile, ds, args, file1, f1, file2, f2, + error = change(outfile, changes, ds, args, file1, f1, file2, f2, i0, i1, j0, j1, &flags); if (error) return (error); } if (m == 0) { - error = change(outfile, ds, args, file1, f1, file2, f2, 1, 0, - 1, ds->len[1], &flags); + error = change(outfile, changes, ds, args, file1, f1, file2, f2, + 1, 0, 1, ds->len[1], &flags); if (error) return (error); } if (ds->anychange != 0) - dump_unified_vec(outfile, ds, args, f1, f2, flags); + dump_unified_vec(outfile, changes, ds, args, f1, f2, flags); return (0); } @@ -906,7 +899,8 @@ uni_range(FILE *outfile, int a, int b) * lines missing from the to file. */ static int -change(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args, +change(FILE *outfile, struct got_diff_changes *changes, + struct got_diff_state *ds, struct got_diff_args *args, const char *file1, FILE *f1, const char *file2, FILE *f2, int a, int b, int c, int d, int *pflags) { @@ -952,7 +946,8 @@ change(FILE *outfile, struct got_diff_state *ds, struc * If this change is more than 'diff_context' lines from the * previous change, dump the record and reset it. */ - dump_unified_vec(outfile, ds, args, f1, f2, *pflags); + dump_unified_vec(outfile, changes, ds, args, f1, f2, + *pflags); } ds->context_vec_ptr++; ds->context_vec_ptr->a = a; @@ -1133,7 +1128,8 @@ match_function(struct got_diff_state *ds, const long * /* dump accumulated "unified" diff changes */ static void -dump_unified_vec(FILE *outfile, struct got_diff_state *ds, struct got_diff_args *args, +dump_unified_vec(FILE *outfile, struct got_diff_changes *changes, + struct got_diff_state *ds, struct got_diff_args *args, FILE *f1, FILE *f2, int flags) { struct context_vec *cvp = ds->context_vec_start; @@ -1167,6 +1163,17 @@ dump_unified_vec(FILE *outfile, struct got_diff_state * are printed together. */ for (; cvp <= ds->context_vec_ptr; cvp++) { + if (changes) { + struct got_diff_change *change; + change = calloc(1, sizeof(*change)); + if (change) { + memcpy(&change->cv, cvp, sizeof(change->cv)); + SIMPLEQ_INSERT_TAIL(&changes->entries, change, + entry); + changes->nchanges++; + } + } + a = cvp->a; b = cvp->b; c = cvp->c; blob - c317a77c20a024f9e60756d2993d242bce101ee9 blob + 93a2f44346f67706520e5e9383b00a94dd7244dd --- lib/got_lib_diff.h +++ lib/got_lib_diff.h @@ -74,6 +74,28 @@ struct excludes { struct excludes *next; }; +/* + * The following struct is used to record change information when + * doing a "context" or "unified" diff. (see routine "change" to + * understand the highly mnemonic field names) + */ +struct context_vec { + int a; /* start line in old file */ + int b; /* end line in old file */ + int c; /* start line in new file */ + int d; /* end line in new file */ +}; + +struct got_diff_change { + SIMPLEQ_ENTRY(got_diff_change) entry; + struct context_vec cv; +}; + +struct got_diff_changes { + size_t nchanges; + SIMPLEQ_HEAD(, got_diff_change) entries; +}; + struct got_diff_state { int *J; /* will be overlaid on class */ int *class; /* will be overlaid on file[0] */ @@ -110,4 +132,9 @@ struct got_diff_args { }; const struct got_error *got_diffreg(int *, FILE *, - FILE *, int, struct got_diff_args *, struct got_diff_state *, FILE *); + FILE *, int, struct got_diff_args *, struct got_diff_state *, FILE *, + struct got_diff_changes *); + +const struct got_error *got_diff_blob_lines_changed(struct got_diff_changes **, + struct got_blob_object *, struct got_blob_object *); +void got_diff_free_changes(struct got_diff_changes *);