Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <sha1.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <time.h>
25 #include <util.h>
26 #include <zlib.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_blame.h"
31 #include "got_opentemp.h"
33 #include "got_lib_inflate.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_object.h"
36 #include "got_lib_diff.h"
37 #include "got_lib_diffoffset.h"
38 #include "got_commit_graph.h"
40 struct got_blame_line {
41 int annotated;
42 struct got_object_id id;
43 };
45 struct got_blame_diff_offsets {
46 struct got_diffoffset_chunks *chunks;
47 struct got_object_id *commit_id;
48 SLIST_ENTRY(got_blame_diff_offsets) entry;
49 };
51 SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets);
53 struct got_blame {
54 FILE *f;
55 int nlines;
56 int nannotated;
57 struct got_blame_line *lines; /* one per line */
58 int ncommits;
59 struct got_blame_diff_offsets_list diff_offsets_list;
60 };
62 static void
63 free_diff_offsets(struct got_blame_diff_offsets *diff_offsets)
64 {
65 if (diff_offsets->chunks)
66 got_diffoffset_free(diff_offsets->chunks);
67 free(diff_offsets->commit_id);
68 free(diff_offsets);
69 }
71 static const struct got_error *
72 alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets,
73 struct got_object_id *commit_id)
74 {
75 const struct got_error *err = NULL;
77 *diff_offsets = calloc(1, sizeof(**diff_offsets));
78 if (*diff_offsets == NULL)
79 return got_error_prefix_errno("calloc");
81 (*diff_offsets)->commit_id = got_object_id_dup(commit_id);
82 if ((*diff_offsets)->commit_id == NULL) {
83 err = got_error_prefix_errno("got_object_id_dup");
84 free_diff_offsets(*diff_offsets);
85 *diff_offsets = NULL;
86 return err;
87 }
89 err = got_diffoffset_alloc(&(*diff_offsets)->chunks);
90 if (err) {
91 free_diff_offsets(*diff_offsets);
92 return err;
93 }
95 return NULL;
96 }
98 static const struct got_error *
99 annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id,
100 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
101 void *arg)
103 const struct got_error *err = NULL;
104 struct got_blame_line *line;
106 if (lineno < 1 || lineno > blame->nlines)
107 return NULL;
109 line = &blame->lines[lineno - 1];
110 if (line->annotated)
111 return NULL;
113 memcpy(&line->id, id, sizeof(line->id));
114 line->annotated = 1;
115 blame->nannotated++;
116 if (cb)
117 err = cb(arg, blame->nlines, lineno, id);
118 return err;
121 static int
122 get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list,
123 int lineno)
125 struct got_blame_diff_offsets *diff_offsets;
127 SLIST_FOREACH(diff_offsets, diff_offsets_list, entry)
128 lineno = got_diffoffset_get(diff_offsets->chunks, lineno);
130 return lineno;
133 static const struct got_error *
134 blame_changes(struct got_blame *blame, struct got_diff_changes *changes,
135 struct got_object_id *commit_id,
136 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
137 void *arg)
139 const struct got_error *err = NULL;
140 struct got_diff_change *change;
141 struct got_blame_diff_offsets *diff_offsets;
143 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
144 int c = change->cv.c;
145 int d = change->cv.d;
146 int new_lineno = c;
147 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
148 int ln;
150 for (ln = new_lineno; ln < new_lineno + new_length; ln++) {
151 err = annotate_line(blame,
152 get_blamed_line(&blame->diff_offsets_list, ln),
153 commit_id, cb, arg);
154 if (err)
155 return err;
156 if (blame->nlines == blame->nannotated)
157 return NULL;
161 err = alloc_diff_offsets(&diff_offsets, commit_id);
162 if (err)
163 return err;
164 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
165 int a = change->cv.a;
166 int b = change->cv.b;
167 int c = change->cv.c;
168 int d = change->cv.d;
169 int old_lineno = a;
170 int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0));
171 int new_lineno = c;
172 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
174 err = got_diffoffset_add(diff_offsets->chunks,
175 old_lineno, old_length, new_lineno, new_length);
176 if (err) {
177 free_diff_offsets(diff_offsets);
178 return err;
181 SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry);
183 return NULL;
186 static const struct got_error *
187 blame_commit(struct got_blame *blame, struct got_object_id *id,
188 struct got_object_id *pid, const char *path, struct got_repository *repo,
189 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
190 void *arg)
192 const struct got_error *err = NULL;
193 struct got_object *obj = NULL, *pobj = NULL;
194 struct got_object_id *obj_id = NULL, *pobj_id = NULL;
195 struct got_blob_object *blob = NULL, *pblob = NULL;
196 struct got_diff_changes *changes = NULL;
198 err = got_object_id_by_path(&obj_id, repo, id, path);
199 if (err)
200 goto done;
202 err = got_object_open(&obj, repo, obj_id);
203 if (err)
204 goto done;
206 if (obj->type != GOT_OBJ_TYPE_BLOB) {
207 err = got_error(GOT_ERR_OBJ_TYPE);
208 goto done;
211 err = got_object_id_by_path(&pobj_id, repo, pid, path);
212 if (err) {
213 if (err->code == GOT_ERR_NO_TREE_ENTRY) {
214 /* Blob's history began in previous commit. */
215 err = got_error(GOT_ERR_ITER_COMPLETED);
217 goto done;
220 /* If IDs match then don't bother with diffing. */
221 if (got_object_id_cmp(obj_id, pobj_id) == 0) {
222 if (cb)
223 err = cb(arg, blame->nlines, -1, id);
224 goto done;
227 err = got_object_open(&pobj, repo, pobj_id);
228 if (err)
229 goto done;
231 if (pobj->type != GOT_OBJ_TYPE_BLOB) {
232 /*
233 * Encountered a non-blob at the path (probably a tree).
234 * Blob's history began in previous commit.
235 */
236 err = got_error(GOT_ERR_ITER_COMPLETED);
237 goto done;
240 err = got_object_blob_open(&blob, repo, obj, 8192);
241 if (err)
242 goto done;
244 err = got_object_blob_open(&pblob, repo, pobj, 8192);
245 if (err)
246 goto done;
248 err = got_diff_blob_lines_changed(&changes, pblob, blob);
249 if (err)
250 goto done;
252 if (changes) {
253 err = blame_changes(blame, changes, id, cb, arg);
254 got_diff_free_changes(changes);
255 } else if (cb)
256 err = cb(arg, blame->nlines, -1, id);
257 done:
258 free(obj_id);
259 free(pobj_id);
260 if (obj)
261 got_object_close(obj);
262 if (pobj)
263 got_object_close(pobj);
264 if (blob)
265 got_object_blob_close(blob);
266 if (pblob)
267 got_object_blob_close(pblob);
268 return err;
271 static const struct got_error *
272 blame_close(struct got_blame *blame)
274 const struct got_error *err = NULL;
275 struct got_blame_diff_offsets *diff_offsets;
277 if (blame->f && fclose(blame->f) != 0)
278 err = got_error_prefix_errno("fclose");
279 free(blame->lines);
280 while (!SLIST_EMPTY(&blame->diff_offsets_list)) {
281 diff_offsets = SLIST_FIRST(&blame->diff_offsets_list);
282 SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry);
283 free_diff_offsets(diff_offsets);
285 free(blame);
286 return err;
289 static const struct got_error *
290 blame_open(struct got_blame **blamep, const char *path,
291 struct got_object_id *start_commit_id, struct got_repository *repo,
292 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
293 void *arg)
295 const struct got_error *err = NULL;
296 struct got_object *obj = NULL;
297 struct got_object_id *obj_id = NULL;
298 struct got_blob_object *blob = NULL;
299 struct got_blame *blame = NULL;
300 struct got_object_id *id = NULL;
301 int lineno;
302 struct got_commit_graph *graph = NULL;
304 *blamep = NULL;
306 err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
307 if (err)
308 return err;
310 err = got_object_open(&obj, repo, obj_id);
311 if (err)
312 goto done;
314 if (obj->type != GOT_OBJ_TYPE_BLOB) {
315 err = got_error(GOT_ERR_OBJ_TYPE);
316 goto done;
319 err = got_object_blob_open(&blob, repo, obj, 8192);
320 if (err)
321 goto done;
323 blame = calloc(1, sizeof(*blame));
324 if (blame == NULL)
325 return got_error_prefix_errno("calloc");
327 blame->f = got_opentemp();
328 if (blame->f == NULL) {
329 err = got_error_prefix_errno("got_opentemp");
330 goto done;
332 err = got_object_blob_dump_to_file(NULL, &blame->nlines, blame->f,
333 blob);
334 if (err)
335 goto done;
337 blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
338 if (blame->lines == NULL) {
339 err = got_error_prefix_errno("calloc");
340 goto done;
343 err = got_commit_graph_open(&graph, start_commit_id, path, 0, repo);
344 if (err)
345 return err;
346 err = got_commit_graph_iter_start(graph, start_commit_id, repo);
347 if (err)
348 goto done;
350 id = NULL;
351 for (;;) {
352 struct got_object_id *next_id;
354 err = got_commit_graph_iter_next(&next_id, graph);
355 if (err) {
356 if (err->code == GOT_ERR_ITER_COMPLETED) {
357 err = NULL;
358 break;
360 if (err->code != GOT_ERR_ITER_NEED_MORE)
361 break;
362 err = got_commit_graph_fetch_commits(graph, 1, repo);
363 if (err)
364 break;
365 else
366 continue;
368 if (next_id == NULL)
369 break;
370 if (id) {
371 err = blame_commit(blame, id, next_id, path, repo,
372 cb, arg);
373 if (err) {
374 if (err->code == GOT_ERR_ITER_COMPLETED)
375 err = NULL;
376 break;
378 if (blame->nannotated == blame->nlines)
379 break;
381 id = next_id;
384 if (id && blame->nannotated < blame->nlines) {
385 /* Annotate remaining non-annotated lines with last commit. */
386 for (lineno = 1; lineno <= blame->nlines; lineno++) {
387 err = annotate_line(blame, lineno, id, cb, arg);
388 if (err)
389 goto done;
393 done:
394 if (graph)
395 got_commit_graph_close(graph);
396 free(obj_id);
397 if (obj)
398 got_object_close(obj);
399 if (blob)
400 got_object_blob_close(blob);
401 if (err) {
402 if (blame)
403 blame_close(blame);
404 } else
405 *blamep = blame;
407 return err;
410 static const struct got_error *
411 blame_line(struct got_object_id **id, struct got_blame *blame, int lineno)
413 if (lineno < 1 || lineno > blame->nlines)
414 return got_error(GOT_ERR_RANGE);
415 *id = &blame->lines[lineno - 1].id;
416 return NULL;
419 static char *
420 parse_next_line(FILE *f, size_t *len)
422 char *line;
423 size_t linelen;
424 size_t lineno;
425 const char delim[3] = { '\0', '\0', '\0'};
427 line = fparseln(f, &linelen, &lineno, delim, 0);
428 if (len)
429 *len = linelen;
430 return line;
433 const struct got_error *
434 got_blame(const char *path, struct got_object_id *start_commit_id,
435 struct got_repository *repo, FILE *outfile)
437 const struct got_error *err = NULL, *close_err = NULL;
438 struct got_blame *blame;
439 int lineno;
440 char *abspath;
442 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
443 return got_error_prefix_errno2("asprintf", path);
445 err = blame_open(&blame, abspath, start_commit_id, repo, NULL, NULL);
446 if (err) {
447 free(abspath);
448 return err;
451 for (lineno = 1; lineno <= blame->nlines; lineno++) {
452 struct got_object_id *id;
453 char *line, *id_str;
455 line = parse_next_line(blame->f, NULL);
456 if (line == NULL)
457 break;
459 err = blame_line(&id, blame, lineno);
460 if (err) {
461 free(line);
462 break;
465 err = got_object_id_str(&id_str, id);
466 /* Do not free id; It points into blame->lines. */
467 if (err) {
468 free(line);
469 break;
472 fprintf(outfile, "%.8s %s\n", id_str, line);
473 free(line);
474 free(id_str);
477 close_err = blame_close(blame);
478 free(abspath);
479 return err ? err : close_err;
482 const struct got_error *
483 got_blame_incremental(const char *path, struct got_object_id *commit_id,
484 struct got_repository *repo,
485 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
486 void *arg)
488 const struct got_error *err = NULL, *close_err = NULL;
489 struct got_blame *blame;
490 char *abspath;
492 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
493 return got_error_prefix_errno2("asprintf", path);
495 err = blame_open(&blame, abspath, commit_id, repo, cb, arg);
496 free(abspath);
497 if (blame)
498 close_err = blame_close(blame);
499 return err ? err : close_err;