Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <sha1.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <time.h>
25 #include <util.h>
26 #include <zlib.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_blame.h"
31 #include "got_opentemp.h"
33 #include "got_lib_inflate.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_object.h"
36 #include "got_lib_diff.h"
37 #include "got_lib_diffoffset.h"
38 #include "got_commit_graph.h"
40 struct got_blame_line {
41 int annotated;
42 struct got_object_id id;
43 off_t offset;
44 };
46 struct got_blame_diff_offsets {
47 struct got_diffoffset_chunks *chunks;
48 struct got_object_id *commit_id;
49 SLIST_ENTRY(got_blame_diff_offsets) entry;
50 };
52 SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets);
54 struct got_blame {
55 FILE *f;
56 size_t filesize;
57 int nlines;
58 int nannotated;
59 struct got_blame_line *lines; /* one per line */
60 off_t *line_offsets; /* one per line */
61 int ncommits;
62 struct got_blame_diff_offsets_list diff_offsets_list;
63 };
65 static void
66 free_diff_offsets(struct got_blame_diff_offsets *diff_offsets)
67 {
68 if (diff_offsets->chunks)
69 got_diffoffset_free(diff_offsets->chunks);
70 free(diff_offsets->commit_id);
71 free(diff_offsets);
72 }
74 static const struct got_error *
75 alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets,
76 struct got_object_id *commit_id)
77 {
78 const struct got_error *err = NULL;
80 *diff_offsets = calloc(1, sizeof(**diff_offsets));
81 if (*diff_offsets == NULL)
82 return got_error_from_errno("calloc");
84 (*diff_offsets)->commit_id = got_object_id_dup(commit_id);
85 if ((*diff_offsets)->commit_id == NULL) {
86 err = got_error_from_errno("got_object_id_dup");
87 free_diff_offsets(*diff_offsets);
88 *diff_offsets = NULL;
89 return err;
90 }
92 err = got_diffoffset_alloc(&(*diff_offsets)->chunks);
93 if (err) {
94 free_diff_offsets(*diff_offsets);
95 return err;
96 }
98 return NULL;
99 }
101 static const struct got_error *
102 annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id,
103 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
104 void *arg)
106 const struct got_error *err = NULL;
107 struct got_blame_line *line;
109 if (lineno < 1 || lineno > blame->nlines)
110 return NULL;
112 line = &blame->lines[lineno - 1];
113 if (line->annotated)
114 return NULL;
116 memcpy(&line->id, id, sizeof(line->id));
117 line->annotated = 1;
118 blame->nannotated++;
119 if (cb)
120 err = cb(arg, blame->nlines, lineno, id);
121 return err;
124 static int
125 get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list,
126 int lineno)
128 struct got_blame_diff_offsets *diff_offsets;
130 SLIST_FOREACH(diff_offsets, diff_offsets_list, entry)
131 lineno = got_diffoffset_get(diff_offsets->chunks, lineno);
133 return lineno;
136 static const struct got_error *
137 blame_changes(struct got_blame *blame, struct got_diff_changes *changes,
138 struct got_object_id *commit_id,
139 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
140 void *arg)
142 const struct got_error *err = NULL;
143 struct got_diff_change *change;
144 struct got_blame_diff_offsets *diff_offsets;
146 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
147 int c = change->cv.c;
148 int d = change->cv.d;
149 int new_lineno = c;
150 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
151 int ln;
153 for (ln = new_lineno; ln < new_lineno + new_length; ln++) {
154 err = annotate_line(blame,
155 get_blamed_line(&blame->diff_offsets_list, ln),
156 commit_id, cb, arg);
157 if (err)
158 return err;
159 if (blame->nlines == blame->nannotated)
160 return NULL;
164 err = alloc_diff_offsets(&diff_offsets, commit_id);
165 if (err)
166 return err;
167 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
168 int a = change->cv.a;
169 int b = change->cv.b;
170 int c = change->cv.c;
171 int d = change->cv.d;
172 int old_lineno = a;
173 int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0));
174 int new_lineno = c;
175 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
177 err = got_diffoffset_add(diff_offsets->chunks,
178 old_lineno, old_length, new_lineno, new_length);
179 if (err) {
180 free_diff_offsets(diff_offsets);
181 return err;
184 SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry);
186 return NULL;
189 static const struct got_error *
190 blame_commit(struct got_blame *blame, struct got_object_id *id,
191 struct got_object_id *pid, const char *path, struct got_repository *repo,
192 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
193 void *arg)
195 const struct got_error *err = NULL;
196 struct got_object *obj = NULL, *pobj = NULL;
197 struct got_object_id *obj_id = NULL, *pobj_id = NULL;
198 struct got_blob_object *blob = NULL, *pblob = NULL;
199 struct got_diff_changes *changes = NULL;
201 err = got_object_id_by_path(&obj_id, repo, id, path);
202 if (err)
203 goto done;
205 err = got_object_open(&obj, repo, obj_id);
206 if (err)
207 goto done;
209 if (obj->type != GOT_OBJ_TYPE_BLOB) {
210 err = got_error(GOT_ERR_OBJ_TYPE);
211 goto done;
214 if (pid) {
215 err = got_object_id_by_path(&pobj_id, repo, pid, path);
216 if (err) {
217 if (err->code == GOT_ERR_NO_TREE_ENTRY) {
218 /* Blob's history began in previous commit. */
219 err = got_error(GOT_ERR_ITER_COMPLETED);
221 goto done;
224 /* If IDs match then don't bother with diffing. */
225 if (got_object_id_cmp(obj_id, pobj_id) == 0) {
226 if (cb)
227 err = cb(arg, blame->nlines, -1, id);
228 goto done;
231 err = got_object_open(&pobj, repo, pobj_id);
232 if (err)
233 goto done;
235 if (pobj->type != GOT_OBJ_TYPE_BLOB) {
236 /*
237 * Encountered a non-blob at the path (probably a tree).
238 * Blob's history began in previous commit.
239 */
240 err = got_error(GOT_ERR_ITER_COMPLETED);
241 goto done;
244 err = got_object_blob_open(&pblob, repo, pobj, 8192);
245 if (err)
246 goto done;
249 err = got_object_blob_open(&blob, repo, obj, 8192);
250 if (err)
251 goto done;
253 err = got_diff_blob_lines_changed(&changes, pblob, blob);
254 if (err)
255 goto done;
257 if (changes) {
258 err = blame_changes(blame, changes, id, cb, arg);
259 got_diff_free_changes(changes);
260 } else if (cb)
261 err = cb(arg, blame->nlines, -1, id);
262 done:
263 free(obj_id);
264 free(pobj_id);
265 if (obj)
266 got_object_close(obj);
267 if (pobj)
268 got_object_close(pobj);
269 if (blob)
270 got_object_blob_close(blob);
271 if (pblob)
272 got_object_blob_close(pblob);
273 return err;
276 static const struct got_error *
277 blame_close(struct got_blame *blame)
279 const struct got_error *err = NULL;
280 struct got_blame_diff_offsets *diff_offsets;
282 if (blame->f && fclose(blame->f) != 0)
283 err = got_error_from_errno("fclose");
284 free(blame->lines);
285 while (!SLIST_EMPTY(&blame->diff_offsets_list)) {
286 diff_offsets = SLIST_FIRST(&blame->diff_offsets_list);
287 SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry);
288 free_diff_offsets(diff_offsets);
290 free(blame);
291 return err;
294 static const struct got_error *
295 blame_open(struct got_blame **blamep, const char *path,
296 struct got_object_id *start_commit_id, struct got_repository *repo,
297 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
298 void *arg)
300 const struct got_error *err = NULL;
301 struct got_object *obj = NULL;
302 struct got_object_id *obj_id = NULL;
303 struct got_blob_object *blob = NULL;
304 struct got_blame *blame = NULL;
305 struct got_object_id *id = NULL, *parent_id = NULL;
306 int lineno;
307 struct got_commit_graph *graph = NULL;
309 *blamep = NULL;
311 err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
312 if (err)
313 return err;
315 err = got_object_open(&obj, repo, obj_id);
316 if (err)
317 goto done;
319 if (obj->type != GOT_OBJ_TYPE_BLOB) {
320 err = got_error(GOT_ERR_OBJ_TYPE);
321 goto done;
324 err = got_object_blob_open(&blob, repo, obj, 8192);
325 if (err)
326 goto done;
328 blame = calloc(1, sizeof(*blame));
329 if (blame == NULL)
330 return got_error_from_errno("calloc");
332 blame->f = got_opentemp();
333 if (blame->f == NULL) {
334 err = got_error_from_errno("got_opentemp");
335 goto done;
337 err = got_object_blob_dump_to_file(&blame->filesize, &blame->nlines,
338 &blame->line_offsets, blame->f, blob);
339 if (err)
340 goto done;
342 blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
343 if (blame->lines == NULL) {
344 err = got_error_from_errno("calloc");
345 goto done;
348 err = got_commit_graph_open(&graph, start_commit_id, path, 0, repo);
349 if (err)
350 return err;
351 err = got_commit_graph_iter_start(graph, start_commit_id, repo);
352 if (err)
353 goto done;
355 id = NULL;
356 for (;;) {
357 err = got_commit_graph_iter_next(&parent_id, graph);
358 if (err) {
359 if (err->code == GOT_ERR_ITER_COMPLETED) {
360 if (id)
361 err = blame_commit(blame, id,
362 parent_id, path, repo, cb, arg);
363 else
364 err = NULL;
365 break;
367 if (err->code != GOT_ERR_ITER_NEED_MORE)
368 break;
369 err = got_commit_graph_fetch_commits(graph, 1, repo);
370 if (err)
371 break;
372 continue;
374 if (id) {
375 err = blame_commit(blame, id, parent_id, path, repo,
376 cb, arg);
377 if (err) {
378 if (err->code == GOT_ERR_ITER_COMPLETED)
379 err = NULL;
380 break;
382 if (blame->nannotated == blame->nlines)
383 break;
385 id = parent_id;
388 if (id && blame->nannotated < blame->nlines) {
389 /* Annotate remaining non-annotated lines with last commit. */
390 for (lineno = 1; lineno <= blame->nlines; lineno++) {
391 err = annotate_line(blame, lineno, id, cb, arg);
392 if (err)
393 goto done;
397 done:
398 if (graph)
399 got_commit_graph_close(graph);
400 free(obj_id);
401 if (obj)
402 got_object_close(obj);
403 if (blob)
404 got_object_blob_close(blob);
405 if (err) {
406 if (blame)
407 blame_close(blame);
408 } else
409 *blamep = blame;
411 return err;
414 static const struct got_error *
415 blame_line(struct got_object_id **id, struct got_blame *blame, int lineno)
417 if (lineno < 1 || lineno > blame->nlines) {
418 *id = NULL;
419 return got_error(GOT_ERR_RANGE);
421 *id = &blame->lines[lineno - 1].id;
422 return NULL;
425 static char *
426 parse_next_line(FILE *f, size_t *len)
428 char *line;
429 size_t linelen;
430 size_t lineno;
431 const char delim[3] = { '\0', '\0', '\0'};
433 line = fparseln(f, &linelen, &lineno, delim, 0);
434 if (len)
435 *len = linelen;
436 return line;
439 const struct got_error *
440 got_blame(const char *path, struct got_object_id *start_commit_id,
441 struct got_repository *repo, FILE *outfile)
443 const struct got_error *err = NULL, *close_err = NULL;
444 struct got_blame *blame;
445 int lineno;
446 char *abspath;
448 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
449 return got_error_from_errno2("asprintf", path);
451 err = blame_open(&blame, abspath, start_commit_id, repo, NULL, NULL);
452 if (err) {
453 free(abspath);
454 return err;
457 for (lineno = 1; lineno <= blame->nlines; lineno++) {
458 struct got_object_id *id = NULL;
459 char *line, *id_str;
461 line = parse_next_line(blame->f, NULL);
462 if (line == NULL)
463 break;
465 err = blame_line(&id, blame, lineno);
466 if (err) {
467 free(line);
468 break;
471 err = got_object_id_str(&id_str, id);
472 /* Do not free id; It points into blame->lines. */
473 if (err) {
474 free(line);
475 break;
478 fprintf(outfile, "%.8s %s\n", id_str, line);
479 free(line);
480 free(id_str);
483 close_err = blame_close(blame);
484 free(abspath);
485 return err ? err : close_err;
488 const struct got_error *
489 got_blame_incremental(const char *path, struct got_object_id *commit_id,
490 struct got_repository *repo,
491 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
492 void *arg)
494 const struct got_error *err = NULL, *close_err = NULL;
495 struct got_blame *blame;
496 char *abspath;
498 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
499 return got_error_from_errno2("asprintf", path);
501 err = blame_open(&blame, abspath, commit_id, repo, cb, arg);
502 free(abspath);
503 if (blame)
504 close_err = blame_close(blame);
505 return err ? err : close_err;