Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/stat.h>
20 #include <sha1.h>
21 #include <string.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <time.h>
25 #include <util.h>
26 #include <zlib.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_blame.h"
31 #include "got_opentemp.h"
33 #include "got_lib_inflate.h"
34 #include "got_lib_delta.h"
35 #include "got_lib_object.h"
36 #include "got_lib_diff.h"
37 #include "got_lib_diffoffset.h"
38 #include "got_commit_graph.h"
40 struct got_blame_line {
41 int annotated;
42 struct got_object_id id;
43 };
45 struct got_blame_diff_offsets {
46 struct got_diffoffset_chunks *chunks;
47 struct got_object_id *commit_id;
48 SLIST_ENTRY(got_blame_diff_offsets) entry;
49 };
51 SLIST_HEAD(got_blame_diff_offsets_list, got_blame_diff_offsets);
53 struct got_blame {
54 FILE *f;
55 size_t nlines;
56 struct got_blame_line *lines; /* one per line */
57 int ncommits;
58 struct got_blame_diff_offsets_list diff_offsets_list;
59 };
61 static void
62 free_diff_offsets(struct got_blame_diff_offsets *diff_offsets)
63 {
64 if (diff_offsets->chunks)
65 got_diffoffset_free(diff_offsets->chunks);
66 free(diff_offsets->commit_id);
67 free(diff_offsets);
68 }
70 static const struct got_error *
71 alloc_diff_offsets(struct got_blame_diff_offsets **diff_offsets,
72 struct got_object_id *commit_id)
73 {
74 const struct got_error *err = NULL;
76 *diff_offsets = calloc(1, sizeof(**diff_offsets));
77 if (*diff_offsets == NULL)
78 return got_error_from_errno();
80 (*diff_offsets)->commit_id = got_object_id_dup(commit_id);
81 if ((*diff_offsets)->commit_id == NULL) {
82 err = got_error_from_errno();
83 free_diff_offsets(*diff_offsets);
84 *diff_offsets = NULL;
85 return err;
86 }
88 err = got_diffoffset_alloc(&(*diff_offsets)->chunks);
89 if (err) {
90 free_diff_offsets(*diff_offsets);
91 return err;
92 }
94 return NULL;
95 }
97 static const struct got_error *
98 annotate_line(struct got_blame *blame, int lineno, struct got_object_id *id,
99 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
100 void *arg)
102 const struct got_error *err = NULL;
103 struct got_blame_line *line;
105 if (lineno < 1 || lineno > blame->nlines)
106 return got_error(GOT_ERR_RANGE);
108 line = &blame->lines[lineno - 1];
109 if (line->annotated)
110 return NULL;
112 memcpy(&line->id, id, sizeof(line->id));
113 line->annotated = 1;
114 if (cb)
115 err = cb(arg, blame->nlines, lineno, id);
116 return err;
119 static int
120 get_blamed_line(struct got_blame_diff_offsets_list *diff_offsets_list,
121 int lineno)
123 struct got_blame_diff_offsets *diff_offsets;
125 SLIST_FOREACH(diff_offsets, diff_offsets_list, entry)
126 lineno = got_diffoffset_get(diff_offsets->chunks, lineno);
128 return lineno;
131 static const struct got_error *
132 blame_changes(struct got_blame *blame, struct got_diff_changes *changes,
133 struct got_object_id *commit_id,
134 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
135 void *arg)
137 const struct got_error *err = NULL;
138 struct got_diff_change *change;
139 struct got_blame_diff_offsets *diff_offsets;
141 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
142 int c = change->cv.c;
143 int d = change->cv.d;
144 int new_lineno = c;
145 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
146 int ln;
148 for (ln = new_lineno; ln < new_lineno + new_length; ln++) {
149 err = annotate_line(blame,
150 get_blamed_line(&blame->diff_offsets_list, ln),
151 commit_id, cb, arg);
152 if (err)
153 return err;
157 err = alloc_diff_offsets(&diff_offsets, commit_id);
158 if (err)
159 return err;
160 SIMPLEQ_FOREACH(change, &changes->entries, entry) {
161 int a = change->cv.a;
162 int b = change->cv.b;
163 int c = change->cv.c;
164 int d = change->cv.d;
165 int old_lineno = a;
166 int old_length = (a < b ? b - a + 1 : (a == b ? 1 : 0));
167 int new_lineno = c;
168 int new_length = (c < d ? d - c + 1 : (c == d ? 1 : 0));
170 err = got_diffoffset_add(diff_offsets->chunks,
171 old_lineno, old_length, new_lineno, new_length);
172 if (err) {
173 free_diff_offsets(diff_offsets);
174 return err;
177 SLIST_INSERT_HEAD(&blame->diff_offsets_list, diff_offsets, entry);
179 return NULL;
182 static const struct got_error *
183 blame_commit(struct got_blame *blame, struct got_object_id *id,
184 struct got_object_id *pid, const char *path, struct got_repository *repo,
185 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
186 void *arg)
188 const struct got_error *err = NULL;
189 struct got_object *obj = NULL, *pobj = NULL;
190 struct got_object_id *obj_id = NULL, *pobj_id = NULL;
191 struct got_blob_object *blob = NULL, *pblob = NULL;
192 struct got_diff_changes *changes = NULL;
194 err = got_object_id_by_path(&obj_id, repo, id, path);
195 if (err)
196 goto done;
198 err = got_object_open(&obj, repo, obj_id);
199 if (err)
200 goto done;
202 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
203 err = got_error(GOT_ERR_OBJ_TYPE);
204 goto done;
207 err = got_object_id_by_path(&pobj_id, repo, pid, path);
208 if (err) {
209 if (err->code == GOT_ERR_NO_OBJ) {
210 /* Blob's history began in previous commit. */
211 err = got_error(GOT_ERR_ITER_COMPLETED);
213 goto done;
216 /* If IDs match then don't bother with diffing. */
217 if (got_object_id_cmp(obj_id, pobj_id) == 0) {
218 if (cb)
219 err = cb(arg, blame->nlines, -1, id);
220 goto done;
223 err = got_object_open(&pobj, repo, pobj_id);
224 if (err)
225 goto done;
227 if (got_object_get_type(pobj) != GOT_OBJ_TYPE_BLOB) {
228 /*
229 * Encountered a non-blob at the path (probably a tree).
230 * Blob's history began in previous commit.
231 */
232 err = got_error(GOT_ERR_ITER_COMPLETED);
233 goto done;
236 err = got_object_blob_open(&blob, repo, obj, 8192);
237 if (err)
238 goto done;
240 err = got_object_blob_open(&pblob, repo, pobj, 8192);
241 if (err)
242 goto done;
244 err = got_diff_blob_lines_changed(&changes, pblob, blob);
245 if (err)
246 goto done;
248 if (changes) {
249 err = blame_changes(blame, changes, id, cb, arg);
250 got_diff_free_changes(changes);
251 } else if (cb)
252 err = cb(arg, blame->nlines, -1, id);
253 done:
254 free(obj_id);
255 free(pobj_id);
256 if (obj)
257 got_object_close(obj);
258 if (pobj)
259 got_object_close(pobj);
260 if (blob)
261 got_object_blob_close(blob);
262 if (pblob)
263 got_object_blob_close(pblob);
264 return err;
267 static void
268 blame_close(struct got_blame *blame)
270 struct got_blame_diff_offsets *diff_offsets;
272 if (blame->f)
273 fclose(blame->f);
274 free(blame->lines);
275 while (!SLIST_EMPTY(&blame->diff_offsets_list)) {
276 diff_offsets = SLIST_FIRST(&blame->diff_offsets_list);
277 SLIST_REMOVE_HEAD(&blame->diff_offsets_list, entry);
278 free_diff_offsets(diff_offsets);
280 free(blame);
283 static const struct got_error *
284 blame_open(struct got_blame **blamep, const char *path,
285 struct got_object_id *start_commit_id, struct got_repository *repo,
286 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
287 void *arg)
289 const struct got_error *err = NULL;
290 struct got_object *obj = NULL;
291 struct got_object_id *obj_id = NULL;
292 struct got_blob_object *blob = NULL;
293 struct got_blame *blame = NULL;
294 struct got_object_id *id = NULL;
295 int lineno;
296 struct got_commit_graph *graph = NULL;
298 *blamep = NULL;
300 err = got_object_id_by_path(&obj_id, repo, start_commit_id, path);
301 if (err)
302 return err;
304 err = got_object_open(&obj, repo, obj_id);
305 if (err)
306 goto done;
308 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
309 err = got_error(GOT_ERR_OBJ_TYPE);
310 goto done;
313 err = got_object_blob_open(&blob, repo, obj, 8192);
314 if (err)
315 goto done;
317 blame = calloc(1, sizeof(*blame));
318 if (blame == NULL)
319 return got_error_from_errno();
321 blame->f = got_opentemp();
322 if (blame->f == NULL) {
323 err = got_error_from_errno();
324 goto done;
326 err = got_object_blob_dump_to_file(NULL, &blame->nlines, blame->f,
327 blob);
328 if (err)
329 goto done;
331 blame->lines = calloc(blame->nlines, sizeof(*blame->lines));
332 if (blame->lines == NULL) {
333 err = got_error_from_errno();
334 goto done;
337 err = got_commit_graph_open(&graph, start_commit_id, path, 0, repo);
338 if (err)
339 return err;
340 err = got_commit_graph_iter_start(graph, start_commit_id, repo);
341 if (err)
342 goto done;
344 id = NULL;
345 while (1) {
346 struct got_object_id *next_id;
348 err = got_commit_graph_iter_next(&next_id, graph);
349 if (err) {
350 if (err->code == GOT_ERR_ITER_COMPLETED) {
351 err = NULL;
352 break;
354 if (err->code != GOT_ERR_ITER_NEED_MORE)
355 break;
356 err = got_commit_graph_fetch_commits(graph, 1, repo);
357 if (err)
358 break;
359 else
360 continue;
362 if (next_id == NULL)
363 break;
364 if (id) {
365 err = blame_commit(blame, id, next_id, path, repo,
366 cb, arg);
367 if (err) {
368 if (err->code == GOT_ERR_ITER_COMPLETED)
369 err = NULL;
370 break;
373 id = next_id;
376 if (id) {
377 /* Annotate remaining non-annotated lines with last commit. */
378 for (lineno = 1; lineno <= blame->nlines; lineno++) {
379 err = annotate_line(blame, lineno, id, cb, arg);
380 if (err)
381 goto done;
385 done:
386 if (graph)
387 got_commit_graph_close(graph);
388 free(obj_id);
389 if (obj)
390 got_object_close(obj);
391 if (blob)
392 got_object_blob_close(blob);
393 if (err) {
394 if (blame)
395 blame_close(blame);
396 } else
397 *blamep = blame;
399 return err;
402 static const struct got_error *
403 blame_line(struct got_object_id **id, struct got_blame *blame, int lineno)
405 if (lineno < 1 || lineno > blame->nlines)
406 return got_error(GOT_ERR_RANGE);
407 *id = &blame->lines[lineno - 1].id;
408 return NULL;
411 static char *
412 parse_next_line(FILE *f, size_t *len)
414 char *line;
415 size_t linelen;
416 size_t lineno;
417 const char delim[3] = { '\0', '\0', '\0'};
419 line = fparseln(f, &linelen, &lineno, delim, 0);
420 if (len)
421 *len = linelen;
422 return line;
425 const struct got_error *
426 got_blame(const char *path, struct got_object_id *start_commit_id,
427 struct got_repository *repo, FILE *outfile)
429 const struct got_error *err = NULL;
430 struct got_blame *blame;
431 int lineno;
432 char *abspath;
434 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
435 return got_error_from_errno();
437 err = blame_open(&blame, abspath, start_commit_id, repo, NULL, NULL);
438 if (err) {
439 free(abspath);
440 return err;
443 for (lineno = 1; lineno <= blame->nlines; lineno++) {
444 struct got_object_id *id;
445 char *line, *id_str;
447 line = parse_next_line(blame->f, NULL);
448 if (line == NULL)
449 break;
451 err = blame_line(&id, blame, lineno);
452 if (err) {
453 free(line);
454 break;
457 err = got_object_id_str(&id_str, id);
458 /* Do not free id; It points into blame->lines. */
459 if (err) {
460 free(line);
461 break;
464 fprintf(outfile, "%.8s %s\n", id_str, line);
465 free(line);
466 free(id_str);
469 blame_close(blame);
470 free(abspath);
471 return err;
474 const struct got_error *
475 got_blame_incremental(const char *path, struct got_object_id *commit_id,
476 struct got_repository *repo,
477 const struct got_error *(*cb)(void *, int, int, struct got_object_id *),
478 void *arg)
480 const struct got_error *err = NULL;
481 struct got_blame *blame;
482 char *abspath;
484 if (asprintf(&abspath, "%s%s", path[0] == '/' ? "" : "/", path) == -1)
485 return got_error_from_errno();
487 err = blame_open(&blame, abspath, commit_id, repo, cb, arg);
488 free(abspath);
489 if (blame)
490 blame_close(blame);
491 return err;