Blob


1 /*
2 * Copyright (c) 2022 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/queue.h>
18 #include <sys/tree.h>
19 #include <sys/stat.h>
21 #include <errno.h>
22 #include <limits.h>
23 #include <sha1.h>
24 #include <sha2.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
30 #include "got_error.h"
31 #include "got_object.h"
32 #include "got_repository.h"
33 #include "got_path.h"
35 #include "got_lib_delta.h"
36 #include "got_lib_object.h"
37 #include "got_lib_object_cache.h"
38 #include "got_lib_object_parse.h"
39 #include "got_lib_pack.h"
40 #include "got_lib_repository.h"
42 const struct got_error *
43 got_object_open_packed(struct got_object **obj, struct got_object_id *id,
44 struct got_repository *repo)
45 {
46 const struct got_error *err = NULL;
47 struct got_pack *pack = NULL;
48 struct got_packidx *packidx = NULL;
49 int idx;
50 char *path_packfile;
52 err = got_repo_search_packidx(&packidx, &idx, repo, id);
53 if (err)
54 return err;
56 err = got_packidx_get_packfile_path(&path_packfile,
57 packidx->path_packidx);
58 if (err)
59 return err;
61 pack = got_repo_get_cached_pack(repo, path_packfile);
62 if (pack == NULL) {
63 err = got_repo_cache_pack(&pack, repo, path_packfile, packidx);
64 if (err)
65 goto done;
66 }
68 err = got_packfile_open_object(obj, pack, packidx, idx, id);
69 if (err)
70 return err;
71 (*obj)->refcnt++;
73 err = got_repo_cache_object(repo, id, *obj);
74 if (err) {
75 if (err->code == GOT_ERR_OBJ_EXISTS ||
76 err->code == GOT_ERR_OBJ_TOO_LARGE)
77 err = NULL;
78 }
79 done:
80 free(path_packfile);
81 return err;
82 }
84 const struct got_error *
85 got_object_open_from_packfile(struct got_object **obj, struct got_object_id *id,
86 struct got_pack *pack, struct got_packidx *packidx, int obj_idx,
87 struct got_repository *repo)
88 {
89 const struct got_error *err;
91 *obj = got_repo_get_cached_object(repo, id);
92 if (*obj != NULL) {
93 (*obj)->refcnt++;
94 return NULL;
95 }
97 err = got_packfile_open_object(obj, pack, packidx, obj_idx, id);
98 if (err)
99 return err;
100 (*obj)->refcnt++;
102 err = got_repo_cache_object(repo, id, *obj);
103 if (err) {
104 if (err->code == GOT_ERR_OBJ_EXISTS ||
105 err->code == GOT_ERR_OBJ_TOO_LARGE)
106 err = NULL;
107 return err;
109 (*obj)->refcnt++;
110 return NULL;
113 const struct got_error *
114 got_object_read_raw_delta(uint64_t *base_size, uint64_t *result_size,
115 off_t *delta_size, off_t *delta_compressed_size, off_t *delta_offset,
116 off_t *delta_out_offset, struct got_object_id **base_id, int delta_cache_fd,
117 struct got_packidx *packidx, int obj_idx, struct got_object_id *id,
118 struct got_repository *repo)
120 return got_error(GOT_ERR_NOT_IMPL);
123 const struct got_error *
124 got_object_open(struct got_object **obj, struct got_repository *repo,
125 struct got_object_id *id)
127 const struct got_error *err = NULL;
128 int fd;
130 *obj = got_repo_get_cached_object(repo, id);
131 if (*obj != NULL) {
132 (*obj)->refcnt++;
133 return NULL;
136 err = got_object_open_packed(obj, id, repo);
137 if (err) {
138 if (err->code != GOT_ERR_NO_OBJ)
139 return err;
140 } else
141 return NULL;
143 err = got_object_open_loose_fd(&fd, id, repo);
144 if (err) {
145 if (err->code == GOT_ERR_ERRNO && errno == ENOENT)
146 err = got_error_no_obj(id);
147 return err;
150 err = got_object_read_header(obj, fd);
151 if (err)
152 goto done;
154 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
155 (*obj)->refcnt++;
157 err = got_repo_cache_object(repo, id, *obj);
158 if (err) {
159 if (err->code == GOT_ERR_OBJ_EXISTS ||
160 err->code == GOT_ERR_OBJ_TOO_LARGE)
161 err = NULL;
163 done:
164 if (close(fd) == -1 && err == NULL)
165 err = got_error_from_errno("close");
166 return err;
169 static const struct got_error *
170 wrap_fd(FILE **f, int wrapped_fd)
172 const struct got_error *err = NULL;
173 int fd;
175 if (ftruncate(wrapped_fd, 0L) == -1)
176 return got_error_from_errno("ftruncate");
178 if (lseek(wrapped_fd, 0L, SEEK_SET) == -1)
179 return got_error_from_errno("lseek");
181 fd = dup(wrapped_fd);
182 if (fd == -1)
183 return got_error_from_errno("dup");
185 *f = fdopen(fd, "w+");
186 if (*f == NULL) {
187 err = got_error_from_errno("fdopen");
188 close(fd);
190 return err;
193 static const struct got_error *
194 read_packed_object_raw(uint8_t **outbuf, off_t *size, size_t *hdrlen,
195 int outfd, struct got_pack *pack, struct got_packidx *packidx, int idx,
196 struct got_object_id *id)
198 const struct got_error *err = NULL;
199 uint64_t raw_size = 0;
200 struct got_object *obj;
201 FILE *outfile = NULL, *basefile = NULL, *accumfile = NULL;
203 *outbuf = NULL;
204 *size = 0;
205 *hdrlen = 0;
207 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
208 if (err)
209 return err;
211 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
212 err = got_pack_get_max_delta_object_size(&raw_size, obj, pack);
213 if (err)
214 goto done;
215 } else
216 raw_size = obj->size;
218 if (raw_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
219 size_t len;
220 err = got_packfile_extract_object_to_mem(outbuf, &len,
221 obj, pack);
222 if (err)
223 goto done;
224 *size = (off_t)len;
225 } else {
226 /*
227 * XXX This uses 3 file extra descriptors for no good reason.
228 * We should have got_packfile_extract_object_to_fd().
229 */
230 err = wrap_fd(&outfile, outfd);
231 if (err)
232 goto done;
233 err = wrap_fd(&basefile, pack->basefd);
234 if (err)
235 goto done;
236 err = wrap_fd(&accumfile, pack->accumfd);
237 if (err)
238 goto done;
239 err = got_packfile_extract_object(pack, obj, outfile, basefile,
240 accumfile);
241 if (err)
242 goto done;
243 *size = obj->size;
246 *hdrlen = obj->hdrlen;
247 done:
248 got_object_close(obj);
249 if (outfile && fclose(outfile) == EOF && err == NULL)
250 err = got_error_from_errno("fclose");
251 if (basefile && fclose(basefile) == EOF && err == NULL)
252 err = got_error_from_errno("fclose");
253 if (accumfile && fclose(accumfile) == EOF && err == NULL)
254 err = got_error_from_errno("fclose");
255 return err;
259 static void
260 put_raw_object_tempfile(struct got_raw_object *obj)
262 struct got_repository *repo = obj->close_arg;
264 if (obj->tempfile_idx != -1)
265 got_repo_temp_fds_put(obj->tempfile_idx, repo);
268 /* *outfd must be initialized to -1 by caller */
269 const struct got_error *
270 got_object_raw_open(struct got_raw_object **obj, int *outfd,
271 struct got_repository *repo, struct got_object_id *id)
273 const struct got_error *err = NULL;
274 struct got_packidx *packidx = NULL;
275 int idx, tempfd, tempfile_idx;
276 uint8_t *outbuf = NULL;
277 off_t size = 0;
278 size_t hdrlen = 0;
279 char *path_packfile = NULL;
281 *obj = got_repo_get_cached_raw_object(repo, id);
282 if (*obj != NULL) {
283 (*obj)->refcnt++;
284 return NULL;
287 err = got_repo_temp_fds_get(&tempfd, &tempfile_idx, repo);
288 if (err)
289 return err;
291 err = got_repo_search_packidx(&packidx, &idx, repo, id);
292 if (err == NULL) {
293 struct got_pack *pack = NULL;
295 err = got_packidx_get_packfile_path(&path_packfile,
296 packidx->path_packidx);
297 if (err)
298 goto done;
300 pack = got_repo_get_cached_pack(repo, path_packfile);
301 if (pack == NULL) {
302 err = got_repo_cache_pack(&pack, repo, path_packfile,
303 packidx);
304 if (err)
305 goto done;
307 err = read_packed_object_raw(&outbuf, &size, &hdrlen,
308 tempfd, pack, packidx, idx, id);
309 if (err)
310 goto done;
311 } else if (err->code == GOT_ERR_NO_OBJ) {
312 int fd;
314 err = got_object_open_loose_fd(&fd, id, repo);
315 if (err)
316 goto done;
317 err = got_object_read_raw(&outbuf, &size, &hdrlen,
318 GOT_DELTA_RESULT_SIZE_CACHED_MAX, tempfd, id, fd);
319 if (close(fd) == -1 && err == NULL)
320 err = got_error_from_errno("close");
321 if (err)
322 goto done;
325 if (outbuf == NULL) {
326 if (*outfd != -1) {
327 err = got_error_msg(GOT_ERR_NOT_IMPL, "bad outfd");
328 goto done;
331 /*
332 * Duplicate tempfile descriptor to allow use of
333 * fdopen(3) inside got_object_raw_alloc().
334 */
335 *outfd = dup(tempfd);
336 if (*outfd == -1) {
337 err = got_error_from_errno("dup");
338 goto done;
342 err = got_object_raw_alloc(obj, outbuf, outfd,
343 GOT_DELTA_RESULT_SIZE_CACHED_MAX, hdrlen, size);
344 if (err)
345 goto done;
347 err = got_repo_cache_raw_object(repo, id, *obj);
348 if (err) {
349 if (err->code == GOT_ERR_OBJ_EXISTS ||
350 err->code == GOT_ERR_OBJ_TOO_LARGE)
351 err = NULL;
353 done:
354 free(path_packfile);
355 if (err) {
356 if (*obj) {
357 got_object_raw_close(*obj);
358 *obj = NULL;
360 free(outbuf);
361 got_repo_temp_fds_put(tempfile_idx, repo);
362 if (*outfd != -1) {
363 close(*outfd);
364 *outfd = -1;
366 } else {
367 if (((*obj)->f == NULL && (*obj)->fd == -1)) {
368 /* This raw object is not backed by a file. */
369 got_repo_temp_fds_put(tempfile_idx, repo);
370 if (*outfd != -1) {
371 close(*outfd);
372 *outfd = -1;
374 } else {
375 (*obj)->tempfile_idx = tempfile_idx;
376 (*obj)->close_cb = put_raw_object_tempfile;
377 (*obj)->close_arg = repo;
380 return err;
383 static const struct got_error *
384 open_commit(struct got_commit_object **commit,
385 struct got_repository *repo, struct got_object_id *id, int check_cache)
387 const struct got_error *err = NULL;
388 struct got_packidx *packidx = NULL;
389 int idx;
390 char *path_packfile = NULL;
392 if (check_cache) {
393 *commit = got_repo_get_cached_commit(repo, id);
394 if (*commit != NULL) {
395 (*commit)->refcnt++;
396 return NULL;
398 } else
399 *commit = NULL;
401 err = got_repo_search_packidx(&packidx, &idx, repo, id);
402 if (err == NULL) {
403 struct got_pack *pack = NULL;
404 struct got_object *obj;
405 uint8_t *buf;
406 size_t len;
408 err = got_packidx_get_packfile_path(&path_packfile,
409 packidx->path_packidx);
410 if (err)
411 return err;
413 pack = got_repo_get_cached_pack(repo, path_packfile);
414 if (pack == NULL) {
415 err = got_repo_cache_pack(&pack, repo, path_packfile,
416 packidx);
417 if (err)
418 goto done;
420 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
421 if (err)
422 goto done;
423 err = got_packfile_extract_object_to_mem(&buf, &len,
424 obj, pack);
425 got_object_close(obj);
426 if (err)
427 goto done;
428 err = got_object_parse_commit(commit, buf, len);
429 free(buf);
430 } else if (err->code == GOT_ERR_NO_OBJ) {
431 int fd;
433 err = got_object_open_loose_fd(&fd, id, repo);
434 if (err)
435 return err;
436 err = got_object_read_commit(commit, fd, id, 0);
437 if (close(fd) == -1 && err == NULL)
438 err = got_error_from_errno("close");
439 if (err)
440 return err;
443 if (err == NULL) {
444 (*commit)->refcnt++;
445 err = got_repo_cache_commit(repo, id, *commit);
446 if (err) {
447 if (err->code == GOT_ERR_OBJ_EXISTS ||
448 err->code == GOT_ERR_OBJ_TOO_LARGE)
449 err = NULL;
452 done:
453 free(path_packfile);
454 return err;
457 const struct got_error *
458 got_object_open_as_commit(struct got_commit_object **commit,
459 struct got_repository *repo, struct got_object_id *id)
461 *commit = got_repo_get_cached_commit(repo, id);
462 if (*commit != NULL) {
463 (*commit)->refcnt++;
464 return NULL;
467 return open_commit(commit, repo, id, 0);
470 const struct got_error *
471 got_object_commit_open(struct got_commit_object **commit,
472 struct got_repository *repo, struct got_object *obj)
474 return open_commit(commit, repo, got_object_get_id(obj), 1);
477 static const struct got_error *
478 open_tree(struct got_tree_object **tree,
479 struct got_repository *repo, struct got_object_id *id, int check_cache)
481 const struct got_error *err = NULL;
482 struct got_packidx *packidx = NULL;
483 int idx;
484 char *path_packfile = NULL;
485 struct got_parsed_tree_entry *entries = NULL;
486 size_t nentries = 0, nentries_alloc = 0, i;
487 uint8_t *buf = NULL;
489 if (check_cache) {
490 *tree = got_repo_get_cached_tree(repo, id);
491 if (*tree != NULL) {
492 (*tree)->refcnt++;
493 return NULL;
495 } else
496 *tree = NULL;
498 err = got_repo_search_packidx(&packidx, &idx, repo, id);
499 if (err == NULL) {
500 struct got_pack *pack = NULL;
501 struct got_object *obj;
502 size_t len;
504 err = got_packidx_get_packfile_path(&path_packfile,
505 packidx->path_packidx);
506 if (err)
507 return err;
509 pack = got_repo_get_cached_pack(repo, path_packfile);
510 if (pack == NULL) {
511 err = got_repo_cache_pack(&pack, repo, path_packfile,
512 packidx);
513 if (err)
514 goto done;
516 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
517 if (err)
518 goto done;
519 err = got_packfile_extract_object_to_mem(&buf, &len,
520 obj, pack);
521 got_object_close(obj);
522 if (err)
523 goto done;
524 err = got_object_parse_tree(&entries, &nentries,
525 &nentries_alloc, buf, len);
526 if (err)
527 goto done;
528 } else if (err->code == GOT_ERR_NO_OBJ) {
529 int fd;
531 err = got_object_open_loose_fd(&fd, id, repo);
532 if (err)
533 return err;
534 err = got_object_read_tree(&entries, &nentries,
535 &nentries_alloc, &buf, fd, id);
536 if (close(fd) == -1 && err == NULL)
537 err = got_error_from_errno("close");
538 if (err)
539 goto done;
540 } else
541 goto done;
543 *tree = malloc(sizeof(**tree));
544 if (*tree == NULL) {
545 err = got_error_from_errno("malloc");
546 goto done;
548 (*tree)->entries = calloc(nentries, sizeof(struct got_tree_entry));
549 if ((*tree)->entries == NULL) {
550 err = got_error_from_errno("malloc");
551 goto done;
553 (*tree)->nentries = nentries;
554 (*tree)->refcnt = 0;
556 for (i = 0; i < nentries; i++) {
557 struct got_parsed_tree_entry *pe = &entries[i];
558 struct got_tree_entry *te = &(*tree)->entries[i];
560 if (strlcpy(te->name, pe->name,
561 sizeof(te->name)) >= sizeof(te->name)) {
562 err = got_error(GOT_ERR_NO_SPACE);
563 goto done;
565 memcpy(te->id.sha1, pe->id, SHA1_DIGEST_LENGTH);
566 te->mode = pe->mode;
567 te->idx = i;
569 done:
570 free(path_packfile);
571 free(entries);
572 free(buf);
573 if (err == NULL) {
574 (*tree)->refcnt++;
575 err = got_repo_cache_tree(repo, id, *tree);
576 if (err) {
577 if (err->code == GOT_ERR_OBJ_EXISTS ||
578 err->code == GOT_ERR_OBJ_TOO_LARGE)
579 err = NULL;
582 if (err) {
583 if (*tree)
584 free((*tree)->entries);
585 free(*tree);
586 *tree = NULL;
588 return err;
591 const struct got_error *
592 got_object_open_as_tree(struct got_tree_object **tree,
593 struct got_repository *repo, struct got_object_id *id)
595 *tree = got_repo_get_cached_tree(repo, id);
596 if (*tree != NULL) {
597 (*tree)->refcnt++;
598 return NULL;
601 return open_tree(tree, repo, id, 0);
604 const struct got_error *
605 got_object_tree_open(struct got_tree_object **tree,
606 struct got_repository *repo, struct got_object *obj)
608 return open_tree(tree, repo, got_object_get_id(obj), 1);
611 const struct got_error *
612 got_object_open_as_blob(struct got_blob_object **blob,
613 struct got_repository *repo, struct got_object_id *id, size_t blocksize,
614 int outfd)
616 return got_error(GOT_ERR_NOT_IMPL);
619 const struct got_error *
620 got_object_blob_open(struct got_blob_object **blob,
621 struct got_repository *repo, struct got_object *obj, size_t blocksize,
622 int outfd)
624 return got_error(GOT_ERR_NOT_IMPL);
627 static const struct got_error *
628 open_tag(struct got_tag_object **tag, struct got_repository *repo,
629 struct got_object_id *id, int check_cache)
631 const struct got_error *err = NULL;
632 struct got_packidx *packidx = NULL;
633 int idx;
634 char *path_packfile = NULL;
635 struct got_object *obj = NULL;
636 int obj_type = GOT_OBJ_TYPE_ANY;
638 if (check_cache) {
639 *tag = got_repo_get_cached_tag(repo, id);
640 if (*tag != NULL) {
641 (*tag)->refcnt++;
642 return NULL;
644 } else
645 *tag = NULL;
647 err = got_repo_search_packidx(&packidx, &idx, repo, id);
648 if (err == NULL) {
649 struct got_pack *pack = NULL;
650 uint8_t *buf = NULL;
651 size_t len;
653 err = got_packidx_get_packfile_path(&path_packfile,
654 packidx->path_packidx);
655 if (err)
656 return err;
658 pack = got_repo_get_cached_pack(repo, path_packfile);
659 if (pack == NULL) {
660 err = got_repo_cache_pack(&pack, repo, path_packfile,
661 packidx);
662 if (err)
663 goto done;
666 /* Beware of "lightweight" tags: Check object type first. */
667 err = got_packfile_open_object(&obj, pack, packidx, idx, id);
668 if (err)
669 goto done;
670 obj_type = obj->type;
671 if (obj_type != GOT_OBJ_TYPE_TAG) {
672 err = got_error(GOT_ERR_OBJ_TYPE);
673 got_object_close(obj);
674 goto done;
676 err = got_packfile_extract_object_to_mem(&buf, &len,
677 obj, pack);
678 got_object_close(obj);
679 if (err)
680 goto done;
681 err = got_object_parse_tag(tag, buf, len);
682 free(buf);
683 } else if (err->code == GOT_ERR_NO_OBJ) {
684 int fd;
686 err = got_object_open_loose_fd(&fd, id, repo);
687 if (err)
688 return err;
689 err = got_object_read_header(&obj, fd);
690 if (close(fd) == -1 && err == NULL)
691 err = got_error_from_errno("close");
692 if (err)
693 return err;
694 obj_type = obj->type;
695 got_object_close(obj);
696 if (obj_type != GOT_OBJ_TYPE_TAG)
697 return got_error(GOT_ERR_OBJ_TYPE);
699 err = got_object_open_loose_fd(&fd, id, repo);
700 if (err)
701 return err;
702 err = got_object_read_tag(tag, fd, id, 0);
703 if (close(fd) == -1 && err == NULL)
704 err = got_error_from_errno("close");
705 if (err)
706 return err;
709 if (err == NULL) {
710 (*tag)->refcnt++;
711 err = got_repo_cache_tag(repo, id, *tag);
712 if (err) {
713 if (err->code == GOT_ERR_OBJ_EXISTS ||
714 err->code == GOT_ERR_OBJ_TOO_LARGE)
715 err = NULL;
718 done:
719 free(path_packfile);
720 return err;
723 const struct got_error *
724 got_object_open_as_tag(struct got_tag_object **tag,
725 struct got_repository *repo, struct got_object_id *id)
727 *tag = got_repo_get_cached_tag(repo, id);
728 if (*tag != NULL) {
729 (*tag)->refcnt++;
730 return NULL;
733 return open_tag(tag, repo, id, 0);
736 const struct got_error *
737 got_object_tag_open(struct got_tag_object **tag,
738 struct got_repository *repo, struct got_object *obj)
740 return open_tag(tag, repo, got_object_get_id(obj), 1);