Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
20 #include <sys/uio.h>
21 #include <sys/socket.h>
22 #include <sys/wait.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdint.h>
30 #include <sha1.h>
31 #include <zlib.h>
32 #include <ctype.h>
33 #include <limits.h>
34 #include <imsg.h>
35 #include <time.h>
37 #include "got_error.h"
38 #include "got_object.h"
39 #include "got_repository.h"
40 #include "got_opentemp.h"
42 #include "got_lib_sha1.h"
43 #include "got_lib_delta.h"
44 #include "got_lib_pack.h"
45 #include "got_lib_path.h"
46 #include "got_lib_inflate.h"
47 #include "got_lib_object.h"
48 #include "got_lib_privsep.h"
49 #include "got_lib_object_parse.h"
50 #include "got_lib_repository.h"
52 #ifndef MIN
53 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
54 #endif
56 const struct got_error *
57 got_object_id_str(char **outbuf, struct got_object_id *id)
58 {
59 static const size_t len = SHA1_DIGEST_STRING_LENGTH;
61 *outbuf = malloc(len);
62 if (*outbuf == NULL)
63 return got_error_from_errno();
65 if (got_sha1_digest_to_str(id->sha1, *outbuf, len) == NULL) {
66 free(*outbuf);
67 *outbuf = NULL;
68 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
69 }
71 return NULL;
72 }
74 int
75 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
76 {
77 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
78 }
80 struct got_object_id *
81 got_object_id_dup(struct got_object_id *id1)
82 {
83 struct got_object_id *id2;
85 id2 = malloc(sizeof(*id2));
86 if (id2 == NULL)
87 return NULL;
88 memcpy(id2, id1, sizeof(*id2));
89 return id2;
90 }
92 struct got_object_id *
93 got_object_get_id(struct got_object *obj)
94 {
95 return got_object_id_dup(&obj->id);
96 }
98 const struct got_error *
99 got_object_get_id_str(char **outbuf, struct got_object *obj)
101 return got_object_id_str(outbuf, &obj->id);
104 int
105 got_object_get_type(struct got_object *obj)
107 switch (obj->type) {
108 case GOT_OBJ_TYPE_COMMIT:
109 case GOT_OBJ_TYPE_TREE:
110 case GOT_OBJ_TYPE_BLOB:
111 case GOT_OBJ_TYPE_TAG:
112 return obj->type;
113 default:
114 abort();
115 break;
118 /* not reached */
119 return 0;
122 static const struct got_error *
123 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
125 const struct got_error *err = NULL;
126 char *hex = NULL;
127 char *path_objects = got_repo_get_path_objects(repo);
129 *path = NULL;
131 if (path_objects == NULL)
132 return got_error_from_errno();
134 err = got_object_id_str(&hex, id);
135 if (err)
136 goto done;
138 if (asprintf(path, "%s/%.2x/%s", path_objects,
139 id->sha1[0], hex + 2) == -1)
140 err = got_error_from_errno();
142 done:
143 free(hex);
144 free(path_objects);
145 return err;
148 static const struct got_error *
149 open_loose_object(int *fd, struct got_object *obj, struct got_repository *repo)
151 const struct got_error *err = NULL;
152 char *path;
154 err = object_path(&path, &obj->id, repo);
155 if (err)
156 return err;
157 *fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
158 if (*fd == -1) {
159 err = got_error_from_errno();
160 goto done;
162 done:
163 free(path);
164 return err;
167 const struct got_error *
168 got_object_open(struct got_object **obj, struct got_repository *repo,
169 struct got_object_id *id)
171 const struct got_error *err = NULL;
172 char *path;
173 int fd;
175 *obj = got_repo_get_cached_object(repo, id);
176 if (*obj != NULL) {
177 (*obj)->refcnt++;
178 return NULL;
181 err = object_path(&path, id, repo);
182 if (err)
183 return err;
185 fd = open(path, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
186 if (fd == -1) {
187 if (errno != ENOENT) {
188 err = got_error_from_errno();
189 goto done;
191 err = got_packfile_open_object(obj, id, repo);
192 if (err)
193 goto done;
194 if (*obj == NULL)
195 err = got_error(GOT_ERR_NO_OBJ);
196 } else {
197 err = got_object_read_header_privsep(obj, repo, fd);
198 if (err)
199 goto done;
200 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
203 if (err == NULL) {
204 (*obj)->refcnt++;
205 err = got_repo_cache_object(repo, id, *obj);
207 done:
208 free(path);
209 if (fd != -1)
210 close(fd);
211 return err;
215 const struct got_error *
216 got_object_open_by_id_str(struct got_object **obj, struct got_repository *repo,
217 const char *id_str)
219 struct got_object_id id;
221 if (!got_parse_sha1_digest(id.sha1, id_str))
222 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
224 return got_object_open(obj, repo, &id);
227 const struct got_error *
228 got_object_open_as_commit(struct got_commit_object **commit,
229 struct got_repository *repo, struct got_object_id *id)
231 const struct got_error *err;
232 struct got_object *obj;
234 *commit = NULL;
236 err = got_object_open(&obj, repo, id);
237 if (err)
238 return err;
239 if (got_object_get_type(obj) != GOT_OBJ_TYPE_COMMIT) {
240 err = got_error(GOT_ERR_OBJ_TYPE);
241 goto done;
244 err = got_object_commit_open(commit, repo, obj);
245 done:
246 got_object_close(obj);
247 return err;
250 const struct got_error *
251 got_object_qid_alloc(struct got_object_qid **qid, struct got_object_id *id)
253 const struct got_error *err = NULL;
255 *qid = calloc(1, sizeof(**qid));
256 if (*qid == NULL)
257 return got_error_from_errno();
259 (*qid)->id = got_object_id_dup(id);
260 if ((*qid)->id == NULL) {
261 err = got_error_from_errno();
262 got_object_qid_free(*qid);
263 *qid = NULL;
264 return err;
267 return NULL;
270 const struct got_error *
271 got_object_commit_open(struct got_commit_object **commit,
272 struct got_repository *repo, struct got_object *obj)
274 const struct got_error *err = NULL;
276 *commit = got_repo_get_cached_commit(repo, &obj->id);
277 if (*commit != NULL) {
278 (*commit)->refcnt++;
279 return NULL;
282 if (obj->type != GOT_OBJ_TYPE_COMMIT)
283 return got_error(GOT_ERR_OBJ_TYPE);
285 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
286 uint8_t *buf;
287 size_t len;
288 err = got_packfile_extract_object_to_mem(&buf, &len, obj, repo);
289 if (err)
290 return err;
291 obj->size = len;
292 err = got_object_parse_commit(commit, buf, len);
293 free(buf);
294 } else {
295 int fd;
296 err = open_loose_object(&fd, obj, repo);
297 if (err)
298 return err;
299 err = got_object_read_commit_privsep(commit, obj, fd, repo);
300 close(fd);
303 if (err == NULL) {
304 (*commit)->refcnt++;
305 err = got_repo_cache_commit(repo, &obj->id, *commit);
308 return err;
311 const struct got_error *
312 got_object_tree_open(struct got_tree_object **tree,
313 struct got_repository *repo, struct got_object *obj)
315 const struct got_error *err = NULL;
317 *tree = got_repo_get_cached_tree(repo, &obj->id);
318 if (*tree != NULL) {
319 (*tree)->refcnt++;
320 return NULL;
323 if (obj->type != GOT_OBJ_TYPE_TREE)
324 return got_error(GOT_ERR_OBJ_TYPE);
326 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
327 uint8_t *buf;
328 size_t len;
329 err = got_packfile_extract_object_to_mem(&buf, &len, obj, repo);
330 if (err)
331 return err;
332 obj->size = len;
333 err = got_object_parse_tree(tree, buf, len);
334 free(buf);
335 } else {
336 int fd;
337 err = open_loose_object(&fd, obj, repo);
338 if (err)
339 return err;
340 err = got_object_read_tree_privsep(tree, obj, fd, repo);
341 close(fd);
344 if (err == NULL) {
345 (*tree)->refcnt++;
346 err = got_repo_cache_tree(repo, &obj->id, *tree);
349 return err;
352 const struct got_error *
353 got_object_open_as_tree(struct got_tree_object **tree,
354 struct got_repository *repo, struct got_object_id *id)
356 const struct got_error *err;
357 struct got_object *obj;
359 *tree = NULL;
361 err = got_object_open(&obj, repo, id);
362 if (err)
363 return err;
364 if (got_object_get_type(obj) != GOT_OBJ_TYPE_TREE) {
365 err = got_error(GOT_ERR_OBJ_TYPE);
366 goto done;
369 err = got_object_tree_open(tree, repo, obj);
370 done:
371 got_object_close(obj);
372 return err;
375 const struct got_tree_entries *
376 got_object_tree_get_entries(struct got_tree_object *tree)
378 return &tree->entries;
381 const struct got_error *
382 got_object_blob_open(struct got_blob_object **blob,
383 struct got_repository *repo, struct got_object *obj, size_t blocksize)
385 const struct got_error *err = NULL;
387 if (obj->type != GOT_OBJ_TYPE_BLOB)
388 return got_error(GOT_ERR_OBJ_TYPE);
390 if (blocksize < obj->hdrlen)
391 return got_error(GOT_ERR_NO_SPACE);
393 *blob = calloc(1, sizeof(**blob));
394 if (*blob == NULL)
395 return got_error_from_errno();
397 (*blob)->read_buf = malloc(blocksize);
398 if ((*blob)->read_buf == NULL) {
399 err = got_error_from_errno();
400 goto done;
402 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
403 err = got_packfile_extract_object(&((*blob)->f), obj, repo);
404 if (err)
405 goto done;
406 } else {
407 int infd, outfd;
408 size_t size;
409 struct stat sb;
411 err = open_loose_object(&infd, obj, repo);
412 if (err)
413 goto done;
416 outfd = got_opentempfd();
417 if (outfd == -1) {
418 err = got_error_from_errno();
419 close(infd);
420 goto done;
423 err = got_object_read_blob_privsep(&size, outfd, infd, repo);
424 close(infd);
425 if (err)
426 goto done;
428 if (size != obj->hdrlen + obj->size) {
429 err = got_error(GOT_ERR_PRIVSEP_LEN);
430 close(outfd);
431 goto done;
434 if (fstat(outfd, &sb) == -1) {
435 err = got_error_from_errno();
436 close(outfd);
437 goto done;
440 if (sb.st_size != size) {
441 err = got_error(GOT_ERR_PRIVSEP_LEN);
442 close(outfd);
443 goto done;
446 (*blob)->f = fdopen(outfd, "rb");
447 if ((*blob)->f == NULL) {
448 err = got_error_from_errno();
449 close(outfd);
450 goto done;
454 (*blob)->hdrlen = obj->hdrlen;
455 (*blob)->blocksize = blocksize;
456 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
458 done:
459 if (err && *blob) {
460 if ((*blob)->f)
461 fclose((*blob)->f);
462 free((*blob)->read_buf);
463 free(*blob);
464 *blob = NULL;
466 return err;
469 const struct got_error *
470 got_object_open_as_blob(struct got_blob_object **blob,
471 struct got_repository *repo, struct got_object_id *id,
472 size_t blocksize)
474 const struct got_error *err;
475 struct got_object *obj;
477 *blob = NULL;
479 err = got_object_open(&obj, repo, id);
480 if (err)
481 return err;
482 if (got_object_get_type(obj) != GOT_OBJ_TYPE_BLOB) {
483 err = got_error(GOT_ERR_OBJ_TYPE);
484 goto done;
487 err = got_object_blob_open(blob, repo, obj, blocksize);
488 done:
489 got_object_close(obj);
490 return err;
493 void
494 got_object_blob_close(struct got_blob_object *blob)
496 free(blob->read_buf);
497 fclose(blob->f);
498 free(blob);
501 char *
502 got_object_blob_id_str(struct got_blob_object *blob, char *buf, size_t size)
504 return got_sha1_digest_to_str(blob->id.sha1, buf, size);
507 size_t
508 got_object_blob_get_hdrlen(struct got_blob_object *blob)
510 return blob->hdrlen;
513 const uint8_t *
514 got_object_blob_get_read_buf(struct got_blob_object *blob)
516 return blob->read_buf;
519 const struct got_error *
520 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
522 size_t n;
524 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
525 if (n == 0 && ferror(blob->f))
526 return got_ferror(blob->f, GOT_ERR_IO);
527 *outlenp = n;
528 return NULL;
531 const struct got_error *
532 got_object_blob_dump_to_file(size_t *total_len, size_t *nlines,
533 FILE *outfile, struct got_blob_object *blob)
535 const struct got_error *err = NULL;
536 size_t len, hdrlen;
537 const uint8_t *buf;
538 int i;
540 if (total_len)
541 *total_len = 0;
542 if (nlines)
543 *nlines = 0;
545 hdrlen = got_object_blob_get_hdrlen(blob);
546 do {
547 err = got_object_blob_read_block(&len, blob);
548 if (err)
549 return err;
550 if (len == 0)
551 break;
552 if (total_len)
553 *total_len += len;
554 buf = got_object_blob_get_read_buf(blob);
555 if (nlines) {
556 for (i = 0; i < len; i++) {
557 if (buf[i] == '\n')
558 (*nlines)++;
561 /* Skip blob object header first time around. */
562 fwrite(buf + hdrlen, len - hdrlen, 1, outfile);
563 hdrlen = 0;
564 } while (len != 0);
566 fflush(outfile);
567 rewind(outfile);
569 return NULL;
572 static struct got_tree_entry *
573 find_entry_by_name(struct got_tree_object *tree, const char *name)
575 struct got_tree_entry *te;
577 SIMPLEQ_FOREACH(te, &tree->entries.head, entry) {
578 if (strcmp(te->name, name) == 0)
579 return te;
581 return NULL;
584 const struct got_error *
585 got_object_open_by_path(struct got_object **obj, struct got_repository *repo,
586 struct got_object_id *commit_id, const char *path)
588 const struct got_error *err = NULL;
589 struct got_commit_object *commit = NULL;
590 struct got_tree_object *tree = NULL;
591 struct got_tree_entry *te = NULL;
592 char *seg, *s, *s0 = NULL;
593 size_t len = strlen(path);
595 *obj = NULL;
597 /* We are expecting an absolute in-repository path. */
598 if (path[0] != '/')
599 return got_error(GOT_ERR_NOT_ABSPATH);
601 err = got_object_open_as_commit(&commit, repo, commit_id);
602 if (err)
603 goto done;
605 /* Handle opening of root of commit's tree. */
606 if (path[1] == '\0') {
607 err = got_object_open(obj, repo, commit->tree_id);
608 goto done;
611 err = got_object_open_as_tree(&tree, repo, commit->tree_id);
612 if (err)
613 goto done;
615 s0 = strdup(path);
616 if (s0 == NULL) {
617 err = got_error_from_errno();
618 goto done;
620 err = got_canonpath(path, s0, len + 1);
621 if (err)
622 goto done;
624 s = s0;
625 s++; /* skip leading '/' */
626 len--;
627 seg = s;
628 while (len > 0) {
629 struct got_tree_object *next_tree;
631 if (*s != '/') {
632 s++;
633 len--;
634 if (*s)
635 continue;
638 /* end of path segment */
639 *s = '\0';
641 te = find_entry_by_name(tree, seg);
642 if (te == NULL) {
643 err = got_error(GOT_ERR_NO_OBJ);
644 goto done;
647 if (len == 0)
648 break;
650 seg = s + 1;
651 s++;
652 len--;
653 if (*s) {
654 err = got_object_open_as_tree(&next_tree, repo,
655 te->id);
656 te = NULL;
657 if (err)
658 goto done;
659 got_object_tree_close(tree);
660 tree = next_tree;
664 if (te)
665 err = got_object_open(obj, repo, te->id);
666 else
667 err = got_error(GOT_ERR_NO_OBJ);
668 done:
669 free(s0);
670 if (commit)
671 got_object_commit_close(commit);
672 if (tree)
673 got_object_tree_close(tree);
674 return err;