Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/stat.h>
18 #include <sys/queue.h>
20 #include <errno.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sha1.h>
25 #include <zlib.h>
26 #include <ctype.h>
27 #include <limits.h>
29 #include "got_error.h"
30 #include "got_object.h"
31 #include "got_repository.h"
32 #include "got_sha1.h"
33 #include "pack.h"
34 #include "delta.h"
35 #include "object.h"
36 #include "zb.h"
38 #ifndef MIN
39 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
40 #endif
42 #ifndef nitems
43 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
44 #endif
46 #define GOT_OBJ_TAG_COMMIT "commit"
47 #define GOT_OBJ_TAG_TREE "tree"
48 #define GOT_OBJ_TAG_BLOB "blob"
50 #define GOT_COMMIT_TAG_TREE "tree "
51 #define GOT_COMMIT_TAG_PARENT "parent "
52 #define GOT_COMMIT_TAG_AUTHOR "author "
53 #define GOT_COMMIT_TAG_COMMITTER "committer "
55 char *
56 got_object_id_str(struct got_object_id *id, char *buf, size_t size)
57 {
58 return got_sha1_digest_to_str(id->sha1, buf, size);
59 }
61 int
62 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
63 {
64 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
65 }
67 int
68 got_object_get_type(struct got_object *obj)
69 {
70 switch (obj->type) {
71 case GOT_OBJ_TYPE_COMMIT:
72 case GOT_OBJ_TYPE_TREE:
73 case GOT_OBJ_TYPE_BLOB:
74 case GOT_OBJ_TYPE_TAG:
75 return obj->type;
76 default:
77 abort();
78 break;
79 }
81 /* not reached */
82 return 0;
83 }
85 static const struct got_error *
86 parse_object_header(struct got_object **obj, char *buf, size_t len)
87 {
88 const char *obj_tags[] = {
89 GOT_OBJ_TAG_COMMIT,
90 GOT_OBJ_TAG_TREE,
91 GOT_OBJ_TAG_BLOB
92 };
93 const int obj_types[] = {
94 GOT_OBJ_TYPE_COMMIT,
95 GOT_OBJ_TYPE_TREE,
96 GOT_OBJ_TYPE_BLOB,
97 };
98 int type = 0;
99 size_t size = 0, hdrlen = 0;
100 int i;
101 char *p = strchr(buf, '\0');
103 if (p == NULL)
104 return got_error(GOT_ERR_BAD_OBJ_HDR);
106 hdrlen = strlen(buf) + 1 /* '\0' */;
108 for (i = 0; i < nitems(obj_tags); i++) {
109 const char *tag = obj_tags[i];
110 size_t tlen = strlen(tag);
111 const char *errstr;
113 if (strncmp(buf, tag, tlen) != 0)
114 continue;
116 type = obj_types[i];
117 if (len <= tlen)
118 return got_error(GOT_ERR_BAD_OBJ_HDR);
119 size = strtonum(buf + tlen, 0, LONG_MAX, &errstr);
120 if (errstr != NULL)
121 return got_error(GOT_ERR_BAD_OBJ_HDR);
122 break;
125 if (type == 0)
126 return got_error(GOT_ERR_BAD_OBJ_HDR);
128 *obj = calloc(1, sizeof(**obj));
129 if (*obj == NULL)
130 return got_error(GOT_ERR_NO_MEM);
131 (*obj)->type = type;
132 (*obj)->hdrlen = hdrlen;
133 (*obj)->size = size;
134 return NULL;
137 static const struct got_error *
138 read_object_header(struct got_object **obj, struct got_repository *repo,
139 FILE *f)
141 const struct got_error *err;
142 struct got_zstream_buf zb;
143 char *buf;
144 size_t len;
145 const size_t zbsize = 64;
146 size_t outlen, totlen;
147 int i, ret;
149 buf = calloc(zbsize, sizeof(char));
150 if (buf == NULL)
151 return got_error(GOT_ERR_NO_MEM);
153 err = got_inflate_init(&zb, zbsize);
154 if (err)
155 return err;
157 i = 0;
158 totlen = 0;
159 do {
160 err = got_inflate_read(&zb, f, &outlen);
161 if (err)
162 goto done;
163 if (strchr(zb.outbuf, '\0') == NULL) {
164 buf = recallocarray(buf, 1 + i, 2 + i, zbsize);
165 if (buf == NULL) {
166 err = got_error(GOT_ERR_NO_MEM);
167 goto done;
170 memcpy(buf + totlen, zb.outbuf, outlen);
171 totlen += outlen;
172 i++;
173 } while (strchr(zb.outbuf, '\0') == NULL);
175 err = parse_object_header(obj, buf, totlen);
176 done:
177 got_inflate_end(&zb);
178 return err;
181 static const struct got_error *
182 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
184 const struct got_error *err = NULL;
185 char hex[SHA1_DIGEST_STRING_LENGTH];
186 char *path_objects = got_repo_get_path_objects(repo);
188 if (path_objects == NULL)
189 return got_error(GOT_ERR_NO_MEM);
191 got_object_id_str(id, hex, sizeof(hex));
193 if (asprintf(path, "%s/%.2x/%s", path_objects,
194 id->sha1[0], hex + 2) == -1)
195 err = got_error(GOT_ERR_NO_MEM);
197 free(path_objects);
198 return err;
201 static const struct got_error *
202 open_loose_object(FILE **f, struct got_object *obj, struct got_repository *repo)
204 const struct got_error *err = NULL;
205 char *path;
207 err = object_path(&path, &obj->id, repo);
208 if (err)
209 return err;
210 *f = fopen(path, "rb");
211 if (*f == NULL) {
212 err = got_error_from_errno();
213 goto done;
215 done:
216 free(path);
217 return err;
220 const struct got_error *
221 got_object_open(struct got_object **obj, struct got_repository *repo,
222 struct got_object_id *id)
224 const struct got_error *err = NULL;
225 char *path;
226 FILE *f;
228 err = object_path(&path, id, repo);
229 if (err)
230 return err;
232 f = fopen(path, "rb");
233 if (f == NULL) {
234 if (errno != ENOENT) {
235 err = got_error_from_errno();
236 goto done;
238 err = got_packfile_open_object(obj, id, repo);
239 if (err)
240 goto done;
241 if (*obj == NULL)
242 err = got_error(GOT_ERR_NO_OBJ);
243 } else {
244 err = read_object_header(obj, repo, f);
245 if (err)
246 goto done;
247 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
249 done:
250 free(path);
251 if (err && f)
252 fclose(f);
253 return err;
257 void
258 got_object_close(struct got_object *obj)
260 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
261 struct got_delta *delta;
262 while (!SIMPLEQ_EMPTY(&obj->deltas.entries)) {
263 delta = SIMPLEQ_FIRST(&obj->deltas.entries);
264 SIMPLEQ_REMOVE_HEAD(&obj->deltas.entries, entry);
265 got_delta_close(delta);
268 if (obj->flags & GOT_OBJ_FLAG_PACKED)
269 free(obj->path_packfile);
270 free(obj);
273 static int
274 commit_object_valid(struct got_commit_object *commit)
276 int i;
277 int n;
279 if (commit == NULL)
280 return 0;
282 n = 0;
283 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
284 if (commit->tree_id.sha1[i] == 0)
285 n++;
287 if (n == SHA1_DIGEST_LENGTH)
288 return 0;
290 return 1;
293 static const struct got_error *
294 parse_commit_object(struct got_commit_object **commit, char *buf, size_t len)
296 const struct got_error *err = NULL;
297 char *s = buf;
298 size_t tlen;
299 ssize_t remain = (ssize_t)len;
301 *commit = calloc(1, sizeof(**commit));
302 if (*commit == NULL)
303 return got_error(GOT_ERR_NO_MEM);
305 SIMPLEQ_INIT(&(*commit)->parent_ids);
307 tlen = strlen(GOT_COMMIT_TAG_TREE);
308 if (strncmp(s, GOT_COMMIT_TAG_TREE, tlen) == 0) {
309 remain -= tlen;
310 if (remain < SHA1_DIGEST_STRING_LENGTH) {
311 err = got_error(GOT_ERR_BAD_OBJ_DATA);
312 goto done;
314 s += tlen;
315 if (!got_parse_sha1_digest((*commit)->tree_id.sha1, s)) {
316 err = got_error(GOT_ERR_BAD_OBJ_DATA);
317 goto done;
319 remain -= SHA1_DIGEST_STRING_LENGTH;
320 s += SHA1_DIGEST_STRING_LENGTH;
321 } else {
322 err = got_error(GOT_ERR_BAD_OBJ_DATA);
323 goto done;
326 tlen = strlen(GOT_COMMIT_TAG_PARENT);
327 while (strncmp(s, GOT_COMMIT_TAG_PARENT, tlen) == 0) {
328 struct got_parent_id *pid;
330 remain -= tlen;
331 if (remain < SHA1_DIGEST_STRING_LENGTH) {
332 err = got_error(GOT_ERR_BAD_OBJ_DATA);
333 goto done;
336 pid = calloc(1, sizeof(*pid));
337 if (pid == NULL) {
338 err = got_error(GOT_ERR_NO_MEM);
339 goto done;
341 s += tlen;
342 if (!got_parse_sha1_digest(pid->id.sha1, s)) {
343 err = got_error(GOT_ERR_BAD_OBJ_DATA);
344 goto done;
346 SIMPLEQ_INSERT_TAIL(&(*commit)->parent_ids, pid, entry);
347 (*commit)->nparents++;
349 remain -= SHA1_DIGEST_STRING_LENGTH;
350 s += SHA1_DIGEST_STRING_LENGTH;
353 tlen = strlen(GOT_COMMIT_TAG_AUTHOR);
354 if (strncmp(s, GOT_COMMIT_TAG_AUTHOR, tlen) == 0) {
355 char *p;
357 remain -= tlen;
358 if (remain <= 0) {
359 err = got_error(GOT_ERR_BAD_OBJ_DATA);
360 goto done;
362 s += tlen;
363 p = strchr(s, '\n');
364 if (p == NULL) {
365 err = got_error(GOT_ERR_BAD_OBJ_DATA);
366 goto done;
368 *p = '\0';
369 (*commit)->author = strdup(s);
370 if ((*commit)->author == NULL) {
371 err = got_error(GOT_ERR_NO_MEM);
372 goto done;
374 s += strlen((*commit)->author) + 1;
375 remain -= strlen((*commit)->author) + 1;
378 tlen = strlen(GOT_COMMIT_TAG_COMMITTER);
379 if (strncmp(s, GOT_COMMIT_TAG_COMMITTER, tlen) == 0) {
380 char *p;
382 remain -= tlen;
383 if (remain <= 0) {
384 err = got_error(GOT_ERR_BAD_OBJ_DATA);
385 goto done;
387 s += tlen;
388 p = strchr(s, '\n');
389 if (p == NULL) {
390 err = got_error(GOT_ERR_BAD_OBJ_DATA);
391 goto done;
393 *p = '\0';
394 (*commit)->committer = strdup(s);
395 if ((*commit)->committer == NULL) {
396 err = got_error(GOT_ERR_NO_MEM);
397 goto done;
399 s += strlen((*commit)->committer) + 1;
400 remain -= strlen((*commit)->committer) + 1;
403 (*commit)->logmsg = strndup(s, remain);
404 if ((*commit)->logmsg == NULL) {
405 err = got_error(GOT_ERR_NO_MEM);
406 goto done;
408 done:
409 if (err)
410 got_object_commit_close(*commit);
411 return err;
414 static void
415 tree_entry_close(struct got_tree_entry *te)
417 free(te->name);
418 free(te);
421 static const struct got_error *
422 parse_tree_entry(struct got_tree_entry **te, size_t *elen, char *buf,
423 size_t maxlen)
425 char *p = buf, *space;
426 const struct got_error *err = NULL;
427 char hex[SHA1_DIGEST_STRING_LENGTH];
429 *te = calloc(1, sizeof(**te));
430 if (*te == NULL)
431 return got_error(GOT_ERR_NO_MEM);
433 *elen = strlen(buf) + 1;
434 if (*elen > maxlen) {
435 free(*te);
436 return got_error(GOT_ERR_BAD_OBJ_DATA);
439 space = strchr(buf, ' ');
440 if (space == NULL) {
441 free(*te);
442 return got_error(GOT_ERR_BAD_OBJ_DATA);
444 while (*p != ' ') {
445 if (*p < '0' && *p > '7') {
446 err = got_error(GOT_ERR_BAD_OBJ_DATA);
447 goto done;
449 (*te)->mode <<= 3;
450 (*te)->mode |= *p - '0';
451 p++;
454 (*te)->name = strdup(space + 1);
455 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH) {
456 err = got_error(GOT_ERR_BAD_OBJ_DATA);
457 goto done;
459 buf += strlen(buf) + 1;
460 memcpy((*te)->id.sha1, buf, SHA1_DIGEST_LENGTH);
461 *elen += SHA1_DIGEST_LENGTH;
462 done:
463 if (err)
464 tree_entry_close(*te);
465 return err;
468 static const struct got_error *
469 parse_tree_object(struct got_tree_object **tree, struct got_repository *repo,
470 char *buf, size_t len)
472 const struct got_error *err;
473 size_t remain = len;
474 int nentries;
476 *tree = calloc(1, sizeof(**tree));
477 if (*tree == NULL)
478 return got_error(GOT_ERR_NO_MEM);
480 SIMPLEQ_INIT(&(*tree)->entries);
482 while (remain > 0) {
483 struct got_tree_entry *te;
484 size_t elen;
486 err = parse_tree_entry(&te, &elen, buf, remain);
487 if (err)
488 return err;
489 (*tree)->nentries++;
490 SIMPLEQ_INSERT_TAIL(&(*tree)->entries, te, entry);
491 buf += elen;
492 remain -= elen;
495 if (remain != 0) {
496 got_object_tree_close(*tree);
497 return got_error(GOT_ERR_BAD_OBJ_DATA);
500 return NULL;
503 static const struct got_error *
504 read_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f)
506 const struct got_error *err = NULL;
507 static const size_t blocksize = 512;
508 size_t n, total, remain;
509 uint8_t *buf;
511 *outbuf = NULL;
512 *outlen = 0;
514 buf = calloc(1, blocksize);
515 if (buf == NULL)
516 return got_error(GOT_ERR_NO_MEM);
518 remain = blocksize;
519 total = 0;
520 while (1) {
521 if (remain == 0) {
522 uint8_t *newbuf;
523 newbuf = reallocarray(buf, 1, total + blocksize);
524 if (newbuf == NULL) {
525 err = got_error(GOT_ERR_NO_MEM);
526 goto done;
528 buf = newbuf;
529 remain += blocksize;
531 n = fread(buf, 1, remain, f);
532 if (n == 0) {
533 if (ferror(f)) {
534 err = got_ferror(f, GOT_ERR_IO);
535 goto done;
537 break; /* EOF */
539 remain -= n;
540 total += n;
541 };
543 done:
544 if (err == NULL) {
545 *outbuf = buf;
546 *outlen = total;
547 } else
548 free(buf);
549 return err;
552 static const struct got_error *
553 read_commit_object(struct got_commit_object **commit,
554 struct got_repository *repo, struct got_object *obj, FILE *f)
556 const struct got_error *err = NULL;
557 size_t len;
558 uint8_t *p;
559 int i, ret;
561 if (obj->flags & GOT_OBJ_FLAG_PACKED)
562 err = read_to_mem(&p, &len, f);
563 else
564 err = got_inflate_to_mem(&p, &len, f);
565 if (err)
566 return err;
568 if (len < obj->hdrlen + obj->size) {
569 err = got_error(GOT_ERR_BAD_OBJ_DATA);
570 goto done;
573 /* Skip object header. */
574 len -= obj->hdrlen;
575 err = parse_commit_object(commit, p + obj->hdrlen, len);
576 free(p);
577 done:
578 return err;
581 const struct got_error *
582 got_object_commit_open(struct got_commit_object **commit,
583 struct got_repository *repo, struct got_object *obj)
585 const struct got_error *err = NULL;
586 FILE *f;
588 if (obj->type != GOT_OBJ_TYPE_COMMIT)
589 return got_error(GOT_ERR_OBJ_TYPE);
591 if (obj->flags & GOT_OBJ_FLAG_PACKED)
592 err = got_packfile_extract_object(&f, obj, repo);
593 else
594 err = open_loose_object(&f, obj, repo);
595 if (err)
596 return err;
598 err = read_commit_object(commit, repo, obj, f);
599 fclose(f);
600 return err;
603 void
604 got_object_commit_close(struct got_commit_object *commit)
606 struct got_parent_id *pid;
608 while (!SIMPLEQ_EMPTY(&commit->parent_ids)) {
609 pid = SIMPLEQ_FIRST(&commit->parent_ids);
610 SIMPLEQ_REMOVE_HEAD(&commit->parent_ids, entry);
611 free(pid);
614 free(commit->author);
615 free(commit->committer);
616 free(commit->logmsg);
617 free(commit);
620 static const struct got_error *
621 read_tree_object(struct got_tree_object **tree,
622 struct got_repository *repo, struct got_object *obj, FILE *f)
624 const struct got_error *err = NULL;
625 size_t len;
626 uint8_t *p;
627 int i, ret;
629 if (obj->flags & GOT_OBJ_FLAG_PACKED)
630 err = read_to_mem(&p, &len, f);
631 else
632 err = got_inflate_to_mem(&p, &len, f);
633 if (err)
634 return err;
636 if (len < obj->hdrlen + obj->size) {
637 err = got_error(GOT_ERR_BAD_OBJ_DATA);
638 goto done;
641 /* Skip object header. */
642 len -= obj->hdrlen;
643 err = parse_tree_object(tree, repo, p + obj->hdrlen, len);
644 free(p);
645 done:
646 return err;
649 const struct got_error *
650 got_object_tree_open(struct got_tree_object **tree,
651 struct got_repository *repo, struct got_object *obj)
653 const struct got_error *err = NULL;
654 FILE *f;
656 if (obj->type != GOT_OBJ_TYPE_TREE)
657 return got_error(GOT_ERR_OBJ_TYPE);
659 if (obj->flags & GOT_OBJ_FLAG_PACKED)
660 err = got_packfile_extract_object(&f, obj, repo);
661 else
662 err = open_loose_object(&f, obj, repo);
663 if (err)
664 return err;
666 err = read_tree_object(tree, repo, obj, f);
667 fclose(f);
668 return err;
671 void
672 got_object_tree_close(struct got_tree_object *tree)
674 struct got_tree_entry *te;
676 while (!SIMPLEQ_EMPTY(&tree->entries)) {
677 te = SIMPLEQ_FIRST(&tree->entries);
678 SIMPLEQ_REMOVE_HEAD(&tree->entries, entry);
679 tree_entry_close(te);
682 free(tree);
685 const struct got_error *
686 got_object_blob_open(struct got_blob_object **blob,
687 struct got_repository *repo, struct got_object *obj, size_t blocksize)
689 const struct got_error *err = NULL;
691 if (obj->type != GOT_OBJ_TYPE_BLOB)
692 return got_error(GOT_ERR_OBJ_TYPE);
694 if (blocksize < obj->hdrlen)
695 return got_error(GOT_ERR_NO_SPACE);
697 *blob = calloc(1, sizeof(**blob));
698 if (*blob == NULL)
699 return got_error(GOT_ERR_NO_MEM);
701 if (obj->flags & GOT_OBJ_FLAG_PACKED) {
702 (*blob)->read_buf = calloc(1, blocksize);
703 if ((*blob)->read_buf == NULL)
704 return got_error(GOT_ERR_NO_MEM);
705 err = got_packfile_extract_object(&((*blob)->f), obj, repo);
706 if (err)
707 return err;
708 } else {
709 err = open_loose_object(&((*blob)->f), obj, repo);
710 if (err) {
711 free(*blob);
712 return err;
715 err = got_inflate_init(&(*blob)->zb, blocksize);
716 if (err != NULL) {
717 fclose((*blob)->f);
718 free(*blob);
719 return err;
722 (*blob)->read_buf = (*blob)->zb.outbuf;
723 (*blob)->flags |= GOT_BLOB_F_COMPRESSED;
726 (*blob)->hdrlen = obj->hdrlen;
727 (*blob)->blocksize = blocksize;
728 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
730 return err;
733 void
734 got_object_blob_close(struct got_blob_object *blob)
736 if (blob->flags & GOT_BLOB_F_COMPRESSED)
737 got_inflate_end(&blob->zb);
738 else
739 free(blob->read_buf);
740 fclose(blob->f);
741 free(blob);
744 const struct got_error *
745 got_object_blob_read_block(size_t *outlenp, struct got_blob_object *blob)
747 size_t n;
749 if (blob->flags & GOT_BLOB_F_COMPRESSED)
750 return got_inflate_read(&blob->zb, blob->f, outlenp);
752 n = fread(blob->read_buf, 1, blob->blocksize, blob->f);
753 if (n == 0 && ferror(blob->f))
754 return got_ferror(blob->f, GOT_ERR_IO);
755 *outlenp = n;
756 return NULL;