Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/stat.h>
18 #include <sys/queue.h>
20 #include <errno.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sha1.h>
25 #include <zlib.h>
26 #include <ctype.h>
27 #include <limits.h>
29 #include "got_error.h"
30 #include "got_object.h"
31 #include "got_repository.h"
32 #include "got_sha1.h"
33 #include "pack.h"
34 #include "delta.h"
35 #include "object.h"
37 #ifndef MIN
38 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
39 #endif
41 #ifndef nitems
42 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
43 #endif
45 #define GOT_OBJ_TAG_COMMIT "commit"
46 #define GOT_OBJ_TAG_TREE "tree"
47 #define GOT_OBJ_TAG_BLOB "blob"
49 #define GOT_COMMIT_TAG_TREE "tree "
50 #define GOT_COMMIT_TAG_PARENT "parent "
51 #define GOT_COMMIT_TAG_AUTHOR "author "
52 #define GOT_COMMIT_TAG_COMMITTER "committer "
54 char *
55 got_object_id_str(struct got_object_id *id, char *buf, size_t size)
56 {
57 return got_sha1_digest_to_str(id->sha1, buf, size);
58 }
60 int
61 got_object_id_cmp(struct got_object_id *id1, struct got_object_id *id2)
62 {
63 return memcmp(id1->sha1, id2->sha1, SHA1_DIGEST_LENGTH);
64 }
66 int
67 got_object_get_type(struct got_object *obj)
68 {
69 switch (obj->type) {
70 case GOT_OBJ_TYPE_COMMIT:
71 case GOT_OBJ_TYPE_TREE:
72 case GOT_OBJ_TYPE_BLOB:
73 case GOT_OBJ_TYPE_TAG:
74 return obj->type;
75 default:
76 abort();
77 break;
78 }
80 /* not reached */
81 return 0;
82 }
84 static void
85 inflate_end(struct got_zstream_buf *zb)
86 {
87 free(zb->inbuf);
88 free(zb->outbuf);
89 inflateEnd(&zb->z);
90 }
92 static const struct got_error *
93 inflate_init(struct got_zstream_buf *zb, size_t bufsize)
94 {
95 const struct got_error *err = NULL;
97 memset(zb, 0, sizeof(*zb));
99 zb->z.zalloc = Z_NULL;
100 zb->z.zfree = Z_NULL;
101 if (inflateInit(&zb->z) != Z_OK) {
102 err = got_error(GOT_ERR_IO);
103 goto done;
106 zb->inlen = zb->outlen = bufsize;
108 zb->inbuf = calloc(1, zb->inlen);
109 if (zb->inbuf == NULL) {
110 err = got_error(GOT_ERR_NO_MEM);
111 goto done;
114 zb->outbuf = calloc(1, zb->outlen);
115 if (zb->outbuf == NULL) {
116 err = got_error(GOT_ERR_NO_MEM);
117 goto done;
120 done:
121 if (err)
122 inflate_end(zb);
123 return err;
126 static const struct got_error *
127 inflate_read(struct got_zstream_buf *zb, FILE *f, size_t *outlenp)
129 size_t last_total_out = zb->z.total_out;
130 z_stream *z = &zb->z;
131 int n, ret;
133 z->next_out = zb->outbuf;
134 z->avail_out = zb->outlen;
136 do {
137 if (z->avail_in == 0) {
138 int i;
139 n = fread(zb->inbuf, 1, zb->inlen, f);
140 if (n == 0) {
141 if (ferror(f))
142 return got_ferror(f, GOT_ERR_IO);
143 *outlenp = 0;
144 return NULL;
146 z->next_in = zb->inbuf;
147 z->avail_in = n;
149 ret = inflate(z, Z_SYNC_FLUSH);
150 } while (ret == Z_OK && z->avail_out > 0);
152 if (ret != Z_OK) {
153 if (ret != Z_STREAM_END)
154 return got_error(GOT_ERR_DECOMPRESSION);
155 zb->flags |= GOT_ZSTREAM_F_HAVE_MORE;
158 *outlenp = z->total_out - last_total_out;
159 return NULL;
162 static const struct got_error *
163 parse_object_header(struct got_object **obj, char *buf, size_t len)
165 const char *obj_tags[] = {
166 GOT_OBJ_TAG_COMMIT,
167 GOT_OBJ_TAG_TREE,
168 GOT_OBJ_TAG_BLOB
169 };
170 const int obj_types[] = {
171 GOT_OBJ_TYPE_COMMIT,
172 GOT_OBJ_TYPE_TREE,
173 GOT_OBJ_TYPE_BLOB,
174 };
175 int type = 0;
176 size_t size = 0, hdrlen = 0;
177 int i;
178 char *p = strchr(buf, '\0');
180 if (p == NULL)
181 return got_error(GOT_ERR_BAD_OBJ_HDR);
183 hdrlen = strlen(buf) + 1 /* '\0' */;
185 for (i = 0; i < nitems(obj_tags); i++) {
186 const char *tag = obj_tags[i];
187 size_t tlen = strlen(tag);
188 const char *errstr;
190 if (strncmp(buf, tag, tlen) != 0)
191 continue;
193 type = obj_types[i];
194 if (len <= tlen)
195 return got_error(GOT_ERR_BAD_OBJ_HDR);
196 size = strtonum(buf + tlen, 0, LONG_MAX, &errstr);
197 if (errstr != NULL)
198 return got_error(GOT_ERR_BAD_OBJ_HDR);
199 break;
202 if (type == 0)
203 return got_error(GOT_ERR_BAD_OBJ_HDR);
205 *obj = calloc(1, sizeof(**obj));
206 if (*obj == NULL)
207 return got_error(GOT_ERR_NO_MEM);
208 (*obj)->type = type;
209 (*obj)->hdrlen = hdrlen;
210 (*obj)->size = size;
211 return NULL;
214 static const struct got_error *
215 read_object_header(struct got_object **obj, struct got_repository *repo,
216 FILE *f)
218 const struct got_error *err;
219 struct got_zstream_buf zb;
220 char *buf;
221 size_t len;
222 const size_t zbsize = 64;
223 size_t outlen, totlen;
224 int i, ret;
226 buf = calloc(zbsize, sizeof(char));
227 if (buf == NULL)
228 return got_error(GOT_ERR_NO_MEM);
230 err = inflate_init(&zb, zbsize);
231 if (err)
232 return err;
234 i = 0;
235 totlen = 0;
236 do {
237 err = inflate_read(&zb, f, &outlen);
238 if (err)
239 goto done;
240 if (strchr(zb.outbuf, '\0') == NULL) {
241 buf = recallocarray(buf, 1 + i, 2 + i, zbsize);
242 if (buf == NULL) {
243 err = got_error(GOT_ERR_NO_MEM);
244 goto done;
247 memcpy(buf + totlen, zb.outbuf, outlen);
248 totlen += outlen;
249 i++;
250 } while (strchr(zb.outbuf, '\0') == NULL);
252 err = parse_object_header(obj, buf, totlen);
253 done:
254 inflate_end(&zb);
255 return err;
258 static const struct got_error *
259 object_path(char **path, struct got_object_id *id, struct got_repository *repo)
261 const struct got_error *err = NULL;
262 char hex[SHA1_DIGEST_STRING_LENGTH];
263 char *path_objects = got_repo_get_path_objects(repo);
265 if (path_objects == NULL)
266 return got_error(GOT_ERR_NO_MEM);
268 got_object_id_str(id, hex, sizeof(hex));
270 if (asprintf(path, "%s/%.2x/%s", path_objects,
271 id->sha1[0], hex + 2) == -1)
272 err = got_error(GOT_ERR_NO_MEM);
274 free(path_objects);
275 return err;
278 static const struct got_error *
279 fopen_object(FILE **f, struct got_object *obj, struct got_repository *repo)
281 const struct got_error *err = NULL;
282 char *path;
284 if (obj->flags & GOT_OBJ_FLAG_PACKED)
285 return got_packfile_extract_object(f, obj, repo);
287 err = object_path(&path, &obj->id, repo);
288 if (err)
289 return err;
290 *f = fopen(path, "rb");
291 if (*f == NULL) {
292 err = got_error_from_errno();
293 goto done;
295 done:
296 free(path);
297 return err;
300 const struct got_error *
301 got_object_open(struct got_object **obj, struct got_repository *repo,
302 struct got_object_id *id)
304 const struct got_error *err = NULL;
305 char *path;
306 FILE *f;
308 err = object_path(&path, id, repo);
309 if (err)
310 return err;
312 f = fopen(path, "rb");
313 if (f == NULL) {
314 if (errno != ENOENT) {
315 err = got_error_from_errno();
316 goto done;
318 err = got_packfile_open_object(obj, id, repo);
319 if (err)
320 goto done;
321 if (*obj == NULL)
322 err = got_error(GOT_ERR_NO_OBJ);
323 } else {
324 err = read_object_header(obj, repo, f);
325 if (err)
326 goto done;
327 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
329 done:
330 free(path);
331 if (err && f)
332 fclose(f);
333 return err;
337 void
338 got_object_close(struct got_object *obj)
340 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
341 struct got_delta *delta;
342 while (!SIMPLEQ_EMPTY(&obj->deltas.entries)) {
343 delta = SIMPLEQ_FIRST(&obj->deltas.entries);
344 SIMPLEQ_REMOVE_HEAD(&obj->deltas.entries, entry);
345 got_delta_close(delta);
348 if (obj->flags & GOT_OBJ_FLAG_PACKED)
349 free(obj->path_packfile);
350 free(obj);
353 static int
354 commit_object_valid(struct got_commit_object *commit)
356 int i;
357 int n;
359 if (commit == NULL)
360 return 0;
362 n = 0;
363 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
364 if (commit->tree_id.sha1[i] == 0)
365 n++;
367 if (n == SHA1_DIGEST_LENGTH)
368 return 0;
370 return 1;
373 static const struct got_error *
374 parse_commit_object(struct got_commit_object **commit, char *buf, size_t len)
376 const struct got_error *err = NULL;
377 char *s = buf;
378 size_t tlen;
379 ssize_t remain = (ssize_t)len;
381 *commit = calloc(1, sizeof(**commit));
382 if (*commit == NULL)
383 return got_error(GOT_ERR_NO_MEM);
385 SIMPLEQ_INIT(&(*commit)->parent_ids);
387 tlen = strlen(GOT_COMMIT_TAG_TREE);
388 if (strncmp(s, GOT_COMMIT_TAG_TREE, tlen) == 0) {
389 remain -= tlen;
390 if (remain < SHA1_DIGEST_STRING_LENGTH) {
391 err = got_error(GOT_ERR_BAD_OBJ_DATA);
392 goto done;
394 s += tlen;
395 if (!got_parse_sha1_digest((*commit)->tree_id.sha1, s)) {
396 err = got_error(GOT_ERR_BAD_OBJ_DATA);
397 goto done;
399 remain -= SHA1_DIGEST_STRING_LENGTH;
400 s += SHA1_DIGEST_STRING_LENGTH;
401 } else {
402 err = got_error(GOT_ERR_BAD_OBJ_DATA);
403 goto done;
406 tlen = strlen(GOT_COMMIT_TAG_PARENT);
407 while (strncmp(s, GOT_COMMIT_TAG_PARENT, tlen) == 0) {
408 struct got_parent_id *pid;
410 remain -= tlen;
411 if (remain < SHA1_DIGEST_STRING_LENGTH) {
412 err = got_error(GOT_ERR_BAD_OBJ_DATA);
413 goto done;
416 pid = calloc(1, sizeof(*pid));
417 if (pid == NULL) {
418 err = got_error(GOT_ERR_NO_MEM);
419 goto done;
421 s += tlen;
422 if (!got_parse_sha1_digest(pid->id.sha1, s)) {
423 err = got_error(GOT_ERR_BAD_OBJ_DATA);
424 goto done;
426 SIMPLEQ_INSERT_TAIL(&(*commit)->parent_ids, pid, entry);
427 (*commit)->nparents++;
429 s += SHA1_DIGEST_STRING_LENGTH;
432 tlen = strlen(GOT_COMMIT_TAG_AUTHOR);
433 if (strncmp(s, GOT_COMMIT_TAG_AUTHOR, tlen) == 0) {
434 char *p;
436 remain -= tlen;
437 if (remain <= 0) {
438 err = got_error(GOT_ERR_BAD_OBJ_DATA);
439 goto done;
441 s += tlen;
442 p = strchr(s, '\n');
443 if (p == NULL) {
444 err = got_error(GOT_ERR_BAD_OBJ_DATA);
445 goto done;
447 *p = '\0';
448 (*commit)->author = strdup(s);
449 if ((*commit)->author == NULL) {
450 err = got_error(GOT_ERR_NO_MEM);
451 goto done;
453 s += strlen((*commit)->author) + 1;
456 tlen = strlen(GOT_COMMIT_TAG_COMMITTER);
457 if (strncmp(s, GOT_COMMIT_TAG_COMMITTER, tlen) == 0) {
458 char *p;
460 remain -= tlen;
461 if (remain <= 0) {
462 err = got_error(GOT_ERR_BAD_OBJ_DATA);
463 goto done;
465 s += tlen;
466 p = strchr(s, '\n');
467 if (p == NULL) {
468 err = got_error(GOT_ERR_BAD_OBJ_DATA);
469 goto done;
471 *p = '\0';
472 (*commit)->committer = strdup(s);
473 if ((*commit)->committer == NULL) {
474 err = got_error(GOT_ERR_NO_MEM);
475 goto done;
477 s += strlen((*commit)->committer) + 1;
480 (*commit)->logmsg = strdup(s);
481 done:
482 if (err)
483 got_object_commit_close(*commit);
484 return err;
487 static void
488 tree_entry_close(struct got_tree_entry *te)
490 free(te->name);
491 free(te);
494 static const struct got_error *
495 parse_tree_entry(struct got_tree_entry **te, size_t *elen, char *buf,
496 size_t maxlen)
498 char *p = buf, *space;
499 const struct got_error *err = NULL;
500 char hex[SHA1_DIGEST_STRING_LENGTH];
502 *te = calloc(1, sizeof(**te));
503 if (*te == NULL)
504 return got_error(GOT_ERR_NO_MEM);
506 *elen = strlen(buf) + 1;
507 if (*elen > maxlen) {
508 free(*te);
509 return got_error(GOT_ERR_BAD_OBJ_DATA);
512 space = strchr(buf, ' ');
513 if (space == NULL) {
514 free(*te);
515 return got_error(GOT_ERR_BAD_OBJ_DATA);
517 while (*p != ' ') {
518 if (*p < '0' && *p > '7') {
519 err = got_error(GOT_ERR_BAD_OBJ_DATA);
520 goto done;
522 (*te)->mode <<= 3;
523 (*te)->mode |= *p - '0';
524 p++;
527 (*te)->name = strdup(space + 1);
528 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH) {
529 err = got_error(GOT_ERR_BAD_OBJ_DATA);
530 goto done;
532 buf += strlen(buf) + 1;
533 memcpy((*te)->id.sha1, buf, SHA1_DIGEST_LENGTH);
534 *elen += SHA1_DIGEST_LENGTH;
535 done:
536 if (err)
537 tree_entry_close(*te);
538 return err;
541 static const struct got_error *
542 parse_tree_object(struct got_tree_object **tree, struct got_repository *repo,
543 char *buf, size_t len)
545 size_t remain = len;
546 int nentries;
548 *tree = calloc(1, sizeof(**tree));
549 if (*tree == NULL)
550 return got_error(GOT_ERR_NO_MEM);
552 SIMPLEQ_INIT(&(*tree)->entries);
554 while (remain > 0) {
555 struct got_tree_entry *te;
556 size_t elen;
558 parse_tree_entry(&te, &elen, buf, remain);
559 (*tree)->nentries++;
560 SIMPLEQ_INSERT_TAIL(&(*tree)->entries, te, entry);
561 buf += elen;
562 remain -= elen;
565 if (remain != 0) {
566 got_object_tree_close(*tree);
567 return got_error(GOT_ERR_BAD_OBJ_DATA);
570 return NULL;
573 static const struct got_error *
574 read_commit_object(struct got_commit_object **commit,
575 struct got_repository *repo, struct got_object *obj, FILE *f)
577 const struct got_error *err = NULL;
578 struct got_zstream_buf zb;
579 size_t len;
580 char *p;
581 int i, ret;
583 err = inflate_init(&zb, 8192);
584 if (err)
585 return err;
587 do {
588 err = inflate_read(&zb, f, &len);
589 if (err || len == 0)
590 break;
591 } while (len < obj->hdrlen + obj->size);
593 if (len < obj->hdrlen + obj->size) {
594 err = got_error(GOT_ERR_BAD_OBJ_DATA);
595 goto done;
598 /* Skip object header. */
599 len -= obj->hdrlen;
600 err = parse_commit_object(commit, zb.outbuf + obj->hdrlen, len);
601 done:
602 inflate_end(&zb);
603 return err;
606 const struct got_error *
607 got_object_commit_open(struct got_commit_object **commit,
608 struct got_repository *repo, struct got_object *obj)
610 const struct got_error *err = NULL;
611 FILE *f;
613 if (obj->type != GOT_OBJ_TYPE_COMMIT)
614 return got_error(GOT_ERR_OBJ_TYPE);
616 err = fopen_object(&f, obj, repo);
617 if (err)
618 return err;
620 err = read_commit_object(commit, repo, obj, f);
621 fclose(f);
622 return err;
625 void
626 got_object_commit_close(struct got_commit_object *commit)
628 struct got_parent_id *pid;
630 while (!SIMPLEQ_EMPTY(&commit->parent_ids)) {
631 pid = SIMPLEQ_FIRST(&commit->parent_ids);
632 SIMPLEQ_REMOVE_HEAD(&commit->parent_ids, entry);
633 free(pid);
636 free(commit->author);
637 free(commit->committer);
638 free(commit->logmsg);
639 free(commit);
642 static const struct got_error *
643 read_tree_object(struct got_tree_object **tree,
644 struct got_repository *repo, struct got_object *obj, FILE *f)
646 const struct got_error *err = NULL;
647 struct got_zstream_buf zb;
648 size_t len;
649 char *p;
650 int i, ret;
652 err = inflate_init(&zb, 8192);
653 if (err)
654 return err;
656 do {
657 err = inflate_read(&zb, f, &len);
658 if (err || len == 0)
659 break;
660 } while (len < obj->hdrlen + obj->size);
662 if (len < obj->hdrlen + obj->size) {
663 err = got_error(GOT_ERR_BAD_OBJ_DATA);
664 goto done;
667 /* Skip object header. */
668 len -= obj->hdrlen;
669 err = parse_tree_object(tree, repo, zb.outbuf + obj->hdrlen, len);
670 done:
671 inflate_end(&zb);
672 return err;
675 const struct got_error *
676 got_object_tree_open(struct got_tree_object **tree,
677 struct got_repository *repo, struct got_object *obj)
679 const struct got_error *err = NULL;
680 FILE *f;
682 if (obj->type != GOT_OBJ_TYPE_TREE)
683 return got_error(GOT_ERR_OBJ_TYPE);
685 err = fopen_object(&f, obj, repo);
686 if (err)
687 return err;
689 err = read_tree_object(tree, repo, obj, f);
690 fclose(f);
691 return err;
694 void
695 got_object_tree_close(struct got_tree_object *tree)
697 struct got_tree_entry *te;
699 while (!SIMPLEQ_EMPTY(&tree->entries)) {
700 te = SIMPLEQ_FIRST(&tree->entries);
701 SIMPLEQ_REMOVE_HEAD(&tree->entries, entry);
702 tree_entry_close(te);
705 free(tree);
708 const struct got_error *
709 got_object_blob_open(struct got_blob_object **blob,
710 struct got_repository *repo, struct got_object *obj, size_t blocksize)
712 const struct got_error *err = NULL;
714 if (obj->type != GOT_OBJ_TYPE_BLOB)
715 return got_error(GOT_ERR_OBJ_TYPE);
717 if (blocksize < obj->hdrlen)
718 return got_error(GOT_ERR_NO_SPACE);
720 *blob = calloc(1, sizeof(**blob));
721 if (*blob == NULL)
722 return got_error(GOT_ERR_NO_MEM);
724 err = fopen_object(&((*blob)->f), obj, repo);
725 if (err) {
726 free(*blob);
727 return err;
730 err = inflate_init(&(*blob)->zb, blocksize);
731 if (err != NULL) {
732 fclose((*blob)->f);
733 free(*blob);
734 return err;
737 (*blob)->hdrlen = obj->hdrlen;
738 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
740 return err;
743 void
744 got_object_blob_close(struct got_blob_object *blob)
746 inflate_end(&blob->zb);
747 fclose(blob->f);
748 free(blob);
751 const struct got_error *
752 got_object_blob_read_block(struct got_blob_object *blob, size_t *outlenp)
754 return inflate_read(&blob->zb, blob->f, outlenp);