Blob


1 /*
2 * Copyright (c) 2017 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/stat.h>
18 #include <sys/queue.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <sha1.h>
24 #include <zlib.h>
25 #include <ctype.h>
26 #include <limits.h>
28 #include "got_error.h"
29 #include "got_object.h"
30 #include "got_repository.h"
31 #include "got_sha1.h"
33 #ifndef MIN
34 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
35 #endif
37 #ifndef nitems
38 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
39 #endif
41 #define GOT_OBJ_TAG_COMMIT "commit"
42 #define GOT_OBJ_TAG_TREE "tree"
43 #define GOT_OBJ_TAG_BLOB "blob"
45 #define GOT_COMMIT_TAG_TREE "tree "
46 #define GOT_COMMIT_TAG_PARENT "parent "
47 #define GOT_COMMIT_TAG_AUTHOR "author "
48 #define GOT_COMMIT_TAG_COMMITTER "committer "
50 char *
51 got_object_id_str(struct got_object_id *id, char *buf, size_t size)
52 {
53 char *p = buf;
54 char hex[3];
55 int i;
57 if (size < SHA1_DIGEST_STRING_LENGTH)
58 return NULL;
60 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
61 snprintf(hex, sizeof(hex), "%.2x", id->sha1[i]);
62 p[0] = hex[0];
63 p[1] = hex[1];
64 p += 2;
65 }
66 p[0] = '\0';
68 return buf;
69 }
71 static void
72 inflate_end(struct got_zstream_buf *zb)
73 {
74 free(zb->inbuf);
75 free(zb->outbuf);
76 inflateEnd(&zb->z);
77 }
79 static const struct got_error *
80 inflate_init(struct got_zstream_buf *zb, size_t bufsize)
81 {
82 const struct got_error *err = NULL;
84 memset(zb, 0, sizeof(*zb));
86 zb->z.zalloc = Z_NULL;
87 zb->z.zfree = Z_NULL;
88 if (inflateInit(&zb->z) != Z_OK) {
89 err = got_error(GOT_ERR_IO);
90 goto done;
91 }
93 zb->inlen = zb->outlen = bufsize;
95 zb->inbuf = calloc(1, zb->inlen);
96 if (zb->inbuf == NULL) {
97 err = got_error(GOT_ERR_NO_MEM);
98 goto done;
99 }
101 zb->outbuf = calloc(1, zb->outlen);
102 if (zb->outbuf == NULL) {
103 err = got_error(GOT_ERR_NO_MEM);
104 goto done;
107 done:
108 if (err)
109 inflate_end(zb);
110 return err;
113 static const struct got_error *
114 inflate_read(struct got_zstream_buf *zb, FILE *f, size_t *outlenp)
116 size_t last_total_out = zb->z.total_out;
117 z_stream *z = &zb->z;
118 int n, ret;
120 z->next_out = zb->outbuf;
121 z->avail_out = zb->outlen;
123 do {
124 if (z->avail_in == 0) {
125 int i;
126 n = fread(zb->inbuf, 1, zb->inlen, f);
127 if (n == 0) {
128 if (ferror(f))
129 return got_error(GOT_ERR_IO);
130 *outlenp = 0;
131 return NULL;
133 z->next_in = zb->inbuf;
134 z->avail_in = n;
136 ret = inflate(z, Z_SYNC_FLUSH);
137 } while (ret == Z_OK && z->avail_out > 0);
139 if (ret != Z_OK) {
140 if (ret != Z_STREAM_END)
141 return got_error(GOT_ERR_DECOMPRESSION);
142 zb->flags |= GOT_ZSTREAM_F_HAVE_MORE;
145 *outlenp = z->total_out - last_total_out;
146 return NULL;
149 static const struct got_error *
150 parse_object_header(struct got_object **obj, char *buf, size_t len)
152 const char *obj_tags[] = {
153 GOT_OBJ_TAG_COMMIT,
154 GOT_OBJ_TAG_TREE,
155 GOT_OBJ_TAG_BLOB
156 };
157 const int obj_types[] = {
158 GOT_OBJ_TYPE_COMMIT,
159 GOT_OBJ_TYPE_TREE,
160 GOT_OBJ_TYPE_BLOB,
161 };
162 int type = 0;
163 size_t size = 0, hdrlen = 0;
164 int i;
165 char *p = strchr(buf, '\0');
167 if (p == NULL)
168 return got_error(GOT_ERR_BAD_OBJ_HDR);
170 hdrlen = strlen(buf) + 1 /* '\0' */;
172 for (i = 0; i < nitems(obj_tags); i++) {
173 const char *tag = obj_tags[i];
174 size_t tlen = strlen(tag);
175 const char *errstr;
177 if (strncmp(buf, tag, tlen) != 0)
178 continue;
180 type = obj_types[i];
181 if (len <= tlen)
182 return got_error(GOT_ERR_BAD_OBJ_HDR);
183 size = strtonum(buf + tlen, 0, LONG_MAX, &errstr);
184 if (errstr != NULL)
185 return got_error(GOT_ERR_BAD_OBJ_HDR);
186 break;
189 if (type == 0)
190 return got_error(GOT_ERR_BAD_OBJ_HDR);
192 *obj = calloc(1, sizeof(**obj));
193 (*obj)->type = type;
194 (*obj)->hdrlen = hdrlen;
195 (*obj)->size = size;
196 return NULL;
199 static const struct got_error *
200 read_object_header(struct got_object **obj, struct got_repository *repo,
201 const char *path)
203 const struct got_error *err;
204 FILE *f;
205 struct got_zstream_buf zb;
206 char *buf;
207 size_t len;
208 const size_t zbsize = 64;
209 size_t outlen, totlen;
210 int i, ret;
212 f = fopen(path, "rb");
213 if (f == NULL)
214 return got_error(GOT_ERR_BAD_PATH);
216 buf = calloc(zbsize, sizeof(char));
217 if (buf == NULL)
218 return got_error(GOT_ERR_NO_MEM);
220 err = inflate_init(&zb, zbsize);
221 if (err) {
222 fclose(f);
223 return err;
226 i = 0;
227 totlen = 0;
228 do {
229 err = inflate_read(&zb, f, &outlen);
230 if (err)
231 goto done;
232 if (strchr(zb.outbuf, '\0') == NULL) {
233 buf = recallocarray(buf, 1 + i, 2 + i, zbsize);
234 if (buf == NULL) {
235 err = got_error(GOT_ERR_NO_MEM);
236 goto done;
238 buf += totlen;
240 memcpy(buf, zb.outbuf, outlen);
241 totlen += outlen;
242 i++;
243 } while (strchr(zb.outbuf, '\0') == NULL);
245 err = parse_object_header(obj, buf, totlen);
246 done:
247 inflate_end(&zb);
248 fclose(f);
249 return err;
252 static const struct got_error *
253 object_path(char **path, struct got_object_id *id,
254 struct got_repository *repo)
256 const struct got_error *err = NULL;
257 char hex[SHA1_DIGEST_STRING_LENGTH];
258 char *path_objects = got_repo_get_path_objects(repo);
260 if (path_objects == NULL)
261 return got_error(GOT_ERR_NO_MEM);
263 got_object_id_str(id, hex, sizeof(hex));
265 if (asprintf(path, "%s/%.2x/%s", path_objects,
266 id->sha1[0], hex + 2) == -1)
267 err = got_error(GOT_ERR_NO_MEM);
269 free(path_objects);
270 return err;
273 const struct got_error *
274 got_object_open(struct got_object **obj, struct got_repository *repo,
275 struct got_object_id *id)
277 const struct got_error *err = NULL;
278 char *path = NULL;
280 err = object_path(&path, id, repo);
281 if (err)
282 return err;
284 err = read_object_header(obj, repo, path);
285 if (err == NULL)
286 memcpy((*obj)->id.sha1, id->sha1, SHA1_DIGEST_LENGTH);
287 done:
288 free(path);
289 return err;
292 void
293 got_object_close(struct got_object *obj)
295 free(obj);
298 static int
299 commit_object_valid(struct got_commit_object *commit)
301 int i;
302 int n;
304 if (commit == NULL)
305 return 0;
307 n = 0;
308 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
309 if (commit->tree_id.sha1[i] == 0)
310 n++;
312 if (n == SHA1_DIGEST_LENGTH)
313 return 0;
315 return 1;
318 static const struct got_error *
319 parse_commit_object(struct got_commit_object **commit, char *buf, size_t len)
321 const struct got_error *err = NULL;
322 char *s = buf;
323 size_t tlen;
324 ssize_t remain = (ssize_t)len;
326 *commit = calloc(1, sizeof(**commit));
327 if (*commit == NULL)
328 return got_error(GOT_ERR_NO_MEM);
330 SIMPLEQ_INIT(&(*commit)->parent_ids);
332 tlen = strlen(GOT_COMMIT_TAG_TREE);
333 if (strncmp(s, GOT_COMMIT_TAG_TREE, tlen) == 0) {
334 remain -= tlen;
335 if (remain < SHA1_DIGEST_STRING_LENGTH) {
336 err = got_error(GOT_ERR_BAD_OBJ_DATA);
337 goto done;
339 s += tlen;
340 if (!got_parse_sha1_digest((*commit)->tree_id.sha1, s)) {
341 err = got_error(GOT_ERR_BAD_OBJ_DATA);
342 goto done;
344 remain -= SHA1_DIGEST_STRING_LENGTH;
345 s += SHA1_DIGEST_STRING_LENGTH;
346 } else {
347 err = got_error(GOT_ERR_BAD_OBJ_DATA);
348 goto done;
351 tlen = strlen(GOT_COMMIT_TAG_PARENT);
352 while (strncmp(s, GOT_COMMIT_TAG_PARENT, tlen) == 0) {
353 struct got_parent_id *pid;
355 remain -= tlen;
356 if (remain < SHA1_DIGEST_STRING_LENGTH) {
357 err = got_error(GOT_ERR_BAD_OBJ_DATA);
358 goto done;
361 pid = calloc(1, sizeof(*pid));
362 if (pid == NULL) {
363 err = got_error(GOT_ERR_NO_MEM);
364 goto done;
366 s += tlen;
367 if (!got_parse_sha1_digest(pid->id.sha1, s)) {
368 err = got_error(GOT_ERR_BAD_OBJ_DATA);
369 goto done;
371 SIMPLEQ_INSERT_TAIL(&(*commit)->parent_ids, pid, entry);
372 (*commit)->nparents++;
374 s += SHA1_DIGEST_STRING_LENGTH;
377 tlen = strlen(GOT_COMMIT_TAG_AUTHOR);
378 if (strncmp(s, GOT_COMMIT_TAG_AUTHOR, tlen) == 0) {
379 char *p;
381 remain -= tlen;
382 if (remain <= 0) {
383 err = got_error(GOT_ERR_BAD_OBJ_DATA);
384 goto done;
386 s += tlen;
387 p = strchr(s, '\n');
388 if (p == NULL) {
389 err = got_error(GOT_ERR_BAD_OBJ_DATA);
390 goto done;
392 *p = '\0';
393 (*commit)->author = strdup(s);
394 if ((*commit)->author == NULL) {
395 err = got_error(GOT_ERR_NO_MEM);
396 goto done;
398 s += strlen((*commit)->author) + 1;
401 tlen = strlen(GOT_COMMIT_TAG_COMMITTER);
402 if (strncmp(s, GOT_COMMIT_TAG_COMMITTER, tlen) == 0) {
403 char *p;
405 remain -= tlen;
406 if (remain <= 0) {
407 err = got_error(GOT_ERR_BAD_OBJ_DATA);
408 goto done;
410 s += tlen;
411 p = strchr(s, '\n');
412 if (p == NULL) {
413 err = got_error(GOT_ERR_BAD_OBJ_DATA);
414 goto done;
416 *p = '\0';
417 (*commit)->committer = strdup(s);
418 if ((*commit)->committer == NULL) {
419 err = got_error(GOT_ERR_NO_MEM);
420 goto done;
422 s += strlen((*commit)->committer) + 1;
425 (*commit)->logmsg = strdup(s);
426 done:
427 if (err)
428 got_object_commit_close(*commit);
429 return err;
432 static void
433 tree_entry_close(struct got_tree_entry *te)
435 free(te->name);
436 free(te);
439 static const struct got_error *
440 parse_tree_entry(struct got_tree_entry **te, size_t *elen, char *buf,
441 size_t maxlen)
443 char *p = buf, *space;
444 const struct got_error *err = NULL;
445 char hex[SHA1_DIGEST_STRING_LENGTH];
447 *te = calloc(1, sizeof(**te));
448 if (*te == NULL)
449 return got_error(GOT_ERR_NO_MEM);
451 *elen = strlen(buf) + 1;
452 if (*elen > maxlen) {
453 free(*te);
454 return got_error(GOT_ERR_BAD_OBJ_DATA);
457 space = strchr(buf, ' ');
458 if (space == NULL) {
459 free(*te);
460 return got_error(GOT_ERR_BAD_OBJ_DATA);
462 while (*p != ' ') {
463 if (*p < '0' && *p > '7') {
464 err = got_error(GOT_ERR_BAD_OBJ_DATA);
465 goto done;
467 (*te)->mode <<= 3;
468 (*te)->mode |= *p - '0';
469 p++;
472 (*te)->name = strdup(space + 1);
473 if (*elen > maxlen || maxlen - *elen < SHA1_DIGEST_LENGTH) {
474 err = got_error(GOT_ERR_BAD_OBJ_DATA);
475 goto done;
477 buf += strlen(buf) + 1;
478 memcpy((*te)->id.sha1, buf, SHA1_DIGEST_LENGTH);
479 *elen += SHA1_DIGEST_LENGTH;
480 done:
481 if (err)
482 tree_entry_close(*te);
483 return err;
486 static const struct got_error *
487 parse_tree_object(struct got_tree_object **tree, struct got_repository *repo,
488 char *buf, size_t len)
490 size_t remain = len;
491 int nentries;
493 *tree = calloc(1, sizeof(**tree));
494 if (*tree == NULL)
495 return got_error(GOT_ERR_NO_MEM);
497 SIMPLEQ_INIT(&(*tree)->entries);
499 while (remain > 0) {
500 struct got_tree_entry *te;
501 size_t elen;
503 parse_tree_entry(&te, &elen, buf, remain);
504 (*tree)->nentries++;
505 SIMPLEQ_INSERT_TAIL(&(*tree)->entries, te, entry);
506 buf += elen;
507 remain -= elen;
510 if (remain != 0) {
511 got_object_tree_close(*tree);
512 return got_error(GOT_ERR_BAD_OBJ_DATA);
515 return NULL;
518 static const struct got_error *
519 read_commit_object(struct got_commit_object **commit,
520 struct got_repository *repo, struct got_object *obj, const char *path)
522 const struct got_error *err = NULL;
523 FILE *f;
524 struct got_zstream_buf zb;
525 size_t len;
526 char *p;
527 int i, ret;
529 f = fopen(path, "rb");
530 if (f == NULL)
531 return got_error(GOT_ERR_BAD_PATH);
533 err = inflate_init(&zb, 8192);
534 if (err) {
535 fclose(f);
536 return err;
539 do {
540 err = inflate_read(&zb, f, &len);
541 if (err || len == 0)
542 break;
543 } while (len < obj->hdrlen + obj->size);
545 if (len < obj->hdrlen + obj->size) {
546 err = got_error(GOT_ERR_BAD_OBJ_DATA);
547 goto done;
550 /* Skip object header. */
551 len -= obj->hdrlen;
552 err = parse_commit_object(commit, zb.outbuf + obj->hdrlen, len);
553 done:
554 inflate_end(&zb);
555 fclose(f);
556 return err;
559 const struct got_error *
560 got_object_commit_open(struct got_commit_object **commit,
561 struct got_repository *repo, struct got_object *obj)
563 const struct got_error *err = NULL;
564 char *path = NULL;
566 if (obj->type != GOT_OBJ_TYPE_COMMIT)
567 return got_error(GOT_ERR_OBJ_TYPE);
569 err = object_path(&path, &obj->id, repo);
570 if (err)
571 return err;
573 err = read_commit_object(commit, repo, obj, path);
574 free(path);
575 return err;
578 void
579 got_object_commit_close(struct got_commit_object *commit)
581 struct got_parent_id *pid;
583 while (!SIMPLEQ_EMPTY(&commit->parent_ids)) {
584 pid = SIMPLEQ_FIRST(&commit->parent_ids);
585 SIMPLEQ_REMOVE_HEAD(&commit->parent_ids, entry);
586 free(pid);
589 free(commit->author);
590 free(commit->committer);
591 free(commit->logmsg);
592 free(commit);
595 static const struct got_error *
596 read_tree_object(struct got_tree_object **tree,
597 struct got_repository *repo, struct got_object *obj, const char *path)
599 const struct got_error *err = NULL;
600 FILE *f;
601 struct got_zstream_buf zb;
602 size_t len;
603 char *p;
604 int i, ret;
606 f = fopen(path, "rb");
607 if (f == NULL)
608 return got_error(GOT_ERR_BAD_PATH);
610 err = inflate_init(&zb, 8192);
611 if (err) {
612 fclose(f);
613 return err;
616 do {
617 err = inflate_read(&zb, f, &len);
618 if (err || len == 0)
619 break;
620 } while (len < obj->hdrlen + obj->size);
622 if (len < obj->hdrlen + obj->size) {
623 err = got_error(GOT_ERR_BAD_OBJ_DATA);
624 goto done;
627 /* Skip object header. */
628 len -= obj->hdrlen;
629 err = parse_tree_object(tree, repo, zb.outbuf + obj->hdrlen, len);
630 done:
631 inflate_end(&zb);
632 fclose(f);
633 return err;
636 const struct got_error *
637 got_object_tree_open(struct got_tree_object **tree,
638 struct got_repository *repo, struct got_object *obj)
640 const struct got_error *err = NULL;
641 char *path = NULL;
643 if (obj->type != GOT_OBJ_TYPE_TREE)
644 return got_error(GOT_ERR_OBJ_TYPE);
646 err = object_path(&path, &obj->id, repo);
647 if (err)
648 return err;
650 err = read_tree_object(tree, repo, obj, path);
651 free(path);
652 return err;
655 void
656 got_object_tree_close(struct got_tree_object *tree)
658 struct got_tree_entry *te;
660 while (!SIMPLEQ_EMPTY(&tree->entries)) {
661 te = SIMPLEQ_FIRST(&tree->entries);
662 SIMPLEQ_REMOVE_HEAD(&tree->entries, entry);
663 tree_entry_close(te);
666 free(tree);
669 const struct got_error *
670 got_object_blob_open(struct got_blob_object **blob,
671 struct got_repository *repo, struct got_object *obj, size_t blocksize)
673 const struct got_error *err = NULL;
674 char *path;
676 if (obj->type != GOT_OBJ_TYPE_BLOB)
677 return got_error(GOT_ERR_OBJ_TYPE);
679 if (blocksize < obj->hdrlen)
680 return got_error(GOT_ERR_NO_SPACE);
682 err = object_path(&path, &obj->id, repo);
683 if (err)
684 return err;
686 *blob = calloc(1, sizeof(**blob));
687 if (*blob == NULL) {
688 free(path);
689 return got_error(GOT_ERR_NO_MEM);
692 (*blob)->f = fopen(path, "rb");
693 if ((*blob)->f == NULL) {
694 free(*blob);
695 free(path);
696 return got_error(GOT_ERR_BAD_PATH);
699 err = inflate_init(&(*blob)->zb, blocksize);
700 if (err != NULL) {
701 fclose((*blob)->f);
702 free(*blob);
703 free(path);
704 return err;
707 (*blob)->hdrlen = obj->hdrlen;
708 memcpy(&(*blob)->id.sha1, obj->id.sha1, SHA1_DIGEST_LENGTH);
710 free(path);
711 return err;
714 void
715 got_object_blob_close(struct got_blob_object *blob)
717 inflate_end(&blob->zb);
718 fclose(blob->f);
719 free(blob);
722 const struct got_error *
723 got_object_blob_read_block(struct got_blob_object *blob, size_t *outlenp)
725 return inflate_read(&blob->zb, blob->f, outlenp);