Blob


1 /*
2 * Copyright (c) 2018, 2019, 2020 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/socket.h>
23 #include <sys/wait.h>
24 #include <sys/mman.h>
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <stdint.h>
31 #include <sha1.h>
32 #include <sha2.h>
33 #include <zlib.h>
34 #include <ctype.h>
35 #include <limits.h>
36 #include <imsg.h>
37 #include <time.h>
38 #include <unistd.h>
40 #include "got_error.h"
41 #include "got_object.h"
42 #include "got_repository.h"
43 #include "got_opentemp.h"
44 #include "got_path.h"
46 #include "got_lib_hash.h"
47 #include "got_lib_delta.h"
48 #include "got_lib_inflate.h"
49 #include "got_lib_object.h"
50 #include "got_lib_object_parse.h"
51 #include "got_lib_object_cache.h"
52 #include "got_lib_pack.h"
53 #include "got_lib_repository.h"
55 #ifndef nitems
56 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
57 #endif
59 struct got_object_id *
60 got_object_id_dup(struct got_object_id *id1)
61 {
62 struct got_object_id *id2;
64 id2 = malloc(sizeof(*id2));
65 if (id2 == NULL)
66 return NULL;
67 memcpy(id2, id1, sizeof(*id2));
68 return id2;
69 }
71 int
72 got_object_id_cmp(const struct got_object_id *id1,
73 const struct got_object_id *id2)
74 {
75 if (id1->algo != id2->algo)
76 abort(); // return -1;
77 if (id1->algo == GOT_HASH_SHA1)
78 return memcmp(id1->hash, id2->hash, SHA1_DIGEST_LENGTH);
79 if (id1->algo == GOT_HASH_SHA256)
80 return memcmp(id1->hash, id2->hash, SHA256_DIGEST_LENGTH);
81 abort();
82 return -1;
83 }
85 const struct got_error *
86 got_object_qid_alloc_partial(struct got_object_qid **qid)
87 {
88 *qid = calloc(1, sizeof(**qid));
89 if (*qid == NULL)
90 return got_error_from_errno("malloc");
92 (*qid)->data = NULL;
93 return NULL;
94 }
96 const struct got_error *
97 got_object_id_str(char **outbuf, struct got_object_id *id)
98 {
99 static const size_t len = GOT_OBJECT_ID_HEX_MAXLEN;
101 *outbuf = malloc(len);
102 if (*outbuf == NULL)
103 return got_error_from_errno("malloc");
105 if (got_object_id_hex(id, *outbuf, len) == NULL) {
106 free(*outbuf);
107 *outbuf = NULL;
108 return got_error(GOT_ERR_BAD_OBJ_ID_STR);
111 return NULL;
114 char *
115 got_object_id_hex(struct got_object_id *id, char *buf, size_t len)
117 return got_hash_digest_to_str(id->hash, buf, len, id->algo);
120 void
121 got_object_close(struct got_object *obj)
123 if (obj->refcnt > 0) {
124 obj->refcnt--;
125 if (obj->refcnt > 0)
126 return;
129 if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) {
130 struct got_delta *delta;
131 while (!STAILQ_EMPTY(&obj->deltas.entries)) {
132 delta = STAILQ_FIRST(&obj->deltas.entries);
133 STAILQ_REMOVE_HEAD(&obj->deltas.entries, entry);
134 free(delta);
137 free(obj);
140 const struct got_error *
141 got_object_raw_close(struct got_raw_object *obj)
143 const struct got_error *err = NULL;
145 if (obj->refcnt > 0) {
146 obj->refcnt--;
147 if (obj->refcnt > 0)
148 return NULL;
151 if (obj->close_cb)
152 obj->close_cb(obj);
154 if (obj->f == NULL) {
155 if (obj->fd != -1) {
156 if (munmap(obj->data, obj->hdrlen + obj->size) == -1)
157 err = got_error_from_errno("munmap");
158 if (close(obj->fd) == -1 && err == NULL)
159 err = got_error_from_errno("close");
160 } else
161 free(obj->data);
162 } else {
163 if (fclose(obj->f) == EOF && err == NULL)
164 err = got_error_from_errno("fclose");
166 free(obj);
167 return err;
170 void
171 got_object_qid_free(struct got_object_qid *qid)
173 free(qid);
176 void
177 got_object_id_queue_free(struct got_object_id_queue *ids)
179 struct got_object_qid *qid;
181 while (!STAILQ_EMPTY(ids)) {
182 qid = STAILQ_FIRST(ids);
183 STAILQ_REMOVE_HEAD(ids, entry);
184 got_object_qid_free(qid);
188 const struct got_error *
189 got_object_parse_header(struct got_object **obj, char *buf, size_t len)
191 const char *obj_labels[] = {
192 GOT_OBJ_LABEL_COMMIT,
193 GOT_OBJ_LABEL_TREE,
194 GOT_OBJ_LABEL_BLOB,
195 GOT_OBJ_LABEL_TAG,
196 };
197 const int obj_types[] = {
198 GOT_OBJ_TYPE_COMMIT,
199 GOT_OBJ_TYPE_TREE,
200 GOT_OBJ_TYPE_BLOB,
201 GOT_OBJ_TYPE_TAG,
202 };
203 int type = 0;
204 size_t size = 0;
205 size_t i;
206 char *end;
208 *obj = NULL;
210 end = memchr(buf, '\0', len);
211 if (end == NULL)
212 return got_error(GOT_ERR_BAD_OBJ_HDR);
214 for (i = 0; i < nitems(obj_labels); i++) {
215 const char *label = obj_labels[i];
216 size_t label_len = strlen(label);
217 const char *errstr;
219 if (len <= label_len || buf + label_len >= end ||
220 strncmp(buf, label, label_len) != 0)
221 continue;
223 type = obj_types[i];
224 size = strtonum(buf + label_len, 0, LONG_MAX, &errstr);
225 if (errstr != NULL)
226 return got_error(GOT_ERR_BAD_OBJ_HDR);
227 break;
230 if (type == 0)
231 return got_error(GOT_ERR_BAD_OBJ_HDR);
233 *obj = calloc(1, sizeof(**obj));
234 if (*obj == NULL)
235 return got_error_from_errno("calloc");
236 (*obj)->type = type;
237 (*obj)->hdrlen = end - buf + 1;
238 (*obj)->size = size;
239 return NULL;
242 const struct got_error *
243 got_object_read_header(struct got_object **obj, int fd)
245 const struct got_error *err;
246 struct got_inflate_buf zb;
247 uint8_t *buf;
248 const size_t zbsize = 64;
249 size_t outlen, totlen;
250 int nbuf = 1;
252 *obj = NULL;
254 buf = malloc(zbsize);
255 if (buf == NULL)
256 return got_error_from_errno("malloc");
257 buf[0] = '\0';
259 err = got_inflate_init(&zb, buf, zbsize, NULL);
260 if (err)
261 return err;
263 totlen = 0;
264 do {
265 err = got_inflate_read_fd(&zb, fd, &outlen, NULL);
266 if (err)
267 goto done;
268 if (outlen == 0)
269 break;
270 totlen += outlen;
271 if (memchr(zb.outbuf, '\0', outlen) == NULL) {
272 uint8_t *newbuf;
273 nbuf++;
274 newbuf = recallocarray(buf, nbuf - 1, nbuf, zbsize);
275 if (newbuf == NULL) {
276 err = got_error_from_errno("recallocarray");
277 goto done;
279 buf = newbuf;
280 zb.outbuf = newbuf + totlen;
281 zb.outlen = (nbuf * zbsize) - totlen;
283 } while (memchr(zb.outbuf, '\0', outlen) == NULL);
285 err = got_object_parse_header(obj, buf, totlen);
286 done:
287 free(buf);
288 got_inflate_end(&zb);
289 return err;
292 const struct got_error *
293 got_object_read_raw(uint8_t **outbuf, off_t *size, size_t *hdrlen,
294 size_t max_in_mem_size, int outfd, struct got_object_id *expected_id,
295 int infd)
297 const struct got_error *err = NULL;
298 struct got_object *obj;
299 struct got_inflate_checksum csum;
300 uint8_t hash[GOT_OBJECT_ID_MAXLEN];
301 struct got_hash ctx;
302 size_t len, consumed;
303 FILE *f = NULL;
305 *outbuf = NULL;
306 *size = 0;
307 *hdrlen = 0;
309 got_hash_init(&ctx, expected_id->algo);
310 memset(&csum, 0, sizeof(csum));
311 csum.output_ctx = &ctx;
313 if (lseek(infd, SEEK_SET, 0) == -1)
314 return got_error_from_errno("lseek");
316 err = got_object_read_header(&obj, infd);
317 if (err)
318 return err;
320 if (lseek(infd, SEEK_SET, 0) == -1)
321 return got_error_from_errno("lseek");
323 if (obj->size + obj->hdrlen <= max_in_mem_size) {
324 err = got_inflate_to_mem_fd(outbuf, &len, &consumed, &csum,
325 obj->size + obj->hdrlen, infd);
326 } else {
327 int fd;
328 /*
329 * XXX This uses an extra file descriptor for no good reason.
330 * We should have got_inflate_fd_to_fd().
331 */
332 fd = dup(infd);
333 if (fd == -1)
334 return got_error_from_errno("dup");
335 f = fdopen(fd, "r");
336 if (f == NULL) {
337 err = got_error_from_errno("fdopen");
338 abort();
339 close(fd);
340 goto done;
342 err = got_inflate_to_fd(&len, f, &csum, outfd);
344 if (err)
345 goto done;
347 if (len < obj->hdrlen || len != obj->hdrlen + obj->size) {
348 err = got_error(GOT_ERR_BAD_OBJ_HDR);
349 goto done;
352 got_hash_final(&ctx, hash);
353 if (got_hash_cmp(&ctx, expected_id->hash, hash) != 0) {
354 err = got_error_checksum(expected_id);
355 goto done;
358 *size = obj->size;
359 *hdrlen = obj->hdrlen;
360 done:
361 got_object_close(obj);
362 if (f && fclose(f) == EOF && err == NULL)
363 err = got_error_from_errno("fclose");
364 return err;
367 struct got_commit_object *
368 got_object_commit_alloc_partial(void)
370 struct got_commit_object *commit;
372 commit = calloc(1, sizeof(*commit));
373 if (commit == NULL)
374 return NULL;
375 commit->tree_id = malloc(sizeof(*commit->tree_id));
376 if (commit->tree_id == NULL) {
377 free(commit);
378 return NULL;
381 STAILQ_INIT(&commit->parent_ids);
383 return commit;
386 const struct got_error *
387 got_object_commit_add_parent(struct got_commit_object *commit,
388 const char *id_str, enum got_hash_algorithm algo)
390 const struct got_error *err = NULL;
391 struct got_object_qid *qid;
393 err = got_object_qid_alloc_partial(&qid);
394 if (err)
395 return err;
397 qid->id.algo = algo;
398 if (!got_parse_hash_digest(qid->id.hash, id_str, algo)) {
399 err = got_error(GOT_ERR_BAD_OBJ_DATA);
400 got_object_qid_free(qid);
401 return err;
404 STAILQ_INSERT_TAIL(&commit->parent_ids, qid, entry);
405 commit->nparents++;
407 return NULL;
410 static const struct got_error *
411 parse_gmtoff(time_t *gmtoff, const char *tzstr)
413 int sign = 1;
414 const char *p = tzstr;
415 time_t h, m;
417 *gmtoff = 0;
419 if (*p == '-')
420 sign = -1;
421 else if (*p != '+')
422 return got_error(GOT_ERR_BAD_OBJ_DATA);
423 p++;
424 if (!isdigit((unsigned char)*p) &&
425 !isdigit((unsigned char)*(p + 1)))
426 return got_error(GOT_ERR_BAD_OBJ_DATA);
427 h = (((*p - '0') * 10) + (*(p + 1) - '0'));
429 p += 2;
430 if (!isdigit((unsigned char)*p) &&
431 !isdigit((unsigned char)*(p + 1)))
432 return got_error(GOT_ERR_BAD_OBJ_DATA);
433 m = ((*p - '0') * 10) + (*(p + 1) - '0');
435 *gmtoff = (h * 60 * 60 + m * 60) * sign;
436 return NULL;
439 static const struct got_error *
440 parse_commit_time(time_t *time, time_t *gmtoff, char *committer)
442 const struct got_error *err = NULL;
443 const char *errstr;
444 char *space, *tzstr;
446 /* Parse and strip off trailing timezone indicator string. */
447 space = strrchr(committer, ' ');
448 if (space == NULL)
449 return got_error(GOT_ERR_BAD_OBJ_DATA);
450 tzstr = strdup(space + 1);
451 if (tzstr == NULL)
452 return got_error_from_errno("strdup");
453 err = parse_gmtoff(gmtoff, tzstr);
454 free(tzstr);
455 if (err) {
456 if (err->code != GOT_ERR_BAD_OBJ_DATA)
457 return err;
458 /* Old versions of Git omitted the timestamp. */
459 *time = 0;
460 *gmtoff = 0;
461 return NULL;
463 *space = '\0';
465 /* Timestamp is separated from committer name + email by space. */
466 space = strrchr(committer, ' ');
467 if (space == NULL)
468 return got_error(GOT_ERR_BAD_OBJ_DATA);
470 /* Timestamp parsed here is expressed as UNIX timestamp (UTC). */
471 *time = strtonum(space + 1, 0, INT64_MAX, &errstr);
472 if (errstr)
473 return got_error(GOT_ERR_BAD_OBJ_DATA);
475 /* Strip off parsed time information, leaving just author and email. */
476 *space = '\0';
478 return NULL;
481 void
482 got_object_commit_close(struct got_commit_object *commit)
484 if (commit->refcnt > 0) {
485 commit->refcnt--;
486 if (commit->refcnt > 0)
487 return;
490 got_object_id_queue_free(&commit->parent_ids);
491 free(commit->tree_id);
492 free(commit->author);
493 free(commit->committer);
494 free(commit->logmsg);
495 free(commit);
498 struct got_object_id *
499 got_object_commit_get_tree_id(struct got_commit_object *commit)
501 return commit->tree_id;
504 int
505 got_object_commit_get_nparents(struct got_commit_object *commit)
507 return commit->nparents;
510 const struct got_object_id_queue *
511 got_object_commit_get_parent_ids(struct got_commit_object *commit)
513 return &commit->parent_ids;
516 const char *
517 got_object_commit_get_author(struct got_commit_object *commit)
519 return commit->author;
522 time_t
523 got_object_commit_get_author_time(struct got_commit_object *commit)
525 return commit->author_time;
528 time_t got_object_commit_get_author_gmtoff(struct got_commit_object *commit)
530 return commit->author_gmtoff;
533 const char *
534 got_object_commit_get_committer(struct got_commit_object *commit)
536 return commit->committer;
539 time_t
540 got_object_commit_get_committer_time(struct got_commit_object *commit)
542 return commit->committer_time;
545 time_t
546 got_object_commit_get_committer_gmtoff(struct got_commit_object *commit)
548 return commit->committer_gmtoff;
551 const struct got_error *
552 got_object_commit_get_logmsg(char **logmsg, struct got_commit_object *commit)
554 const struct got_error *err = NULL;
555 const char *src;
556 char *dst;
557 size_t len;
559 len = strlen(commit->logmsg);
560 *logmsg = malloc(len + 2); /* leave room for a trailing \n and \0 */
561 if (*logmsg == NULL)
562 return got_error_from_errno("malloc");
564 /*
565 * Strip out unusual headers. Headers are separated from the commit
566 * message body by a single empty line.
567 */
568 src = commit->logmsg;
569 dst = *logmsg;
570 while (*src != '\0' && *src != '\n') {
571 int copy_header = 1, eol = 0;
572 if (strncmp(src, GOT_COMMIT_LABEL_TREE,
573 strlen(GOT_COMMIT_LABEL_TREE)) != 0 &&
574 strncmp(src, GOT_COMMIT_LABEL_AUTHOR,
575 strlen(GOT_COMMIT_LABEL_AUTHOR)) != 0 &&
576 strncmp(src, GOT_COMMIT_LABEL_PARENT,
577 strlen(GOT_COMMIT_LABEL_PARENT)) != 0 &&
578 strncmp(src, GOT_COMMIT_LABEL_COMMITTER,
579 strlen(GOT_COMMIT_LABEL_COMMITTER)) != 0)
580 copy_header = 0;
582 while (*src != '\0' && !eol) {
583 if (copy_header) {
584 *dst = *src;
585 dst++;
587 if (*src == '\n')
588 eol = 1;
589 src++;
592 *dst = '\0';
594 if (strlcat(*logmsg, src, len + 1) >= len + 1) {
595 err = got_error(GOT_ERR_NO_SPACE);
596 goto done;
599 /* Trim redundant trailing whitespace. */
600 len = strlen(*logmsg);
601 while (len > 1 && isspace((unsigned char)(*logmsg)[len - 2]) &&
602 isspace((unsigned char)(*logmsg)[len - 1])) {
603 (*logmsg)[len - 1] = '\0';
604 len--;
607 /* Append a trailing newline if missing. */
608 if (len > 0 && (*logmsg)[len - 1] != '\n') {
609 (*logmsg)[len] = '\n';
610 (*logmsg)[len + 1] = '\0';
612 done:
613 if (err) {
614 free(*logmsg);
615 *logmsg = NULL;
617 return err;
620 const char *
621 got_object_commit_get_logmsg_raw(struct got_commit_object *commit)
623 return commit->logmsg;
626 const struct got_error *
627 got_object_parse_commit(struct got_commit_object **commit, char *buf,
628 size_t len, enum got_hash_algorithm algo)
630 const struct got_error *err = NULL;
631 char *s = buf;
632 size_t label_len, idlen;
633 ssize_t remain = (ssize_t)len;
635 idlen = got_hash_digest_string_length(algo);
637 if (remain == 0)
638 return got_error(GOT_ERR_BAD_OBJ_DATA);
640 *commit = got_object_commit_alloc_partial();
641 if (*commit == NULL)
642 return got_error_from_errno("got_object_commit_alloc_partial");
644 label_len = strlen(GOT_COMMIT_LABEL_TREE);
645 if (strncmp(s, GOT_COMMIT_LABEL_TREE, label_len) == 0) {
646 remain -= label_len;
647 if (remain < idlen) {
648 err = got_error(GOT_ERR_BAD_OBJ_DATA);
649 goto done;
651 s += label_len;
652 if (!got_parse_hash_digest((*commit)->tree_id->hash, s, algo)) {
653 err = got_error(GOT_ERR_BAD_OBJ_DATA);
654 goto done;
656 (*commit)->tree_id->algo = algo;
657 remain -= idlen;
658 s += idlen;
659 } else {
660 err = got_error(GOT_ERR_BAD_OBJ_DATA);
661 goto done;
664 label_len = strlen(GOT_COMMIT_LABEL_PARENT);
665 while (strncmp(s, GOT_COMMIT_LABEL_PARENT, label_len) == 0) {
666 remain -= label_len;
667 if (remain < idlen) {
668 err = got_error(GOT_ERR_BAD_OBJ_DATA);
669 goto done;
671 s += label_len;
672 err = got_object_commit_add_parent(*commit, s, algo);
673 if (err)
674 goto done;
676 remain -= idlen;
677 s += idlen;
680 label_len = strlen(GOT_COMMIT_LABEL_AUTHOR);
681 if (strncmp(s, GOT_COMMIT_LABEL_AUTHOR, label_len) == 0) {
682 char *p;
683 size_t slen;
685 remain -= label_len;
686 if (remain <= 0) {
687 err = got_error(GOT_ERR_BAD_OBJ_DATA);
688 goto done;
690 s += label_len;
691 p = memchr(s, '\n', remain);
692 if (p == NULL) {
693 err = got_error(GOT_ERR_BAD_OBJ_DATA);
694 goto done;
696 *p = '\0';
697 slen = strlen(s);
698 err = parse_commit_time(&(*commit)->author_time,
699 &(*commit)->author_gmtoff, s);
700 if (err)
701 goto done;
702 (*commit)->author = strdup(s);
703 if ((*commit)->author == NULL) {
704 err = got_error_from_errno("strdup");
705 goto done;
707 s += slen + 1;
708 remain -= slen + 1;
711 label_len = strlen(GOT_COMMIT_LABEL_COMMITTER);
712 if (strncmp(s, GOT_COMMIT_LABEL_COMMITTER, label_len) == 0) {
713 char *p;
714 size_t slen;
716 remain -= label_len;
717 if (remain <= 0) {
718 err = got_error(GOT_ERR_BAD_OBJ_DATA);
719 goto done;
721 s += label_len;
722 p = memchr(s, '\n', remain);
723 if (p == NULL) {
724 err = got_error(GOT_ERR_BAD_OBJ_DATA);
725 goto done;
727 *p = '\0';
728 slen = strlen(s);
729 err = parse_commit_time(&(*commit)->committer_time,
730 &(*commit)->committer_gmtoff, s);
731 if (err)
732 goto done;
733 (*commit)->committer = strdup(s);
734 if ((*commit)->committer == NULL) {
735 err = got_error_from_errno("strdup");
736 goto done;
738 s += slen + 1;
739 remain -= slen + 1;
742 (*commit)->logmsg = strndup(s, remain);
743 if ((*commit)->logmsg == NULL) {
744 err = got_error_from_errno("strndup");
745 goto done;
747 done:
748 if (err) {
749 got_object_commit_close(*commit);
750 *commit = NULL;
752 return err;
755 const struct got_error *
756 got_object_read_commit(struct got_commit_object **commit, int fd,
757 struct got_object_id *expected_id, size_t expected_size)
759 struct got_object *obj = NULL;
760 const struct got_error *err = NULL;
761 size_t len;
762 uint8_t *p;
763 struct got_inflate_checksum csum;
764 struct got_hash ctx;
765 struct got_object_id id;
767 memset(&id, 0, sizeof(id));
768 id.algo = expected_id->algo;
770 got_hash_init(&ctx, expected_id->algo);
771 memset(&csum, 0, sizeof(csum));
772 csum.output_ctx = &ctx;
774 err = got_inflate_to_mem_fd(&p, &len, NULL, &csum, expected_size, fd);
775 if (err)
776 return err;
778 got_hash_final(&ctx, id.hash);
779 if (got_object_id_cmp(expected_id, &id) != 0) {
780 err = got_error_checksum(expected_id);
781 goto done;
784 err = got_object_parse_header(&obj, p, len);
785 if (err)
786 goto done;
788 if (len < obj->hdrlen + obj->size) {
789 err = got_error(GOT_ERR_BAD_OBJ_DATA);
790 goto done;
793 if (obj->type != GOT_OBJ_TYPE_COMMIT) {
794 err = got_error(GOT_ERR_OBJ_TYPE);
795 goto done;
798 /* Skip object header. */
799 len -= obj->hdrlen;
800 err = got_object_parse_commit(commit, p + obj->hdrlen, len,
801 expected_id->algo);
802 done:
803 free(p);
804 if (obj)
805 got_object_close(obj);
806 return err;
809 void
810 got_object_tree_close(struct got_tree_object *tree)
812 if (tree->refcnt > 0) {
813 tree->refcnt--;
814 if (tree->refcnt > 0)
815 return;
818 free(tree->entries);
819 free(tree);
822 static const struct got_error *
823 parse_tree_entry(struct got_parsed_tree_entry *pte, size_t *elen, char *buf,
824 size_t maxlen, enum got_hash_algorithm algo, size_t idlen)
826 char *p, *space;
828 *elen = 0;
830 *elen = strnlen(buf, maxlen) + 1;
831 if (*elen > maxlen)
832 return got_error(GOT_ERR_BAD_OBJ_DATA);
834 space = memchr(buf, ' ', *elen);
835 if (space == NULL || space <= buf)
836 return got_error(GOT_ERR_BAD_OBJ_DATA);
838 pte->mode = 0;
839 p = buf;
840 while (p < space) {
841 if (*p < '0' || *p > '7')
842 return got_error(GOT_ERR_BAD_OBJ_DATA);
843 pte->mode <<= 3;
844 pte->mode |= *p - '0';
845 p++;
848 if (*elen > maxlen || maxlen - *elen < idlen)
849 return got_error(GOT_ERR_BAD_OBJ_DATA);
851 pte->name = space + 1;
852 pte->namelen = strlen(pte->name);
853 buf += *elen;
854 pte->id = buf;
855 pte->idlen = idlen;
856 pte->algo = algo;
857 *elen += idlen;
858 return NULL;
861 static int
862 pte_cmp(const void *pa, const void *pb)
864 const struct got_parsed_tree_entry *a = pa, *b = pb;
866 return got_path_cmp(a->name, b->name, a->namelen, b->namelen);
869 const struct got_error *
870 got_object_parse_tree(struct got_parsed_tree_entry **entries, size_t *nentries,
871 size_t *nentries_alloc, uint8_t *buf, size_t len,
872 enum got_hash_algorithm algo)
874 const struct got_error *err = NULL;
875 size_t idlen, remain = len;
876 const size_t nalloc = 16;
877 struct got_parsed_tree_entry *pte;
878 int i;
880 idlen = got_hash_digest_length(algo);
882 *nentries = 0;
883 if (remain == 0)
884 return NULL; /* tree is empty */
886 while (remain > 0) {
887 size_t elen;
889 if (*nentries >= *nentries_alloc) {
890 pte = recallocarray(*entries, *nentries_alloc,
891 *nentries_alloc + nalloc, sizeof(**entries));
892 if (pte == NULL) {
893 err = got_error_from_errno("recallocarray");
894 goto done;
896 *entries = pte;
897 *nentries_alloc += nalloc;
900 pte = &(*entries)[*nentries];
901 err = parse_tree_entry(pte, &elen, buf, remain, algo, idlen);
902 if (err)
903 goto done;
904 buf += elen;
905 remain -= elen;
906 (*nentries)++;
909 if (remain != 0) {
910 err = got_error(GOT_ERR_BAD_OBJ_DATA);
911 goto done;
914 if (*nentries > 1) {
915 mergesort(*entries, *nentries, sizeof(**entries), pte_cmp);
917 for (i = 0; i < *nentries - 1; i++) {
918 struct got_parsed_tree_entry *prev = &(*entries)[i];
919 pte = &(*entries)[i + 1];
920 if (got_path_cmp(prev->name, pte->name,
921 prev->namelen, pte->namelen) == 0) {
922 err = got_error(GOT_ERR_TREE_DUP_ENTRY);
923 break;
927 done:
928 if (err)
929 *nentries = 0;
930 return err;
933 const struct got_error *
934 got_object_read_tree(struct got_parsed_tree_entry **entries, size_t *nentries,
935 size_t *nentries_alloc, uint8_t **p, int fd,
936 struct got_object_id *expected_id)
938 const struct got_error *err = NULL;
939 struct got_object *obj = NULL;
940 size_t len;
941 struct got_inflate_checksum csum;
942 struct got_hash ctx;
943 struct got_object_id id;
945 memset(&id, 0, sizeof(id));
946 id.algo = expected_id->algo;
948 got_hash_init(&ctx, expected_id->algo);
949 memset(&csum, 0, sizeof(csum));
950 csum.output_ctx = &ctx;
952 err = got_inflate_to_mem_fd(p, &len, NULL, &csum, 0, fd);
953 if (err)
954 return err;
956 got_hash_final(&ctx, id.hash);
957 if (got_object_id_cmp(expected_id, &id) != 0) {
958 err = got_error_checksum(expected_id);
959 goto done;
962 err = got_object_parse_header(&obj, *p, len);
963 if (err)
964 goto done;
966 if (len < obj->hdrlen + obj->size) {
967 err = got_error(GOT_ERR_BAD_OBJ_DATA);
968 goto done;
971 /* Skip object header. */
972 len -= obj->hdrlen;
973 err = got_object_parse_tree(entries, nentries, nentries_alloc,
974 *p + obj->hdrlen, len, expected_id->algo);
975 done:
976 if (obj)
977 got_object_close(obj);
978 return err;
981 void
982 got_object_tag_close(struct got_tag_object *tag)
984 if (tag->refcnt > 0) {
985 tag->refcnt--;
986 if (tag->refcnt > 0)
987 return;
990 free(tag->tag);
991 free(tag->tagger);
992 free(tag->tagmsg);
993 free(tag);
996 const struct got_error *
997 got_object_parse_tag(struct got_tag_object **tag, uint8_t *buf, size_t len,
998 enum got_hash_algorithm algo)
1000 const struct got_error *err = NULL;
1001 size_t remain = len;
1002 char *s = buf;
1003 size_t label_len, id_len;
1005 id_len = got_hash_digest_string_length(algo);
1007 if (remain == 0)
1008 return got_error(GOT_ERR_BAD_OBJ_DATA);
1010 *tag = calloc(1, sizeof(**tag));
1011 if (*tag == NULL)
1012 return got_error_from_errno("calloc");
1014 label_len = strlen(GOT_TAG_LABEL_OBJECT);
1015 if (strncmp(s, GOT_TAG_LABEL_OBJECT, label_len) == 0) {
1016 remain -= label_len;
1017 if (remain < id_len) {
1018 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1019 goto done;
1021 s += label_len;
1022 if (!got_parse_hash_digest((*tag)->id.hash, s, algo)) {
1023 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1024 goto done;
1026 (*tag)->id.algo = algo;
1027 remain -= id_len;
1028 s += id_len;
1029 } else {
1030 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1031 goto done;
1034 if (remain <= 0) {
1035 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1036 goto done;
1039 label_len = strlen(GOT_TAG_LABEL_TYPE);
1040 if (strncmp(s, GOT_TAG_LABEL_TYPE, label_len) == 0) {
1041 remain -= label_len;
1042 if (remain <= 0) {
1043 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1044 goto done;
1046 s += label_len;
1047 if (strncmp(s, GOT_OBJ_LABEL_COMMIT,
1048 strlen(GOT_OBJ_LABEL_COMMIT)) == 0) {
1049 (*tag)->obj_type = GOT_OBJ_TYPE_COMMIT;
1050 label_len = strlen(GOT_OBJ_LABEL_COMMIT);
1051 s += label_len;
1052 remain -= label_len;
1053 } else if (strncmp(s, GOT_OBJ_LABEL_TREE,
1054 strlen(GOT_OBJ_LABEL_TREE)) == 0) {
1055 (*tag)->obj_type = GOT_OBJ_TYPE_TREE;
1056 label_len = strlen(GOT_OBJ_LABEL_TREE);
1057 s += label_len;
1058 remain -= label_len;
1059 } else if (strncmp(s, GOT_OBJ_LABEL_BLOB,
1060 strlen(GOT_OBJ_LABEL_BLOB)) == 0) {
1061 (*tag)->obj_type = GOT_OBJ_TYPE_BLOB;
1062 label_len = strlen(GOT_OBJ_LABEL_BLOB);
1063 s += label_len;
1064 remain -= label_len;
1065 } else if (strncmp(s, GOT_OBJ_LABEL_TAG,
1066 strlen(GOT_OBJ_LABEL_TAG)) == 0) {
1067 (*tag)->obj_type = GOT_OBJ_TYPE_TAG;
1068 label_len = strlen(GOT_OBJ_LABEL_TAG);
1069 s += label_len;
1070 remain -= label_len;
1071 } else {
1072 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1073 goto done;
1076 if (remain <= 0 || *s != '\n') {
1077 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1078 goto done;
1080 s++;
1081 remain--;
1082 if (remain <= 0) {
1083 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1084 goto done;
1086 } else {
1087 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1088 goto done;
1091 label_len = strlen(GOT_TAG_LABEL_TAG);
1092 if (strncmp(s, GOT_TAG_LABEL_TAG, label_len) == 0) {
1093 char *p;
1094 size_t slen;
1095 remain -= label_len;
1096 if (remain <= 0) {
1097 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1098 goto done;
1100 s += label_len;
1101 p = memchr(s, '\n', remain);
1102 if (p == NULL) {
1103 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1104 goto done;
1106 *p = '\0';
1107 slen = strlen(s);
1108 (*tag)->tag = strndup(s, slen);
1109 if ((*tag)->tag == NULL) {
1110 err = got_error_from_errno("strndup");
1111 goto done;
1113 s += slen + 1;
1114 remain -= slen + 1;
1115 if (remain <= 0) {
1116 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1117 goto done;
1119 } else {
1120 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1121 goto done;
1124 label_len = strlen(GOT_TAG_LABEL_TAGGER);
1125 if (strncmp(s, GOT_TAG_LABEL_TAGGER, label_len) == 0) {
1126 char *p;
1127 size_t slen;
1129 remain -= label_len;
1130 if (remain <= 0) {
1131 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1132 goto done;
1134 s += label_len;
1135 p = memchr(s, '\n', remain);
1136 if (p == NULL) {
1137 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1138 goto done;
1140 *p = '\0';
1141 slen = strlen(s);
1142 err = parse_commit_time(&(*tag)->tagger_time,
1143 &(*tag)->tagger_gmtoff, s);
1144 if (err)
1145 goto done;
1146 (*tag)->tagger = strdup(s);
1147 if ((*tag)->tagger == NULL) {
1148 err = got_error_from_errno("strdup");
1149 goto done;
1151 s += slen + 1;
1152 remain -= slen + 1;
1153 if (remain < 0) {
1154 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1155 goto done;
1157 } else {
1158 /* Some old tags in the Linux git repo have no tagger. */
1159 (*tag)->tagger = strdup("");
1160 if ((*tag)->tagger == NULL) {
1161 err = got_error_from_errno("strdup");
1162 goto done;
1166 (*tag)->tagmsg = strndup(s, remain);
1167 if ((*tag)->tagmsg == NULL) {
1168 err = got_error_from_errno("strndup");
1169 goto done;
1171 done:
1172 if (err) {
1173 got_object_tag_close(*tag);
1174 *tag = NULL;
1176 return err;
1179 const struct got_error *
1180 got_object_read_tag(struct got_tag_object **tag, int fd,
1181 struct got_object_id *expected_id, size_t expected_size)
1183 const struct got_error *err = NULL;
1184 struct got_object *obj = NULL;
1185 size_t len;
1186 uint8_t *p;
1187 struct got_inflate_checksum csum;
1188 struct got_hash ctx;
1189 struct got_object_id id;
1191 memset(&id, 0, sizeof(id));
1192 id.algo = expected_id->algo;
1194 got_hash_init(&ctx, expected_id->algo);
1195 memset(&csum, 0, sizeof(csum));
1196 csum.output_ctx = &ctx;
1198 err = got_inflate_to_mem_fd(&p, &len, NULL, &csum,
1199 expected_size, fd);
1200 if (err)
1201 return err;
1203 got_hash_final(&ctx, id.hash);
1204 if (got_object_id_cmp(expected_id, &id) != 0) {
1205 err = got_error_checksum(expected_id);
1206 goto done;
1209 err = got_object_parse_header(&obj, p, len);
1210 if (err)
1211 goto done;
1213 if (len < obj->hdrlen + obj->size) {
1214 err = got_error(GOT_ERR_BAD_OBJ_DATA);
1215 goto done;
1218 /* Skip object header. */
1219 len -= obj->hdrlen;
1220 err = got_object_parse_tag(tag, p + obj->hdrlen, len,
1221 expected_id->algo);
1222 done:
1223 free(p);
1224 if (obj)
1225 got_object_close(obj);
1226 return err;
1229 const struct got_error *
1230 got_read_file_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f)
1232 const struct got_error *err = NULL;
1233 static const size_t blocksize = 512;
1234 size_t n, total, remain;
1235 uint8_t *buf;
1237 *outbuf = NULL;
1238 *outlen = 0;
1240 buf = malloc(blocksize);
1241 if (buf == NULL)
1242 return got_error_from_errno("malloc");
1244 remain = blocksize;
1245 total = 0;
1246 for (;;) {
1247 if (remain == 0) {
1248 uint8_t *newbuf;
1249 newbuf = reallocarray(buf, 1, total + blocksize);
1250 if (newbuf == NULL) {
1251 err = got_error_from_errno("reallocarray");
1252 goto done;
1254 buf = newbuf;
1255 remain += blocksize;
1257 n = fread(buf + total, 1, remain, f);
1258 if (n == 0) {
1259 if (ferror(f)) {
1260 err = got_ferror(f, GOT_ERR_IO);
1261 goto done;
1263 break; /* EOF */
1265 remain -= n;
1266 total += n;
1269 done:
1270 if (err == NULL) {
1271 *outbuf = buf;
1272 *outlen = total;
1273 } else
1274 free(buf);
1275 return err;