Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
21 #include <dirent.h>
22 #include <fcntl.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <limits.h>
29 #include <sha1.h>
30 #include <endian.h>
31 #include <zlib.h>
33 #include "got_error.h"
34 #include "got_object.h"
35 #include "got_repository.h"
36 #include "got_opentemp.h"
38 #include "got_lib_sha1.h"
39 #include "got_lib_pack.h"
40 #include "got_lib_path.h"
41 #include "got_lib_delta.h"
42 #include "got_lib_zbuf.h"
43 #include "got_lib_object.h"
44 #include "got_lib_repository.h"
46 #ifndef nitems
47 #define nitems(_a) (sizeof(_a) / sizeof((_a)[0]))
48 #endif
50 #define GOT_PACK_PREFIX "pack-"
51 #define GOT_PACKFILE_SUFFIX ".pack"
52 #define GOT_PACKIDX_SUFFIX ".idx"
53 #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
54 SHA1_DIGEST_STRING_LENGTH - 1 + \
55 strlen(GOT_PACKFILE_SUFFIX))
56 #define GOT_PACKIDX_NAMELEN (strlen(GOT_PACK_PREFIX) + \
57 SHA1_DIGEST_STRING_LENGTH - 1 + \
58 strlen(GOT_PACKIDX_SUFFIX))
60 #ifndef MIN
61 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
62 #endif
64 static const struct got_error *
65 verify_fanout_table(uint32_t *fanout_table)
66 {
67 int i;
69 for (i = 0; i < 0xff - 1; i++) {
70 if (be32toh(fanout_table[i]) > be32toh(fanout_table[i + 1]))
71 return got_error(GOT_ERR_BAD_PACKIDX);
72 }
74 return NULL;
75 }
77 static const struct got_error *
78 get_packfile_size(size_t *size, const char *path)
79 {
80 struct stat sb;
81 char *dot;
83 *size = 0;
85 dot = strrchr(path, '.');
86 if (dot == NULL)
87 return got_error(GOT_ERR_BAD_PATH);
89 /* Path must point to a pack index or to a pack file. */
90 if (strcmp(dot, GOT_PACKIDX_SUFFIX) == 0) {
91 const struct got_error *err = NULL;
92 char *path_pack;
93 char base_path[PATH_MAX];
95 /* Convert pack index path to pack file path. */
96 if (strlcpy(base_path, path, PATH_MAX) > PATH_MAX)
97 return got_error(GOT_ERR_NO_SPACE);
98 dot = strrchr(base_path, '.');
99 if (dot == NULL)
100 return got_error(GOT_ERR_BAD_PATH);
101 *dot = '\0';
102 if (asprintf(&path_pack, "%s.pack", base_path) == -1)
103 return got_error_from_errno();
105 if (stat(path_pack, &sb) != 0)
106 err = got_error_from_errno();
107 free(path_pack);
108 if (err)
109 return err;
110 } else if (strcmp(dot, GOT_PACKFILE_SUFFIX) == 0) {
111 if (stat(path, &sb) != 0)
112 return got_error_from_errno();
113 } else
114 return got_error(GOT_ERR_BAD_PATH);
116 *size = sb.st_size;
117 return 0;
120 const struct got_error *
121 got_packidx_open(struct got_packidx **packidx, const char *path)
123 struct got_packidx *p;
124 struct got_packidx_v2_hdr *h;
125 FILE *f;
126 const struct got_error *err = NULL;
127 size_t n, nobj, packfile_size;
128 SHA1_CTX ctx;
129 uint8_t sha1[SHA1_DIGEST_LENGTH];
131 *packidx = NULL;
133 SHA1Init(&ctx);
135 f = fopen(path, "rb");
136 if (f == NULL)
137 return got_error_from_errno();
139 err = get_packfile_size(&packfile_size, path);
140 if (err)
141 return err;
143 p = calloc(1, sizeof(*p));
144 if (p == NULL)
145 return got_error_from_errno();
146 p->path_packidx = strdup(path);
147 if (p->path_packidx == NULL) {
148 err = got_error_from_errno();
149 free(p->path_packidx);
150 free(p);
151 return err;
154 h = &p->hdr;
155 n = fread(&h->magic, sizeof(h->magic), 1, f);
156 if (n != 1) {
157 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
158 goto done;
161 if (betoh32(h->magic) != GOT_PACKIDX_V2_MAGIC) {
162 err = got_error(GOT_ERR_BAD_PACKIDX);
163 goto done;
166 SHA1Update(&ctx, (uint8_t *)&h->magic, sizeof(h->magic));
168 n = fread(&h->version, sizeof(h->version), 1, f);
169 if (n != 1) {
170 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
171 goto done;
174 if (betoh32(h->version) != GOT_PACKIDX_VERSION) {
175 err = got_error(GOT_ERR_BAD_PACKIDX);
176 goto done;
179 SHA1Update(&ctx, (uint8_t *)&h->version, sizeof(h->version));
181 n = fread(&h->fanout_table, sizeof(h->fanout_table), 1, f);
182 if (n != 1) {
183 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
184 goto done;
187 err = verify_fanout_table(h->fanout_table);
188 if (err)
189 goto done;
191 SHA1Update(&ctx, (uint8_t *)h->fanout_table, sizeof(h->fanout_table));
193 nobj = betoh32(h->fanout_table[0xff]);
195 h->sorted_ids = calloc(nobj, sizeof(*h->sorted_ids));
196 if (h->sorted_ids == NULL) {
197 err = got_error_from_errno();
198 goto done;
201 n = fread(h->sorted_ids, sizeof(*h->sorted_ids), nobj, f);
202 if (n != nobj) {
203 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
204 goto done;
207 SHA1Update(&ctx, (uint8_t *)h->sorted_ids,
208 nobj * sizeof(*h->sorted_ids));
210 h->crc32 = calloc(nobj, sizeof(*h->crc32));
211 if (h->crc32 == NULL) {
212 err = got_error_from_errno();
213 goto done;
216 n = fread(h->crc32, sizeof(*h->crc32), nobj, f);
217 if (n != nobj) {
218 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
219 goto done;
222 SHA1Update(&ctx, (uint8_t *)h->crc32, nobj * sizeof(*h->crc32));
224 h->offsets = calloc(nobj, sizeof(*h->offsets));
225 if (h->offsets == NULL) {
226 err = got_error_from_errno();
227 goto done;
230 n = fread(h->offsets, sizeof(*h->offsets), nobj, f);
231 if (n != nobj) {
232 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
233 goto done;
236 SHA1Update(&ctx, (uint8_t *)h->offsets, nobj * sizeof(*h->offsets));
238 /* Large file offsets are contained only in files > 2GB. */
239 if (packfile_size <= 0x80000000)
240 goto checksum;
242 h->large_offsets = calloc(nobj, sizeof(*h->large_offsets));
243 if (h->large_offsets == NULL) {
244 err = got_error_from_errno();
245 goto done;
248 n = fread(h->large_offsets, sizeof(*h->large_offsets), nobj, f);
249 if (n != nobj) {
250 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
251 goto done;
254 SHA1Update(&ctx, (uint8_t*)h->large_offsets,
255 nobj * sizeof(*h->large_offsets));
257 checksum:
258 n = fread(&h->trailer, sizeof(h->trailer), 1, f);
259 if (n != 1) {
260 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
261 goto done;
264 SHA1Update(&ctx, h->trailer.packfile_sha1, SHA1_DIGEST_LENGTH);
265 SHA1Final(sha1, &ctx);
266 if (memcmp(h->trailer.packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
267 err = got_error(GOT_ERR_PACKIDX_CSUM);
268 done:
269 fclose(f);
270 if (err)
271 got_packidx_close(p);
272 else
273 *packidx = p;
274 return err;
277 void
278 got_packidx_close(struct got_packidx *packidx)
280 free(packidx->hdr.sorted_ids);
281 free(packidx->hdr.offsets);
282 free(packidx->hdr.crc32);
283 free(packidx->hdr.large_offsets);
284 free(packidx->path_packidx);
285 free(packidx);
288 static int
289 is_packidx_filename(const char *name, size_t len)
291 if (len != GOT_PACKIDX_NAMELEN)
292 return 0;
294 if (strncmp(name, GOT_PACK_PREFIX, strlen(GOT_PACK_PREFIX)) != 0)
295 return 0;
297 if (strcmp(name + strlen(GOT_PACK_PREFIX) +
298 SHA1_DIGEST_STRING_LENGTH - 1, GOT_PACKIDX_SUFFIX) != 0)
299 return 0;
301 return 1;
304 static off_t
305 get_object_offset(struct got_packidx *packidx, int idx)
307 uint32_t totobj = betoh32(packidx->hdr.fanout_table[0xff]);
308 uint32_t offset = betoh32(packidx->hdr.offsets[idx]);
309 if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
310 uint64_t loffset;
311 idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
312 if (idx < 0 || idx > totobj ||
313 packidx->hdr.large_offsets == NULL)
314 return -1;
315 loffset = betoh64(packidx->hdr.large_offsets[idx]);
316 return (loffset > INT64_MAX ? -1 : (off_t)loffset);
318 return (off_t)(offset & GOT_PACKIDX_OFFSET_VAL_MASK);
321 static int
322 get_object_idx(struct got_packidx *packidx, struct got_object_id *id,
323 struct got_repository *repo)
325 u_int8_t id0 = id->sha1[0];
326 uint32_t totobj = betoh32(packidx->hdr.fanout_table[0xff]);
327 int left = 0, right = totobj - 1;
329 if (id0 > 0)
330 left = betoh32(packidx->hdr.fanout_table[id0 - 1]);
332 while (left <= right) {
333 struct got_object_id *oid;
334 int i, cmp;
336 i = ((left + right) / 2);
337 oid = &packidx->hdr.sorted_ids[i];
338 cmp = got_object_id_cmp(id, oid);
339 if (cmp == 0)
340 return i;
341 else if (cmp > 0)
342 left = i + 1;
343 else if (cmp < 0)
344 right = i - 1;
347 return -1;
350 static struct got_packidx *
351 dup_packidx(struct got_packidx *packidx)
353 struct got_packidx *p;
354 size_t nobj;
356 p = calloc(1, sizeof(*p));
357 if (p == NULL)
358 return NULL;
360 p->path_packidx = strdup(packidx->path_packidx);
361 if (p->path_packidx == NULL) {
362 free(p);
363 return NULL;
365 memcpy(&p->hdr, &packidx->hdr, sizeof(p->hdr));
366 p->hdr.sorted_ids = NULL;
367 p->hdr.crc32 = NULL;
368 p->hdr.offsets = NULL;
369 p->hdr.large_offsets = NULL;
371 nobj = betoh32(p->hdr.fanout_table[0xff]);
373 p->hdr.sorted_ids = calloc(nobj, sizeof(*p->hdr.sorted_ids));
374 if (p->hdr.sorted_ids == NULL)
375 goto err;
376 memcpy(p->hdr.sorted_ids, packidx->hdr.sorted_ids,
377 nobj * sizeof(*p->hdr.sorted_ids));
379 p->hdr.crc32 = calloc(nobj, sizeof(*p->hdr.crc32));
380 if (p->hdr.crc32 == NULL)
381 goto err;
382 memcpy(p->hdr.crc32, packidx->hdr.crc32, nobj * sizeof(*p->hdr.crc32));
384 p->hdr.offsets = calloc(nobj, sizeof(*p->hdr.offsets));
385 if (p->hdr.offsets == NULL)
386 goto err;
387 memcpy(p->hdr.offsets, packidx->hdr.offsets,
388 nobj * sizeof(*p->hdr.offsets));
390 if (p->hdr.large_offsets) {
391 p->hdr.large_offsets = calloc(nobj,
392 sizeof(*p->hdr.large_offsets));
393 if (p->hdr.large_offsets == NULL)
394 goto err;
395 memcpy(p->hdr.large_offsets, packidx->hdr.large_offsets,
396 nobj * sizeof(*p->hdr.large_offsets));
399 return p;
401 err:
402 free(p->hdr.large_offsets);
403 free(p->hdr.offsets);
404 free(p->hdr.crc32);
405 free(p->hdr.sorted_ids);
406 free(p->path_packidx);
407 free(p);
408 return NULL;
411 static void
412 cache_packidx(struct got_packidx *packidx, struct got_repository *repo)
414 int i;
416 for (i = 0; i < nitems(repo->packidx_cache); i++) {
417 if (repo->packidx_cache[i] == NULL)
418 break;
421 if (i == nitems(repo->packidx_cache)) {
422 got_packidx_close(repo->packidx_cache[i - 1]);
423 memmove(&repo->packidx_cache[1], &repo->packidx_cache[0],
424 sizeof(repo->packidx_cache) -
425 sizeof(repo->packidx_cache[0]));
426 i = 0;
429 repo->packidx_cache[i] = dup_packidx(packidx);
432 static const struct got_error *
433 search_packidx(struct got_packidx **packidx, int *idx,
434 struct got_repository *repo, struct got_object_id *id)
436 const struct got_error *err;
437 char *path_packdir;
438 DIR *packdir;
439 struct dirent *dent;
440 char *path_packidx;
441 int i;
443 /* Search pack index cache. */
444 for (i = 0; i < nitems(repo->packidx_cache); i++) {
445 if (repo->packidx_cache[i] == NULL)
446 break;
447 *idx = get_object_idx(repo->packidx_cache[i], id, repo);
448 if (*idx != -1) {
449 *packidx = repo->packidx_cache[i];
450 return NULL;
453 /* No luck. Search the filesystem. */
455 path_packdir = got_repo_get_path_objects_pack(repo);
456 if (path_packdir == NULL)
457 return got_error_from_errno();
459 packdir = opendir(path_packdir);
460 if (packdir == NULL) {
461 err = got_error_from_errno();
462 goto done;
465 while ((dent = readdir(packdir)) != NULL) {
466 if (!is_packidx_filename(dent->d_name, dent->d_namlen))
467 continue;
469 if (asprintf(&path_packidx, "%s/%s", path_packdir,
470 dent->d_name) == -1) {
471 err = got_error_from_errno();
472 goto done;
475 err = got_packidx_open(packidx, path_packidx);
476 free(path_packidx);
477 if (err)
478 goto done;
480 *idx = get_object_idx(*packidx, id, repo);
481 if (*idx != -1) {
482 err = NULL; /* found the object */
483 cache_packidx(*packidx, repo);
484 goto done;
487 got_packidx_close(*packidx);
488 *packidx = NULL;
491 err = got_error(GOT_ERR_NO_OBJ);
492 done:
493 free(path_packdir);
494 if (packdir && closedir(packdir) != 0 && err == 0)
495 err = got_error_from_errno();
496 return err;
499 static const struct got_error *
500 get_packfile_path(char **path_packfile, struct got_repository *repo,
501 struct got_packidx *packidx)
503 size_t size;
505 /* Packfile path contains ".pack" instead of ".idx", so add one byte. */
506 size = strlen(packidx->path_packidx) + 2;
507 if (size < GOT_PACKFILE_NAMELEN + 1)
508 return got_error(GOT_ERR_BAD_PATH);
510 *path_packfile = calloc(size, sizeof(**path_packfile));
511 if (*path_packfile == NULL)
512 return got_error_from_errno();
514 /* Copy up to and excluding ".idx". */
515 if (strlcpy(*path_packfile, packidx->path_packidx,
516 size - strlen(GOT_PACKIDX_SUFFIX) - 1) >= size)
517 return got_error(GOT_ERR_NO_SPACE);
519 if (strlcat(*path_packfile, GOT_PACKFILE_SUFFIX, size) >= size)
520 return got_error(GOT_ERR_NO_SPACE);
522 return NULL;
525 const struct got_error *
526 read_packfile_hdr(int fd, struct got_packidx *packidx)
528 const struct got_error *err = NULL;
529 uint32_t totobj = betoh32(packidx->hdr.fanout_table[0xff]);
530 struct got_packfile_hdr hdr;
531 ssize_t n;
533 n = read(fd, &hdr, sizeof(hdr));
534 if (n < 0)
535 return got_error_from_errno();
536 if (n != sizeof(hdr))
537 return got_error(GOT_ERR_BAD_PACKFILE);
539 if (betoh32(hdr.signature) != GOT_PACKFILE_SIGNATURE ||
540 betoh32(hdr.version) != GOT_PACKFILE_VERSION ||
541 betoh32(hdr.nobjects) != totobj)
542 err = got_error(GOT_ERR_BAD_PACKFILE);
544 return err;
547 static const struct got_error *
548 open_packfile(int *fd, const char *path_packfile,
549 struct got_repository *repo, struct got_packidx *packidx)
551 const struct got_error *err = NULL;
553 *fd = open(path_packfile, O_RDONLY | O_NOFOLLOW, GOT_DEFAULT_FILE_MODE);
554 if (*fd == -1)
555 return got_error_from_errno();
557 if (packidx) {
558 err = read_packfile_hdr(*fd, packidx);
559 if (err) {
560 close(*fd);
561 *fd = -1;
564 return err;
567 void
568 got_pack_close(struct got_pack *pack)
570 close(pack->fd);
571 pack->fd = -1;
572 free(pack->path_packfile);
573 pack->path_packfile = NULL;
574 pack->filesize = 0;
577 static const struct got_error *
578 cache_pack(struct got_pack **packp, const char *path_packfile,
579 struct got_packidx *packidx, struct got_repository *repo)
581 const struct got_error *err = NULL;
582 struct got_pack *pack = NULL;
583 int i;
585 if (packp)
586 *packp = NULL;
588 for (i = 0; i < nitems(repo->packs); i++) {
589 pack = &repo->packs[i];
590 if (pack->path_packfile == NULL)
591 break;
592 if (strcmp(pack->path_packfile, path_packfile) == 0)
593 return NULL;
596 if (i == nitems(repo->packs) - 1) {
597 got_pack_close(&repo->packs[i - 1]);
598 memmove(&repo->packs[1], &repo->packs[0],
599 sizeof(repo->packs) - sizeof(repo->packs[0]));
600 i = 0;
603 pack = &repo->packs[i];
605 pack->path_packfile = strdup(path_packfile);
606 if (pack->path_packfile == NULL) {
607 err = got_error_from_errno();
608 goto done;
611 err = open_packfile(&pack->fd, path_packfile, repo, packidx);
612 if (err)
613 goto done;
615 err = get_packfile_size(&pack->filesize, path_packfile);
616 done:
617 if (err) {
618 if (pack) {
619 free(pack->path_packfile);
620 memset(pack, 0, sizeof(*pack));
622 } else if (packp)
623 *packp = pack;
624 return err;
627 struct got_pack *
628 get_cached_pack(const char *path_packfile, struct got_repository *repo)
630 struct got_pack *pack = NULL;
631 int i;
633 for (i = 0; i < nitems(repo->packs); i++) {
634 pack = &repo->packs[i];
635 if (pack->path_packfile == NULL)
636 break;
637 if (strcmp(pack->path_packfile, path_packfile) == 0)
638 return pack;
641 return NULL;
644 static const struct got_error *
645 parse_object_type_and_size(uint8_t *type, uint64_t *size, size_t *len, int fd)
647 uint8_t t = 0;
648 uint64_t s = 0;
649 uint8_t sizeN;
650 ssize_t n;
651 int i = 0;
653 do {
654 /* We do not support size values which don't fit in 64 bit. */
655 if (i > 9)
656 return got_error(GOT_ERR_NO_SPACE);
658 n = read(fd, &sizeN, sizeof(sizeN));
659 if (n < 0)
660 return got_error_from_errno();
661 if (n != sizeof(sizeN))
662 return got_error(GOT_ERR_BAD_PACKFILE);
664 if (i == 0) {
665 t = (sizeN & GOT_PACK_OBJ_SIZE0_TYPE_MASK) >>
666 GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT;
667 s = (sizeN & GOT_PACK_OBJ_SIZE0_VAL_MASK);
668 } else {
669 size_t shift = 4 + 7 * (i - 1);
670 s |= ((sizeN & GOT_PACK_OBJ_SIZE_VAL_MASK) << shift);
672 i++;
673 } while (sizeN & GOT_PACK_OBJ_SIZE_MORE);
675 *type = t;
676 *size = s;
677 *len = i * sizeof(sizeN);
678 return NULL;
681 static const struct got_error *
682 open_plain_object(struct got_object **obj, const char *path_packfile,
683 struct got_object_id *id, uint8_t type, off_t offset, size_t size)
685 *obj = calloc(1, sizeof(**obj));
686 if (*obj == NULL)
687 return got_error_from_errno();
689 (*obj)->path_packfile = strdup(path_packfile);
690 if ((*obj)->path_packfile == NULL) {
691 const struct got_error *err = got_error_from_errno();
692 free(*obj);
693 *obj = NULL;
694 return err;
697 (*obj)->type = type;
698 (*obj)->flags = GOT_OBJ_FLAG_PACKED;
699 (*obj)->hdrlen = 0;
700 (*obj)->size = size;
701 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
702 (*obj)->pack_offset = offset;
704 return NULL;
707 static const struct got_error *
708 parse_negative_offset(int64_t *offset, size_t *len, int fd)
710 int64_t o = 0;
711 uint8_t offN;
712 ssize_t n;
713 int i = 0;
715 do {
716 /* We do not support offset values which don't fit in 64 bit. */
717 if (i > 8)
718 return got_error(GOT_ERR_NO_SPACE);
720 n = read(fd, &offN, sizeof(offN));
721 if (n < 0)
722 return got_error_from_errno();
723 if (n != sizeof(offN))
724 return got_error(GOT_ERR_BAD_PACKFILE);
726 if (i == 0)
727 o = (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
728 else {
729 o++;
730 o <<= 7;
731 o += (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
733 i++;
734 } while (offN & GOT_PACK_OBJ_DELTA_OFF_MORE);
736 *offset = o;
737 *len = i * sizeof(offN);
738 return NULL;
741 static const struct got_error *
742 parse_offset_delta(off_t *base_offset, int fd, off_t offset)
744 const struct got_error *err;
745 int64_t negoffset;
746 size_t negofflen;
748 err = parse_negative_offset(&negoffset, &negofflen, fd);
749 if (err)
750 return err;
752 /* Compute the base object's offset (must be in the same pack file). */
753 *base_offset = (offset - negoffset);
754 if (*base_offset <= 0)
755 return got_error(GOT_ERR_BAD_PACKFILE);
757 return NULL;
760 static const struct got_error *
761 resolve_delta_chain(struct got_delta_chain *, struct got_repository *,
762 int, size_t, const char *, off_t, size_t, int, size_t, unsigned int);
764 static const struct got_error *
765 add_delta(struct got_delta_chain *deltas, const char *path_packfile,
766 off_t delta_offset, size_t tslen, int delta_type, size_t delta_size,
767 size_t delta_data_offset, uint8_t *delta_buf, size_t delta_len)
769 struct got_delta *delta;
771 delta = got_delta_open(path_packfile, delta_offset, tslen,
772 delta_type, delta_size, delta_data_offset, delta_buf,
773 delta_len);
774 if (delta == NULL)
775 return got_error_from_errno();
776 /* delta is freed in got_object_close() */
777 deltas->nentries++;
778 SIMPLEQ_INSERT_HEAD(&deltas->entries, delta, entry);
779 return NULL;
782 static const struct got_error *
783 resolve_offset_delta(struct got_delta_chain *deltas,
784 struct got_repository *repo, int fd, size_t packfile_size,
785 const char *path_packfile, off_t delta_offset, size_t tslen,
786 int delta_type, size_t delta_size, unsigned int recursion)
789 const struct got_error *err;
790 off_t base_offset;
791 uint8_t base_type;
792 uint64_t base_size;
793 size_t base_tslen;
794 off_t delta_data_offset;
795 uint8_t *delta_buf;
796 size_t delta_len;
798 err = parse_offset_delta(&base_offset, fd, delta_offset);
799 if (err)
800 return err;
802 delta_data_offset = lseek(fd, 0, SEEK_CUR);
803 if (delta_data_offset == -1)
804 return got_error_from_errno();
806 err = got_inflate_to_mem_fd(&delta_buf, &delta_len, fd);
807 if (err)
808 return err;
810 err = add_delta(deltas, path_packfile, delta_offset, tslen,
811 delta_type, delta_size, delta_data_offset, delta_buf, delta_len);
812 if (err)
813 return err;
815 /* An offset delta must be in the same packfile. */
816 if (base_offset >= packfile_size)
817 return got_error(GOT_ERR_PACK_OFFSET);
818 if (lseek(fd, base_offset, SEEK_SET) == -1)
819 return got_error_from_errno();
821 err = parse_object_type_and_size(&base_type, &base_size, &base_tslen,
822 fd);
823 if (err)
824 return err;
826 return resolve_delta_chain(deltas, repo, fd, packfile_size,
827 path_packfile, base_offset, base_tslen, base_type, base_size,
828 recursion - 1);
831 static const struct got_error *
832 resolve_ref_delta(struct got_delta_chain *deltas, struct got_repository *repo,
833 int fd, const char *path_packfile, off_t delta_offset,
834 size_t tslen, int delta_type, size_t delta_size, unsigned int recursion)
836 const struct got_error *err;
837 struct got_object_id id;
838 struct got_packidx *packidx;
839 int idx;
840 off_t base_offset;
841 uint8_t base_type;
842 uint64_t base_size;
843 size_t base_tslen;
844 ssize_t n;
845 char *path_base_packfile;
846 struct got_pack *base_pack;
847 off_t delta_data_offset;
848 uint8_t *delta_buf;
849 size_t delta_len;
851 n = read(fd, &id, sizeof(id));
852 if (n < 0)
853 return got_error_from_errno();
854 if (n != sizeof(id))
855 return got_error(GOT_ERR_BAD_PACKFILE);
857 delta_data_offset = lseek(fd, 0, SEEK_CUR);
858 if (delta_data_offset == -1)
859 return got_error_from_errno();
861 err = got_inflate_to_mem_fd(&delta_buf, &delta_len, fd);
862 if (err)
863 return err;
865 err = add_delta(deltas, path_packfile, delta_offset, tslen,
866 delta_type, delta_size, delta_data_offset, delta_buf, delta_len);
867 if (err)
868 return err;
870 err = search_packidx(&packidx, &idx, repo, &id);
871 if (err)
872 return err;
874 base_offset = get_object_offset(packidx, idx);
875 if (base_offset == (uint64_t)-1) {
876 return got_error(GOT_ERR_BAD_PACKIDX);
879 err = get_packfile_path(&path_base_packfile, repo, packidx);
880 if (err)
881 return err;
883 base_pack = get_cached_pack(path_base_packfile, repo);
884 if (base_pack == NULL) {
885 err = cache_pack(&base_pack, path_base_packfile, NULL, repo);
886 if (err)
887 goto done;
890 if (base_offset >= base_pack->filesize) {
891 err = got_error(GOT_ERR_PACK_OFFSET);
892 goto done;
894 if (lseek(base_pack->fd, base_offset, SEEK_SET) == -1) {
895 err = got_error_from_errno();
896 goto done;
899 err = parse_object_type_and_size(&base_type, &base_size, &base_tslen,
900 base_pack->fd);
901 if (err)
902 goto done;
904 err = resolve_delta_chain(deltas, repo, base_pack->fd,
905 base_pack->filesize, path_base_packfile, base_offset,
906 base_tslen, base_type, base_size, recursion - 1);
907 done:
908 free(path_base_packfile);
909 return err;
912 static const struct got_error *
913 resolve_delta_chain(struct got_delta_chain *deltas, struct got_repository *repo,
914 int fd, size_t packfile_size, const char *path_packfile, off_t delta_offset,
915 size_t tslen, int delta_type, size_t delta_size, unsigned int recursion)
917 const struct got_error *err = NULL;
919 if (--recursion == 0)
920 return got_error(GOT_ERR_RECURSION);
922 switch (delta_type) {
923 case GOT_OBJ_TYPE_COMMIT:
924 case GOT_OBJ_TYPE_TREE:
925 case GOT_OBJ_TYPE_BLOB:
926 case GOT_OBJ_TYPE_TAG:
927 /* Plain types are the final delta base. Recursion ends. */
928 err = add_delta(deltas, path_packfile, delta_offset, tslen,
929 delta_type, delta_size, 0, NULL, 0);
930 break;
931 case GOT_OBJ_TYPE_OFFSET_DELTA:
932 err = resolve_offset_delta(deltas, repo, fd, packfile_size,
933 path_packfile, delta_offset, tslen, delta_type, delta_size,
934 recursion - 1);
935 break;
936 case GOT_OBJ_TYPE_REF_DELTA:
937 err = resolve_ref_delta(deltas, repo, fd, path_packfile,
938 delta_offset, tslen, delta_type, delta_size, recursion - 1);
939 break;
940 default:
941 return got_error(GOT_ERR_OBJ_TYPE);
944 return err;
947 static const struct got_error *
948 open_delta_object(struct got_object **obj, struct got_repository *repo,
949 const char *path_packfile, int fd, size_t packfile_size,
950 struct got_object_id *id, off_t offset, size_t tslen,
951 int delta_type, size_t delta_size)
953 const struct got_error *err = NULL;
954 int resolved_type;
956 *obj = calloc(1, sizeof(**obj));
957 if (*obj == NULL)
958 return got_error_from_errno();
960 (*obj)->flags = 0;
961 (*obj)->hdrlen = 0;
962 (*obj)->size = 0; /* Not known because deltas aren't applied yet. */
963 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
964 (*obj)->pack_offset = offset + tslen;
966 (*obj)->path_packfile = strdup(path_packfile);
967 if ((*obj)->path_packfile == NULL) {
968 err = got_error_from_errno();
969 goto done;
971 (*obj)->flags |= GOT_OBJ_FLAG_PACKED;
973 SIMPLEQ_INIT(&(*obj)->deltas.entries);
974 (*obj)->flags |= GOT_OBJ_FLAG_DELTIFIED;
976 err = resolve_delta_chain(&(*obj)->deltas, repo, fd, packfile_size,
977 path_packfile, offset, tslen, delta_type, delta_size,
978 GOT_DELTA_CHAIN_RECURSION_MAX);
979 if (err)
980 goto done;
982 err = got_delta_chain_get_base_type(&resolved_type, &(*obj)->deltas);
983 if (err)
984 goto done;
985 (*obj)->type = resolved_type;
987 done:
988 if (err) {
989 got_object_close(*obj);
990 *obj = NULL;
992 return err;
995 static const struct got_error *
996 open_packed_object(struct got_object **obj, struct got_repository *repo,
997 struct got_packidx *packidx, int idx, struct got_object_id *id)
999 const struct got_error *err = NULL;
1000 off_t offset;
1001 char *path_packfile;
1002 struct got_pack *pack;
1003 uint8_t type;
1004 uint64_t size;
1005 size_t tslen;
1007 *obj = NULL;
1009 offset = get_object_offset(packidx, idx);
1010 if (offset == (uint64_t)-1)
1011 return got_error(GOT_ERR_BAD_PACKIDX);
1013 err = get_packfile_path(&path_packfile, repo, packidx);
1014 if (err)
1015 return err;
1017 pack = get_cached_pack(path_packfile, repo);
1018 if (pack == NULL) {
1019 err = cache_pack(&pack, path_packfile, packidx, repo);
1020 if (err)
1021 goto done;
1024 if (offset >= pack->filesize) {
1025 err = got_error(GOT_ERR_PACK_OFFSET);
1026 goto done;
1028 if (lseek(pack->fd, offset, SEEK_SET) == -1) {
1029 err = got_error_from_errno();
1030 goto done;
1033 err = parse_object_type_and_size(&type, &size, &tslen, pack->fd);
1034 if (err)
1035 goto done;
1037 switch (type) {
1038 case GOT_OBJ_TYPE_COMMIT:
1039 case GOT_OBJ_TYPE_TREE:
1040 case GOT_OBJ_TYPE_BLOB:
1041 case GOT_OBJ_TYPE_TAG:
1042 err = open_plain_object(obj, path_packfile, id, type,
1043 offset + tslen, size);
1044 break;
1046 case GOT_OBJ_TYPE_OFFSET_DELTA:
1047 case GOT_OBJ_TYPE_REF_DELTA:
1048 err = open_delta_object(obj, repo, path_packfile, pack->fd,
1049 pack->filesize, id, offset, tslen, type, size);
1050 break;
1052 default:
1053 err = got_error(GOT_ERR_OBJ_TYPE);
1054 goto done;
1056 done:
1057 free(path_packfile);
1058 return err;
1061 const struct got_error *
1062 got_packfile_open_object(struct got_object **obj, struct got_object_id *id,
1063 struct got_repository *repo)
1065 const struct got_error *err = NULL;
1066 struct got_packidx *packidx = NULL;
1067 int idx;
1069 err = search_packidx(&packidx, &idx, repo, id);
1070 if (err)
1071 return err;
1073 err = open_packed_object(obj, repo, packidx, idx, id);
1074 if (err)
1075 return err;
1077 err = cache_pack(NULL, (*obj)->path_packfile, packidx, repo);
1078 return err;
1081 static const struct got_error *
1082 get_delta_chain_max_size(uint64_t *max_size, struct got_delta_chain *deltas)
1084 struct got_delta *delta;
1085 uint64_t base_size = 0, result_size = 0;
1087 *max_size = 0;
1088 SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
1089 /* Plain object types are the delta base. */
1090 if (delta->type != GOT_OBJ_TYPE_COMMIT &&
1091 delta->type != GOT_OBJ_TYPE_TREE &&
1092 delta->type != GOT_OBJ_TYPE_BLOB &&
1093 delta->type != GOT_OBJ_TYPE_TAG) {
1094 const struct got_error *err;
1095 err = got_delta_get_sizes(&base_size, &result_size,
1096 delta->delta_buf, delta->delta_len);
1097 if (err)
1098 return err;
1099 } else
1100 base_size = delta->size;
1101 if (base_size > *max_size)
1102 *max_size = base_size;
1103 if (result_size > *max_size)
1104 *max_size = result_size;
1107 return NULL;
1110 static const struct got_error *
1111 dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas,
1112 FILE *outfile, struct got_repository *repo)
1114 const struct got_error *err = NULL;
1115 struct got_delta *delta;
1116 FILE *base_file = NULL, *accum_file = NULL;
1117 uint8_t *base_buf = NULL, *accum_buf = NULL;
1118 size_t accum_size = 0;
1119 uint64_t max_size;
1120 int n = 0;
1122 *result_size = 0;
1124 if (SIMPLEQ_EMPTY(&deltas->entries))
1125 return got_error(GOT_ERR_BAD_DELTA_CHAIN);
1127 /* We process small enough files entirely in memory for speed. */
1128 err = get_delta_chain_max_size(&max_size, deltas);
1129 if (err)
1130 return err;
1131 if (max_size < GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
1132 accum_buf = malloc(max_size);
1133 if (accum_buf == NULL)
1134 return got_error_from_errno();
1135 } else {
1136 base_file = got_opentemp();
1137 if (base_file == NULL)
1138 return got_error_from_errno();
1140 accum_file = got_opentemp();
1141 if (accum_file == NULL) {
1142 err = got_error_from_errno();
1143 fclose(base_file);
1144 return err;
1148 /* Deltas are ordered in ascending order. */
1149 SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
1150 if (n == 0) {
1151 struct got_pack *pack;
1152 size_t base_len;
1153 off_t delta_data_offset;
1155 /* Plain object types are the delta base. */
1156 if (delta->type != GOT_OBJ_TYPE_COMMIT &&
1157 delta->type != GOT_OBJ_TYPE_TREE &&
1158 delta->type != GOT_OBJ_TYPE_BLOB &&
1159 delta->type != GOT_OBJ_TYPE_TAG) {
1160 err = got_error(GOT_ERR_BAD_DELTA_CHAIN);
1161 goto done;
1164 pack = get_cached_pack(delta->path_packfile, repo);
1165 if (pack == NULL) {
1166 err = got_error(GOT_ERR_BAD_DELTA_CHAIN);
1167 goto done;
1170 delta_data_offset = delta->offset + delta->tslen;
1171 if (delta_data_offset >= pack->filesize) {
1172 err = got_error(GOT_ERR_PACK_OFFSET);
1173 goto done;
1175 if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) {
1176 err = got_error_from_errno();
1177 goto done;
1179 if (base_file)
1180 err = got_inflate_to_file_fd(&base_len,
1181 pack->fd, base_file);
1182 else {
1183 err = got_inflate_to_mem_fd(&base_buf,
1184 &base_len, pack->fd);
1185 if (base_len < max_size) {
1186 uint8_t *p;
1187 p = reallocarray(base_buf, 1, max_size);
1188 if (p == NULL) {
1189 err = got_error_from_errno();
1190 goto done;
1192 base_buf = p;
1195 if (err)
1196 goto done;
1197 n++;
1198 if (base_file)
1199 rewind(base_file);
1200 continue;
1203 if (base_buf) {
1204 err = got_delta_apply_in_mem(base_buf, delta->delta_buf,
1205 delta->delta_len, accum_buf, &accum_size);
1206 n++;
1207 } else {
1208 err = got_delta_apply(base_file, delta->delta_buf,
1209 delta->delta_len,
1210 /* Final delta application writes to output file. */
1211 ++n < deltas->nentries ? accum_file : outfile,
1212 &accum_size);
1214 if (err)
1215 goto done;
1217 if (n < deltas->nentries) {
1218 /* Accumulated delta becomes the new base. */
1219 if (base_buf) {
1220 uint8_t *tmp = accum_buf;
1221 accum_buf = base_buf;
1222 base_buf = tmp;
1223 } else {
1224 FILE *tmp = accum_file;
1225 accum_file = base_file;
1226 base_file = tmp;
1227 rewind(base_file);
1228 rewind(accum_file);
1233 done:
1234 free(base_buf);
1235 if (accum_buf) {
1236 size_t len = fwrite(accum_buf, 1, accum_size, outfile);
1237 free(accum_buf);
1238 if (len != accum_size)
1239 return got_ferror(outfile, GOT_ERR_IO);
1241 if (base_file)
1242 fclose(base_file);
1243 if (accum_file)
1244 fclose(accum_file);
1245 rewind(outfile);
1246 if (err == NULL)
1247 *result_size = accum_size;
1248 return err;
1251 static const struct got_error *
1252 dump_delta_chain_to_mem(uint8_t **outbuf, size_t *outlen,
1253 struct got_delta_chain *deltas, struct got_repository *repo)
1255 const struct got_error *err = NULL;
1256 struct got_delta *delta;
1257 uint8_t *base_buf = NULL, *accum_buf = NULL;
1258 size_t accum_size;
1259 uint64_t max_size;
1260 int n = 0;
1262 *outbuf = NULL;
1263 *outlen = 0;
1265 if (SIMPLEQ_EMPTY(&deltas->entries))
1266 return got_error(GOT_ERR_BAD_DELTA_CHAIN);
1268 err = get_delta_chain_max_size(&max_size, deltas);
1269 if (err)
1270 return err;
1271 accum_buf = malloc(max_size);
1272 if (accum_buf == NULL)
1273 return got_error_from_errno();
1275 /* Deltas are ordered in ascending order. */
1276 SIMPLEQ_FOREACH(delta, &deltas->entries, entry) {
1277 if (n == 0) {
1278 struct got_pack *pack;
1279 size_t base_len;
1280 size_t delta_data_offset;
1282 /* Plain object types are the delta base. */
1283 if (delta->type != GOT_OBJ_TYPE_COMMIT &&
1284 delta->type != GOT_OBJ_TYPE_TREE &&
1285 delta->type != GOT_OBJ_TYPE_BLOB &&
1286 delta->type != GOT_OBJ_TYPE_TAG) {
1287 err = got_error(GOT_ERR_BAD_DELTA_CHAIN);
1288 goto done;
1291 pack = get_cached_pack(delta->path_packfile, repo);
1292 if (pack == NULL) {
1293 err = got_error(GOT_ERR_BAD_DELTA_CHAIN);
1294 goto done;
1297 delta_data_offset = delta->offset + delta->tslen;
1298 if (delta_data_offset >= pack->filesize) {
1299 err = got_error(GOT_ERR_PACK_OFFSET);
1300 goto done;
1302 if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) {
1303 err = got_error_from_errno();
1304 goto done;
1306 err = got_inflate_to_mem_fd(&base_buf, &base_len,
1307 pack->fd);
1308 if (base_len < max_size) {
1309 uint8_t *p;
1310 p = reallocarray(base_buf, 1, max_size);
1311 if (p == NULL) {
1312 err = got_error_from_errno();
1313 goto done;
1315 base_buf = p;
1317 if (err)
1318 goto done;
1319 n++;
1320 continue;
1323 err = got_delta_apply_in_mem(base_buf, delta->delta_buf,
1324 delta->delta_len, accum_buf, &accum_size);
1325 n++;
1326 if (err)
1327 goto done;
1329 if (n < deltas->nentries) {
1330 /* Accumulated delta becomes the new base. */
1331 uint8_t *tmp = accum_buf;
1332 accum_buf = base_buf;
1333 base_buf = tmp;
1337 done:
1338 free(base_buf);
1339 if (err) {
1340 free(accum_buf);
1341 *outbuf = NULL;
1342 *outlen = 0;
1343 } else {
1344 *outbuf = accum_buf;
1345 *outlen = accum_size;
1347 return err;
1350 const struct got_error *
1351 got_packfile_extract_object(FILE **f, struct got_object *obj,
1352 struct got_repository *repo)
1354 const struct got_error *err = NULL;
1356 *f = NULL;
1358 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
1359 return got_error(GOT_ERR_OBJ_NOT_PACKED);
1361 *f = got_opentemp();
1362 if (*f == NULL) {
1363 err = got_error_from_errno();
1364 goto done;
1367 if ((obj->flags & GOT_OBJ_FLAG_DELTIFIED) == 0) {
1368 struct got_pack *pack;
1370 pack = get_cached_pack(obj->path_packfile, repo);
1371 if (pack == NULL) {
1372 err = cache_pack(&pack, obj->path_packfile, NULL, repo);
1373 if (err)
1374 goto done;
1377 if (obj->pack_offset >= pack->filesize) {
1378 err = got_error(GOT_ERR_PACK_OFFSET);
1379 goto done;
1381 if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) {
1382 err = got_error_from_errno();
1383 goto done;
1386 err = got_inflate_to_file_fd(&obj->size, pack->fd, *f);
1387 } else
1388 err = dump_delta_chain_to_file(&obj->size,
1389 &obj->deltas, *f, repo);
1390 done:
1391 if (err && *f) {
1392 fclose(*f);
1393 *f = NULL;
1395 return err;
1398 const struct got_error *
1399 got_packfile_extract_object_to_mem(uint8_t **buf, size_t *len,
1400 struct got_object *obj, struct got_repository *repo)
1402 const struct got_error *err = NULL;
1404 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
1405 return got_error(GOT_ERR_OBJ_NOT_PACKED);
1407 if ((obj->flags & GOT_OBJ_FLAG_DELTIFIED) == 0) {
1408 struct got_pack *pack;
1410 pack = get_cached_pack(obj->path_packfile, repo);
1411 if (pack == NULL) {
1412 err = cache_pack(&pack, obj->path_packfile, NULL, repo);
1413 if (err)
1414 goto done;
1417 if (obj->pack_offset >= pack->filesize) {
1418 err = got_error(GOT_ERR_PACK_OFFSET);
1419 goto done;
1421 if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) {
1422 err = got_error_from_errno();
1423 goto done;
1426 err = got_inflate_to_mem_fd(buf, len, pack->fd);
1427 } else
1428 err = dump_delta_chain_to_mem(buf, len, &obj->deltas, repo);
1429 done:
1430 return err;