2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 #include <sys/types.h>
19 #include <sys/queue.h>
32 #include "got_error.h"
33 #include "got_object.h"
34 #include "got_repository.h"
41 #define GOT_PACK_PREFIX "pack-"
42 #define GOT_PACKFILE_SUFFIX ".pack"
43 #define GOT_PACKIDX_SUFFIX ".idx"
44 #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
45 SHA1_DIGEST_STRING_LENGTH - 1 + \
46 strlen(GOT_PACKFILE_SUFFIX))
47 #define GOT_PACKIDX_NAMELEN (strlen(GOT_PACK_PREFIX) + \
48 SHA1_DIGEST_STRING_LENGTH - 1 + \
49 strlen(GOT_PACKIDX_SUFFIX))
52 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
55 static const struct got_error *
56 verify_fanout_table(uint32_t *fanout_table)
60 for (i = 0; i < 0xff - 1; i++) {
61 if (be32toh(fanout_table[i]) > be32toh(fanout_table[i + 1]))
62 return got_error(GOT_ERR_BAD_PACKIDX);
68 static const struct got_error *
69 get_packfile_size(size_t *size, const char *path_idx)
73 char base_path[PATH_MAX];
76 if (strlcpy(base_path, path_idx, PATH_MAX) > PATH_MAX)
77 return got_error(GOT_ERR_NO_SPACE);
79 dot = strrchr(base_path, '.');
81 return got_error(GOT_ERR_BAD_PATH);
83 if (asprintf(&path_pack, "%s.pack", base_path) == -1)
84 return got_error(GOT_ERR_NO_MEM);
86 if (stat(path_pack, &sb) != 0) {
88 return got_error_from_errno();
96 const struct got_error *
97 got_packidx_open(struct got_packidx_v2_hdr **packidx, const char *path)
99 struct got_packidx_v2_hdr *p;
101 const struct got_error *err = NULL;
102 size_t n, nobj, packfile_size;
104 uint8_t sha1[SHA1_DIGEST_LENGTH];
108 f = fopen(path, "rb");
110 return got_error(GOT_ERR_BAD_PATH);
112 err = get_packfile_size(&packfile_size, path);
116 p = calloc(1, sizeof(*p));
118 err = got_error(GOT_ERR_NO_MEM);
122 n = fread(&p->magic, sizeof(p->magic), 1, f);
124 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
128 if (betoh32(p->magic) != GOT_PACKIDX_V2_MAGIC) {
129 err = got_error(GOT_ERR_BAD_PACKIDX);
133 SHA1Update(&ctx, (uint8_t *)&p->magic, sizeof(p->magic));
135 n = fread(&p->version, sizeof(p->version), 1, f);
137 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
141 if (betoh32(p->version) != GOT_PACKIDX_VERSION) {
142 err = got_error(GOT_ERR_BAD_PACKIDX);
146 SHA1Update(&ctx, (uint8_t *)&p->version, sizeof(p->version));
148 n = fread(&p->fanout_table, sizeof(p->fanout_table), 1, f);
150 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
154 err = verify_fanout_table(p->fanout_table);
158 SHA1Update(&ctx, (uint8_t *)p->fanout_table, sizeof(p->fanout_table));
160 nobj = betoh32(p->fanout_table[0xff]);
162 p->sorted_ids = calloc(nobj, sizeof(*p->sorted_ids));
163 if (p->sorted_ids == NULL) {
164 err = got_error(GOT_ERR_NO_MEM);
168 n = fread(p->sorted_ids, sizeof(*p->sorted_ids), nobj, f);
170 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
174 SHA1Update(&ctx, (uint8_t *)p->sorted_ids,
175 nobj * sizeof(*p->sorted_ids));
177 p->crc32 = calloc(nobj, sizeof(*p->crc32));
178 if (p->crc32 == NULL) {
179 err = got_error(GOT_ERR_NO_MEM);
183 n = fread(p->crc32, sizeof(*p->crc32), nobj, f);
185 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
189 SHA1Update(&ctx, (uint8_t *)p->crc32, nobj * sizeof(*p->crc32));
191 p->offsets = calloc(nobj, sizeof(*p->offsets));
192 if (p->offsets == NULL) {
193 err = got_error(GOT_ERR_NO_MEM);
197 n = fread(p->offsets, sizeof(*p->offsets), nobj, f);
199 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
203 SHA1Update(&ctx, (uint8_t *)p->offsets, nobj * sizeof(*p->offsets));
205 /* Large file offsets are contained only in files > 2GB. */
206 if (packfile_size <= 0x80000000)
209 p->large_offsets = calloc(nobj, sizeof(*p->large_offsets));
210 if (p->large_offsets == NULL) {
211 err = got_error(GOT_ERR_NO_MEM);
215 n = fread(p->large_offsets, sizeof(*p->large_offsets), nobj, f);
217 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
221 SHA1Update(&ctx, (uint8_t*)p->large_offsets,
222 nobj * sizeof(*p->large_offsets));
225 n = fread(&p->trailer, sizeof(p->trailer), 1, f);
227 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
231 SHA1Update(&ctx, p->trailer.packfile_sha1, SHA1_DIGEST_LENGTH);
232 SHA1Final(sha1, &ctx);
233 if (memcmp(p->trailer.packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
234 err = got_error(GOT_ERR_PACKIDX_CSUM);
238 got_packidx_close(p);
245 got_packidx_close(struct got_packidx_v2_hdr *packidx)
247 free(packidx->sorted_ids);
248 free(packidx->offsets);
249 free(packidx->crc32);
250 free(packidx->large_offsets);
255 is_packidx_filename(const char *name, size_t len)
257 if (len != GOT_PACKIDX_NAMELEN)
260 if (strncmp(name, GOT_PACK_PREFIX, strlen(GOT_PACK_PREFIX)) != 0)
263 if (strcmp(name + strlen(GOT_PACK_PREFIX) +
264 SHA1_DIGEST_STRING_LENGTH - 1, GOT_PACKIDX_SUFFIX) != 0)
271 get_object_offset(struct got_packidx_v2_hdr *packidx, int idx)
273 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
274 uint32_t offset = betoh32(packidx->offsets[idx]);
275 if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
277 idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
278 if (idx < 0 || idx > totobj || packidx->large_offsets == NULL)
280 loffset = betoh64(packidx->large_offsets[idx]);
281 return (loffset > INT64_MAX ? -1 : (off_t)loffset);
283 return (off_t)(offset & GOT_PACKIDX_OFFSET_VAL_MASK);
287 get_object_idx(struct got_packidx_v2_hdr *packidx, struct got_object_id *id)
289 u_int8_t id0 = id->sha1[0];
290 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
294 i = betoh32(packidx->fanout_table[id0 - 1]);
297 struct got_object_id *oid = &packidx->sorted_ids[i];
299 int cmp = got_object_id_cmp(id, oid);
311 const struct got_error *
312 read_packfile_hdr(FILE *f, struct got_packidx_v2_hdr *packidx)
314 const struct got_error *err = NULL;
315 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
316 struct got_packfile_hdr hdr;
319 n = fread(&hdr, sizeof(hdr), 1, f);
321 return got_ferror(f, GOT_ERR_BAD_PACKIDX);
323 if (betoh32(hdr.signature) != GOT_PACKFILE_SIGNATURE ||
324 betoh32(hdr.version) != GOT_PACKFILE_VERSION ||
325 betoh32(hdr.nobjects) != totobj)
326 err = got_error(GOT_ERR_BAD_PACKFILE);
331 static const struct got_error *
332 decode_object_type_and_size(uint8_t *type, uint64_t *size, size_t *len,
342 /* We do not support size values which don't fit in 64 bit. */
344 return got_error(GOT_ERR_NO_SPACE);
346 n = fread(&sizeN, sizeof(sizeN), 1, packfile);
348 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
351 t = (sizeN & GOT_PACK_OBJ_SIZE0_TYPE_MASK) >>
352 GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT;
353 s = (sizeN & GOT_PACK_OBJ_SIZE0_VAL_MASK);
355 size_t shift = 4 + 7 * (i - 1);
356 s |= ((sizeN & GOT_PACK_OBJ_SIZE_VAL_MASK) << shift);
359 } while (sizeN & GOT_PACK_OBJ_SIZE_MORE);
363 *len = i * sizeof(sizeN);
367 static const struct got_error *
368 open_plain_object(struct got_object **obj, const char *path_packfile,
369 struct got_object_id *id, uint8_t type, off_t offset, size_t size)
371 *obj = calloc(1, sizeof(**obj));
373 return got_error(GOT_ERR_NO_MEM);
375 (*obj)->path_packfile = strdup(path_packfile);
376 if ((*obj)->path_packfile == NULL) {
379 return got_error(GOT_ERR_NO_MEM);
383 (*obj)->flags = GOT_OBJ_FLAG_PACKED;
386 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
387 (*obj)->pack_offset = offset;
392 static const struct got_error *
393 decode_negative_offset(int64_t *offset, size_t *len, FILE *packfile)
401 /* We do not support offset values which don't fit in 64 bit. */
403 return got_error(GOT_ERR_NO_SPACE);
405 n = fread(&offN, sizeof(offN), 1, packfile);
407 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
410 o = (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
414 o += (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
417 } while (offN & GOT_PACK_OBJ_DELTA_OFF_MORE);
420 *len = i * sizeof(offN);
424 static const struct got_error *
425 parse_offset_delta(off_t *base_offset, FILE *packfile, off_t offset)
427 const struct got_error *err;
431 err = decode_negative_offset(&negoffset, &negofflen, packfile);
435 /* Compute the base object's offset (must be in the same pack file). */
436 *base_offset = (offset - negoffset);
437 if (*base_offset <= 0)
438 return got_error(GOT_ERR_BAD_PACKFILE);
443 static const struct got_error *resolve_delta_chain(struct got_delta_chain *,
444 FILE *, const char *, int, off_t, size_t);
446 static const struct got_error *
447 resolve_offset_delta(struct got_delta_chain *deltas, FILE *packfile,
448 const char *path_packfile, off_t delta_offset)
450 const struct got_error *err;
456 err = parse_offset_delta(&base_offset, packfile, delta_offset);
460 /* An offset delta must be in the same packfile. */
461 if (fseeko(packfile, base_offset, SEEK_SET) != 0)
462 return got_error_from_errno();
464 err = decode_object_type_and_size(&base_type, &base_size, &base_tslen,
469 return resolve_delta_chain(deltas, packfile, path_packfile,
470 base_type, base_offset + base_tslen, base_size);
473 static const struct got_error *
474 resolve_delta_chain(struct got_delta_chain *deltas, FILE *packfile,
475 const char *path_packfile, int delta_type, off_t delta_offset,
478 const struct got_error *err = NULL;
479 struct got_delta *delta;
481 delta = got_delta_open(path_packfile, delta_type, delta_offset,
484 return got_error(GOT_ERR_NO_MEM);
486 SIMPLEQ_INSERT_TAIL(&deltas->entries, delta, entry);
487 /* In case of error below, delta is freed in got_object_close(). */
489 switch (delta_type) {
490 case GOT_OBJ_TYPE_COMMIT:
491 case GOT_OBJ_TYPE_TREE:
492 case GOT_OBJ_TYPE_BLOB:
493 case GOT_OBJ_TYPE_TAG:
494 /* Plain types are the final delta base. Recursion ends. */
496 case GOT_OBJ_TYPE_OFFSET_DELTA:
497 err = resolve_offset_delta(deltas, packfile, path_packfile,
500 case GOT_OBJ_TYPE_REF_DELTA:
502 return got_error(GOT_ERR_NOT_IMPL);
508 static const struct got_error *
509 open_offset_delta_object(struct got_object **obj,
510 struct got_repository *repo, struct got_packidx_v2_hdr *packidx,
511 const char *path_packfile, FILE *packfile, struct got_object_id *id,
512 off_t offset, size_t tslen, size_t delta_size)
514 const struct got_error *err = NULL;
515 struct got_object_id base_id;
521 *obj = calloc(1, sizeof(**obj));
523 return got_error(GOT_ERR_NO_MEM);
527 (*obj)->size = 0; /* Not yet known because deltas aren't combined. */
528 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
529 (*obj)->pack_offset = offset + tslen;
531 (*obj)->path_packfile = strdup(path_packfile);
532 if ((*obj)->path_packfile == NULL) {
533 err = got_error(GOT_ERR_NO_MEM);
536 (*obj)->flags |= GOT_OBJ_FLAG_PACKED;
538 SIMPLEQ_INIT(&(*obj)->deltas.entries);
539 (*obj)->flags |= GOT_OBJ_FLAG_DELTIFIED;
541 err = resolve_delta_chain(&(*obj)->deltas, packfile, path_packfile,
542 GOT_OBJ_TYPE_OFFSET_DELTA, offset, delta_size);
546 err = got_delta_chain_get_base_type(&resolved_type, &(*obj)->deltas);
549 (*obj)->type = resolved_type;
553 got_object_close(*obj);
559 static const struct got_error *
560 open_packed_object(struct got_object **obj, struct got_repository *repo,
561 const char *path_packdir, struct got_packidx_v2_hdr *packidx,
562 struct got_object_id *id)
564 const struct got_error *err = NULL;
565 int idx = get_object_idx(packidx, id);
567 char hex[SHA1_DIGEST_STRING_LENGTH];
576 if (idx == -1) /* object not found in pack index */
579 offset = get_object_offset(packidx, idx);
580 if (offset == (uint64_t)-1)
581 return got_error(GOT_ERR_BAD_PACKIDX);
583 sha1str = got_sha1_digest_to_str(packidx->trailer.packfile_sha1,
586 return got_error(GOT_ERR_PACKIDX_CSUM);
588 if (asprintf(&path_packfile, "%s/%s%s%s", path_packdir,
589 GOT_PACK_PREFIX, sha1str, GOT_PACKFILE_SUFFIX) == -1)
590 return got_error(GOT_ERR_NO_MEM);
592 packfile = fopen(path_packfile, "rb");
593 if (packfile == NULL) {
594 err = got_error_from_errno();
598 err = read_packfile_hdr(packfile, packidx);
602 if (fseeko(packfile, offset, SEEK_SET) != 0) {
603 err = got_error_from_errno();
607 err = decode_object_type_and_size(&type, &size, &tslen, packfile);
612 case GOT_OBJ_TYPE_COMMIT:
613 case GOT_OBJ_TYPE_TREE:
614 case GOT_OBJ_TYPE_BLOB:
615 err = open_plain_object(obj, path_packfile, id, type,
616 offset + tslen, size);
619 case GOT_OBJ_TYPE_OFFSET_DELTA:
620 err = open_offset_delta_object(obj, repo, packidx,
621 path_packfile, packfile, id, offset, tslen, size);
624 case GOT_OBJ_TYPE_REF_DELTA:
625 case GOT_OBJ_TYPE_TAG:
627 err = got_error(GOT_ERR_NOT_IMPL);
632 if (packfile && fclose(packfile) == -1 && err == 0)
633 err = got_error_from_errno();
637 const struct got_error *
638 got_packfile_open_object(struct got_object **obj, struct got_object_id *id,
639 struct got_repository *repo)
641 const struct got_error *err = NULL;
644 char *path_packdir = got_repo_get_path_objects_pack(repo);
646 if (path_packdir == NULL) {
647 err = got_error(GOT_ERR_NO_MEM);
651 packdir = opendir(path_packdir);
652 if (packdir == NULL) {
653 err = got_error_from_errno();
657 while ((dent = readdir(packdir)) != NULL) {
658 struct got_packidx_v2_hdr *packidx;
659 char *path_packidx, *path_object;
661 if (!is_packidx_filename(dent->d_name, dent->d_namlen))
664 if (asprintf(&path_packidx, "%s/%s", path_packdir,
665 dent->d_name) == -1) {
666 err = got_error(GOT_ERR_NO_MEM);
670 err = got_packidx_open(&packidx, path_packidx);
675 err = open_packed_object(obj, repo, path_packdir, packidx, id);
676 got_packidx_close(packidx);
685 if (packdir && closedir(packdir) != 0 && err == 0)
686 err = got_error_from_errno();
690 static const struct got_error *
691 dump_plain_object(FILE *infile, uint8_t type, size_t size, FILE *outfile)
697 size_t len = MIN(size, sizeof(data));
699 n = fread(data, len, 1, infile);
701 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
703 n = fwrite(data, len, 1, outfile);
705 return got_ferror(outfile, GOT_ERR_IO);
714 static const struct got_error *
715 dump_ref_delta_object(struct got_repository *repo, FILE *infile, uint8_t type,
716 size_t size, FILE *outfile)
718 const struct got_error *err = NULL;
719 struct got_object_id base_id;
720 struct got_object *base_obj;
723 if (size < sizeof(base_id))
724 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
726 n = fread(&base_id, sizeof(base_id), 1, infile);
728 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
730 size -= sizeof(base_id);
732 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
734 err = got_object_open(&base_obj, repo, &base_id);
738 err = got_delta_apply(repo, infile, size, base_obj, outfile);
739 got_object_close(base_obj);
743 const struct got_error *
744 got_packfile_extract_object(FILE **f, struct got_object *obj,
745 struct got_repository *repo)
747 const struct got_error *err = NULL;
748 FILE *packfile = NULL;
750 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
751 return got_error(GOT_ERR_OBJ_NOT_PACKED);
755 err = got_error(GOT_ERR_FILE_OPEN);
759 packfile = fopen(obj->path_packfile, "rb");
760 if (packfile == NULL) {
761 err = got_error_from_errno();
765 if (fseeko(packfile, obj->pack_offset, SEEK_SET) != 0) {
766 err = got_error_from_errno();
771 case GOT_OBJ_TYPE_COMMIT:
772 case GOT_OBJ_TYPE_TREE:
773 case GOT_OBJ_TYPE_BLOB:
774 err = dump_plain_object(packfile, obj->type, obj->size, *f);
776 case GOT_OBJ_TYPE_REF_DELTA:
777 err = dump_ref_delta_object(repo, packfile, obj->type,
780 case GOT_OBJ_TYPE_TAG:
781 case GOT_OBJ_TYPE_OFFSET_DELTA:
783 err = got_error(GOT_ERR_NOT_IMPL);