Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
21 #include <dirent.h>
22 #include <errno.h>
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <limits.h>
28 #include <sha1.h>
29 #include <endian.h>
30 #include <zlib.h>
32 #include "got_error.h"
33 #include "got_object.h"
34 #include "got_repository.h"
35 #include "got_sha1.h"
36 #include "pack.h"
37 #include "path.h"
38 #include "delta.h"
39 #include "object.h"
41 #define GOT_PACK_PREFIX "pack-"
42 #define GOT_PACKFILE_SUFFIX ".pack"
43 #define GOT_PACKIDX_SUFFIX ".idx"
44 #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
45 SHA1_DIGEST_STRING_LENGTH - 1 + \
46 strlen(GOT_PACKFILE_SUFFIX))
47 #define GOT_PACKIDX_NAMELEN (strlen(GOT_PACK_PREFIX) + \
48 SHA1_DIGEST_STRING_LENGTH - 1 + \
49 strlen(GOT_PACKIDX_SUFFIX))
51 #ifndef MIN
52 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
53 #endif
55 static const struct got_error *
56 verify_fanout_table(uint32_t *fanout_table)
57 {
58 int i;
60 for (i = 0; i < 0xff - 1; i++) {
61 if (be32toh(fanout_table[i]) > be32toh(fanout_table[i + 1]))
62 return got_error(GOT_ERR_BAD_PACKIDX);
63 }
65 return NULL;
66 }
68 static const struct got_error *
69 get_packfile_size(size_t *size, const char *path_idx)
70 {
71 struct stat sb;
72 char *path_pack;
73 char base_path[PATH_MAX];
74 char *dot;
76 if (strlcpy(base_path, path_idx, PATH_MAX) > PATH_MAX)
77 return got_error(GOT_ERR_NO_SPACE);
79 dot = strrchr(base_path, '.');
80 if (dot == NULL)
81 return got_error(GOT_ERR_BAD_PATH);
82 *dot = '\0';
83 if (asprintf(&path_pack, "%s.pack", base_path) == -1)
84 return got_error(GOT_ERR_NO_MEM);
86 if (stat(path_pack, &sb) != 0) {
87 free(path_pack);
88 return got_error_from_errno();
89 }
91 free(path_pack);
92 *size = sb.st_size;
93 return 0;
94 }
96 const struct got_error *
97 got_packidx_open(struct got_packidx_v2_hdr **packidx, const char *path)
98 {
99 struct got_packidx_v2_hdr *p;
100 FILE *f;
101 const struct got_error *err = NULL;
102 size_t n, nobj, packfile_size;
103 SHA1_CTX ctx;
104 uint8_t sha1[SHA1_DIGEST_LENGTH];
106 SHA1Init(&ctx);
108 f = fopen(path, "rb");
109 if (f == NULL)
110 return got_error(GOT_ERR_BAD_PATH);
112 err = get_packfile_size(&packfile_size, path);
113 if (err)
114 return err;
116 p = calloc(1, sizeof(*p));
117 if (p == NULL) {
118 err = got_error(GOT_ERR_NO_MEM);
119 goto done;
122 n = fread(&p->magic, sizeof(p->magic), 1, f);
123 if (n != 1) {
124 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
125 goto done;
128 if (betoh32(p->magic) != GOT_PACKIDX_V2_MAGIC) {
129 err = got_error(GOT_ERR_BAD_PACKIDX);
130 goto done;
133 SHA1Update(&ctx, (uint8_t *)&p->magic, sizeof(p->magic));
135 n = fread(&p->version, sizeof(p->version), 1, f);
136 if (n != 1) {
137 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
138 goto done;
141 if (betoh32(p->version) != GOT_PACKIDX_VERSION) {
142 err = got_error(GOT_ERR_BAD_PACKIDX);
143 goto done;
146 SHA1Update(&ctx, (uint8_t *)&p->version, sizeof(p->version));
148 n = fread(&p->fanout_table, sizeof(p->fanout_table), 1, f);
149 if (n != 1) {
150 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
151 goto done;
154 err = verify_fanout_table(p->fanout_table);
155 if (err)
156 goto done;
158 SHA1Update(&ctx, (uint8_t *)p->fanout_table, sizeof(p->fanout_table));
160 nobj = betoh32(p->fanout_table[0xff]);
162 p->sorted_ids = calloc(nobj, sizeof(*p->sorted_ids));
163 if (p->sorted_ids == NULL) {
164 err = got_error(GOT_ERR_NO_MEM);
165 goto done;
168 n = fread(p->sorted_ids, sizeof(*p->sorted_ids), nobj, f);
169 if (n != nobj) {
170 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
171 goto done;
174 SHA1Update(&ctx, (uint8_t *)p->sorted_ids,
175 nobj * sizeof(*p->sorted_ids));
177 p->crc32 = calloc(nobj, sizeof(*p->crc32));
178 if (p->crc32 == NULL) {
179 err = got_error(GOT_ERR_NO_MEM);
180 goto done;
183 n = fread(p->crc32, sizeof(*p->crc32), nobj, f);
184 if (n != nobj) {
185 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
186 goto done;
189 SHA1Update(&ctx, (uint8_t *)p->crc32, nobj * sizeof(*p->crc32));
191 p->offsets = calloc(nobj, sizeof(*p->offsets));
192 if (p->offsets == NULL) {
193 err = got_error(GOT_ERR_NO_MEM);
194 goto done;
197 n = fread(p->offsets, sizeof(*p->offsets), nobj, f);
198 if (n != nobj) {
199 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
200 goto done;
203 SHA1Update(&ctx, (uint8_t *)p->offsets, nobj * sizeof(*p->offsets));
205 /* Large file offsets are contained only in files > 2GB. */
206 if (packfile_size <= 0x80000000)
207 goto checksum;
209 p->large_offsets = calloc(nobj, sizeof(*p->large_offsets));
210 if (p->large_offsets == NULL) {
211 err = got_error(GOT_ERR_NO_MEM);
212 goto done;
215 n = fread(p->large_offsets, sizeof(*p->large_offsets), nobj, f);
216 if (n != nobj) {
217 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
218 goto done;
221 SHA1Update(&ctx, (uint8_t*)p->large_offsets,
222 nobj * sizeof(*p->large_offsets));
224 checksum:
225 n = fread(&p->trailer, sizeof(p->trailer), 1, f);
226 if (n != 1) {
227 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
228 goto done;
231 SHA1Update(&ctx, p->trailer.packfile_sha1, SHA1_DIGEST_LENGTH);
232 SHA1Final(sha1, &ctx);
233 if (memcmp(p->trailer.packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
234 err = got_error(GOT_ERR_PACKIDX_CSUM);
235 done:
236 fclose(f);
237 if (err)
238 got_packidx_close(p);
239 else
240 *packidx = p;
241 return err;
244 void
245 got_packidx_close(struct got_packidx_v2_hdr *packidx)
247 free(packidx->sorted_ids);
248 free(packidx->offsets);
249 free(packidx->crc32);
250 free(packidx->large_offsets);
251 free(packidx);
254 static int
255 is_packidx_filename(const char *name, size_t len)
257 if (len != GOT_PACKIDX_NAMELEN)
258 return 0;
260 if (strncmp(name, GOT_PACK_PREFIX, strlen(GOT_PACK_PREFIX)) != 0)
261 return 0;
263 if (strcmp(name + strlen(GOT_PACK_PREFIX) +
264 SHA1_DIGEST_STRING_LENGTH - 1, GOT_PACKIDX_SUFFIX) != 0)
265 return 0;
267 return 1;
270 static off_t
271 get_object_offset(struct got_packidx_v2_hdr *packidx, int idx)
273 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
274 uint32_t offset = betoh32(packidx->offsets[idx]);
275 if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
276 uint64_t loffset;
277 idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
278 if (idx < 0 || idx > totobj || packidx->large_offsets == NULL)
279 return -1;
280 loffset = betoh64(packidx->large_offsets[idx]);
281 return (loffset > INT64_MAX ? -1 : (off_t)loffset);
283 return (off_t)(offset & GOT_PACKIDX_OFFSET_VAL_MASK);
286 static int
287 get_object_idx(struct got_packidx_v2_hdr *packidx, struct got_object_id *id)
289 u_int8_t id0 = id->sha1[0];
290 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
291 int i = 0;
293 if (id0 > 0)
294 i = betoh32(packidx->fanout_table[id0 - 1]);
296 while (i < totobj) {
297 struct got_object_id *oid = &packidx->sorted_ids[i];
298 uint32_t offset;
299 int cmp = got_object_id_cmp(id, oid);
301 if (cmp == 0)
302 return i;
303 else if (cmp > 0)
304 break;
305 i++;
308 return -1;
311 const struct got_error *
312 read_packfile_hdr(FILE *f, struct got_packidx_v2_hdr *packidx)
314 const struct got_error *err = NULL;
315 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
316 struct got_packfile_hdr hdr;
317 size_t n;
319 n = fread(&hdr, sizeof(hdr), 1, f);
320 if (n != 1)
321 return got_ferror(f, GOT_ERR_BAD_PACKIDX);
323 if (betoh32(hdr.signature) != GOT_PACKFILE_SIGNATURE ||
324 betoh32(hdr.version) != GOT_PACKFILE_VERSION ||
325 betoh32(hdr.nobjects) != totobj)
326 err = got_error(GOT_ERR_BAD_PACKFILE);
328 return err;
331 static const struct got_error *
332 decode_type_and_size(uint8_t *type, uint64_t *size, size_t *len, FILE *packfile)
334 uint8_t t = 0;
335 uint64_t s = 0;
336 uint8_t sizeN;
337 size_t n;
338 int i = 0;
340 do {
341 /* We do not support size values which don't fit in 64 bit. */
342 if (i > 9)
343 return got_error(GOT_ERR_NO_SPACE);
345 n = fread(&sizeN, sizeof(sizeN), 1, packfile);
346 if (n != 1)
347 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
349 if (i == 0) {
350 t = (sizeN & GOT_PACK_OBJ_SIZE0_TYPE_MASK) >>
351 GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT;
352 s = (sizeN & GOT_PACK_OBJ_SIZE0_VAL_MASK);
353 } else {
354 size_t shift = 4 + 7 * (i - 1);
355 s |= ((sizeN & GOT_PACK_OBJ_SIZE_VAL_MASK) << shift);
357 i++;
358 } while (sizeN & GOT_PACK_OBJ_SIZE_MORE);
360 *type = t;
361 *size = s;
362 *len = i * sizeof(sizeN);
363 return NULL;
366 static const struct got_error *
367 open_plain_object(struct got_object **obj, const char *path_packfile,
368 struct got_object_id *id, uint8_t type, off_t offset, size_t size)
370 *obj = calloc(1, sizeof(**obj));
371 if (*obj == NULL)
372 return got_error(GOT_ERR_NO_MEM);
374 (*obj)->path_packfile = strdup(path_packfile);
375 if ((*obj)->path_packfile == NULL) {
376 free(*obj);
377 *obj = NULL;
378 return got_error(GOT_ERR_NO_MEM);
381 (*obj)->type = type;
382 (*obj)->flags = GOT_OBJ_FLAG_PACKED;
383 (*obj)->hdrlen = 0;
384 (*obj)->size = size;
385 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
386 (*obj)->pack_offset = offset;
388 return NULL;
391 static const struct got_error *
392 decode_negative_offset(int64_t *offset, size_t *len, FILE *packfile)
394 int64_t o = 0;
395 uint8_t offN;
396 size_t n;
397 int i = 0;
399 do {
400 /* We do not support offset values which don't fit in 64 bit. */
401 if (i > 8)
402 return got_error(GOT_ERR_NO_SPACE);
404 n = fread(&offN, sizeof(offN), 1, packfile);
405 if (n != 1)
406 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
408 if (i == 0)
409 o = (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
410 else {
411 o++;
412 o <<= 7;
413 o += (offN & GOT_PACK_OBJ_DELTA_OFF_VAL_MASK);
415 i++;
416 } while (offN & GOT_PACK_OBJ_DELTA_OFF_MORE);
418 *offset = o;
419 *len = i * sizeof(offN);
420 return NULL;
423 static const struct got_error *
424 open_offset_delta_object(struct got_object **obj, struct got_repository *repo,
425 const char *path_packfile, FILE *packfile, struct got_object_id *id,
426 off_t offset, size_t tslen, size_t size)
428 const struct got_error *err = NULL;
429 int64_t negoffset;
430 size_t negofflen;
431 off_t base_obj_offset;
432 struct got_object *base_obj;
433 struct got_object_id base_id;
434 uint8_t base_type;
435 uint64_t base_size;
436 size_t base_tslen;
438 err = decode_negative_offset(&negoffset, &negofflen, packfile);
439 if (err)
440 return err;
442 /* Compute the base object's offset (must be in the same pack file). */
443 base_obj_offset = (offset - negoffset);
444 if (base_obj_offset <= 0)
445 return got_error(GOT_ERR_BAD_PACKFILE);
447 if (fseeko(packfile, base_obj_offset, SEEK_SET) != 0)
448 return got_error_from_errno();
450 err = decode_type_and_size(&base_type, &base_size, &base_tslen,
451 packfile);
452 if (err)
453 return err;
455 /*
456 * XXX We currently only support plain objects as a delta base,
457 * i.e. deltas cannot be chained. Is this a problem?
458 * If so, we would have to resolve a plain object base type here.
459 */
460 switch (base_type) {
461 case GOT_OBJ_TYPE_COMMIT:
462 case GOT_OBJ_TYPE_TREE:
463 case GOT_OBJ_TYPE_BLOB:
464 case GOT_OBJ_TYPE_TAG:
465 break;
466 case GOT_OBJ_TYPE_OFFSET_DELTA:
467 case GOT_OBJ_TYPE_REF_DELTA:
468 default:
469 return got_error(GOT_ERR_NOT_IMPL);
472 *obj = calloc(1, sizeof(**obj));
473 if (*obj == NULL)
474 return got_error(GOT_ERR_NO_MEM);
476 (*obj)->path_packfile = strdup(path_packfile);
477 if ((*obj)->path_packfile == NULL) {
478 free(*obj);
479 return got_error(GOT_ERR_NO_MEM);
481 (*obj)->type = GOT_OBJ_TYPE_OFFSET_DELTA;
482 (*obj)->flags = GOT_OBJ_FLAG_PACKED;
483 (*obj)->hdrlen = 0;
484 (*obj)->size = size;
485 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
486 (*obj)->pack_offset = offset + tslen;
487 (*obj)->base_type = base_type;
488 (*obj)->base_size = base_size;
489 (*obj)->base_obj_offset = base_obj_offset;
491 return NULL;
494 static const struct got_error *
495 open_packed_object(struct got_object **obj, struct got_repository *repo,
496 const char *path_packdir, struct got_packidx_v2_hdr *packidx,
497 struct got_object_id *id)
499 const struct got_error *err = NULL;
500 int idx = get_object_idx(packidx, id);
501 off_t offset;
502 char hex[SHA1_DIGEST_STRING_LENGTH];
503 char *sha1str;
504 char *path_packfile;
505 FILE *packfile;
506 uint8_t type;
507 uint64_t size;
508 size_t tslen;
510 *obj = NULL;
511 if (idx == -1) /* object not found in pack index */
512 return NULL;
514 offset = get_object_offset(packidx, idx);
515 if (offset == (uint64_t)-1)
516 return got_error(GOT_ERR_BAD_PACKIDX);
518 sha1str = got_sha1_digest_to_str(packidx->trailer.packfile_sha1,
519 hex, sizeof(hex));
520 if (sha1str == NULL)
521 return got_error(GOT_ERR_PACKIDX_CSUM);
523 if (asprintf(&path_packfile, "%s/%s%s%s", path_packdir,
524 GOT_PACK_PREFIX, sha1str, GOT_PACKFILE_SUFFIX) == -1)
525 return got_error(GOT_ERR_NO_MEM);
527 packfile = fopen(path_packfile, "rb");
528 if (packfile == NULL) {
529 err = got_error_from_errno();
530 goto done;
533 err = read_packfile_hdr(packfile, packidx);
534 if (err)
535 goto done;
537 if (fseeko(packfile, offset, SEEK_SET) != 0) {
538 err = got_error_from_errno();
539 goto done;
542 err = decode_type_and_size(&type, &size, &tslen, packfile);
543 if (err)
544 goto done;
546 switch (type) {
547 case GOT_OBJ_TYPE_COMMIT:
548 case GOT_OBJ_TYPE_TREE:
549 case GOT_OBJ_TYPE_BLOB:
550 err = open_plain_object(obj, path_packfile, id, type,
551 offset + tslen, size);
552 break;
554 case GOT_OBJ_TYPE_OFFSET_DELTA:
555 err = open_offset_delta_object(obj, repo, path_packfile,
556 packfile, id, offset, tslen, size);
557 break;
559 case GOT_OBJ_TYPE_REF_DELTA:
560 case GOT_OBJ_TYPE_TAG:
561 break;
562 default:
563 err = got_error(GOT_ERR_NOT_IMPL);
564 goto done;
566 done:
567 free(path_packfile);
568 if (err)
569 free(*obj);
570 if (packfile && fclose(packfile) == -1 && err == 0)
571 err = got_error_from_errno();
572 return err;
575 const struct got_error *
576 got_packfile_open_object(struct got_object **obj, struct got_object_id *id,
577 struct got_repository *repo)
579 const struct got_error *err = NULL;
580 DIR *packdir = NULL;
581 struct dirent *dent;
582 char *path_packdir = got_repo_get_path_objects_pack(repo);
584 if (path_packdir == NULL) {
585 err = got_error(GOT_ERR_NO_MEM);
586 goto done;
589 packdir = opendir(path_packdir);
590 if (packdir == NULL) {
591 err = got_error_from_errno();
592 goto done;
595 while ((dent = readdir(packdir)) != NULL) {
596 struct got_packidx_v2_hdr *packidx;
597 char *path_packidx, *path_object;
599 if (!is_packidx_filename(dent->d_name, dent->d_namlen))
600 continue;
602 if (asprintf(&path_packidx, "%s/%s", path_packdir,
603 dent->d_name) == -1) {
604 err = got_error(GOT_ERR_NO_MEM);
605 goto done;
608 err = got_packidx_open(&packidx, path_packidx);
609 free(path_packidx);
610 if (err)
611 goto done;
613 err = open_packed_object(obj, repo, path_packdir, packidx, id);
614 got_packidx_close(packidx);
615 if (err)
616 goto done;
617 if (*obj != NULL)
618 break;
621 done:
622 free(path_packdir);
623 if (packdir && closedir(packdir) != 0 && err == 0)
624 err = got_error_from_errno();
625 return err;
628 static const struct got_error *
629 dump_plain_object(FILE *infile, uint8_t type, size_t size, FILE *outfile)
631 size_t n;
633 while (size > 0) {
634 uint8_t data[2048];
635 size_t len = MIN(size, sizeof(data));
637 n = fread(data, len, 1, infile);
638 if (n != 1)
639 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
641 n = fwrite(data, len, 1, outfile);
642 if (n != 1)
643 return got_ferror(outfile, GOT_ERR_IO);
645 size -= len;
648 rewind(outfile);
649 return NULL;
652 static const struct got_error *
653 dump_ref_delta_object(struct got_repository *repo, FILE *infile, uint8_t type,
654 size_t size, FILE *outfile)
656 const struct got_error *err = NULL;
657 struct got_object_id base_id;
658 struct got_object *base_obj;
659 size_t n;
661 if (size < sizeof(base_id))
662 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
664 n = fread(&base_id, sizeof(base_id), 1, infile);
665 if (n != 1)
666 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
668 size -= sizeof(base_id);
669 if (size <= 0)
670 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
672 err = got_object_open(&base_obj, repo, &base_id);
673 if (err)
674 return err;
676 err = got_delta_apply(repo, infile, size, base_obj, outfile);
677 got_object_close(base_obj);
678 return err;
681 const struct got_error *
682 got_packfile_extract_object(FILE **f, struct got_object *obj,
683 struct got_repository *repo)
685 const struct got_error *err = NULL;
686 FILE *packfile = NULL;
688 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
689 return got_error(GOT_ERR_OBJ_NOT_PACKED);
691 *f = got_opentemp();
692 if (*f == NULL) {
693 err = got_error(GOT_ERR_FILE_OPEN);
694 goto done;
697 packfile = fopen(obj->path_packfile, "rb");
698 if (packfile == NULL) {
699 err = got_error_from_errno();
700 goto done;
703 if (fseeko(packfile, obj->pack_offset, SEEK_SET) != 0) {
704 err = got_error_from_errno();
705 goto done;
708 switch (obj->type) {
709 case GOT_OBJ_TYPE_COMMIT:
710 case GOT_OBJ_TYPE_TREE:
711 case GOT_OBJ_TYPE_BLOB:
712 err = dump_plain_object(packfile, obj->type, obj->size, *f);
713 break;
714 case GOT_OBJ_TYPE_REF_DELTA:
715 err = dump_ref_delta_object(repo, packfile, obj->type,
716 obj->size, *f);
717 break;
718 case GOT_OBJ_TYPE_TAG:
719 case GOT_OBJ_TYPE_OFFSET_DELTA:
720 default:
721 err = got_error(GOT_ERR_NOT_IMPL);
722 goto done;
724 done:
725 if (packfile)
726 fclose(packfile);
727 if (err && *f)
728 fclose(*f);
729 return err;