Blob


1 /*
2 * Copyright (c) 2018 Stefan Sperling <stsp@openbsd.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <sys/queue.h>
21 #include <dirent.h>
22 #include <errno.h>
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <limits.h>
28 #include <sha1.h>
29 #include <endian.h>
30 #include <zlib.h>
32 #include "got_error.h"
33 #include "got_object.h"
34 #include "got_repository.h"
35 #include "got_sha1.h"
36 #include "pack.h"
37 #include "path.h"
38 #include "delta.h"
40 #define GOT_PACK_PREFIX "pack-"
41 #define GOT_PACKFILE_SUFFIX ".pack"
42 #define GOT_PACKIDX_SUFFIX ".idx"
43 #define GOT_PACKFILE_NAMELEN (strlen(GOT_PACK_PREFIX) + \
44 SHA1_DIGEST_STRING_LENGTH - 1 + \
45 strlen(GOT_PACKFILE_SUFFIX))
46 #define GOT_PACKIDX_NAMELEN (strlen(GOT_PACK_PREFIX) + \
47 SHA1_DIGEST_STRING_LENGTH - 1 + \
48 strlen(GOT_PACKIDX_SUFFIX))
50 #ifndef MIN
51 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
52 #endif
54 static const struct got_error *
55 verify_fanout_table(uint32_t *fanout_table)
56 {
57 int i;
59 for (i = 0; i < 0xff - 1; i++) {
60 if (be32toh(fanout_table[i]) > be32toh(fanout_table[i + 1]))
61 return got_error(GOT_ERR_BAD_PACKIDX);
62 }
64 return NULL;
65 }
67 static const struct got_error *
68 get_packfile_size(size_t *size, const char *path_idx)
69 {
70 struct stat sb;
71 char *path_pack;
72 char base_path[PATH_MAX];
73 char *dot;
75 if (strlcpy(base_path, path_idx, PATH_MAX) > PATH_MAX)
76 return got_error(GOT_ERR_NO_SPACE);
78 dot = strrchr(base_path, '.');
79 if (dot == NULL)
80 return got_error(GOT_ERR_BAD_PATH);
81 *dot = '\0';
82 if (asprintf(&path_pack, "%s.pack", base_path) == -1)
83 return got_error(GOT_ERR_NO_MEM);
85 if (stat(path_pack, &sb) != 0) {
86 free(path_pack);
87 return got_error_from_errno();
88 }
90 free(path_pack);
91 *size = sb.st_size;
92 return 0;
93 }
95 const struct got_error *
96 got_packidx_open(struct got_packidx_v2_hdr **packidx, const char *path)
97 {
98 struct got_packidx_v2_hdr *p;
99 FILE *f;
100 const struct got_error *err = NULL;
101 size_t n, nobj, packfile_size;
102 SHA1_CTX ctx;
103 uint8_t sha1[SHA1_DIGEST_LENGTH];
105 SHA1Init(&ctx);
107 f = fopen(path, "rb");
108 if (f == NULL)
109 return got_error(GOT_ERR_BAD_PATH);
111 err = get_packfile_size(&packfile_size, path);
112 if (err)
113 return err;
115 p = calloc(1, sizeof(*p));
116 if (p == NULL) {
117 err = got_error(GOT_ERR_NO_MEM);
118 goto done;
121 n = fread(&p->magic, sizeof(p->magic), 1, f);
122 if (n != 1) {
123 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
124 goto done;
127 if (betoh32(p->magic) != GOT_PACKIDX_V2_MAGIC) {
128 err = got_error(GOT_ERR_BAD_PACKIDX);
129 goto done;
132 SHA1Update(&ctx, (uint8_t *)&p->magic, sizeof(p->magic));
134 n = fread(&p->version, sizeof(p->version), 1, f);
135 if (n != 1) {
136 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
137 goto done;
140 if (betoh32(p->version) != GOT_PACKIDX_VERSION) {
141 err = got_error(GOT_ERR_BAD_PACKIDX);
142 goto done;
145 SHA1Update(&ctx, (uint8_t *)&p->version, sizeof(p->version));
147 n = fread(&p->fanout_table, sizeof(p->fanout_table), 1, f);
148 if (n != 1) {
149 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
150 goto done;
153 err = verify_fanout_table(p->fanout_table);
154 if (err)
155 goto done;
157 SHA1Update(&ctx, (uint8_t *)p->fanout_table, sizeof(p->fanout_table));
159 nobj = betoh32(p->fanout_table[0xff]);
161 p->sorted_ids = calloc(nobj, sizeof(*p->sorted_ids));
162 if (p->sorted_ids == NULL) {
163 err = got_error(GOT_ERR_NO_MEM);
164 goto done;
167 n = fread(p->sorted_ids, sizeof(*p->sorted_ids), nobj, f);
168 if (n != nobj) {
169 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
170 goto done;
173 SHA1Update(&ctx, (uint8_t *)p->sorted_ids,
174 nobj * sizeof(*p->sorted_ids));
176 p->crc32 = calloc(nobj, sizeof(*p->crc32));
177 if (p->crc32 == NULL) {
178 err = got_error(GOT_ERR_NO_MEM);
179 goto done;
182 n = fread(p->crc32, sizeof(*p->crc32), nobj, f);
183 if (n != nobj) {
184 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
185 goto done;
188 SHA1Update(&ctx, (uint8_t *)p->crc32, nobj * sizeof(*p->crc32));
190 p->offsets = calloc(nobj, sizeof(*p->offsets));
191 if (p->offsets == NULL) {
192 err = got_error(GOT_ERR_NO_MEM);
193 goto done;
196 n = fread(p->offsets, sizeof(*p->offsets), nobj, f);
197 if (n != nobj) {
198 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
199 goto done;
202 SHA1Update(&ctx, (uint8_t *)p->offsets, nobj * sizeof(*p->offsets));
204 /* Large file offsets are contained only in files > 2GB. */
205 if (packfile_size <= 0x80000000)
206 goto checksum;
208 p->large_offsets = calloc(nobj, sizeof(*p->large_offsets));
209 if (p->large_offsets == NULL) {
210 err = got_error(GOT_ERR_NO_MEM);
211 goto done;
214 n = fread(p->large_offsets, sizeof(*p->large_offsets), nobj, f);
215 if (n != nobj) {
216 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
217 goto done;
220 SHA1Update(&ctx, (uint8_t*)p->large_offsets,
221 nobj * sizeof(*p->large_offsets));
223 checksum:
224 n = fread(&p->trailer, sizeof(p->trailer), 1, f);
225 if (n != 1) {
226 err = got_ferror(f, GOT_ERR_BAD_PACKIDX);
227 goto done;
230 SHA1Update(&ctx, p->trailer.packfile_sha1, SHA1_DIGEST_LENGTH);
231 SHA1Final(sha1, &ctx);
232 if (memcmp(p->trailer.packidx_sha1, sha1, SHA1_DIGEST_LENGTH) != 0)
233 err = got_error(GOT_ERR_PACKIDX_CSUM);
234 done:
235 fclose(f);
236 if (err)
237 got_packidx_close(p);
238 else
239 *packidx = p;
240 return err;
243 void
244 got_packidx_close(struct got_packidx_v2_hdr *packidx)
246 free(packidx->sorted_ids);
247 free(packidx->offsets);
248 free(packidx->crc32);
249 free(packidx->large_offsets);
250 free(packidx);
253 static int
254 is_packidx_filename(const char *name, size_t len)
256 if (len != GOT_PACKIDX_NAMELEN)
257 return 0;
259 if (strncmp(name, GOT_PACK_PREFIX, strlen(GOT_PACK_PREFIX)) != 0)
260 return 0;
262 if (strcmp(name + strlen(GOT_PACK_PREFIX) +
263 SHA1_DIGEST_STRING_LENGTH - 1, GOT_PACKIDX_SUFFIX) != 0)
264 return 0;
266 return 1;
269 static off_t
270 get_object_offset(struct got_packidx_v2_hdr *packidx, int idx)
272 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
273 uint32_t offset = betoh32(packidx->offsets[idx]);
274 if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
275 uint64_t loffset;
276 idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
277 if (idx < 0 || idx > totobj || packidx->large_offsets == NULL)
278 return -1;
279 loffset = betoh64(packidx->large_offsets[idx]);
280 return (loffset > INT64_MAX ? -1 : (off_t)loffset);
282 return (off_t)(offset & GOT_PACKIDX_OFFSET_VAL_MASK);
285 static int
286 get_object_idx(struct got_packidx_v2_hdr *packidx, struct got_object_id *id)
288 u_int8_t id0 = id->sha1[0];
289 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
290 int i = 0;
292 if (id0 > 0)
293 i = betoh32(packidx->fanout_table[id0 - 1]);
295 while (i < totobj) {
296 struct got_object_id *oid = &packidx->sorted_ids[i];
297 uint32_t offset;
298 int cmp = got_object_id_cmp(id, oid);
300 if (cmp == 0)
301 return i;
302 else if (cmp > 0)
303 break;
304 i++;
307 return -1;
310 const struct got_error *
311 read_packfile_hdr(FILE *f, struct got_packidx_v2_hdr *packidx)
313 const struct got_error *err = NULL;
314 uint32_t totobj = betoh32(packidx->fanout_table[0xff]);
315 struct got_packfile_hdr hdr;
316 size_t n;
318 n = fread(&hdr, sizeof(hdr), 1, f);
319 if (n != 1)
320 return got_ferror(f, GOT_ERR_BAD_PACKIDX);
322 if (betoh32(hdr.signature) != GOT_PACKFILE_SIGNATURE ||
323 betoh32(hdr.version) != GOT_PACKFILE_VERSION ||
324 betoh32(hdr.nobjects) != totobj)
325 err = got_error(GOT_ERR_BAD_PACKFILE);
327 return err;
330 static const struct got_error *
331 decode_type_and_size(uint8_t *type, uint64_t *size, size_t *len, FILE *packfile)
333 uint8_t t = 0;
334 uint64_t s = 0;
335 uint8_t sizeN;
336 size_t n;
337 int i = 0;
339 do {
340 /* We do not support size values which don't fit in 64 bit. */
341 if (i > 9)
342 return got_error(GOT_ERR_NO_SPACE);
344 n = fread(&sizeN, sizeof(sizeN), 1, packfile);
345 if (n != 1)
346 return got_ferror(packfile, GOT_ERR_BAD_PACKIDX);
348 if (i == 0) {
349 t = (sizeN & GOT_PACK_OBJ_SIZE0_TYPE_MASK) >>
350 GOT_PACK_OBJ_SIZE0_TYPE_MASK_SHIFT;
351 s = (sizeN & GOT_PACK_OBJ_SIZE0_VAL_MASK);
352 } else {
353 size_t shift = 4 + 7 * (i - 1);
354 s |= ((sizeN & GOT_PACK_OBJ_SIZE_VAL_MASK) << shift);
356 i++;
357 } while (sizeN & GOT_PACK_OBJ_SIZE_MORE);
359 *type = t;
360 *size = s;
361 *len = i * sizeof(sizeN);
362 return NULL;
365 static const struct got_error *
366 open_packed_plain_object(struct got_object **obj, const char *path_packfile,
367 struct got_object_id *id, uint8_t type, off_t offset, size_t size)
369 *obj = calloc(1, sizeof(**obj));
370 if (*obj == NULL)
371 return got_error(GOT_ERR_NO_MEM);
373 (*obj)->path_packfile = strdup(path_packfile);
374 if ((*obj)->path_packfile == NULL) {
375 free(*obj);
376 *obj = NULL;
377 return got_error(GOT_ERR_NO_MEM);
380 (*obj)->type = type;
381 (*obj)->flags = GOT_OBJ_FLAG_PACKED;
382 (*obj)->hdrlen = 0;
383 (*obj)->size = size;
384 memcpy(&(*obj)->id, id, sizeof((*obj)->id));
385 (*obj)->pack_offset = offset;
387 return NULL;
390 static const struct got_error *
391 open_packed_object(struct got_object **obj, struct got_repository *repo,
392 const char *path_packdir, struct got_packidx_v2_hdr *packidx,
393 struct got_object_id *id)
395 const struct got_error *err = NULL;
396 int idx = get_object_idx(packidx, id);
397 off_t offset;
398 char hex[SHA1_DIGEST_STRING_LENGTH];
399 char *sha1str;
400 char *path_packfile;
401 FILE *packfile;
402 uint8_t type;
403 uint64_t size;
404 size_t tslen;
406 *obj = NULL;
407 if (idx == -1) /* object not found in pack index */
408 return NULL;
410 offset = get_object_offset(packidx, idx);
411 if (offset == (uint64_t)-1)
412 return got_error(GOT_ERR_BAD_PACKIDX);
414 sha1str = got_sha1_digest_to_str(packidx->trailer.packfile_sha1,
415 hex, sizeof(hex));
416 if (sha1str == NULL)
417 return got_error(GOT_ERR_PACKIDX_CSUM);
419 if (asprintf(&path_packfile, "%s/%s%s%s", path_packdir,
420 GOT_PACK_PREFIX, sha1str, GOT_PACKFILE_SUFFIX) == -1)
421 return got_error(GOT_ERR_NO_MEM);
423 packfile = fopen(path_packfile, "rb");
424 if (packfile == NULL) {
425 err = got_error_from_errno();
426 goto done;
429 err = read_packfile_hdr(packfile, packidx);
430 if (err)
431 goto done;
433 if (fseeko(packfile, offset, SEEK_SET) != 0) {
434 err = got_error_from_errno();
435 goto done;
438 err = decode_type_and_size(&type, &size, &tslen, packfile);
439 if (err)
440 goto done;
442 switch (type) {
443 case GOT_OBJ_TYPE_COMMIT:
444 case GOT_OBJ_TYPE_TREE:
445 case GOT_OBJ_TYPE_BLOB:
446 err = open_packed_plain_object(obj, path_packfile, id, type,
447 offset + tslen, size);
448 break;
450 case GOT_OBJ_TYPE_REF_DELTA:
451 case GOT_OBJ_TYPE_TAG:
452 case GOT_OBJ_TYPE_OFFSET_DELTA:
453 default:
454 err = got_error(GOT_ERR_NOT_IMPL);
455 goto done;
457 done:
458 free(path_packfile);
459 if (err)
460 free(*obj);
461 if (packfile && fclose(packfile) == -1 && err == 0)
462 err = got_error_from_errno();
463 return err;
466 const struct got_error *
467 got_packfile_open_object(struct got_object **obj, struct got_object_id *id,
468 struct got_repository *repo)
470 const struct got_error *err = NULL;
471 DIR *packdir = NULL;
472 struct dirent *dent;
473 char *path_packdir = got_repo_get_path_objects_pack(repo);
475 if (path_packdir == NULL) {
476 err = got_error(GOT_ERR_NO_MEM);
477 goto done;
480 packdir = opendir(path_packdir);
481 if (packdir == NULL) {
482 err = got_error_from_errno();
483 goto done;
486 while ((dent = readdir(packdir)) != NULL) {
487 struct got_packidx_v2_hdr *packidx;
488 char *path_packidx, *path_object;
490 if (!is_packidx_filename(dent->d_name, dent->d_namlen))
491 continue;
493 if (asprintf(&path_packidx, "%s/%s", path_packdir,
494 dent->d_name) == -1) {
495 err = got_error(GOT_ERR_NO_MEM);
496 goto done;
499 err = got_packidx_open(&packidx, path_packidx);
500 free(path_packidx);
501 if (err)
502 goto done;
504 err = open_packed_object(obj, repo, path_packdir, packidx, id);
505 got_packidx_close(packidx);
506 if (err)
507 goto done;
508 if (*obj != NULL)
509 break;
512 done:
513 free(path_packdir);
514 if (packdir && closedir(packdir) != 0 && err == 0)
515 err = got_error_from_errno();
516 return err;
519 static const struct got_error *
520 dump_plain_object(FILE *infile, uint8_t type, size_t size, FILE *outfile)
522 size_t n;
524 while (size > 0) {
525 uint8_t data[2048];
526 size_t len = MIN(size, sizeof(data));
528 n = fread(data, len, 1, infile);
529 if (n != 1)
530 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
532 n = fwrite(data, len, 1, outfile);
533 if (n != 1)
534 return got_ferror(outfile, GOT_ERR_IO);
536 size -= len;
539 rewind(outfile);
540 return NULL;
543 static const struct got_error *
544 dump_ref_delta_object(struct got_repository *repo, FILE *infile, uint8_t type,
545 size_t size, FILE *outfile)
547 const struct got_error *err = NULL;
548 struct got_object_id base_id;
549 struct got_object *base_obj;
550 int n;
552 if (size < sizeof(base_id))
553 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
555 n = fread(&base_id, sizeof(base_id), 1, infile);
556 if (n != 1)
557 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
559 size -= sizeof(base_id);
560 if (size <= 0)
561 return got_ferror(infile, GOT_ERR_BAD_PACKFILE);
563 err = got_object_open(&base_obj, repo, &base_id);
564 if (err)
565 return err;
567 err = got_delta_apply(repo, infile, size, base_obj, outfile);
568 got_object_close(base_obj);
569 return err;
572 const struct got_error *
573 got_packfile_extract_object(FILE **f, struct got_object *obj,
574 struct got_repository *repo)
576 const struct got_error *err = NULL;
577 FILE *packfile = NULL;
579 if ((obj->flags & GOT_OBJ_FLAG_PACKED) == 0)
580 return got_error(GOT_ERR_OBJ_NOT_PACKED);
582 *f = got_opentemp();
583 if (*f == NULL) {
584 err = got_error(GOT_ERR_FILE_OPEN);
585 goto done;
588 packfile = fopen(obj->path_packfile, "rb");
589 if (packfile == NULL) {
590 err = got_error_from_errno();
591 goto done;
594 if (fseeko(packfile, obj->pack_offset, SEEK_SET) != 0) {
595 err = got_error_from_errno();
596 goto done;
599 switch (obj->type) {
600 case GOT_OBJ_TYPE_COMMIT:
601 case GOT_OBJ_TYPE_TREE:
602 case GOT_OBJ_TYPE_BLOB:
603 err = dump_plain_object(packfile, obj->type, obj->size, *f);
604 break;
605 case GOT_OBJ_TYPE_REF_DELTA:
606 err = dump_ref_delta_object(repo, packfile, obj->type,
607 obj->size, *f);
608 break;
609 case GOT_OBJ_TYPE_TAG:
610 case GOT_OBJ_TYPE_OFFSET_DELTA:
611 default:
612 err = got_error(GOT_ERR_NOT_IMPL);
613 goto done;
615 done:
616 if (packfile)
617 fclose(packfile);
618 if (err && *f)
619 fclose(*f);
620 return err;