Blob


1 /*
2 * Copyright (c) 2019 Ori Bernstein <ori@openbsd.org>
3 * Copyright (c) 2020, 2022 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/queue.h>
19 #include <sys/stat.h>
20 #include <sys/time.h>
21 #include <sys/types.h>
22 #include <sys/uio.h>
23 #include <sys/mman.h>
25 #include <stdint.h>
26 #include <errno.h>
27 #include <imsg.h>
28 #include <limits.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <sha1.h>
35 #include <sha2.h>
36 #include <endian.h>
37 #include <fcntl.h>
38 #include <unistd.h>
39 #include <zlib.h>
40 #include <err.h>
41 #include <assert.h>
42 #include <dirent.h>
44 #include "got_error.h"
45 #include "got_object.h"
47 #include "got_lib_hash.h"
48 #include "got_lib_delta.h"
49 #include "got_lib_inflate.h"
50 #include "got_lib_object.h"
51 #include "got_lib_object_parse.h"
52 #include "got_lib_object_idset.h"
53 #include "got_lib_privsep.h"
54 #include "got_lib_pack.h"
55 #include "got_lib_ratelimit.h"
56 #include "got_lib_pack_index.h"
57 #include "got_lib_delta_cache.h"
59 struct got_indexed_object {
60 struct got_object_id id;
62 /*
63 * Has this object been fully resolved?
64 * If so, we know its ID, otherwise we don't and 'id' is invalid.
65 */
66 int valid;
68 /* Offset of type+size field for this object in pack file. */
69 off_t off;
71 /* Type+size values parsed from pack file. */
72 uint8_t type;
73 uint64_t size;
75 /* Length of on-disk type+size data. */
76 size_t tslen;
78 /* Length of object data following type+size. */
79 size_t len;
81 uint32_t crc;
83 union {
84 struct {
85 /* For ref deltas. */
86 struct got_object_id ref_id;
87 } ref;
88 struct {
89 /* For offset deltas. */
90 off_t base_offset;
91 size_t base_offsetlen;
92 } ofs;
93 } delta;
94 };
96 static void
97 putbe32(char *b, uint32_t n)
98 {
99 b[0] = n >> 24;
100 b[1] = n >> 16;
101 b[2] = n >> 8;
102 b[3] = n >> 0;
105 static const struct got_error *
106 read_checksum(uint32_t *crc, struct got_hash *ctx, int fd, size_t len)
108 uint8_t buf[8192];
109 size_t n;
110 ssize_t r;
112 for (n = len; n > 0; n -= r){
113 r = read(fd, buf, n > sizeof(buf) ? sizeof(buf) : n);
114 if (r == -1)
115 return got_error_from_errno("read");
116 if (r == 0)
117 break;
118 if (crc)
119 *crc = crc32(*crc, buf, r);
120 if (ctx)
121 got_hash_update(ctx, buf, r);
124 return NULL;
127 static const struct got_error *
128 read_file_digest(struct got_hash *ctx, FILE *f, size_t len)
130 uint8_t buf[8192];
131 size_t n, r;
133 for (n = len; n > 0; n -= r) {
134 r = fread(buf, 1, n > sizeof(buf) ? sizeof(buf) : n, f);
135 if (r == 0) {
136 if (feof(f))
137 return NULL;
138 return got_ferror(f, GOT_ERR_IO);
140 got_hash_update(ctx, buf, r);
143 return NULL;
146 static const struct got_error *
147 read_packed_object(struct got_pack *pack, struct got_indexed_object *obj,
148 FILE *tmpfile, struct got_hash *pack_sha1_ctx)
150 const struct got_error *err = NULL;
151 struct got_hash ctx;
152 uint8_t *data = NULL;
153 size_t datalen = 0;
154 ssize_t n;
155 char *header;
156 size_t headerlen;
157 const char *obj_label;
158 size_t mapoff = obj->off;
159 struct got_inflate_checksum csum;
161 memset(&csum, 0, sizeof(csum));
162 csum.input_ctx = pack_sha1_ctx;
163 csum.input_crc = &obj->crc;
165 err = got_pack_parse_object_type_and_size(&obj->type, &obj->size,
166 &obj->tslen, pack, obj->off);
167 if (err)
168 return err;
170 if (pack->map) {
171 obj->crc = crc32(obj->crc, pack->map + mapoff, obj->tslen);
172 got_hash_update(pack_sha1_ctx, pack->map + mapoff, obj->tslen);
173 mapoff += obj->tslen;
174 } else {
175 /* XXX Seek back and get the CRC of on-disk type+size bytes. */
176 if (lseek(pack->fd, obj->off, SEEK_SET) == -1)
177 return got_error_from_errno("lseek");
178 err = read_checksum(&obj->crc, pack_sha1_ctx,
179 pack->fd, obj->tslen);
180 if (err)
181 return err;
184 switch (obj->type) {
185 case GOT_OBJ_TYPE_BLOB:
186 case GOT_OBJ_TYPE_COMMIT:
187 case GOT_OBJ_TYPE_TREE:
188 case GOT_OBJ_TYPE_TAG:
189 if (obj->size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
190 if (fseek(tmpfile, 0L, SEEK_SET) == -1) {
191 err = got_error_from_errno("fseek");
192 break;
194 if (pack->map) {
195 err = got_inflate_to_file_mmap(&datalen,
196 &obj->len, &csum, pack->map, mapoff,
197 pack->filesize - mapoff, tmpfile);
198 } else {
199 err = got_inflate_to_file_fd(&datalen,
200 &obj->len, &csum, pack->fd, tmpfile);
202 } else {
203 if (pack->map) {
204 err = got_inflate_to_mem_mmap(&data, &datalen,
205 &obj->len, &csum, pack->map, mapoff,
206 pack->filesize - mapoff);
207 } else {
208 err = got_inflate_to_mem_fd(&data, &datalen,
209 &obj->len, &csum, obj->size, pack->fd);
212 if (err)
213 break;
214 got_hash_init(&ctx, GOT_HASH_SHA1);
215 err = got_object_type_label(&obj_label, obj->type);
216 if (err) {
217 free(data);
218 break;
220 if (asprintf(&header, "%s %lld", obj_label,
221 (long long)obj->size) == -1) {
222 err = got_error_from_errno("asprintf");
223 free(data);
224 break;
226 headerlen = strlen(header) + 1;
227 got_hash_update(&ctx, header, headerlen);
228 if (obj->size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
229 err = read_file_digest(&ctx, tmpfile, datalen);
230 if (err) {
231 free(header);
232 free(data);
233 break;
235 } else
236 got_hash_update(&ctx, data, datalen);
237 got_hash_final_object_id(&ctx, &obj->id);
238 free(header);
239 free(data);
240 break;
241 case GOT_OBJ_TYPE_REF_DELTA:
242 memset(obj->id.sha1, 0xff, SHA1_DIGEST_LENGTH);
243 if (pack->map) {
244 if (mapoff + SHA1_DIGEST_LENGTH >= pack->filesize) {
245 err = got_error(GOT_ERR_BAD_PACKFILE);
246 break;
248 if (mapoff + SHA1_DIGEST_LENGTH > SIZE_MAX) {
249 err = got_error_fmt(GOT_ERR_RANGE,
250 "mapoff %lld would overflow size_t",
251 (long long)mapoff + SHA1_DIGEST_LENGTH);
252 break;
254 memcpy(obj->delta.ref.ref_id.sha1, pack->map + mapoff,
255 SHA1_DIGEST_LENGTH);
256 obj->crc = crc32(obj->crc, pack->map + mapoff,
257 SHA1_DIGEST_LENGTH);
258 got_hash_update(pack_sha1_ctx, pack->map + mapoff,
259 SHA1_DIGEST_LENGTH);
260 mapoff += SHA1_DIGEST_LENGTH;
261 err = got_inflate_to_mem_mmap(NULL, &datalen,
262 &obj->len, &csum, pack->map, mapoff,
263 pack->filesize - mapoff);
264 if (err)
265 break;
266 } else {
267 n = read(pack->fd, obj->delta.ref.ref_id.sha1,
268 SHA1_DIGEST_LENGTH);
269 if (n == -1) {
270 err = got_error_from_errno("read");
271 break;
273 if (n < sizeof(obj->id)) {
274 err = got_error(GOT_ERR_BAD_PACKFILE);
275 break;
277 obj->crc = crc32(obj->crc, obj->delta.ref.ref_id.sha1,
278 SHA1_DIGEST_LENGTH);
279 got_hash_update(pack_sha1_ctx,
280 obj->delta.ref.ref_id.sha1, SHA1_DIGEST_LENGTH);
281 err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len,
282 &csum, obj->size, pack->fd);
283 if (err)
284 break;
286 obj->len += SHA1_DIGEST_LENGTH;
287 break;
288 case GOT_OBJ_TYPE_OFFSET_DELTA:
289 memset(obj->id.sha1, 0xff, SHA1_DIGEST_LENGTH);
290 err = got_pack_parse_offset_delta(&obj->delta.ofs.base_offset,
291 &obj->delta.ofs.base_offsetlen, pack, obj->off,
292 obj->tslen);
293 if (err)
294 break;
296 if (pack->map) {
297 if (mapoff + obj->delta.ofs.base_offsetlen >=
298 pack->filesize) {
299 err = got_error(GOT_ERR_BAD_PACKFILE);
300 break;
303 if (mapoff + obj->delta.ofs.base_offsetlen >
304 SIZE_MAX) {
305 err = got_error_fmt(GOT_ERR_RANGE,
306 "mapoff %lld would overflow size_t",
307 (long long)mapoff
308 + obj->delta.ofs.base_offsetlen);
311 obj->crc = crc32(obj->crc, pack->map + mapoff,
312 obj->delta.ofs.base_offsetlen);
313 got_hash_update(pack_sha1_ctx, pack->map + mapoff,
314 obj->delta.ofs.base_offsetlen);
315 mapoff += obj->delta.ofs.base_offsetlen;
316 err = got_inflate_to_mem_mmap(NULL, &datalen,
317 &obj->len, &csum, pack->map, mapoff,
318 pack->filesize - mapoff);
319 if (err)
320 break;
321 } else {
322 /*
323 * XXX Seek back and get CRC and SHA1 of on-disk
324 * offset bytes.
325 */
326 if (lseek(pack->fd, obj->off + obj->tslen, SEEK_SET)
327 == -1) {
328 err = got_error_from_errno("lseek");
329 break;
331 err = read_checksum(&obj->crc, pack_sha1_ctx,
332 pack->fd, obj->delta.ofs.base_offsetlen);
333 if (err)
334 break;
336 err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len,
337 &csum, obj->size, pack->fd);
338 if (err)
339 break;
341 obj->len += obj->delta.ofs.base_offsetlen;
342 break;
343 default:
344 err = got_error(GOT_ERR_OBJ_TYPE);
345 break;
348 return err;
351 const struct got_error *
352 got_pack_hwrite(int fd, void *buf, int len, struct got_hash *ctx)
354 ssize_t w;
356 got_hash_update(ctx, buf, len);
358 w = write(fd, buf, len);
359 if (w == -1)
360 return got_error_from_errno("write");
361 if (w != len)
362 return got_error(GOT_ERR_IO);
364 return NULL;
367 static const struct got_error *
368 resolve_deltified_object(struct got_pack *pack, struct got_packidx *packidx,
369 struct got_indexed_object *obj, FILE *tmpfile, FILE *delta_base_file,
370 FILE *delta_accum_file)
372 const struct got_error *err = NULL;
373 struct got_delta_chain deltas;
374 struct got_delta *delta;
375 uint8_t *buf = NULL;
376 size_t len = 0;
377 struct got_hash ctx;
378 char *header = NULL;
379 size_t headerlen;
380 uint64_t max_size;
381 int base_obj_type;
382 const char *obj_label;
384 deltas.nentries = 0;
385 STAILQ_INIT(&deltas.entries);
387 err = got_pack_resolve_delta_chain(&deltas, packidx, pack,
388 obj->off, obj->tslen, obj->type, obj->size,
389 GOT_DELTA_CHAIN_RECURSION_MAX);
390 if (err)
391 goto done;
393 err = got_pack_get_delta_chain_max_size(&max_size, &deltas, pack);
394 if (err)
395 goto done;
396 if (max_size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
397 rewind(tmpfile);
398 rewind(delta_base_file);
399 rewind(delta_accum_file);
400 err = got_pack_dump_delta_chain_to_file(&len, &deltas,
401 pack, tmpfile, delta_base_file, delta_accum_file);
402 if (err)
403 goto done;
404 } else {
405 err = got_pack_dump_delta_chain_to_mem(&buf, &len,
406 &deltas, pack);
408 if (err)
409 goto done;
411 err = got_delta_chain_get_base_type(&base_obj_type, &deltas);
412 if (err)
413 goto done;
414 err = got_object_type_label(&obj_label, base_obj_type);
415 if (err)
416 goto done;
417 if (asprintf(&header, "%s %zd", obj_label, len) == -1) {
418 err = got_error_from_errno("asprintf");
419 goto done;
421 headerlen = strlen(header) + 1;
422 got_hash_init(&ctx, GOT_HASH_SHA1);
423 got_hash_update(&ctx, header, headerlen);
424 if (max_size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) {
425 err = read_file_digest(&ctx, tmpfile, len);
426 if (err)
427 goto done;
428 } else
429 got_hash_update(&ctx, buf, len);
430 got_hash_final_object_id(&ctx, &obj->id);
431 done:
432 free(buf);
433 free(header);
434 while (!STAILQ_EMPTY(&deltas.entries)) {
435 delta = STAILQ_FIRST(&deltas.entries);
436 STAILQ_REMOVE_HEAD(&deltas.entries, entry);
437 free(delta);
439 return err;
442 /* Determine the slot in the pack index a given object ID should use. */
443 static int
444 find_object_idx(struct got_packidx *packidx, uint8_t *sha1)
446 u_int8_t id0 = sha1[0];
447 uint32_t nindexed = be32toh(packidx->hdr.fanout_table[0xff]);
448 int left = 0, right = nindexed - 1;
449 int cmp = 0, i = 0;
451 if (id0 > 0)
452 left = be32toh(packidx->hdr.fanout_table[id0 - 1]);
454 while (left <= right) {
455 struct got_packidx_object_id *oid;
457 i = ((left + right) / 2);
458 oid = &packidx->hdr.sorted_ids[i];
460 cmp = memcmp(sha1, oid->sha1, SHA1_DIGEST_LENGTH);
461 if (cmp == 0)
462 return -1; /* object already indexed */
463 else if (cmp > 0)
464 left = i + 1;
465 else if (cmp < 0)
466 right = i - 1;
469 return left;
472 #if 0
473 static void
474 print_packidx(struct got_packidx *packidx)
476 uint32_t nindexed = be32toh(packidx->hdr.fanout_table[0xff]);
477 int i;
479 fprintf(stderr, "object IDs:\n");
480 for (i = 0; i < nindexed; i++) {
481 char hex[SHA1_DIGEST_STRING_LENGTH];
482 got_sha1_digest_to_str(packidx->hdr.sorted_ids[i].sha1,
483 hex, sizeof(hex));
484 fprintf(stderr, "%s\n", hex);
486 fprintf(stderr, "\n");
488 fprintf(stderr, "object offsets:\n");
489 for (i = 0; i < nindexed; i++) {
490 uint32_t offset = be32toh(packidx->hdr.offsets[i]);
491 if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
492 int j = offset & GOT_PACKIDX_OFFSET_VAL_MASK;
493 fprintf(stderr, "%u -> %llu\n", offset,
494 be64toh(packidx->hdr.large_offsets[j]));
495 } else
496 fprintf(stderr, "%u\n", offset);
498 fprintf(stderr, "\n");
500 fprintf(stderr, "fanout table:");
501 for (i = 0; i <= 0xff; i++)
502 fprintf(stderr, " %u", be32toh(packidx->hdr.fanout_table[i]));
503 fprintf(stderr, "\n");
505 #endif
507 static void
508 add_indexed_object(struct got_packidx *packidx, uint32_t idx,
509 struct got_indexed_object *obj)
511 int i;
513 memcpy(packidx->hdr.sorted_ids[idx].sha1, obj->id.sha1,
514 SHA1_DIGEST_LENGTH);
515 packidx->hdr.crc32[idx] = htobe32(obj->crc);
516 if (obj->off < GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX)
517 packidx->hdr.offsets[idx] = htobe32(obj->off);
518 else {
519 packidx->hdr.offsets[idx] = htobe32(packidx->nlargeobj |
520 GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX);
521 packidx->hdr.large_offsets[packidx->nlargeobj] =
522 htobe64(obj->off);
523 packidx->nlargeobj++;
526 for (i = obj->id.sha1[0]; i <= 0xff; i++) {
527 uint32_t n = be32toh(packidx->hdr.fanout_table[i]);
528 packidx->hdr.fanout_table[i] = htobe32(n + 1);
532 static int
533 indexed_obj_cmp(const void *pa, const void *pb)
535 struct got_indexed_object *a, *b;
537 a = (struct got_indexed_object *)pa;
538 b = (struct got_indexed_object *)pb;
539 return got_object_id_cmp(&a->id, &b->id);
542 static void
543 make_packidx(struct got_packidx *packidx, uint32_t nobj,
544 struct got_indexed_object *objects)
546 struct got_indexed_object *obj;
547 int i;
548 uint32_t idx = 0;
550 qsort(objects, nobj, sizeof(struct got_indexed_object),
551 indexed_obj_cmp);
553 memset(packidx->hdr.fanout_table, 0,
554 GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS * sizeof(uint32_t));
555 packidx->nlargeobj = 0;
557 for (i = 0; i < nobj; i++) {
558 obj = &objects[i];
559 if (obj->valid)
560 add_indexed_object(packidx, idx++, obj);
564 static void
565 update_packidx(struct got_packidx *packidx, uint32_t nobj,
566 struct got_indexed_object *obj)
568 int idx;
569 uint32_t nindexed = be32toh(packidx->hdr.fanout_table[0xff]);
571 idx = find_object_idx(packidx, obj->id.sha1);
572 if (idx == -1)
573 return; /* object already indexed */
575 memmove(&packidx->hdr.sorted_ids[idx + 1],
576 &packidx->hdr.sorted_ids[idx],
577 sizeof(struct got_packidx_object_id) * (nindexed - idx));
578 memmove(&packidx->hdr.offsets[idx + 1], &packidx->hdr.offsets[idx],
579 sizeof(uint32_t) * (nindexed - idx));
581 add_indexed_object(packidx, idx, obj);
584 static const struct got_error *
585 report_progress(uint32_t nobj_total, uint32_t nobj_indexed, uint32_t nobj_loose,
586 uint32_t nobj_resolved, struct got_ratelimit *rl,
587 got_pack_index_progress_cb progress_cb, void *progress_arg)
589 const struct got_error *err;
590 int elapsed = 0;
592 if (rl) {
593 err = got_ratelimit_check(&elapsed, rl);
594 if (err || !elapsed)
595 return err;
598 return progress_cb(progress_arg, nobj_total, nobj_indexed, nobj_loose,
599 nobj_resolved);
602 const struct got_error *
603 got_pack_index(struct got_pack *pack, int idxfd, FILE *tmpfile,
604 FILE *delta_base_file, FILE *delta_accum_file, uint8_t *pack_sha1_expected,
605 got_pack_index_progress_cb progress_cb, void *progress_arg,
606 struct got_ratelimit *rl)
608 const struct got_error *err;
609 struct got_packfile_hdr hdr;
610 struct got_packidx packidx;
611 char buf[8];
612 char pack_sha1[SHA1_DIGEST_LENGTH];
613 uint32_t nobj, nvalid, nloose, nresolved = 0, i;
614 struct got_indexed_object *objects = NULL, *obj;
615 struct got_hash ctx;
616 uint8_t packidx_hash[SHA1_DIGEST_LENGTH];
617 ssize_t r, w;
618 int pass, have_ref_deltas = 0, first_delta_idx = -1;
619 size_t mapoff = 0;
620 int p_indexed = 0, last_p_indexed = -1;
621 int p_resolved = 0, last_p_resolved = -1;
623 /* Require that pack file header and SHA1 trailer are present. */
624 if (pack->filesize < sizeof(hdr) + SHA1_DIGEST_LENGTH)
625 return got_error_msg(GOT_ERR_BAD_PACKFILE,
626 "short pack file");
628 if (pack->map) {
629 memcpy(&hdr, pack->map, sizeof(hdr));
630 mapoff += sizeof(hdr);
631 } else {
632 r = read(pack->fd, &hdr, sizeof(hdr));
633 if (r == -1)
634 return got_error_from_errno("read");
635 if (r < sizeof(hdr))
636 return got_error_msg(GOT_ERR_BAD_PACKFILE,
637 "short pack file");
640 if (hdr.signature != htobe32(GOT_PACKFILE_SIGNATURE))
641 return got_error_msg(GOT_ERR_BAD_PACKFILE,
642 "bad packfile signature");
643 if (hdr.version != htobe32(GOT_PACKFILE_VERSION))
644 return got_error_msg(GOT_ERR_BAD_PACKFILE,
645 "bad packfile version");
646 nobj = be32toh(hdr.nobjects);
647 if (nobj == 0)
648 return got_error_msg(GOT_ERR_BAD_PACKFILE,
649 "bad packfile with zero objects");
651 /* We compute the SHA1 of pack file contents and verify later on. */
652 got_hash_init(&ctx, GOT_HASH_SHA1);
653 got_hash_update(&ctx, &hdr, sizeof(hdr));
655 /*
656 * Create an in-memory pack index which will grow as objects
657 * IDs in the pack file are discovered. Only fields used to
658 * read deltified objects will be needed by the pack.c library
659 * code, so setting up just a pack index header is sufficient.
660 */
661 memset(&packidx, 0, sizeof(packidx));
662 packidx.hdr.magic = malloc(sizeof(uint32_t));
663 if (packidx.hdr.magic == NULL)
664 return got_error_from_errno("malloc");
665 *packidx.hdr.magic = htobe32(GOT_PACKIDX_V2_MAGIC);
666 packidx.hdr.version = malloc(sizeof(uint32_t));
667 if (packidx.hdr.version == NULL) {
668 err = got_error_from_errno("malloc");
669 goto done;
671 *packidx.hdr.version = htobe32(GOT_PACKIDX_VERSION);
672 packidx.hdr.fanout_table = calloc(GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS,
673 sizeof(uint32_t));
674 if (packidx.hdr.fanout_table == NULL) {
675 err = got_error_from_errno("calloc");
676 goto done;
678 packidx.hdr.sorted_ids = calloc(nobj,
679 sizeof(struct got_packidx_object_id));
680 if (packidx.hdr.sorted_ids == NULL) {
681 err = got_error_from_errno("calloc");
682 goto done;
684 packidx.hdr.crc32 = calloc(nobj, sizeof(uint32_t));
685 if (packidx.hdr.crc32 == NULL) {
686 err = got_error_from_errno("calloc");
687 goto done;
689 packidx.hdr.offsets = calloc(nobj, sizeof(uint32_t));
690 if (packidx.hdr.offsets == NULL) {
691 err = got_error_from_errno("calloc");
692 goto done;
694 /* Large offsets table is empty for pack files < 2 GB. */
695 if (pack->filesize >= GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) {
696 packidx.hdr.large_offsets = calloc(nobj, sizeof(uint64_t));
697 if (packidx.hdr.large_offsets == NULL) {
698 err = got_error_from_errno("calloc");
699 goto done;
703 nvalid = 0;
704 nloose = 0;
705 objects = calloc(nobj, sizeof(struct got_indexed_object));
706 if (objects == NULL)
707 return got_error_from_errno("calloc");
709 /*
710 * First pass: locate all objects and identify un-deltified objects.
712 * When this pass has completed we will know offset, type, size, and
713 * CRC information for all objects in this pack file. We won't know
714 * any of the actual object IDs of deltified objects yet since we
715 * will not yet attempt to combine deltas.
716 */
717 pass = 1;
718 for (i = 0; i < nobj; i++) {
719 /* Don't send too many progress privsep messages. */
720 p_indexed = ((i + 1) * 100) / nobj;
721 if (p_indexed != last_p_indexed) {
722 err = report_progress(nobj, i + 1, nloose, 0,
723 rl, progress_cb, progress_arg);
724 if (err)
725 goto done;
726 last_p_indexed = p_indexed;
729 obj = &objects[i];
730 obj->crc = crc32(0L, NULL, 0);
732 /* Store offset to type+size information for this object. */
733 if (pack->map) {
734 obj->off = mapoff;
735 } else {
736 obj->off = lseek(pack->fd, 0, SEEK_CUR);
737 if (obj->off == -1) {
738 err = got_error_from_errno("lseek");
739 goto done;
743 err = read_packed_object(pack, obj, tmpfile, &ctx);
744 if (err)
745 goto done;
747 if (pack->map) {
748 mapoff += obj->tslen + obj->len;
749 } else {
750 if (lseek(pack->fd, obj->off + obj->tslen + obj->len,
751 SEEK_SET) == -1) {
752 err = got_error_from_errno("lseek");
753 goto done;
757 if (obj->type == GOT_OBJ_TYPE_BLOB ||
758 obj->type == GOT_OBJ_TYPE_TREE ||
759 obj->type == GOT_OBJ_TYPE_COMMIT ||
760 obj->type == GOT_OBJ_TYPE_TAG) {
761 obj->valid = 1;
762 nloose++;
763 } else {
764 if (first_delta_idx == -1)
765 first_delta_idx = i;
766 if (obj->type == GOT_OBJ_TYPE_REF_DELTA)
767 have_ref_deltas = 1;
770 nvalid = nloose;
772 /*
773 * Having done a full pass over the pack file and can now
774 * verify its checksum.
775 */
776 got_hash_final(&ctx, pack_sha1);
778 if (memcmp(pack_sha1_expected, pack_sha1, SHA1_DIGEST_LENGTH) != 0) {
779 err = got_error(GOT_ERR_PACKFILE_CSUM);
780 goto done;
783 /* Verify the SHA1 checksum stored at the end of the pack file. */
784 if (pack->map) {
785 if (pack->filesize > SIZE_MAX) {
786 err = got_error_fmt(GOT_ERR_RANGE,
787 "filesize %lld overflows size_t",
788 (long long)pack->filesize);
789 goto done;
792 memcpy(pack_sha1_expected, pack->map +
793 pack->filesize - SHA1_DIGEST_LENGTH,
794 SHA1_DIGEST_LENGTH);
795 } else {
796 ssize_t n;
797 if (lseek(pack->fd, -SHA1_DIGEST_LENGTH, SEEK_END) == -1) {
798 err = got_error_from_errno("lseek");
799 goto done;
801 n = read(pack->fd, pack_sha1_expected, SHA1_DIGEST_LENGTH);
802 if (n == -1) {
803 err = got_error_from_errno("read");
804 goto done;
806 if (n != SHA1_DIGEST_LENGTH) {
807 err = got_error(GOT_ERR_IO);
808 goto done;
811 if (memcmp(pack_sha1, pack_sha1_expected, SHA1_DIGEST_LENGTH) != 0) {
812 err = got_error_msg(GOT_ERR_BAD_PACKFILE,
813 "bad checksum in pack file trailer");
814 goto done;
817 if (first_delta_idx == -1)
818 first_delta_idx = 0;
820 /* In order to resolve ref deltas we need an in-progress pack index. */
821 if (have_ref_deltas)
822 make_packidx(&packidx, nobj, objects);
824 /*
825 * Second pass: We can now resolve deltas to compute the IDs of
826 * objects which appear in deltified form. Because deltas can be
827 * chained this pass may require a couple of iterations until all
828 * IDs of deltified objects have been discovered.
829 */
830 pass++;
831 while (nvalid != nobj) {
832 int n = 0;
833 /*
834 * This loop will only run once unless the pack file
835 * contains ref deltas which refer to objects located
836 * later in the pack file, which is unusual.
837 * Offset deltas can always be resolved in one pass
838 * unless the packfile is corrupt.
839 */
840 for (i = first_delta_idx; i < nobj; i++) {
841 obj = &objects[i];
842 if (obj->type != GOT_OBJ_TYPE_REF_DELTA &&
843 obj->type != GOT_OBJ_TYPE_OFFSET_DELTA)
844 continue;
846 if (obj->valid)
847 continue;
849 if (pack->map == NULL && lseek(pack->fd,
850 obj->off + obj->tslen, SEEK_SET) == -1) {
851 err = got_error_from_errno("lseek");
852 goto done;
855 err = resolve_deltified_object(pack, &packidx, obj,
856 tmpfile, delta_base_file, delta_accum_file);
857 if (err) {
858 if (err->code != GOT_ERR_NO_OBJ)
859 goto done;
860 /*
861 * We cannot resolve this object yet because
862 * a delta base is unknown. Try again later.
863 */
864 continue;
867 obj->valid = 1;
868 n++;
869 if (have_ref_deltas)
870 update_packidx(&packidx, nobj, obj);
871 /* Don't send too many progress privsep messages. */
872 p_resolved = ((nresolved + n) * 100) / nobj;
873 if (p_resolved != last_p_resolved) {
874 err = report_progress(nobj, nobj,
875 nloose, nresolved + n, rl,
876 progress_cb, progress_arg);
877 if (err)
878 goto done;
879 last_p_resolved = p_resolved;
883 if (pass++ > 3 && n == 0) {
884 err = got_error_msg(GOT_ERR_BAD_PACKFILE,
885 "could not resolve any of deltas; packfile could "
886 "be corrupt");
887 goto done;
889 nresolved += n;
890 nvalid += n;
893 if (nloose + nresolved != nobj) {
894 static char msg[64];
895 snprintf(msg, sizeof(msg), "discovered only %d of %d objects",
896 nloose + nresolved, nobj);
897 err = got_error_msg(GOT_ERR_BAD_PACKFILE, msg);
898 goto done;
901 err = report_progress(nobj, nobj, nloose, nresolved, NULL,
902 progress_cb, progress_arg);
903 if (err)
904 goto done;
906 make_packidx(&packidx, nobj, objects);
908 free(objects);
909 objects = NULL;
911 got_hash_init(&ctx, GOT_HASH_SHA1);
912 putbe32(buf, GOT_PACKIDX_V2_MAGIC);
913 putbe32(buf + 4, GOT_PACKIDX_VERSION);
914 err = got_pack_hwrite(idxfd, buf, 8, &ctx);
915 if (err)
916 goto done;
917 err = got_pack_hwrite(idxfd, packidx.hdr.fanout_table,
918 GOT_PACKIDX_V2_FANOUT_TABLE_ITEMS * sizeof(uint32_t), &ctx);
919 if (err)
920 goto done;
921 err = got_pack_hwrite(idxfd, packidx.hdr.sorted_ids,
922 nobj * SHA1_DIGEST_LENGTH, &ctx);
923 if (err)
924 goto done;
925 err = got_pack_hwrite(idxfd, packidx.hdr.crc32,
926 nobj * sizeof(uint32_t), &ctx);
927 if (err)
928 goto done;
929 err = got_pack_hwrite(idxfd, packidx.hdr.offsets,
930 nobj * sizeof(uint32_t), &ctx);
931 if (err)
932 goto done;
933 if (packidx.nlargeobj > 0) {
934 err = got_pack_hwrite(idxfd, packidx.hdr.large_offsets,
935 packidx.nlargeobj * sizeof(uint64_t), &ctx);
936 if (err)
937 goto done;
939 err = got_pack_hwrite(idxfd, pack_sha1, SHA1_DIGEST_LENGTH, &ctx);
940 if (err)
941 goto done;
943 got_hash_final(&ctx, packidx_hash);
944 w = write(idxfd, packidx_hash, sizeof(packidx_hash));
945 if (w == -1) {
946 err = got_error_from_errno("write");
947 goto done;
949 if (w != sizeof(packidx_hash)) {
950 err = got_error(GOT_ERR_IO);
951 goto done;
953 done:
954 free(objects);
955 free(packidx.hdr.magic);
956 free(packidx.hdr.version);
957 free(packidx.hdr.fanout_table);
958 free(packidx.hdr.sorted_ids);
959 free(packidx.hdr.offsets);
960 free(packidx.hdr.large_offsets);
961 return err;