Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/stat.h>
23 #include <sys/time.h>
24 #include <sys/mman.h>
26 #include <endian.h>
27 #include <errno.h>
28 #include <stdint.h>
29 #include <imsg.h>
30 #include <inttypes.h>
31 #include <poll.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sha1.h>
36 #include <sha2.h>
37 #include <time.h>
38 #include <unistd.h>
39 #include <limits.h>
40 #include <zlib.h>
42 #include "got_error.h"
43 #include "got_cancel.h"
44 #include "got_object.h"
45 #include "got_path.h"
46 #include "got_reference.h"
47 #include "got_repository_admin.h"
49 #include "got_lib_deltify.h"
50 #include "got_lib_delta.h"
51 #include "got_lib_object.h"
52 #include "got_lib_object_idset.h"
53 #include "got_lib_object_cache.h"
54 #include "got_lib_deflate.h"
55 #include "got_lib_ratelimit.h"
56 #include "got_lib_pack.h"
57 #include "got_lib_pack_create.h"
58 #include "got_lib_repository.h"
59 #include "got_lib_inflate.h"
60 #include "got_lib_poll.h"
62 #include "murmurhash2.h"
64 #ifndef MIN
65 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
66 #endif
68 #ifndef MAX
69 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
70 #endif
72 #ifndef nitems
73 #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
74 #endif
76 static const struct got_error *
77 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
78 const char *path, int obj_type, time_t mtime, uint32_t seed)
79 {
80 struct got_pack_meta *m;
82 *new = NULL;
84 m = calloc(1, sizeof(*m));
85 if (m == NULL)
86 return got_error_from_errno("calloc");
88 memcpy(&m->id, id, sizeof(m->id));
90 m->path_hash = murmurhash2(path, strlen(path), seed);
91 m->obj_type = obj_type;
92 m->mtime = mtime;
93 *new = m;
94 return NULL;
95 }
97 static void
98 clear_meta(struct got_pack_meta *meta)
99 {
100 if (meta == NULL)
101 return;
102 meta->path_hash = 0;
103 free(meta->delta_buf);
104 meta->delta_buf = NULL;
105 free(meta->base_obj_id);
106 meta->base_obj_id = NULL;
107 meta->reused_delta_offset = 0;
110 static void
111 free_nmeta(struct got_pack_meta **meta, int nmeta)
113 int i;
115 for (i = 0; i < nmeta; i++)
116 clear_meta(meta[i]);
117 free(meta);
120 static int
121 delta_order_cmp(const void *pa, const void *pb)
123 struct got_pack_meta *a, *b;
125 a = *(struct got_pack_meta **)pa;
126 b = *(struct got_pack_meta **)pb;
128 if (a->obj_type != b->obj_type)
129 return a->obj_type - b->obj_type;
130 if (a->path_hash < b->path_hash)
131 return -1;
132 if (a->path_hash > b->path_hash)
133 return 1;
134 if (a->mtime < b->mtime)
135 return -1;
136 if (a->mtime > b->mtime)
137 return 1;
138 return got_object_id_cmp(&a->id, &b->id);
141 static off_t
142 delta_size(struct got_delta_instruction *deltas, int ndeltas)
144 int i;
145 off_t size = 32;
146 for (i = 0; i < ndeltas; i++) {
147 if (deltas[i].copy)
148 size += GOT_DELTA_SIZE_SHIFT;
149 else
150 size += deltas[i].len + 1;
152 return size;
155 static const struct got_error *
156 append(unsigned char **p, size_t *len, off_t *sz, void *seg, int nseg)
158 char *n;
160 if (*len + nseg >= *sz) {
161 while (*len + nseg >= *sz)
162 *sz += *sz / 2;
163 n = realloc(*p, *sz);
164 if (n == NULL)
165 return got_error_from_errno("realloc");
166 *p = n;
168 memcpy(*p + *len, seg, nseg);
169 *len += nseg;
170 return NULL;
173 static const struct got_error *
174 encode_delta_in_mem(struct got_pack_meta *m, struct got_raw_object *o,
175 struct got_delta_instruction *deltas, int ndeltas,
176 off_t delta_size, off_t base_size)
178 const struct got_error *err;
179 unsigned char buf[16], *bp;
180 int i, j;
181 size_t len = 0, compressed_len;
182 off_t bufsize = delta_size;
183 off_t n;
184 struct got_delta_instruction *d;
185 uint8_t *delta_buf;
187 delta_buf = malloc(bufsize);
188 if (delta_buf == NULL)
189 return got_error_from_errno("malloc");
191 /* base object size */
192 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
193 n = base_size >> GOT_DELTA_SIZE_SHIFT;
194 for (i = 1; n > 0; i++) {
195 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
196 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
197 n >>= GOT_DELTA_SIZE_SHIFT;
199 err = append(&delta_buf, &len, &bufsize, buf, i);
200 if (err)
201 goto done;
203 /* target object size */
204 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
205 n = o->size >> GOT_DELTA_SIZE_SHIFT;
206 for (i = 1; n > 0; i++) {
207 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
208 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
209 n >>= GOT_DELTA_SIZE_SHIFT;
211 err = append(&delta_buf, &len, &bufsize, buf, i);
212 if (err)
213 goto done;
215 for (j = 0; j < ndeltas; j++) {
216 d = &deltas[j];
217 if (d->copy) {
218 n = d->offset;
219 bp = &buf[1];
220 buf[0] = GOT_DELTA_BASE_COPY;
221 for (i = 0; i < 4; i++) {
222 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
223 buf[0] |= 1 << i;
224 *bp++ = n & 0xff;
225 n >>= 8;
226 if (n == 0)
227 break;
230 n = d->len;
231 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
232 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
233 for (i = 0; i < 3 && n > 0; i++) {
234 buf[0] |= 1 << (i + 4);
235 *bp++ = n & 0xff;
236 n >>= 8;
239 err = append(&delta_buf, &len, &bufsize,
240 buf, bp - buf);
241 if (err)
242 goto done;
243 } else if (o->f == NULL) {
244 n = 0;
245 while (n != d->len) {
246 buf[0] = (d->len - n < 127) ? d->len - n : 127;
247 err = append(&delta_buf, &len, &bufsize,
248 buf, 1);
249 if (err)
250 goto done;
251 err = append(&delta_buf, &len, &bufsize,
252 o->data + o->hdrlen + d->offset + n,
253 buf[0]);
254 if (err)
255 goto done;
256 n += buf[0];
258 } else {
259 char content[128];
260 size_t r;
261 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) {
262 err = got_error_from_errno("fseeko");
263 goto done;
265 n = 0;
266 while (n != d->len) {
267 buf[0] = (d->len - n < 127) ? d->len - n : 127;
268 err = append(&delta_buf, &len, &bufsize,
269 buf, 1);
270 if (err)
271 goto done;
272 r = fread(content, 1, buf[0], o->f);
273 if (r != buf[0]) {
274 err = got_ferror(o->f, GOT_ERR_IO);
275 goto done;
277 err = append(&delta_buf, &len, &bufsize,
278 content, buf[0]);
279 if (err)
280 goto done;
281 n += buf[0];
286 err = got_deflate_to_mem_mmap(&m->delta_buf, &compressed_len,
287 NULL, NULL, delta_buf, 0, len);
288 if (err)
289 goto done;
291 m->delta_len = len;
292 m->delta_compressed_len = compressed_len;
293 done:
294 free(delta_buf);
295 return err;
298 static const struct got_error *
299 encode_delta(struct got_pack_meta *m, struct got_raw_object *o,
300 struct got_delta_instruction *deltas, int ndeltas,
301 off_t base_size, FILE *f)
303 const struct got_error *err;
304 unsigned char buf[16], *bp;
305 int i, j;
306 off_t n;
307 struct got_deflate_buf zb;
308 struct got_delta_instruction *d;
309 off_t delta_len = 0, compressed_len = 0;
311 err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE);
312 if (err)
313 return err;
315 /* base object size */
316 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
317 n = base_size >> GOT_DELTA_SIZE_SHIFT;
318 for (i = 1; n > 0; i++) {
319 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
320 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
321 n >>= GOT_DELTA_SIZE_SHIFT;
324 err = got_deflate_append_to_file_mmap(&zb, &compressed_len,
325 buf, 0, i, f, NULL);
326 if (err)
327 goto done;
328 delta_len += i;
330 /* target object size */
331 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
332 n = o->size >> GOT_DELTA_SIZE_SHIFT;
333 for (i = 1; n > 0; i++) {
334 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
335 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
336 n >>= GOT_DELTA_SIZE_SHIFT;
339 err = got_deflate_append_to_file_mmap(&zb, &compressed_len,
340 buf, 0, i, f, NULL);
341 if (err)
342 goto done;
343 delta_len += i;
345 for (j = 0; j < ndeltas; j++) {
346 d = &deltas[j];
347 if (d->copy) {
348 n = d->offset;
349 bp = &buf[1];
350 buf[0] = GOT_DELTA_BASE_COPY;
351 for (i = 0; i < 4; i++) {
352 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
353 buf[0] |= 1 << i;
354 *bp++ = n & 0xff;
355 n >>= 8;
356 if (n == 0)
357 break;
359 n = d->len;
360 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
361 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
362 for (i = 0; i < 3 && n > 0; i++) {
363 buf[0] |= 1 << (i + 4);
364 *bp++ = n & 0xff;
365 n >>= 8;
368 err = got_deflate_append_to_file_mmap(&zb,
369 &compressed_len, buf, 0, bp - buf, f, NULL);
370 if (err)
371 goto done;
372 delta_len += (bp - buf);
373 } else if (o->f == NULL) {
374 n = 0;
375 while (n != d->len) {
376 buf[0] = (d->len - n < 127) ? d->len - n : 127;
377 err = got_deflate_append_to_file_mmap(&zb,
378 &compressed_len, buf, 0, 1, f, NULL);
379 if (err)
380 goto done;
381 delta_len++;
382 err = got_deflate_append_to_file_mmap(&zb,
383 &compressed_len,
384 o->data + o->hdrlen + d->offset + n, 0,
385 buf[0], f, NULL);
386 if (err)
387 goto done;
388 delta_len += buf[0];
389 n += buf[0];
391 } else {
392 char content[128];
393 size_t r;
394 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) {
395 err = got_error_from_errno("fseeko");
396 goto done;
398 n = 0;
399 while (n != d->len) {
400 buf[0] = (d->len - n < 127) ? d->len - n : 127;
401 err = got_deflate_append_to_file_mmap(&zb,
402 &compressed_len, buf, 0, 1, f, NULL);
403 if (err)
404 goto done;
405 delta_len++;
406 r = fread(content, 1, buf[0], o->f);
407 if (r != buf[0]) {
408 err = got_ferror(o->f, GOT_ERR_IO);
409 goto done;
411 err = got_deflate_append_to_file_mmap(&zb,
412 &compressed_len, content, 0, buf[0], f,
413 NULL);
414 if (err)
415 goto done;
416 delta_len += buf[0];
417 n += buf[0];
422 err = got_deflate_flush(&zb, f, NULL, &compressed_len);
423 if (err)
424 goto done;
426 /* sanity check */
427 if (compressed_len != ftello(f) - m->delta_offset) {
428 err = got_error(GOT_ERR_COMPRESSION);
429 goto done;
432 m->delta_len = delta_len;
433 m->delta_compressed_len = compressed_len;
434 done:
435 got_deflate_end(&zb);
436 return err;
439 const struct got_error *
440 got_pack_report_progress(got_pack_progress_cb progress_cb, void *progress_arg,
441 struct got_ratelimit *rl, int ncolored, int nfound, int ntrees,
442 off_t packfile_size, int ncommits, int nobj_total, int obj_deltify,
443 int nobj_written)
445 const struct got_error *err;
446 int elapsed;
448 if (progress_cb == NULL)
449 return NULL;
451 err = got_ratelimit_check(&elapsed, rl);
452 if (err || !elapsed)
453 return err;
455 return progress_cb(progress_arg, ncolored, nfound, ntrees,
456 packfile_size, ncommits, nobj_total, obj_deltify, nobj_written);
459 const struct got_error *
460 got_pack_add_meta(struct got_pack_meta *m, struct got_pack_metavec *v)
462 if (v->nmeta == v->metasz){
463 size_t newsize = 2 * v->metasz;
464 struct got_pack_meta **new;
465 new = reallocarray(v->meta, newsize, sizeof(*new));
466 if (new == NULL)
467 return got_error_from_errno("reallocarray");
468 v->meta = new;
469 v->metasz = newsize;
472 v->meta[v->nmeta++] = m;
473 return NULL;
476 const struct got_error *
477 got_pack_find_pack_for_reuse(struct got_packidx **best_packidx,
478 struct got_repository *repo)
480 const struct got_error *err = NULL;
481 struct got_pathlist_entry *pe;
482 const char *best_packidx_path = NULL;
483 int nobj_max = 0;
485 *best_packidx = NULL;
487 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
488 const char *path_packidx = pe->path;
489 struct got_packidx *packidx;
490 int nobj;
492 err = got_repo_get_packidx(&packidx, path_packidx, repo);
493 if (err)
494 break;
496 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
497 if (nobj > nobj_max) {
498 best_packidx_path = path_packidx;
499 nobj_max = nobj;
503 if (best_packidx_path) {
504 err = got_repo_get_packidx(best_packidx, best_packidx_path,
505 repo);
508 return err;
511 const struct got_error *
512 got_pack_cache_pack_for_packidx(struct got_pack **pack,
513 struct got_packidx *packidx, struct got_repository *repo)
515 const struct got_error *err;
516 char *path_packfile = NULL;
518 err = got_packidx_get_packfile_path(&path_packfile,
519 packidx->path_packidx);
520 if (err)
521 return err;
523 *pack = got_repo_get_cached_pack(repo, path_packfile);
524 if (*pack == NULL) {
525 err = got_repo_cache_pack(pack, repo, path_packfile, packidx);
526 if (err)
527 goto done;
529 done:
530 free(path_packfile);
531 return err;
534 static const struct got_error *
535 pick_deltas(struct got_pack_meta **meta, int nmeta, int ncolored,
536 int nfound, int ntrees, int ncommits, int nreused, FILE *delta_cache,
537 struct got_repository *repo,
538 got_pack_progress_cb progress_cb, void *progress_arg,
539 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
541 const struct got_error *err = NULL;
542 struct got_pack_meta *m = NULL, *base = NULL;
543 struct got_raw_object *raw = NULL, *base_raw = NULL;
544 struct got_delta_instruction *deltas = NULL, *best_deltas = NULL;
545 int i, j, ndeltas, best_ndeltas;
546 off_t size, best_size;
547 const int max_base_candidates = 3;
548 size_t delta_memsize = 0;
549 const size_t max_delta_memsize = 4 * GOT_DELTA_RESULT_SIZE_CACHED_MAX;
550 int outfd = -1;
551 uint32_t delta_seed;
553 delta_seed = arc4random();
555 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
556 for (i = 0; i < nmeta; i++) {
557 if (cancel_cb) {
558 err = (*cancel_cb)(cancel_arg);
559 if (err)
560 break;
562 err = got_pack_report_progress(progress_cb, progress_arg, rl,
563 ncolored, nfound, ntrees, 0L, ncommits, nreused + nmeta,
564 nreused + i, 0);
565 if (err)
566 goto done;
567 m = meta[i];
569 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
570 m->obj_type == GOT_OBJ_TYPE_TAG)
571 continue;
573 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
574 if (err)
575 goto done;
576 m->size = raw->size;
578 if (raw->f == NULL) {
579 err = got_deltify_init_mem(&m->dtab, raw->data,
580 raw->hdrlen, raw->size + raw->hdrlen, delta_seed);
581 } else {
582 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
583 raw->size + raw->hdrlen, delta_seed);
585 if (err)
586 goto done;
588 if (i > max_base_candidates) {
589 struct got_pack_meta *n = NULL;
590 n = meta[i - (max_base_candidates + 1)];
591 got_deltify_free(n->dtab);
592 n->dtab = NULL;
595 best_size = raw->size;
596 best_ndeltas = 0;
597 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
598 if (cancel_cb) {
599 err = (*cancel_cb)(cancel_arg);
600 if (err)
601 goto done;
603 base = meta[j];
604 /* long chains make unpacking slow, avoid such bases */
605 if (base->nchain >= 128 ||
606 base->obj_type != m->obj_type)
607 continue;
609 err = got_object_raw_open(&base_raw, &outfd, repo,
610 &base->id);
611 if (err)
612 goto done;
614 if (raw->f == NULL && base_raw->f == NULL) {
615 err = got_deltify_mem_mem(&deltas, &ndeltas,
616 raw->data, raw->hdrlen,
617 raw->size + raw->hdrlen, delta_seed,
618 base->dtab, base_raw->data,
619 base_raw->hdrlen,
620 base_raw->size + base_raw->hdrlen);
621 } else if (raw->f == NULL) {
622 err = got_deltify_mem_file(&deltas, &ndeltas,
623 raw->data, raw->hdrlen,
624 raw->size + raw->hdrlen, delta_seed,
625 base->dtab, base_raw->f,
626 base_raw->hdrlen,
627 base_raw->size + base_raw->hdrlen);
628 } else if (base_raw->f == NULL) {
629 err = got_deltify_file_mem(&deltas, &ndeltas,
630 raw->f, raw->hdrlen,
631 raw->size + raw->hdrlen, delta_seed,
632 base->dtab, base_raw->data,
633 base_raw->hdrlen,
634 base_raw->size + base_raw->hdrlen);
635 } else {
636 err = got_deltify(&deltas, &ndeltas,
637 raw->f, raw->hdrlen,
638 raw->size + raw->hdrlen, delta_seed,
639 base->dtab, base_raw->f, base_raw->hdrlen,
640 base_raw->size + base_raw->hdrlen);
642 got_object_raw_close(base_raw);
643 base_raw = NULL;
644 if (err)
645 goto done;
647 size = delta_size(deltas, ndeltas);
648 if (size + 32 < best_size){
649 /*
650 * if we already picked a best delta,
651 * replace it.
652 */
653 best_size = size;
654 free(best_deltas);
655 best_deltas = deltas;
656 best_ndeltas = ndeltas;
657 deltas = NULL;
658 m->nchain = base->nchain + 1;
659 m->prev = base;
660 m->head = base->head;
661 if (m->head == NULL)
662 m->head = base;
663 } else {
664 free(deltas);
665 deltas = NULL;
666 ndeltas = 0;
670 if (best_ndeltas > 0) {
671 if (best_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX &&
672 delta_memsize + best_size <= max_delta_memsize) {
673 delta_memsize += best_size;
674 err = encode_delta_in_mem(m, raw, best_deltas,
675 best_ndeltas, best_size, m->prev->size);
676 } else {
677 m->delta_offset = ftello(delta_cache);
678 err = encode_delta(m, raw, best_deltas,
679 best_ndeltas, m->prev->size, delta_cache);
681 free(best_deltas);
682 best_deltas = NULL;
683 best_ndeltas = 0;
684 if (err)
685 goto done;
688 got_object_raw_close(raw);
689 raw = NULL;
691 done:
692 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
693 got_deltify_free(meta[i]->dtab);
694 meta[i]->dtab = NULL;
696 if (raw)
697 got_object_raw_close(raw);
698 if (base_raw)
699 got_object_raw_close(base_raw);
700 if (outfd != -1 && close(outfd) == -1 && err == NULL)
701 err = got_error_from_errno("close");
702 free(deltas);
703 free(best_deltas);
704 return err;
707 static const struct got_error *
708 search_packidx(int *found, struct got_object_id *id,
709 struct got_repository *repo)
711 const struct got_error *err = NULL;
712 struct got_packidx *packidx = NULL;
713 int idx;
715 *found = 0;
717 err = got_repo_search_packidx(&packidx, &idx, repo, id);
718 if (err == NULL)
719 *found = 1; /* object is already packed */
720 else if (err->code == GOT_ERR_NO_OBJ)
721 err = NULL;
722 return err;
725 const struct got_error *
726 got_pack_add_object(int want_meta, struct got_object_idset *idset,
727 struct got_object_id *id, const char *path, int obj_type,
728 time_t mtime, uint32_t seed, int loose_obj_only,
729 struct got_repository *repo, int *ncolored, int *nfound, int *ntrees,
730 got_pack_progress_cb progress_cb, void *progress_arg,
731 struct got_ratelimit *rl)
733 const struct got_error *err;
734 struct got_pack_meta *m = NULL;
736 if (loose_obj_only) {
737 int is_packed;
738 err = search_packidx(&is_packed, id, repo);
739 if (err)
740 return err;
741 if (is_packed && want_meta)
742 return NULL;
745 if (want_meta) {
746 err = alloc_meta(&m, id, path, obj_type, mtime, seed);
747 if (err)
748 return err;
750 (*nfound)++;
751 err = got_pack_report_progress(progress_cb, progress_arg, rl,
752 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
753 if (err) {
754 clear_meta(m);
755 free(m);
756 return err;
760 err = got_object_idset_add(idset, id, m);
761 if (err) {
762 clear_meta(m);
763 free(m);
765 return err;
768 const struct got_error *
769 got_pack_load_tree_entries(struct got_object_id_queue *ids, int want_meta,
770 struct got_object_idset *idset, struct got_object_idset *idset_exclude,
771 struct got_tree_object *tree,
772 const char *dpath, time_t mtime, uint32_t seed, struct got_repository *repo,
773 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
774 got_pack_progress_cb progress_cb, void *progress_arg,
775 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
777 const struct got_error *err;
778 char *p = NULL;
779 int i;
781 (*ntrees)++;
782 err = got_pack_report_progress(progress_cb, progress_arg, rl,
783 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
784 if (err)
785 return err;
787 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
788 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
789 struct got_object_id *id = got_tree_entry_get_id(e);
790 mode_t mode = got_tree_entry_get_mode(e);
792 if (cancel_cb) {
793 err = (*cancel_cb)(cancel_arg);
794 if (err)
795 break;
798 if (got_object_tree_entry_is_submodule(e) ||
799 got_object_idset_contains(idset, id) ||
800 got_object_idset_contains(idset_exclude, id))
801 continue;
803 /*
804 * If got-read-pack is crawling trees for us then
805 * we are only here to collect blob IDs.
806 */
807 if (ids == NULL && S_ISDIR(mode))
808 continue;
810 if (asprintf(&p, "%s%s%s", dpath,
811 got_path_is_root_dir(dpath) ? "" : "/",
812 got_tree_entry_get_name(e)) == -1) {
813 err = got_error_from_errno("asprintf");
814 break;
817 if (S_ISDIR(mode)) {
818 struct got_object_qid *qid;
819 err = got_object_qid_alloc(&qid, id);
820 if (err)
821 break;
822 qid->data = p;
823 p = NULL;
824 STAILQ_INSERT_TAIL(ids, qid, entry);
825 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
826 err = got_pack_add_object(want_meta,
827 want_meta ? idset : idset_exclude, id, p,
828 GOT_OBJ_TYPE_BLOB, mtime, seed, loose_obj_only,
829 repo, ncolored, nfound, ntrees,
830 progress_cb, progress_arg, rl);
831 if (err)
832 break;
833 free(p);
834 p = NULL;
835 } else {
836 free(p);
837 p = NULL;
841 free(p);
842 return err;
845 const struct got_error *
846 got_pack_load_tree(int want_meta, struct got_object_idset *idset,
847 struct got_object_idset *idset_exclude,
848 struct got_object_id *tree_id, const char *dpath, time_t mtime,
849 uint32_t seed, struct got_repository *repo, int loose_obj_only,
850 int *ncolored, int *nfound, int *ntrees,
851 got_pack_progress_cb progress_cb, void *progress_arg,
852 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
854 const struct got_error *err = NULL;
855 struct got_object_id_queue tree_ids;
856 struct got_object_qid *qid;
857 struct got_tree_object *tree = NULL;
859 if (got_object_idset_contains(idset, tree_id) ||
860 got_object_idset_contains(idset_exclude, tree_id))
861 return NULL;
863 err = got_object_qid_alloc(&qid, tree_id);
864 if (err)
865 return err;
866 qid->data = strdup(dpath);
867 if (qid->data == NULL) {
868 err = got_error_from_errno("strdup");
869 got_object_qid_free(qid);
870 return err;
873 STAILQ_INIT(&tree_ids);
874 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
876 while (!STAILQ_EMPTY(&tree_ids)) {
877 const char *path;
878 if (cancel_cb) {
879 err = (*cancel_cb)(cancel_arg);
880 if (err)
881 break;
884 qid = STAILQ_FIRST(&tree_ids);
885 STAILQ_REMOVE_HEAD(&tree_ids, entry);
886 path = qid->data;
888 if (got_object_idset_contains(idset, &qid->id) ||
889 got_object_idset_contains(idset_exclude, &qid->id)) {
890 free(qid->data);
891 got_object_qid_free(qid);
892 continue;
895 err = got_pack_add_object(want_meta,
896 want_meta ? idset : idset_exclude,
897 &qid->id, path, GOT_OBJ_TYPE_TREE,
898 mtime, seed, loose_obj_only, repo,
899 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
900 if (err) {
901 free(qid->data);
902 got_object_qid_free(qid);
903 break;
906 err = got_object_open_as_tree(&tree, repo, &qid->id);
907 if (err) {
908 free(qid->data);
909 got_object_qid_free(qid);
910 break;
913 err = got_pack_load_tree_entries(&tree_ids, want_meta, idset,
914 idset_exclude, tree, path, mtime, seed, repo,
915 loose_obj_only, ncolored, nfound, ntrees,
916 progress_cb, progress_arg, rl,
917 cancel_cb, cancel_arg);
918 free(qid->data);
919 got_object_qid_free(qid);
920 if (err)
921 break;
923 got_object_tree_close(tree);
924 tree = NULL;
927 STAILQ_FOREACH(qid, &tree_ids, entry)
928 free(qid->data);
929 got_object_id_queue_free(&tree_ids);
930 if (tree)
931 got_object_tree_close(tree);
932 return err;
935 static const struct got_error *
936 load_commit(int want_meta, struct got_object_idset *idset,
937 struct got_object_idset *idset_exclude,
938 struct got_object_id *id, struct got_repository *repo, uint32_t seed,
939 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
940 got_pack_progress_cb progress_cb, void *progress_arg,
941 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
943 const struct got_error *err;
944 struct got_commit_object *commit;
946 if (got_object_idset_contains(idset, id) ||
947 got_object_idset_contains(idset_exclude, id))
948 return NULL;
950 if (loose_obj_only) {
951 int is_packed;
952 err = search_packidx(&is_packed, id, repo);
953 if (err)
954 return err;
955 if (is_packed && want_meta)
956 return NULL;
959 err = got_object_open_as_commit(&commit, repo, id);
960 if (err)
961 return err;
963 err = got_pack_add_object(want_meta,
964 want_meta ? idset : idset_exclude, id, "", GOT_OBJ_TYPE_COMMIT,
965 got_object_commit_get_committer_time(commit), seed,
966 loose_obj_only, repo,
967 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
968 if (err)
969 goto done;
971 err = got_pack_load_tree(want_meta, idset, idset_exclude,
972 got_object_commit_get_tree_id(commit),
973 "", got_object_commit_get_committer_time(commit), seed,
974 repo, loose_obj_only, ncolored, nfound, ntrees,
975 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
976 done:
977 got_object_commit_close(commit);
978 return err;
981 static const struct got_error *
982 load_tag(int want_meta, struct got_object_idset *idset,
983 struct got_object_idset *idset_exclude,
984 struct got_object_id *id, struct got_repository *repo, uint32_t seed,
985 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
986 got_pack_progress_cb progress_cb, void *progress_arg,
987 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
989 const struct got_error *err;
990 struct got_tag_object *tag = NULL;
992 if (got_object_idset_contains(idset, id) ||
993 got_object_idset_contains(idset_exclude, id))
994 return NULL;
996 if (loose_obj_only) {
997 int is_packed;
998 err = search_packidx(&is_packed, id, repo);
999 if (err)
1000 return err;
1001 if (is_packed && want_meta)
1002 return NULL;
1005 err = got_object_open_as_tag(&tag, repo, id);
1006 if (err)
1007 return err;
1009 err = got_pack_add_object(want_meta,
1010 want_meta ? idset : idset_exclude, id, "", GOT_OBJ_TYPE_TAG,
1011 got_object_tag_get_tagger_time(tag), seed, loose_obj_only, repo,
1012 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
1013 if (err)
1014 goto done;
1016 switch (got_object_tag_get_object_type(tag)) {
1017 case GOT_OBJ_TYPE_COMMIT:
1018 err = load_commit(want_meta, idset, idset_exclude,
1019 got_object_tag_get_object_id(tag), repo, seed,
1020 loose_obj_only, ncolored, nfound, ntrees,
1021 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1022 break;
1023 case GOT_OBJ_TYPE_TREE:
1024 err = got_pack_load_tree(want_meta, idset, idset_exclude,
1025 got_object_tag_get_object_id(tag), "",
1026 got_object_tag_get_tagger_time(tag), seed, repo,
1027 loose_obj_only, ncolored, nfound, ntrees,
1028 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1029 break;
1030 default:
1031 break;
1034 done:
1035 got_object_tag_close(tag);
1036 return err;
1039 const struct got_error *
1040 got_pack_paint_commit(struct got_object_qid *qid, intptr_t color)
1042 if (color < 0 || color >= COLOR_MAX)
1043 return got_error(GOT_ERR_RANGE);
1045 qid->data = (void *)color;
1046 return NULL;
1049 const struct got_error *
1050 got_pack_queue_commit_id(struct got_object_id_queue *ids,
1051 struct got_object_id *id, intptr_t color, struct got_repository *repo)
1053 const struct got_error *err;
1054 struct got_object_qid *qid;
1056 err = got_object_qid_alloc(&qid, id);
1057 if (err)
1058 return err;
1060 STAILQ_INSERT_TAIL(ids, qid, entry);
1061 return got_pack_paint_commit(qid, color);
1064 struct append_id_arg {
1065 struct got_object_id **array;
1066 int idx;
1067 struct got_object_idset *drop;
1068 struct got_object_idset *skip;
1071 static const struct got_error *
1072 append_id(struct got_object_id *id, void *data, void *arg)
1074 struct append_id_arg *a = arg;
1076 if (got_object_idset_contains(a->skip, id) ||
1077 got_object_idset_contains(a->drop, id))
1078 return NULL;
1080 a->array[++a->idx] = got_object_id_dup(id);
1081 if (a->array[a->idx] == NULL)
1082 return got_error_from_errno("got_object_id_dup");
1084 return NULL;
1087 static const struct got_error *
1088 queue_commit_or_tag_id(struct got_object_id *id, intptr_t color,
1089 struct got_object_id_queue *ids, struct got_repository *repo)
1091 const struct got_error *err;
1092 struct got_tag_object *tag = NULL;
1093 int obj_type;
1095 err = got_object_get_type(&obj_type, repo, id);
1096 if (err)
1097 return err;
1099 if (obj_type == GOT_OBJ_TYPE_TAG) {
1100 err = got_object_open_as_tag(&tag, repo, id);
1101 if (err)
1102 return err;
1103 obj_type = got_object_tag_get_object_type(tag);
1104 id = got_object_tag_get_object_id(tag);
1107 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
1108 err = got_pack_queue_commit_id(ids, id, color, repo);
1109 if (err)
1110 goto done;
1112 done:
1113 if (tag)
1114 got_object_tag_close(tag);
1115 return err;
1118 const struct got_error *
1119 got_pack_find_pack_for_commit_painting(struct got_packidx **best_packidx,
1120 struct got_object_id_queue *ids, int nids, struct got_repository *repo)
1122 const struct got_error *err = NULL;
1123 struct got_pathlist_entry *pe;
1124 const char *best_packidx_path = NULL;
1125 int nobj_max = 0;
1126 int ncommits_max = 0;
1128 *best_packidx = NULL;
1131 * Find the largest pack which contains at least some of the
1132 * commits we are interested in.
1134 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
1135 const char *path_packidx = pe->path;
1136 struct got_packidx *packidx;
1137 int nobj, idx, ncommits = 0;
1138 struct got_object_qid *qid;
1140 err = got_repo_get_packidx(&packidx, path_packidx, repo);
1141 if (err)
1142 break;
1144 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
1145 if (nobj <= nobj_max)
1146 continue;
1148 STAILQ_FOREACH(qid, ids, entry) {
1149 idx = got_packidx_get_object_idx(packidx, &qid->id);
1150 if (idx != -1)
1151 ncommits++;
1153 if (ncommits > ncommits_max) {
1154 best_packidx_path = path_packidx;
1155 nobj_max = nobj;
1156 ncommits_max = ncommits;
1160 if (best_packidx_path && err == NULL) {
1161 err = got_repo_get_packidx(best_packidx, best_packidx_path,
1162 repo);
1165 return err;
1168 static const struct got_error *
1169 findtwixt(struct got_object_id ***res, int *nres, int *ncolored,
1170 struct got_object_id **head, int nhead,
1171 struct got_object_id **tail, int ntail,
1172 struct got_repository *repo,
1173 got_pack_progress_cb progress_cb, void *progress_arg,
1174 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1176 const struct got_error *err = NULL;
1177 struct got_object_id_queue ids;
1178 struct got_object_idset *keep, *drop, *skip = NULL;
1179 int i, nkeep;
1181 STAILQ_INIT(&ids);
1182 *res = NULL;
1183 *nres = 0;
1184 *ncolored = 0;
1186 keep = got_object_idset_alloc();
1187 if (keep == NULL)
1188 return got_error_from_errno("got_object_idset_alloc");
1190 drop = got_object_idset_alloc();
1191 if (drop == NULL) {
1192 err = got_error_from_errno("got_object_idset_alloc");
1193 goto done;
1196 skip = got_object_idset_alloc();
1197 if (skip == NULL) {
1198 err = got_error_from_errno("got_object_idset_alloc");
1199 goto done;
1202 for (i = 0; i < nhead; i++) {
1203 struct got_object_id *id = head[i];
1204 if (id == NULL)
1205 continue;
1206 err = queue_commit_or_tag_id(id, COLOR_KEEP, &ids, repo);
1207 if (err)
1208 goto done;
1211 for (i = 0; i < ntail; i++) {
1212 struct got_object_id *id = tail[i];
1213 if (id == NULL)
1214 continue;
1215 err = queue_commit_or_tag_id(id, COLOR_DROP, &ids, repo);
1216 if (err)
1217 goto done;
1220 err = got_pack_paint_commits(ncolored, &ids, nhead + ntail,
1221 keep, drop, skip, repo, progress_cb, progress_arg, rl,
1222 cancel_cb, cancel_arg);
1223 if (err)
1224 goto done;
1226 nkeep = got_object_idset_num_elements(keep);
1227 if (nkeep > 0) {
1228 struct append_id_arg arg;
1229 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
1230 if (arg.array == NULL) {
1231 err = got_error_from_errno("calloc");
1232 goto done;
1234 arg.idx = -1;
1235 arg.skip = skip;
1236 arg.drop = drop;
1237 err = got_object_idset_for_each(keep, append_id, &arg);
1238 if (err) {
1239 free(arg.array);
1240 goto done;
1242 *res = arg.array;
1243 *nres = arg.idx + 1;
1245 done:
1246 got_object_idset_free(keep);
1247 got_object_idset_free(drop);
1248 if (skip)
1249 got_object_idset_free(skip);
1250 got_object_id_queue_free(&ids);
1251 return err;
1254 static const struct got_error *
1255 find_pack_for_enumeration(struct got_packidx **best_packidx,
1256 struct got_object_id **ids, int nids, struct got_repository *repo)
1258 const struct got_error *err = NULL;
1259 struct got_pathlist_entry *pe;
1260 const char *best_packidx_path = NULL;
1261 int nobj_max = 0;
1262 int ncommits_max = 0;
1264 *best_packidx = NULL;
1267 * Find the largest pack which contains at least some of the
1268 * commits and tags we are interested in.
1270 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
1271 const char *path_packidx = pe->path;
1272 struct got_packidx *packidx;
1273 int nobj, i, idx, ncommits = 0;
1275 err = got_repo_get_packidx(&packidx, path_packidx, repo);
1276 if (err)
1277 break;
1279 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
1280 if (nobj <= nobj_max)
1281 continue;
1283 for (i = 0; i < nids; i++) {
1284 idx = got_packidx_get_object_idx(packidx, ids[i]);
1285 if (idx != -1)
1286 ncommits++;
1288 if (ncommits > ncommits_max) {
1289 best_packidx_path = path_packidx;
1290 nobj_max = nobj;
1291 ncommits_max = ncommits;
1295 if (best_packidx_path && err == NULL) {
1296 err = got_repo_get_packidx(best_packidx, best_packidx_path,
1297 repo);
1300 return err;
1303 static const struct got_error *
1304 load_object_ids(int *ncolored, int *nfound, int *ntrees,
1305 struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs,
1306 struct got_object_id **ours, int nours, struct got_repository *repo,
1307 uint32_t seed, int loose_obj_only, got_pack_progress_cb progress_cb,
1308 void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb,
1309 void *cancel_arg)
1311 const struct got_error *err = NULL;
1312 struct got_object_id **ids = NULL;
1313 struct got_packidx *packidx = NULL;
1314 int i, nobj = 0, obj_type, found_all_objects = 0;
1315 struct got_object_idset *idset_exclude;
1317 idset_exclude = got_object_idset_alloc();
1318 if (idset_exclude == NULL)
1319 return got_error_from_errno("got_object_idset_alloc");
1321 *ncolored = 0;
1322 *nfound = 0;
1323 *ntrees = 0;
1325 err = findtwixt(&ids, &nobj, ncolored, ours, nours, theirs, ntheirs,
1326 repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1327 if (err)
1328 goto done;
1330 err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo);
1331 if (err)
1332 goto done;
1333 if (packidx) {
1334 err = got_pack_load_packed_object_ids(&found_all_objects,
1335 theirs, ntheirs, NULL, 0, 0, seed, idset, idset_exclude,
1336 loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
1337 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1338 if (err)
1339 goto done;
1342 for (i = 0; i < ntheirs; i++) {
1343 struct got_object_id *id = theirs[i];
1344 if (id == NULL)
1345 continue;
1346 err = got_object_get_type(&obj_type, repo, id);
1347 if (err)
1348 return err;
1349 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
1350 if (!found_all_objects) {
1351 err = load_commit(0, idset, idset_exclude,
1352 id, repo, seed, loose_obj_only,
1353 ncolored, nfound, ntrees,
1354 progress_cb, progress_arg, rl,
1355 cancel_cb, cancel_arg);
1356 if (err)
1357 goto done;
1359 } else if (obj_type == GOT_OBJ_TYPE_TAG) {
1360 err = load_tag(0, idset, idset_exclude, id, repo,
1361 seed, loose_obj_only, ncolored, nfound, ntrees,
1362 progress_cb, progress_arg, rl,
1363 cancel_cb, cancel_arg);
1364 if (err)
1365 goto done;
1369 found_all_objects = 0;
1370 err = find_pack_for_enumeration(&packidx, ids, nobj, repo);
1371 if (err)
1372 goto done;
1373 if (packidx) {
1374 err = got_pack_load_packed_object_ids(&found_all_objects, ids,
1375 nobj, theirs, ntheirs, 1, seed, idset, idset_exclude,
1376 loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
1377 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1378 if (err)
1379 goto done;
1382 if (!found_all_objects) {
1383 for (i = 0; i < nobj; i++) {
1384 err = load_commit(1, idset, idset_exclude, ids[i],
1385 repo, seed, loose_obj_only, ncolored, nfound,
1386 ntrees, progress_cb, progress_arg, rl,
1387 cancel_cb, cancel_arg);
1388 if (err)
1389 goto done;
1393 for (i = 0; i < nours; i++) {
1394 struct got_object_id *id = ours[i];
1395 struct got_pack_meta *m;
1396 if (id == NULL)
1397 continue;
1398 m = got_object_idset_get(idset, id);
1399 if (m == NULL) {
1400 err = got_object_get_type(&obj_type, repo, id);
1401 if (err)
1402 goto done;
1403 } else
1404 obj_type = m->obj_type;
1405 if (obj_type != GOT_OBJ_TYPE_TAG)
1406 continue;
1407 err = load_tag(1, idset, idset_exclude, id, repo,
1408 seed, loose_obj_only, ncolored, nfound, ntrees,
1409 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1410 if (err)
1411 goto done;
1413 done:
1414 for (i = 0; i < nobj; i++) {
1415 free(ids[i]);
1417 free(ids);
1418 got_object_idset_free(idset_exclude);
1419 return err;
1422 static const struct got_error *
1423 hwrite(int fd, const void *buf, off_t len, SHA1_CTX *ctx)
1425 SHA1Update(ctx, buf, len);
1426 return got_poll_write_full(fd, buf, len);
1429 static const struct got_error *
1430 hcopy(FILE *fsrc, int fd_dst, off_t len, SHA1_CTX *ctx)
1432 const struct got_error *err;
1433 unsigned char buf[65536];
1434 off_t remain = len;
1435 size_t n;
1437 while (remain > 0) {
1438 size_t copylen = MIN(sizeof(buf), remain);
1439 n = fread(buf, 1, copylen, fsrc);
1440 if (n != copylen)
1441 return got_ferror(fsrc, GOT_ERR_IO);
1442 SHA1Update(ctx, buf, copylen);
1443 err = got_poll_write_full(fd_dst, buf, copylen);
1444 if (err)
1445 return err;
1446 remain -= copylen;
1449 return NULL;
1452 static const struct got_error *
1453 hcopy_mmap(uint8_t *src, off_t src_offset, size_t src_size,
1454 int fd, off_t len, SHA1_CTX *ctx)
1456 if (src_offset + len > src_size)
1457 return got_error(GOT_ERR_RANGE);
1459 SHA1Update(ctx, src + src_offset, len);
1460 return got_poll_write_full(fd, src + src_offset, len);
1463 static void
1464 putbe32(char *b, uint32_t n)
1466 b[0] = n >> 24;
1467 b[1] = n >> 16;
1468 b[2] = n >> 8;
1469 b[3] = n >> 0;
1472 static int
1473 write_order_cmp(const void *pa, const void *pb)
1475 struct got_pack_meta *a, *b, *ahd, *bhd;
1477 a = *(struct got_pack_meta **)pa;
1478 b = *(struct got_pack_meta **)pb;
1479 ahd = (a->head == NULL) ? a : a->head;
1480 bhd = (b->head == NULL) ? b : b->head;
1481 if (bhd->mtime < ahd->mtime)
1482 return -1;
1483 if (bhd->mtime > ahd->mtime)
1484 return 1;
1485 if (bhd < ahd)
1486 return -1;
1487 if (bhd > ahd)
1488 return 1;
1489 if (a->nchain != b->nchain)
1490 return a->nchain - b->nchain;
1491 if (a->mtime < b->mtime)
1492 return -1;
1493 if (a->mtime > b->mtime)
1494 return 1;
1495 return got_object_id_cmp(&a->id, &b->id);
1498 static int
1499 reuse_write_order_cmp(const void *pa, const void *pb)
1501 struct got_pack_meta *a, *b;
1503 a = *(struct got_pack_meta **)pa;
1504 b = *(struct got_pack_meta **)pb;
1506 if (a->reused_delta_offset < b->reused_delta_offset)
1507 return -1;
1508 if (a->reused_delta_offset > b->reused_delta_offset)
1509 return 1;
1510 return 0;
1513 static const struct got_error *
1514 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
1516 size_t i;
1518 *hdrlen = 0;
1520 hdr[0] = obj_type << 4;
1521 hdr[0] |= len & 0xf;
1522 len >>= 4;
1523 for (i = 1; len != 0; i++){
1524 if (i >= bufsize)
1525 return got_error(GOT_ERR_NO_SPACE);
1526 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
1527 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
1528 len >>= GOT_DELTA_SIZE_SHIFT;
1531 *hdrlen = i;
1532 return NULL;
1535 static int
1536 packoff(char *hdr, off_t off)
1538 int i, j;
1539 char rbuf[8];
1541 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
1542 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
1543 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
1544 GOT_DELTA_SIZE_MORE;
1547 j = 0;
1548 while (i > 0)
1549 hdr[j++] = rbuf[--i];
1550 return j;
1553 static const struct got_error *
1554 deltahdr(off_t *packfile_size, SHA1_CTX *ctx, int packfd,
1555 struct got_pack_meta *m)
1557 const struct got_error *err;
1558 char buf[32];
1559 int nh;
1561 if (m->prev->off != 0) {
1562 err = packhdr(&nh, buf, sizeof(buf),
1563 GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len);
1564 if (err)
1565 return err;
1566 nh += packoff(buf + nh, m->off - m->prev->off);
1567 err = hwrite(packfd, buf, nh, ctx);
1568 if (err)
1569 return err;
1570 *packfile_size += nh;
1571 } else {
1572 err = packhdr(&nh, buf, sizeof(buf),
1573 GOT_OBJ_TYPE_REF_DELTA, m->delta_len);
1574 if (err)
1575 return err;
1576 err = hwrite(packfd, buf, nh, ctx);
1577 if (err)
1578 return err;
1579 *packfile_size += nh;
1580 err = hwrite(packfd, m->prev->id.hash,
1581 sizeof(m->prev->id.hash), ctx);
1582 if (err)
1583 return err;
1584 *packfile_size += sizeof(m->prev->id.hash);
1587 return NULL;
1590 static const struct got_error *
1591 write_packed_object(off_t *packfile_size, int packfd,
1592 FILE *delta_cache, uint8_t *delta_cache_map, size_t delta_cache_size,
1593 struct got_pack_meta *m, int *outfd, SHA1_CTX *ctx,
1594 struct got_repository *repo)
1596 const struct got_error *err = NULL;
1597 struct got_deflate_checksum csum;
1598 char buf[32];
1599 int nh;
1600 struct got_raw_object *raw = NULL;
1601 off_t outlen, delta_offset;
1603 csum.output_hash = ctx;
1604 csum.output_crc = NULL;
1606 if (m->reused_delta_offset)
1607 delta_offset = m->reused_delta_offset;
1608 else
1609 delta_offset = m->delta_offset;
1611 m->off = *packfile_size;
1612 if (m->delta_len == 0) {
1613 err = got_object_raw_open(&raw, outfd, repo, &m->id);
1614 if (err)
1615 goto done;
1616 err = packhdr(&nh, buf, sizeof(buf),
1617 m->obj_type, raw->size);
1618 if (err)
1619 goto done;
1620 err = hwrite(packfd, buf, nh, ctx);
1621 if (err)
1622 goto done;
1623 *packfile_size += nh;
1624 if (raw->f == NULL) {
1625 err = got_deflate_to_fd_mmap(&outlen,
1626 raw->data + raw->hdrlen, 0, raw->size,
1627 packfd, &csum);
1628 if (err)
1629 goto done;
1630 } else {
1631 if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
1632 == -1) {
1633 err = got_error_from_errno("fseeko");
1634 goto done;
1636 err = got_deflate_to_fd(&outlen, raw->f,
1637 raw->size, packfd, &csum);
1638 if (err)
1639 goto done;
1641 *packfile_size += outlen;
1642 got_object_raw_close(raw);
1643 raw = NULL;
1644 } else if (m->delta_buf) {
1645 err = deltahdr(packfile_size, ctx, packfd, m);
1646 if (err)
1647 goto done;
1648 err = hwrite(packfd, m->delta_buf,
1649 m->delta_compressed_len, ctx);
1650 if (err)
1651 goto done;
1652 *packfile_size += m->delta_compressed_len;
1653 free(m->delta_buf);
1654 m->delta_buf = NULL;
1655 } else if (delta_cache_map) {
1656 err = deltahdr(packfile_size, ctx, packfd, m);
1657 if (err)
1658 goto done;
1659 err = hcopy_mmap(delta_cache_map, delta_offset,
1660 delta_cache_size, packfd, m->delta_compressed_len,
1661 ctx);
1662 if (err)
1663 goto done;
1664 *packfile_size += m->delta_compressed_len;
1665 } else {
1666 if (fseeko(delta_cache, delta_offset, SEEK_SET) == -1) {
1667 err = got_error_from_errno("fseeko");
1668 goto done;
1670 err = deltahdr(packfile_size, ctx, packfd, m);
1671 if (err)
1672 goto done;
1673 err = hcopy(delta_cache, packfd,
1674 m->delta_compressed_len, ctx);
1675 if (err)
1676 goto done;
1677 *packfile_size += m->delta_compressed_len;
1679 done:
1680 if (raw)
1681 got_object_raw_close(raw);
1682 return err;
1685 static const struct got_error *
1686 genpack(uint8_t *pack_hash, int packfd, struct got_pack *reuse_pack,
1687 FILE *delta_cache, struct got_pack_meta **deltify, int ndeltify,
1688 struct got_pack_meta **reuse, int nreuse,
1689 int ncolored, int nfound, int ntrees, int nours,
1690 struct got_repository *repo,
1691 got_pack_progress_cb progress_cb, void *progress_arg,
1692 struct got_ratelimit *rl,
1693 got_cancel_cb cancel_cb, void *cancel_arg)
1695 const struct got_error *err = NULL;
1696 int i;
1697 SHA1_CTX ctx;
1698 struct got_pack_meta *m;
1699 char buf[32];
1700 off_t packfile_size = 0;
1701 int outfd = -1;
1702 int delta_cache_fd = -1;
1703 uint8_t *delta_cache_map = NULL;
1704 size_t delta_cache_size = 0;
1705 FILE *packfile = NULL;
1707 SHA1Init(&ctx);
1709 #ifndef GOT_PACK_NO_MMAP
1710 delta_cache_fd = dup(fileno(delta_cache));
1711 if (delta_cache_fd != -1) {
1712 struct stat sb;
1713 if (fstat(delta_cache_fd, &sb) == -1) {
1714 err = got_error_from_errno("fstat");
1715 goto done;
1717 if (sb.st_size > 0 && sb.st_size <= SIZE_MAX) {
1718 delta_cache_map = mmap(NULL, sb.st_size,
1719 PROT_READ, MAP_PRIVATE, delta_cache_fd, 0);
1720 if (delta_cache_map == MAP_FAILED) {
1721 if (errno != ENOMEM) {
1722 err = got_error_from_errno("mmap");
1723 goto done;
1725 delta_cache_map = NULL; /* fallback on stdio */
1726 } else
1727 delta_cache_size = (size_t)sb.st_size;
1730 #endif
1731 err = hwrite(packfd, "PACK", 4, &ctx);
1732 if (err)
1733 goto done;
1734 putbe32(buf, GOT_PACKFILE_VERSION);
1735 err = hwrite(packfd, buf, 4, &ctx);
1736 if (err)
1737 goto done;
1738 putbe32(buf, ndeltify + nreuse);
1739 err = hwrite(packfd, buf, 4, &ctx);
1740 if (err)
1741 goto done;
1743 qsort(deltify, ndeltify, sizeof(struct got_pack_meta *),
1744 write_order_cmp);
1745 for (i = 0; i < ndeltify; i++) {
1746 err = got_pack_report_progress(progress_cb, progress_arg, rl,
1747 ncolored, nfound, ntrees, packfile_size, nours,
1748 ndeltify + nreuse, ndeltify + nreuse, i);
1749 if (err)
1750 goto done;
1751 m = deltify[i];
1752 err = write_packed_object(&packfile_size, packfd,
1753 delta_cache, delta_cache_map, delta_cache_size,
1754 m, &outfd, &ctx, repo);
1755 if (err)
1756 goto done;
1759 qsort(reuse, nreuse, sizeof(struct got_pack_meta *),
1760 reuse_write_order_cmp);
1761 if (nreuse > 0 && reuse_pack->map == NULL) {
1762 int fd = dup(reuse_pack->fd);
1763 if (fd == -1) {
1764 err = got_error_from_errno("dup");
1765 goto done;
1767 packfile = fdopen(fd, "r");
1768 if (packfile == NULL) {
1769 err = got_error_from_errno("fdopen");
1770 close(fd);
1771 goto done;
1774 for (i = 0; i < nreuse; i++) {
1775 err = got_pack_report_progress(progress_cb, progress_arg, rl,
1776 ncolored, nfound, ntrees, packfile_size, nours,
1777 ndeltify + nreuse, ndeltify + nreuse, ndeltify + i);
1778 if (err)
1779 goto done;
1780 m = reuse[i];
1781 err = write_packed_object(&packfile_size, packfd,
1782 packfile, reuse_pack->map, reuse_pack->filesize,
1783 m, &outfd, &ctx, repo);
1784 if (err)
1785 goto done;
1788 SHA1Final(pack_hash, &ctx);
1789 err = got_poll_write_full(packfd, pack_hash, SHA1_DIGEST_LENGTH);
1790 if (err)
1791 goto done;
1792 packfile_size += SHA1_DIGEST_LENGTH;
1793 packfile_size += sizeof(struct got_packfile_hdr);
1794 if (progress_cb) {
1795 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1796 packfile_size, nours, ndeltify + nreuse,
1797 ndeltify + nreuse, ndeltify + nreuse);
1798 if (err)
1799 goto done;
1801 done:
1802 if (outfd != -1 && close(outfd) == -1 && err == NULL)
1803 err = got_error_from_errno("close");
1804 if (delta_cache_map && munmap(delta_cache_map, delta_cache_size) == -1)
1805 err = got_error_from_errno("munmap");
1806 if (delta_cache_fd != -1 && close(delta_cache_fd) == -1 && err == NULL)
1807 err = got_error_from_errno("close");
1808 if (packfile && fclose(packfile) == EOF && err == NULL)
1809 err = got_error_from_errno("fclose");
1810 return err;
1813 static const struct got_error *
1814 add_meta_idset_cb(struct got_object_id *id, void *data, void *arg)
1816 struct got_pack_meta *m = data;
1817 struct got_pack_metavec *v = arg;
1819 if (m->reused_delta_offset != 0)
1820 return NULL;
1822 return got_pack_add_meta(m, v);
1825 const struct got_error *
1826 got_pack_create(uint8_t *packhash, int packfd, FILE *delta_cache,
1827 struct got_object_id **theirs, int ntheirs,
1828 struct got_object_id **ours, int nours,
1829 struct got_repository *repo, int loose_obj_only, int allow_empty,
1830 got_pack_progress_cb progress_cb, void *progress_arg,
1831 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1833 const struct got_error *err;
1834 struct got_object_idset *idset;
1835 struct got_packidx *reuse_packidx = NULL;
1836 struct got_pack *reuse_pack = NULL;
1837 struct got_pack_metavec deltify, reuse;
1838 int ncolored = 0, nfound = 0, ntrees = 0;
1839 size_t ndeltify;
1840 uint32_t seed;
1842 seed = arc4random();
1844 memset(&deltify, 0, sizeof(deltify));
1845 memset(&reuse, 0, sizeof(reuse));
1847 idset = got_object_idset_alloc();
1848 if (idset == NULL)
1849 return got_error_from_errno("got_object_idset_alloc");
1851 err = load_object_ids(&ncolored, &nfound, &ntrees, idset, theirs,
1852 ntheirs, ours, nours, repo, seed, loose_obj_only,
1853 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1854 if (err)
1855 goto done;
1857 if (progress_cb) {
1858 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1859 0L, nours, got_object_idset_num_elements(idset), 0, 0);
1860 if (err)
1861 goto done;
1864 if (got_object_idset_num_elements(idset) == 0 && !allow_empty) {
1865 err = got_error(GOT_ERR_CANNOT_PACK);
1866 goto done;
1869 reuse.metasz = 64;
1870 reuse.meta = calloc(reuse.metasz,
1871 sizeof(struct got_pack_meta *));
1872 if (reuse.meta == NULL) {
1873 err = got_error_from_errno("calloc");
1874 goto done;
1877 err = got_pack_search_deltas(&reuse_packidx, &reuse_pack,
1878 &reuse, idset, ncolored, nfound, ntrees, nours,
1879 repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1880 if (err)
1881 goto done;
1883 if (reuse_packidx && reuse_pack) {
1884 err = got_repo_pin_pack(repo, reuse_packidx, reuse_pack);
1885 if (err)
1886 goto done;
1889 if (fseeko(delta_cache, 0L, SEEK_END) == -1) {
1890 err = got_error_from_errno("fseeko");
1891 goto done;
1894 ndeltify = got_object_idset_num_elements(idset) - reuse.nmeta;
1895 if (ndeltify > 0) {
1896 deltify.meta = calloc(ndeltify, sizeof(struct got_pack_meta *));
1897 if (deltify.meta == NULL) {
1898 err = got_error_from_errno("calloc");
1899 goto done;
1901 deltify.metasz = ndeltify;
1903 err = got_object_idset_for_each(idset, add_meta_idset_cb,
1904 &deltify);
1905 if (err)
1906 goto done;
1907 if (deltify.nmeta > 0) {
1908 err = pick_deltas(deltify.meta, deltify.nmeta,
1909 ncolored, nfound, ntrees, nours, reuse.nmeta,
1910 delta_cache, repo, progress_cb, progress_arg, rl,
1911 cancel_cb, cancel_arg);
1912 if (err)
1913 goto done;
1917 if (fflush(delta_cache) == EOF) {
1918 err = got_error_from_errno("fflush");
1919 goto done;
1922 if (progress_cb) {
1924 * Report a 1-byte packfile write to indicate we are about
1925 * to start sending packfile data. gotd(8) needs this.
1927 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1928 1 /* packfile_size */, nours,
1929 got_object_idset_num_elements(idset),
1930 deltify.nmeta + reuse.nmeta, 0);
1931 if (err)
1932 goto done;
1935 err = genpack(packhash, packfd, reuse_pack, delta_cache, deltify.meta,
1936 deltify.nmeta, reuse.meta, reuse.nmeta, ncolored, nfound, ntrees,
1937 nours, repo, progress_cb, progress_arg, rl,
1938 cancel_cb, cancel_arg);
1939 if (err)
1940 goto done;
1941 done:
1942 free_nmeta(deltify.meta, deltify.nmeta);
1943 free_nmeta(reuse.meta, reuse.nmeta);
1944 got_object_idset_free(idset);
1945 got_repo_unpin_pack(repo);
1946 return err;