Blob


1 /*
2 * Copyright (c) 2020 Ori Bernstein
3 * Copyright (c) 2021 Stefan Sperling <stsp@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include <sys/types.h>
19 #include <sys/queue.h>
20 #include <sys/tree.h>
21 #include <sys/uio.h>
22 #include <sys/stat.h>
23 #include <sys/time.h>
24 #include <sys/mman.h>
26 #include <endian.h>
27 #include <errno.h>
28 #include <stdint.h>
29 #include <imsg.h>
30 #include <inttypes.h>
31 #include <poll.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sha1.h>
36 #include <sha2.h>
37 #include <time.h>
38 #include <unistd.h>
39 #include <limits.h>
40 #include <zlib.h>
42 #include "got_error.h"
43 #include "got_cancel.h"
44 #include "got_object.h"
45 #include "got_path.h"
46 #include "got_reference.h"
47 #include "got_repository_admin.h"
49 #include "got_lib_deltify.h"
50 #include "got_lib_delta.h"
51 #include "got_lib_hash.h"
52 #include "got_lib_object.h"
53 #include "got_lib_object_idset.h"
54 #include "got_lib_object_cache.h"
55 #include "got_lib_deflate.h"
56 #include "got_lib_ratelimit.h"
57 #include "got_lib_pack.h"
58 #include "got_lib_pack_create.h"
59 #include "got_lib_repository.h"
60 #include "got_lib_inflate.h"
61 #include "got_lib_poll.h"
63 #include "murmurhash2.h"
65 #ifndef MIN
66 #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b))
67 #endif
69 #ifndef MAX
70 #define MAX(_a,_b) ((_a) > (_b) ? (_a) : (_b))
71 #endif
73 #ifndef nitems
74 #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
75 #endif
77 static const struct got_error *
78 alloc_meta(struct got_pack_meta **new, struct got_object_id *id,
79 const char *path, int obj_type, time_t mtime, uint32_t seed)
80 {
81 struct got_pack_meta *m;
83 *new = NULL;
85 m = calloc(1, sizeof(*m));
86 if (m == NULL)
87 return got_error_from_errno("calloc");
89 memcpy(&m->id, id, sizeof(m->id));
91 m->path_hash = murmurhash2(path, strlen(path), seed);
92 m->obj_type = obj_type;
93 m->mtime = mtime;
94 *new = m;
95 return NULL;
96 }
98 static void
99 clear_meta(struct got_pack_meta *meta)
101 if (meta == NULL)
102 return;
103 meta->path_hash = 0;
104 free(meta->delta_buf);
105 meta->delta_buf = NULL;
106 free(meta->base_obj_id);
107 meta->base_obj_id = NULL;
108 meta->reused_delta_offset = 0;
111 static void
112 free_nmeta(struct got_pack_meta **meta, int nmeta)
114 int i;
116 for (i = 0; i < nmeta; i++)
117 clear_meta(meta[i]);
118 free(meta);
121 static int
122 delta_order_cmp(const void *pa, const void *pb)
124 struct got_pack_meta *a, *b;
126 a = *(struct got_pack_meta **)pa;
127 b = *(struct got_pack_meta **)pb;
129 if (a->obj_type != b->obj_type)
130 return a->obj_type - b->obj_type;
131 if (a->path_hash < b->path_hash)
132 return -1;
133 if (a->path_hash > b->path_hash)
134 return 1;
135 if (a->mtime < b->mtime)
136 return -1;
137 if (a->mtime > b->mtime)
138 return 1;
139 return got_object_id_cmp(&a->id, &b->id);
142 static off_t
143 delta_size(struct got_delta_instruction *deltas, int ndeltas)
145 int i;
146 off_t size = 32;
147 for (i = 0; i < ndeltas; i++) {
148 if (deltas[i].copy)
149 size += GOT_DELTA_SIZE_SHIFT;
150 else
151 size += deltas[i].len + 1;
153 return size;
156 static const struct got_error *
157 append(unsigned char **p, size_t *len, off_t *sz, void *seg, int nseg)
159 char *n;
161 if (*len + nseg >= *sz) {
162 while (*len + nseg >= *sz)
163 *sz += *sz / 2;
164 n = realloc(*p, *sz);
165 if (n == NULL)
166 return got_error_from_errno("realloc");
167 *p = n;
169 memcpy(*p + *len, seg, nseg);
170 *len += nseg;
171 return NULL;
174 static const struct got_error *
175 encode_delta_in_mem(struct got_pack_meta *m, struct got_raw_object *o,
176 struct got_delta_instruction *deltas, int ndeltas,
177 off_t delta_size, off_t base_size)
179 const struct got_error *err;
180 unsigned char buf[16], *bp;
181 int i, j;
182 size_t len = 0, compressed_len;
183 off_t bufsize = delta_size;
184 off_t n;
185 struct got_delta_instruction *d;
186 uint8_t *delta_buf;
188 delta_buf = malloc(bufsize);
189 if (delta_buf == NULL)
190 return got_error_from_errno("malloc");
192 /* base object size */
193 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
194 n = base_size >> GOT_DELTA_SIZE_SHIFT;
195 for (i = 1; n > 0; i++) {
196 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
197 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
198 n >>= GOT_DELTA_SIZE_SHIFT;
200 err = append(&delta_buf, &len, &bufsize, buf, i);
201 if (err)
202 goto done;
204 /* target object size */
205 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
206 n = o->size >> GOT_DELTA_SIZE_SHIFT;
207 for (i = 1; n > 0; i++) {
208 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
209 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
210 n >>= GOT_DELTA_SIZE_SHIFT;
212 err = append(&delta_buf, &len, &bufsize, buf, i);
213 if (err)
214 goto done;
216 for (j = 0; j < ndeltas; j++) {
217 d = &deltas[j];
218 if (d->copy) {
219 n = d->offset;
220 bp = &buf[1];
221 buf[0] = GOT_DELTA_BASE_COPY;
222 for (i = 0; i < 4; i++) {
223 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
224 buf[0] |= 1 << i;
225 *bp++ = n & 0xff;
226 n >>= 8;
227 if (n == 0)
228 break;
231 n = d->len;
232 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
233 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
234 for (i = 0; i < 3 && n > 0; i++) {
235 buf[0] |= 1 << (i + 4);
236 *bp++ = n & 0xff;
237 n >>= 8;
240 err = append(&delta_buf, &len, &bufsize,
241 buf, bp - buf);
242 if (err)
243 goto done;
244 } else if (o->f == NULL) {
245 n = 0;
246 while (n != d->len) {
247 buf[0] = (d->len - n < 127) ? d->len - n : 127;
248 err = append(&delta_buf, &len, &bufsize,
249 buf, 1);
250 if (err)
251 goto done;
252 err = append(&delta_buf, &len, &bufsize,
253 o->data + o->hdrlen + d->offset + n,
254 buf[0]);
255 if (err)
256 goto done;
257 n += buf[0];
259 } else {
260 char content[128];
261 size_t r;
262 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) {
263 err = got_error_from_errno("fseeko");
264 goto done;
266 n = 0;
267 while (n != d->len) {
268 buf[0] = (d->len - n < 127) ? d->len - n : 127;
269 err = append(&delta_buf, &len, &bufsize,
270 buf, 1);
271 if (err)
272 goto done;
273 r = fread(content, 1, buf[0], o->f);
274 if (r != buf[0]) {
275 err = got_ferror(o->f, GOT_ERR_IO);
276 goto done;
278 err = append(&delta_buf, &len, &bufsize,
279 content, buf[0]);
280 if (err)
281 goto done;
282 n += buf[0];
287 err = got_deflate_to_mem_mmap(&m->delta_buf, &compressed_len,
288 NULL, NULL, delta_buf, 0, len);
289 if (err)
290 goto done;
292 m->delta_len = len;
293 m->delta_compressed_len = compressed_len;
294 done:
295 free(delta_buf);
296 return err;
299 static const struct got_error *
300 encode_delta(struct got_pack_meta *m, struct got_raw_object *o,
301 struct got_delta_instruction *deltas, int ndeltas,
302 off_t base_size, FILE *f)
304 const struct got_error *err;
305 unsigned char buf[16], *bp;
306 int i, j;
307 off_t n;
308 struct got_deflate_buf zb;
309 struct got_delta_instruction *d;
310 off_t delta_len = 0, compressed_len = 0;
312 err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE);
313 if (err)
314 return err;
316 /* base object size */
317 buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK;
318 n = base_size >> GOT_DELTA_SIZE_SHIFT;
319 for (i = 1; n > 0; i++) {
320 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
321 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
322 n >>= GOT_DELTA_SIZE_SHIFT;
325 err = got_deflate_append_to_file_mmap(&zb, &compressed_len,
326 buf, 0, i, f, NULL);
327 if (err)
328 goto done;
329 delta_len += i;
331 /* target object size */
332 buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK;
333 n = o->size >> GOT_DELTA_SIZE_SHIFT;
334 for (i = 1; n > 0; i++) {
335 buf[i - 1] |= GOT_DELTA_SIZE_MORE;
336 buf[i] = n & GOT_DELTA_SIZE_VAL_MASK;
337 n >>= GOT_DELTA_SIZE_SHIFT;
340 err = got_deflate_append_to_file_mmap(&zb, &compressed_len,
341 buf, 0, i, f, NULL);
342 if (err)
343 goto done;
344 delta_len += i;
346 for (j = 0; j < ndeltas; j++) {
347 d = &deltas[j];
348 if (d->copy) {
349 n = d->offset;
350 bp = &buf[1];
351 buf[0] = GOT_DELTA_BASE_COPY;
352 for (i = 0; i < 4; i++) {
353 /* DELTA_COPY_OFF1 ... DELTA_COPY_OFF4 */
354 buf[0] |= 1 << i;
355 *bp++ = n & 0xff;
356 n >>= 8;
357 if (n == 0)
358 break;
360 n = d->len;
361 if (n != GOT_DELTA_COPY_DEFAULT_LEN) {
362 /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */
363 for (i = 0; i < 3 && n > 0; i++) {
364 buf[0] |= 1 << (i + 4);
365 *bp++ = n & 0xff;
366 n >>= 8;
369 err = got_deflate_append_to_file_mmap(&zb,
370 &compressed_len, buf, 0, bp - buf, f, NULL);
371 if (err)
372 goto done;
373 delta_len += (bp - buf);
374 } else if (o->f == NULL) {
375 n = 0;
376 while (n != d->len) {
377 buf[0] = (d->len - n < 127) ? d->len - n : 127;
378 err = got_deflate_append_to_file_mmap(&zb,
379 &compressed_len, buf, 0, 1, f, NULL);
380 if (err)
381 goto done;
382 delta_len++;
383 err = got_deflate_append_to_file_mmap(&zb,
384 &compressed_len,
385 o->data + o->hdrlen + d->offset + n, 0,
386 buf[0], f, NULL);
387 if (err)
388 goto done;
389 delta_len += buf[0];
390 n += buf[0];
392 } else {
393 char content[128];
394 size_t r;
395 if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) {
396 err = got_error_from_errno("fseeko");
397 goto done;
399 n = 0;
400 while (n != d->len) {
401 buf[0] = (d->len - n < 127) ? d->len - n : 127;
402 err = got_deflate_append_to_file_mmap(&zb,
403 &compressed_len, buf, 0, 1, f, NULL);
404 if (err)
405 goto done;
406 delta_len++;
407 r = fread(content, 1, buf[0], o->f);
408 if (r != buf[0]) {
409 err = got_ferror(o->f, GOT_ERR_IO);
410 goto done;
412 err = got_deflate_append_to_file_mmap(&zb,
413 &compressed_len, content, 0, buf[0], f,
414 NULL);
415 if (err)
416 goto done;
417 delta_len += buf[0];
418 n += buf[0];
423 err = got_deflate_flush(&zb, f, NULL, &compressed_len);
424 if (err)
425 goto done;
427 /* sanity check */
428 if (compressed_len != ftello(f) - m->delta_offset) {
429 err = got_error(GOT_ERR_COMPRESSION);
430 goto done;
433 m->delta_len = delta_len;
434 m->delta_compressed_len = compressed_len;
435 done:
436 got_deflate_end(&zb);
437 return err;
440 const struct got_error *
441 got_pack_report_progress(got_pack_progress_cb progress_cb, void *progress_arg,
442 struct got_ratelimit *rl, int ncolored, int nfound, int ntrees,
443 off_t packfile_size, int ncommits, int nobj_total, int obj_deltify,
444 int nobj_written)
446 const struct got_error *err;
447 int elapsed;
449 if (progress_cb == NULL)
450 return NULL;
452 err = got_ratelimit_check(&elapsed, rl);
453 if (err || !elapsed)
454 return err;
456 return progress_cb(progress_arg, ncolored, nfound, ntrees,
457 packfile_size, ncommits, nobj_total, obj_deltify, nobj_written);
460 const struct got_error *
461 got_pack_add_meta(struct got_pack_meta *m, struct got_pack_metavec *v)
463 if (v->nmeta == v->metasz){
464 size_t newsize = 2 * v->metasz;
465 struct got_pack_meta **new;
466 new = reallocarray(v->meta, newsize, sizeof(*new));
467 if (new == NULL)
468 return got_error_from_errno("reallocarray");
469 v->meta = new;
470 v->metasz = newsize;
473 v->meta[v->nmeta++] = m;
474 return NULL;
477 const struct got_error *
478 got_pack_find_pack_for_reuse(struct got_packidx **best_packidx,
479 struct got_repository *repo)
481 const struct got_error *err = NULL;
482 struct got_pathlist_entry *pe;
483 const char *best_packidx_path = NULL;
484 int nobj_max = 0;
486 *best_packidx = NULL;
488 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
489 const char *path_packidx = pe->path;
490 struct got_packidx *packidx;
491 int nobj;
493 err = got_repo_get_packidx(&packidx, path_packidx, repo);
494 if (err)
495 break;
497 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
498 if (nobj > nobj_max) {
499 best_packidx_path = path_packidx;
500 nobj_max = nobj;
504 if (best_packidx_path) {
505 err = got_repo_get_packidx(best_packidx, best_packidx_path,
506 repo);
509 return err;
512 const struct got_error *
513 got_pack_cache_pack_for_packidx(struct got_pack **pack,
514 struct got_packidx *packidx, struct got_repository *repo)
516 const struct got_error *err;
517 char *path_packfile = NULL;
519 err = got_packidx_get_packfile_path(&path_packfile,
520 packidx->path_packidx);
521 if (err)
522 return err;
524 *pack = got_repo_get_cached_pack(repo, path_packfile);
525 if (*pack == NULL) {
526 err = got_repo_cache_pack(pack, repo, path_packfile, packidx);
527 if (err)
528 goto done;
530 done:
531 free(path_packfile);
532 return err;
535 static const struct got_error *
536 pick_deltas(struct got_pack_meta **meta, int nmeta, int ncolored,
537 int nfound, int ntrees, int ncommits, int nreused, FILE *delta_cache,
538 struct got_repository *repo,
539 got_pack_progress_cb progress_cb, void *progress_arg,
540 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
542 const struct got_error *err = NULL;
543 struct got_pack_meta *m = NULL, *base = NULL;
544 struct got_raw_object *raw = NULL, *base_raw = NULL;
545 struct got_delta_instruction *deltas = NULL, *best_deltas = NULL;
546 int i, j, ndeltas, best_ndeltas;
547 off_t size, best_size;
548 const int max_base_candidates = 3;
549 size_t delta_memsize = 0;
550 const size_t max_delta_memsize = 4 * GOT_DELTA_RESULT_SIZE_CACHED_MAX;
551 int outfd = -1;
552 uint32_t delta_seed;
554 delta_seed = arc4random();
556 qsort(meta, nmeta, sizeof(struct got_pack_meta *), delta_order_cmp);
557 for (i = 0; i < nmeta; i++) {
558 if (cancel_cb) {
559 err = (*cancel_cb)(cancel_arg);
560 if (err)
561 break;
563 err = got_pack_report_progress(progress_cb, progress_arg, rl,
564 ncolored, nfound, ntrees, 0L, ncommits, nreused + nmeta,
565 nreused + i, 0);
566 if (err)
567 goto done;
568 m = meta[i];
570 if (m->obj_type == GOT_OBJ_TYPE_COMMIT ||
571 m->obj_type == GOT_OBJ_TYPE_TAG)
572 continue;
574 err = got_object_raw_open(&raw, &outfd, repo, &m->id);
575 if (err)
576 goto done;
577 m->size = raw->size;
579 if (raw->f == NULL) {
580 err = got_deltify_init_mem(&m->dtab, raw->data,
581 raw->hdrlen, raw->size + raw->hdrlen, delta_seed);
582 } else {
583 err = got_deltify_init(&m->dtab, raw->f, raw->hdrlen,
584 raw->size + raw->hdrlen, delta_seed);
586 if (err)
587 goto done;
589 if (i > max_base_candidates) {
590 struct got_pack_meta *n = NULL;
591 n = meta[i - (max_base_candidates + 1)];
592 got_deltify_free(n->dtab);
593 n->dtab = NULL;
596 best_size = raw->size;
597 best_ndeltas = 0;
598 for (j = MAX(0, i - max_base_candidates); j < i; j++) {
599 if (cancel_cb) {
600 err = (*cancel_cb)(cancel_arg);
601 if (err)
602 goto done;
604 base = meta[j];
605 /* long chains make unpacking slow, avoid such bases */
606 if (base->nchain >= 128 ||
607 base->obj_type != m->obj_type)
608 continue;
610 err = got_object_raw_open(&base_raw, &outfd, repo,
611 &base->id);
612 if (err)
613 goto done;
615 if (raw->f == NULL && base_raw->f == NULL) {
616 err = got_deltify_mem_mem(&deltas, &ndeltas,
617 raw->data, raw->hdrlen,
618 raw->size + raw->hdrlen, delta_seed,
619 base->dtab, base_raw->data,
620 base_raw->hdrlen,
621 base_raw->size + base_raw->hdrlen);
622 } else if (raw->f == NULL) {
623 err = got_deltify_mem_file(&deltas, &ndeltas,
624 raw->data, raw->hdrlen,
625 raw->size + raw->hdrlen, delta_seed,
626 base->dtab, base_raw->f,
627 base_raw->hdrlen,
628 base_raw->size + base_raw->hdrlen);
629 } else if (base_raw->f == NULL) {
630 err = got_deltify_file_mem(&deltas, &ndeltas,
631 raw->f, raw->hdrlen,
632 raw->size + raw->hdrlen, delta_seed,
633 base->dtab, base_raw->data,
634 base_raw->hdrlen,
635 base_raw->size + base_raw->hdrlen);
636 } else {
637 err = got_deltify(&deltas, &ndeltas,
638 raw->f, raw->hdrlen,
639 raw->size + raw->hdrlen, delta_seed,
640 base->dtab, base_raw->f, base_raw->hdrlen,
641 base_raw->size + base_raw->hdrlen);
643 got_object_raw_close(base_raw);
644 base_raw = NULL;
645 if (err)
646 goto done;
648 size = delta_size(deltas, ndeltas);
649 if (size + 32 < best_size){
650 /*
651 * if we already picked a best delta,
652 * replace it.
653 */
654 best_size = size;
655 free(best_deltas);
656 best_deltas = deltas;
657 best_ndeltas = ndeltas;
658 deltas = NULL;
659 m->nchain = base->nchain + 1;
660 m->prev = base;
661 m->head = base->head;
662 if (m->head == NULL)
663 m->head = base;
664 } else {
665 free(deltas);
666 deltas = NULL;
667 ndeltas = 0;
671 if (best_ndeltas > 0) {
672 if (best_size <= GOT_DELTA_RESULT_SIZE_CACHED_MAX &&
673 delta_memsize + best_size <= max_delta_memsize) {
674 delta_memsize += best_size;
675 err = encode_delta_in_mem(m, raw, best_deltas,
676 best_ndeltas, best_size, m->prev->size);
677 } else {
678 m->delta_offset = ftello(delta_cache);
679 err = encode_delta(m, raw, best_deltas,
680 best_ndeltas, m->prev->size, delta_cache);
682 free(best_deltas);
683 best_deltas = NULL;
684 best_ndeltas = 0;
685 if (err)
686 goto done;
689 got_object_raw_close(raw);
690 raw = NULL;
692 done:
693 for (i = MAX(0, nmeta - max_base_candidates); i < nmeta; i++) {
694 got_deltify_free(meta[i]->dtab);
695 meta[i]->dtab = NULL;
697 if (raw)
698 got_object_raw_close(raw);
699 if (base_raw)
700 got_object_raw_close(base_raw);
701 if (outfd != -1 && close(outfd) == -1 && err == NULL)
702 err = got_error_from_errno("close");
703 free(deltas);
704 free(best_deltas);
705 return err;
708 static const struct got_error *
709 search_packidx(int *found, struct got_object_id *id,
710 struct got_repository *repo)
712 const struct got_error *err = NULL;
713 struct got_packidx *packidx = NULL;
714 int idx;
716 *found = 0;
718 err = got_repo_search_packidx(&packidx, &idx, repo, id);
719 if (err == NULL)
720 *found = 1; /* object is already packed */
721 else if (err->code == GOT_ERR_NO_OBJ)
722 err = NULL;
723 return err;
726 const struct got_error *
727 got_pack_add_object(int want_meta, struct got_object_idset *idset,
728 struct got_object_id *id, const char *path, int obj_type,
729 time_t mtime, uint32_t seed, int loose_obj_only,
730 struct got_repository *repo, int *ncolored, int *nfound, int *ntrees,
731 got_pack_progress_cb progress_cb, void *progress_arg,
732 struct got_ratelimit *rl)
734 const struct got_error *err;
735 struct got_pack_meta *m = NULL;
737 if (loose_obj_only) {
738 int is_packed;
739 err = search_packidx(&is_packed, id, repo);
740 if (err)
741 return err;
742 if (is_packed && want_meta)
743 return NULL;
746 if (want_meta) {
747 err = alloc_meta(&m, id, path, obj_type, mtime, seed);
748 if (err)
749 return err;
751 (*nfound)++;
752 err = got_pack_report_progress(progress_cb, progress_arg, rl,
753 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
754 if (err) {
755 clear_meta(m);
756 free(m);
757 return err;
761 err = got_object_idset_add(idset, id, m);
762 if (err) {
763 clear_meta(m);
764 free(m);
766 return err;
769 const struct got_error *
770 got_pack_load_tree_entries(struct got_object_id_queue *ids, int want_meta,
771 struct got_object_idset *idset, struct got_object_idset *idset_exclude,
772 struct got_tree_object *tree,
773 const char *dpath, time_t mtime, uint32_t seed, struct got_repository *repo,
774 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
775 got_pack_progress_cb progress_cb, void *progress_arg,
776 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
778 const struct got_error *err;
779 char *p = NULL;
780 int i;
782 (*ntrees)++;
783 err = got_pack_report_progress(progress_cb, progress_arg, rl,
784 *ncolored, *nfound, *ntrees, 0L, 0, 0, 0, 0);
785 if (err)
786 return err;
788 for (i = 0; i < got_object_tree_get_nentries(tree); i++) {
789 struct got_tree_entry *e = got_object_tree_get_entry(tree, i);
790 struct got_object_id *id = got_tree_entry_get_id(e);
791 mode_t mode = got_tree_entry_get_mode(e);
793 if (cancel_cb) {
794 err = (*cancel_cb)(cancel_arg);
795 if (err)
796 break;
799 if (got_object_tree_entry_is_submodule(e) ||
800 got_object_idset_contains(idset, id) ||
801 got_object_idset_contains(idset_exclude, id))
802 continue;
804 /*
805 * If got-read-pack is crawling trees for us then
806 * we are only here to collect blob IDs.
807 */
808 if (ids == NULL && S_ISDIR(mode))
809 continue;
811 if (asprintf(&p, "%s%s%s", dpath,
812 got_path_is_root_dir(dpath) ? "" : "/",
813 got_tree_entry_get_name(e)) == -1) {
814 err = got_error_from_errno("asprintf");
815 break;
818 if (S_ISDIR(mode)) {
819 struct got_object_qid *qid;
820 err = got_object_qid_alloc(&qid, id);
821 if (err)
822 break;
823 qid->data = p;
824 p = NULL;
825 STAILQ_INSERT_TAIL(ids, qid, entry);
826 } else if (S_ISREG(mode) || S_ISLNK(mode)) {
827 err = got_pack_add_object(want_meta,
828 want_meta ? idset : idset_exclude, id, p,
829 GOT_OBJ_TYPE_BLOB, mtime, seed, loose_obj_only,
830 repo, ncolored, nfound, ntrees,
831 progress_cb, progress_arg, rl);
832 if (err)
833 break;
834 free(p);
835 p = NULL;
836 } else {
837 free(p);
838 p = NULL;
842 free(p);
843 return err;
846 const struct got_error *
847 got_pack_load_tree(int want_meta, struct got_object_idset *idset,
848 struct got_object_idset *idset_exclude,
849 struct got_object_id *tree_id, const char *dpath, time_t mtime,
850 uint32_t seed, struct got_repository *repo, int loose_obj_only,
851 int *ncolored, int *nfound, int *ntrees,
852 got_pack_progress_cb progress_cb, void *progress_arg,
853 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
855 const struct got_error *err = NULL;
856 struct got_object_id_queue tree_ids;
857 struct got_object_qid *qid;
858 struct got_tree_object *tree = NULL;
860 if (got_object_idset_contains(idset, tree_id) ||
861 got_object_idset_contains(idset_exclude, tree_id))
862 return NULL;
864 err = got_object_qid_alloc(&qid, tree_id);
865 if (err)
866 return err;
867 qid->data = strdup(dpath);
868 if (qid->data == NULL) {
869 err = got_error_from_errno("strdup");
870 got_object_qid_free(qid);
871 return err;
874 STAILQ_INIT(&tree_ids);
875 STAILQ_INSERT_TAIL(&tree_ids, qid, entry);
877 while (!STAILQ_EMPTY(&tree_ids)) {
878 const char *path;
879 if (cancel_cb) {
880 err = (*cancel_cb)(cancel_arg);
881 if (err)
882 break;
885 qid = STAILQ_FIRST(&tree_ids);
886 STAILQ_REMOVE_HEAD(&tree_ids, entry);
887 path = qid->data;
889 if (got_object_idset_contains(idset, &qid->id) ||
890 got_object_idset_contains(idset_exclude, &qid->id)) {
891 free(qid->data);
892 got_object_qid_free(qid);
893 continue;
896 err = got_pack_add_object(want_meta,
897 want_meta ? idset : idset_exclude,
898 &qid->id, path, GOT_OBJ_TYPE_TREE,
899 mtime, seed, loose_obj_only, repo,
900 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
901 if (err) {
902 free(qid->data);
903 got_object_qid_free(qid);
904 break;
907 err = got_object_open_as_tree(&tree, repo, &qid->id);
908 if (err) {
909 free(qid->data);
910 got_object_qid_free(qid);
911 break;
914 err = got_pack_load_tree_entries(&tree_ids, want_meta, idset,
915 idset_exclude, tree, path, mtime, seed, repo,
916 loose_obj_only, ncolored, nfound, ntrees,
917 progress_cb, progress_arg, rl,
918 cancel_cb, cancel_arg);
919 free(qid->data);
920 got_object_qid_free(qid);
921 if (err)
922 break;
924 got_object_tree_close(tree);
925 tree = NULL;
928 STAILQ_FOREACH(qid, &tree_ids, entry)
929 free(qid->data);
930 got_object_id_queue_free(&tree_ids);
931 if (tree)
932 got_object_tree_close(tree);
933 return err;
936 static const struct got_error *
937 load_commit(int want_meta, struct got_object_idset *idset,
938 struct got_object_idset *idset_exclude,
939 struct got_object_id *id, struct got_repository *repo, uint32_t seed,
940 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
941 got_pack_progress_cb progress_cb, void *progress_arg,
942 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
944 const struct got_error *err;
945 struct got_commit_object *commit;
947 if (got_object_idset_contains(idset, id) ||
948 got_object_idset_contains(idset_exclude, id))
949 return NULL;
951 if (loose_obj_only) {
952 int is_packed;
953 err = search_packidx(&is_packed, id, repo);
954 if (err)
955 return err;
956 if (is_packed && want_meta)
957 return NULL;
960 err = got_object_open_as_commit(&commit, repo, id);
961 if (err)
962 return err;
964 err = got_pack_add_object(want_meta,
965 want_meta ? idset : idset_exclude, id, "", GOT_OBJ_TYPE_COMMIT,
966 got_object_commit_get_committer_time(commit), seed,
967 loose_obj_only, repo,
968 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
969 if (err)
970 goto done;
972 err = got_pack_load_tree(want_meta, idset, idset_exclude,
973 got_object_commit_get_tree_id(commit),
974 "", got_object_commit_get_committer_time(commit), seed,
975 repo, loose_obj_only, ncolored, nfound, ntrees,
976 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
977 done:
978 got_object_commit_close(commit);
979 return err;
982 static const struct got_error *
983 load_tag(int want_meta, struct got_object_idset *idset,
984 struct got_object_idset *idset_exclude,
985 struct got_object_id *id, struct got_repository *repo, uint32_t seed,
986 int loose_obj_only, int *ncolored, int *nfound, int *ntrees,
987 got_pack_progress_cb progress_cb, void *progress_arg,
988 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
990 const struct got_error *err;
991 struct got_tag_object *tag = NULL;
993 if (got_object_idset_contains(idset, id) ||
994 got_object_idset_contains(idset_exclude, id))
995 return NULL;
997 if (loose_obj_only) {
998 int is_packed;
999 err = search_packidx(&is_packed, id, repo);
1000 if (err)
1001 return err;
1002 if (is_packed && want_meta)
1003 return NULL;
1006 err = got_object_open_as_tag(&tag, repo, id);
1007 if (err)
1008 return err;
1010 err = got_pack_add_object(want_meta,
1011 want_meta ? idset : idset_exclude, id, "", GOT_OBJ_TYPE_TAG,
1012 got_object_tag_get_tagger_time(tag), seed, loose_obj_only, repo,
1013 ncolored, nfound, ntrees, progress_cb, progress_arg, rl);
1014 if (err)
1015 goto done;
1017 switch (got_object_tag_get_object_type(tag)) {
1018 case GOT_OBJ_TYPE_COMMIT:
1019 err = load_commit(want_meta, idset, idset_exclude,
1020 got_object_tag_get_object_id(tag), repo, seed,
1021 loose_obj_only, ncolored, nfound, ntrees,
1022 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1023 break;
1024 case GOT_OBJ_TYPE_TREE:
1025 err = got_pack_load_tree(want_meta, idset, idset_exclude,
1026 got_object_tag_get_object_id(tag), "",
1027 got_object_tag_get_tagger_time(tag), seed, repo,
1028 loose_obj_only, ncolored, nfound, ntrees,
1029 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1030 break;
1031 default:
1032 break;
1035 done:
1036 got_object_tag_close(tag);
1037 return err;
1040 const struct got_error *
1041 got_pack_paint_commit(struct got_object_qid *qid, intptr_t color)
1043 if (color < 0 || color >= COLOR_MAX)
1044 return got_error(GOT_ERR_RANGE);
1046 qid->data = (void *)color;
1047 return NULL;
1050 const struct got_error *
1051 got_pack_queue_commit_id(struct got_object_id_queue *ids,
1052 struct got_object_id *id, intptr_t color, struct got_repository *repo)
1054 const struct got_error *err;
1055 struct got_object_qid *qid;
1057 err = got_object_qid_alloc(&qid, id);
1058 if (err)
1059 return err;
1061 STAILQ_INSERT_TAIL(ids, qid, entry);
1062 return got_pack_paint_commit(qid, color);
1065 struct append_id_arg {
1066 struct got_object_id **array;
1067 int idx;
1068 struct got_object_idset *drop;
1069 struct got_object_idset *skip;
1072 static const struct got_error *
1073 append_id(struct got_object_id *id, void *data, void *arg)
1075 struct append_id_arg *a = arg;
1077 if (got_object_idset_contains(a->skip, id) ||
1078 got_object_idset_contains(a->drop, id))
1079 return NULL;
1081 a->array[++a->idx] = got_object_id_dup(id);
1082 if (a->array[a->idx] == NULL)
1083 return got_error_from_errno("got_object_id_dup");
1085 return NULL;
1088 static const struct got_error *
1089 queue_commit_or_tag_id(struct got_object_id *id, intptr_t color,
1090 struct got_object_id_queue *ids, struct got_repository *repo)
1092 const struct got_error *err;
1093 struct got_tag_object *tag = NULL;
1094 int obj_type;
1096 err = got_object_get_type(&obj_type, repo, id);
1097 if (err)
1098 return err;
1100 if (obj_type == GOT_OBJ_TYPE_TAG) {
1101 err = got_object_open_as_tag(&tag, repo, id);
1102 if (err)
1103 return err;
1104 obj_type = got_object_tag_get_object_type(tag);
1105 id = got_object_tag_get_object_id(tag);
1108 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
1109 err = got_pack_queue_commit_id(ids, id, color, repo);
1110 if (err)
1111 goto done;
1113 done:
1114 if (tag)
1115 got_object_tag_close(tag);
1116 return err;
1119 const struct got_error *
1120 got_pack_find_pack_for_commit_painting(struct got_packidx **best_packidx,
1121 struct got_object_id_queue *ids, int nids, struct got_repository *repo)
1123 const struct got_error *err = NULL;
1124 struct got_pathlist_entry *pe;
1125 const char *best_packidx_path = NULL;
1126 int nobj_max = 0;
1127 int ncommits_max = 0;
1129 *best_packidx = NULL;
1132 * Find the largest pack which contains at least some of the
1133 * commits we are interested in.
1135 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
1136 const char *path_packidx = pe->path;
1137 struct got_packidx *packidx;
1138 int nobj, idx, ncommits = 0;
1139 struct got_object_qid *qid;
1141 err = got_repo_get_packidx(&packidx, path_packidx, repo);
1142 if (err)
1143 break;
1145 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
1146 if (nobj <= nobj_max)
1147 continue;
1149 STAILQ_FOREACH(qid, ids, entry) {
1150 idx = got_packidx_get_object_idx(packidx, &qid->id);
1151 if (idx != -1)
1152 ncommits++;
1154 if (ncommits > ncommits_max) {
1155 best_packidx_path = path_packidx;
1156 nobj_max = nobj;
1157 ncommits_max = ncommits;
1161 if (best_packidx_path && err == NULL) {
1162 err = got_repo_get_packidx(best_packidx, best_packidx_path,
1163 repo);
1166 return err;
1169 static const struct got_error *
1170 findtwixt(struct got_object_id ***res, int *nres, int *ncolored,
1171 struct got_object_id **head, int nhead,
1172 struct got_object_id **tail, int ntail,
1173 struct got_repository *repo,
1174 got_pack_progress_cb progress_cb, void *progress_arg,
1175 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1177 const struct got_error *err = NULL;
1178 struct got_object_id_queue ids;
1179 struct got_object_idset *keep, *drop, *skip = NULL;
1180 int i, nkeep;
1182 STAILQ_INIT(&ids);
1183 *res = NULL;
1184 *nres = 0;
1185 *ncolored = 0;
1187 keep = got_object_idset_alloc();
1188 if (keep == NULL)
1189 return got_error_from_errno("got_object_idset_alloc");
1191 drop = got_object_idset_alloc();
1192 if (drop == NULL) {
1193 err = got_error_from_errno("got_object_idset_alloc");
1194 goto done;
1197 skip = got_object_idset_alloc();
1198 if (skip == NULL) {
1199 err = got_error_from_errno("got_object_idset_alloc");
1200 goto done;
1203 for (i = 0; i < nhead; i++) {
1204 struct got_object_id *id = head[i];
1205 if (id == NULL)
1206 continue;
1207 err = queue_commit_or_tag_id(id, COLOR_KEEP, &ids, repo);
1208 if (err)
1209 goto done;
1212 for (i = 0; i < ntail; i++) {
1213 struct got_object_id *id = tail[i];
1214 if (id == NULL)
1215 continue;
1216 err = queue_commit_or_tag_id(id, COLOR_DROP, &ids, repo);
1217 if (err)
1218 goto done;
1221 err = got_pack_paint_commits(ncolored, &ids, nhead + ntail,
1222 keep, drop, skip, repo, progress_cb, progress_arg, rl,
1223 cancel_cb, cancel_arg);
1224 if (err)
1225 goto done;
1227 nkeep = got_object_idset_num_elements(keep);
1228 if (nkeep > 0) {
1229 struct append_id_arg arg;
1230 arg.array = calloc(nkeep, sizeof(struct got_object_id *));
1231 if (arg.array == NULL) {
1232 err = got_error_from_errno("calloc");
1233 goto done;
1235 arg.idx = -1;
1236 arg.skip = skip;
1237 arg.drop = drop;
1238 err = got_object_idset_for_each(keep, append_id, &arg);
1239 if (err) {
1240 free(arg.array);
1241 goto done;
1243 *res = arg.array;
1244 *nres = arg.idx + 1;
1246 done:
1247 got_object_idset_free(keep);
1248 got_object_idset_free(drop);
1249 if (skip)
1250 got_object_idset_free(skip);
1251 got_object_id_queue_free(&ids);
1252 return err;
1255 static const struct got_error *
1256 find_pack_for_enumeration(struct got_packidx **best_packidx,
1257 struct got_object_id **ids, int nids, struct got_repository *repo)
1259 const struct got_error *err = NULL;
1260 struct got_pathlist_entry *pe;
1261 const char *best_packidx_path = NULL;
1262 int nobj_max = 0;
1263 int ncommits_max = 0;
1265 *best_packidx = NULL;
1268 * Find the largest pack which contains at least some of the
1269 * commits and tags we are interested in.
1271 TAILQ_FOREACH(pe, &repo->packidx_paths, entry) {
1272 const char *path_packidx = pe->path;
1273 struct got_packidx *packidx;
1274 int nobj, i, idx, ncommits = 0;
1276 err = got_repo_get_packidx(&packidx, path_packidx, repo);
1277 if (err)
1278 break;
1280 nobj = be32toh(packidx->hdr.fanout_table[0xff]);
1281 if (nobj <= nobj_max)
1282 continue;
1284 for (i = 0; i < nids; i++) {
1285 idx = got_packidx_get_object_idx(packidx, ids[i]);
1286 if (idx != -1)
1287 ncommits++;
1289 if (ncommits > ncommits_max) {
1290 best_packidx_path = path_packidx;
1291 nobj_max = nobj;
1292 ncommits_max = ncommits;
1296 if (best_packidx_path && err == NULL) {
1297 err = got_repo_get_packidx(best_packidx, best_packidx_path,
1298 repo);
1301 return err;
1304 static const struct got_error *
1305 load_object_ids(int *ncolored, int *nfound, int *ntrees,
1306 struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs,
1307 struct got_object_id **ours, int nours, struct got_repository *repo,
1308 uint32_t seed, int loose_obj_only, got_pack_progress_cb progress_cb,
1309 void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb,
1310 void *cancel_arg)
1312 const struct got_error *err = NULL;
1313 struct got_object_id **ids = NULL;
1314 struct got_packidx *packidx = NULL;
1315 int i, nobj = 0, obj_type, found_all_objects = 0;
1316 struct got_object_idset *idset_exclude;
1318 idset_exclude = got_object_idset_alloc();
1319 if (idset_exclude == NULL)
1320 return got_error_from_errno("got_object_idset_alloc");
1322 *ncolored = 0;
1323 *nfound = 0;
1324 *ntrees = 0;
1326 err = findtwixt(&ids, &nobj, ncolored, ours, nours, theirs, ntheirs,
1327 repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1328 if (err)
1329 goto done;
1331 err = find_pack_for_enumeration(&packidx, theirs, ntheirs, repo);
1332 if (err)
1333 goto done;
1334 if (packidx) {
1335 err = got_pack_load_packed_object_ids(&found_all_objects,
1336 theirs, ntheirs, NULL, 0, 0, seed, idset, idset_exclude,
1337 loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
1338 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1339 if (err)
1340 goto done;
1343 for (i = 0; i < ntheirs; i++) {
1344 struct got_object_id *id = theirs[i];
1345 if (id == NULL)
1346 continue;
1347 err = got_object_get_type(&obj_type, repo, id);
1348 if (err)
1349 return err;
1350 if (obj_type == GOT_OBJ_TYPE_COMMIT) {
1351 if (!found_all_objects) {
1352 err = load_commit(0, idset, idset_exclude,
1353 id, repo, seed, loose_obj_only,
1354 ncolored, nfound, ntrees,
1355 progress_cb, progress_arg, rl,
1356 cancel_cb, cancel_arg);
1357 if (err)
1358 goto done;
1360 } else if (obj_type == GOT_OBJ_TYPE_TAG) {
1361 err = load_tag(0, idset, idset_exclude, id, repo,
1362 seed, loose_obj_only, ncolored, nfound, ntrees,
1363 progress_cb, progress_arg, rl,
1364 cancel_cb, cancel_arg);
1365 if (err)
1366 goto done;
1370 found_all_objects = 0;
1371 err = find_pack_for_enumeration(&packidx, ids, nobj, repo);
1372 if (err)
1373 goto done;
1374 if (packidx) {
1375 err = got_pack_load_packed_object_ids(&found_all_objects, ids,
1376 nobj, theirs, ntheirs, 1, seed, idset, idset_exclude,
1377 loose_obj_only, repo, packidx, ncolored, nfound, ntrees,
1378 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1379 if (err)
1380 goto done;
1383 if (!found_all_objects) {
1384 for (i = 0; i < nobj; i++) {
1385 err = load_commit(1, idset, idset_exclude, ids[i],
1386 repo, seed, loose_obj_only, ncolored, nfound,
1387 ntrees, progress_cb, progress_arg, rl,
1388 cancel_cb, cancel_arg);
1389 if (err)
1390 goto done;
1394 for (i = 0; i < nours; i++) {
1395 struct got_object_id *id = ours[i];
1396 struct got_pack_meta *m;
1397 if (id == NULL)
1398 continue;
1399 m = got_object_idset_get(idset, id);
1400 if (m == NULL) {
1401 err = got_object_get_type(&obj_type, repo, id);
1402 if (err)
1403 goto done;
1404 } else
1405 obj_type = m->obj_type;
1406 if (obj_type != GOT_OBJ_TYPE_TAG)
1407 continue;
1408 err = load_tag(1, idset, idset_exclude, id, repo,
1409 seed, loose_obj_only, ncolored, nfound, ntrees,
1410 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1411 if (err)
1412 goto done;
1414 done:
1415 for (i = 0; i < nobj; i++) {
1416 free(ids[i]);
1418 free(ids);
1419 got_object_idset_free(idset_exclude);
1420 return err;
1423 static const struct got_error *
1424 hwrite(int fd, const void *buf, off_t len, struct got_hash *ctx)
1426 got_hash_update(ctx, buf, len);
1427 return got_poll_write_full(fd, buf, len);
1430 static const struct got_error *
1431 hcopy(FILE *fsrc, int fd_dst, off_t len, struct got_hash *ctx)
1433 const struct got_error *err;
1434 unsigned char buf[65536];
1435 off_t remain = len;
1436 size_t n;
1438 while (remain > 0) {
1439 size_t copylen = MIN(sizeof(buf), remain);
1440 n = fread(buf, 1, copylen, fsrc);
1441 if (n != copylen)
1442 return got_ferror(fsrc, GOT_ERR_IO);
1443 got_hash_update(ctx, buf, copylen);
1444 err = got_poll_write_full(fd_dst, buf, copylen);
1445 if (err)
1446 return err;
1447 remain -= copylen;
1450 return NULL;
1453 static const struct got_error *
1454 hcopy_mmap(uint8_t *src, off_t src_offset, size_t src_size,
1455 int fd, off_t len, struct got_hash *ctx)
1457 if (src_offset + len > src_size)
1458 return got_error(GOT_ERR_RANGE);
1460 got_hash_update(ctx, src + src_offset, len);
1461 return got_poll_write_full(fd, src + src_offset, len);
1464 static void
1465 putbe32(char *b, uint32_t n)
1467 b[0] = n >> 24;
1468 b[1] = n >> 16;
1469 b[2] = n >> 8;
1470 b[3] = n >> 0;
1473 static int
1474 write_order_cmp(const void *pa, const void *pb)
1476 struct got_pack_meta *a, *b, *ahd, *bhd;
1478 a = *(struct got_pack_meta **)pa;
1479 b = *(struct got_pack_meta **)pb;
1480 ahd = (a->head == NULL) ? a : a->head;
1481 bhd = (b->head == NULL) ? b : b->head;
1482 if (bhd->mtime < ahd->mtime)
1483 return -1;
1484 if (bhd->mtime > ahd->mtime)
1485 return 1;
1486 if (bhd < ahd)
1487 return -1;
1488 if (bhd > ahd)
1489 return 1;
1490 if (a->nchain != b->nchain)
1491 return a->nchain - b->nchain;
1492 if (a->mtime < b->mtime)
1493 return -1;
1494 if (a->mtime > b->mtime)
1495 return 1;
1496 return got_object_id_cmp(&a->id, &b->id);
1499 static int
1500 reuse_write_order_cmp(const void *pa, const void *pb)
1502 struct got_pack_meta *a, *b;
1504 a = *(struct got_pack_meta **)pa;
1505 b = *(struct got_pack_meta **)pb;
1507 if (a->reused_delta_offset < b->reused_delta_offset)
1508 return -1;
1509 if (a->reused_delta_offset > b->reused_delta_offset)
1510 return 1;
1511 return 0;
1514 static const struct got_error *
1515 packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len)
1517 size_t i;
1519 *hdrlen = 0;
1521 hdr[0] = obj_type << 4;
1522 hdr[0] |= len & 0xf;
1523 len >>= 4;
1524 for (i = 1; len != 0; i++){
1525 if (i >= bufsize)
1526 return got_error(GOT_ERR_NO_SPACE);
1527 hdr[i - 1] |= GOT_DELTA_SIZE_MORE;
1528 hdr[i] = len & GOT_DELTA_SIZE_VAL_MASK;
1529 len >>= GOT_DELTA_SIZE_SHIFT;
1532 *hdrlen = i;
1533 return NULL;
1536 static int
1537 packoff(char *hdr, off_t off)
1539 int i, j;
1540 char rbuf[8];
1542 rbuf[0] = off & GOT_DELTA_SIZE_VAL_MASK;
1543 for (i = 1; (off >>= GOT_DELTA_SIZE_SHIFT) != 0; i++) {
1544 rbuf[i] = (--off & GOT_DELTA_SIZE_VAL_MASK) |
1545 GOT_DELTA_SIZE_MORE;
1548 j = 0;
1549 while (i > 0)
1550 hdr[j++] = rbuf[--i];
1551 return j;
1554 static const struct got_error *
1555 deltahdr(off_t *packfile_size, struct got_hash *ctx, int packfd,
1556 int force_refdelta, struct got_pack_meta *m)
1558 const struct got_error *err;
1559 char buf[32];
1560 int nh;
1562 if (m->prev->off != 0 && !force_refdelta) {
1563 err = packhdr(&nh, buf, sizeof(buf),
1564 GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len);
1565 if (err)
1566 return err;
1567 nh += packoff(buf + nh, m->off - m->prev->off);
1568 err = hwrite(packfd, buf, nh, ctx);
1569 if (err)
1570 return err;
1571 *packfile_size += nh;
1572 } else {
1573 err = packhdr(&nh, buf, sizeof(buf),
1574 GOT_OBJ_TYPE_REF_DELTA, m->delta_len);
1575 if (err)
1576 return err;
1577 err = hwrite(packfd, buf, nh, ctx);
1578 if (err)
1579 return err;
1580 *packfile_size += nh;
1581 err = hwrite(packfd, m->prev->id.sha1,
1582 sizeof(m->prev->id.sha1), ctx);
1583 if (err)
1584 return err;
1585 *packfile_size += sizeof(m->prev->id.sha1);
1588 return NULL;
1591 static const struct got_error *
1592 write_packed_object(off_t *packfile_size, int packfd,
1593 FILE *delta_cache, uint8_t *delta_cache_map, size_t delta_cache_size,
1594 struct got_pack_meta *m, int *outfd, struct got_hash *ctx,
1595 struct got_repository *repo, int force_refdelta)
1597 const struct got_error *err = NULL;
1598 struct got_deflate_checksum csum;
1599 char buf[32];
1600 int nh;
1601 struct got_raw_object *raw = NULL;
1602 off_t outlen, delta_offset;
1604 memset(&csum, 0, sizeof(csum));
1605 csum.output_ctx = ctx;
1607 if (m->reused_delta_offset)
1608 delta_offset = m->reused_delta_offset;
1609 else
1610 delta_offset = m->delta_offset;
1612 m->off = *packfile_size;
1613 if (m->delta_len == 0) {
1614 err = got_object_raw_open(&raw, outfd, repo, &m->id);
1615 if (err)
1616 goto done;
1617 err = packhdr(&nh, buf, sizeof(buf),
1618 m->obj_type, raw->size);
1619 if (err)
1620 goto done;
1621 err = hwrite(packfd, buf, nh, ctx);
1622 if (err)
1623 goto done;
1624 *packfile_size += nh;
1625 if (raw->f == NULL) {
1626 err = got_deflate_to_fd_mmap(&outlen,
1627 raw->data + raw->hdrlen, 0, raw->size,
1628 packfd, &csum);
1629 if (err)
1630 goto done;
1631 } else {
1632 if (fseeko(raw->f, raw->hdrlen, SEEK_SET)
1633 == -1) {
1634 err = got_error_from_errno("fseeko");
1635 goto done;
1637 err = got_deflate_to_fd(&outlen, raw->f,
1638 raw->size, packfd, &csum);
1639 if (err)
1640 goto done;
1642 *packfile_size += outlen;
1643 got_object_raw_close(raw);
1644 raw = NULL;
1645 } else if (m->delta_buf) {
1646 err = deltahdr(packfile_size, ctx, packfd, force_refdelta, m);
1647 if (err)
1648 goto done;
1649 err = hwrite(packfd, m->delta_buf,
1650 m->delta_compressed_len, ctx);
1651 if (err)
1652 goto done;
1653 *packfile_size += m->delta_compressed_len;
1654 free(m->delta_buf);
1655 m->delta_buf = NULL;
1656 } else if (delta_cache_map) {
1657 err = deltahdr(packfile_size, ctx, packfd, force_refdelta, m);
1658 if (err)
1659 goto done;
1660 err = hcopy_mmap(delta_cache_map, delta_offset,
1661 delta_cache_size, packfd, m->delta_compressed_len,
1662 ctx);
1663 if (err)
1664 goto done;
1665 *packfile_size += m->delta_compressed_len;
1666 } else {
1667 if (fseeko(delta_cache, delta_offset, SEEK_SET) == -1) {
1668 err = got_error_from_errno("fseeko");
1669 goto done;
1671 err = deltahdr(packfile_size, ctx, packfd, force_refdelta, m);
1672 if (err)
1673 goto done;
1674 err = hcopy(delta_cache, packfd,
1675 m->delta_compressed_len, ctx);
1676 if (err)
1677 goto done;
1678 *packfile_size += m->delta_compressed_len;
1680 done:
1681 if (raw)
1682 got_object_raw_close(raw);
1683 return err;
1686 static const struct got_error *
1687 genpack(uint8_t *pack_sha1, int packfd, struct got_pack *reuse_pack,
1688 FILE *delta_cache, struct got_pack_meta **deltify, int ndeltify,
1689 struct got_pack_meta **reuse, int nreuse,
1690 int ncolored, int nfound, int ntrees, int nours,
1691 struct got_repository *repo, int force_refdelta,
1692 got_pack_progress_cb progress_cb, void *progress_arg,
1693 struct got_ratelimit *rl,
1694 got_cancel_cb cancel_cb, void *cancel_arg)
1696 const struct got_error *err = NULL;
1697 int i;
1698 struct got_hash ctx;
1699 struct got_pack_meta *m;
1700 char buf[32];
1701 off_t packfile_size = 0;
1702 int outfd = -1;
1703 int delta_cache_fd = -1;
1704 uint8_t *delta_cache_map = NULL;
1705 size_t delta_cache_size = 0;
1706 FILE *packfile = NULL;
1708 got_hash_init(&ctx, GOT_HASH_SHA1);
1710 #ifndef GOT_PACK_NO_MMAP
1711 delta_cache_fd = dup(fileno(delta_cache));
1712 if (delta_cache_fd != -1) {
1713 struct stat sb;
1714 if (fstat(delta_cache_fd, &sb) == -1) {
1715 err = got_error_from_errno("fstat");
1716 goto done;
1718 if (sb.st_size > 0 && sb.st_size <= SIZE_MAX) {
1719 delta_cache_map = mmap(NULL, sb.st_size,
1720 PROT_READ, MAP_PRIVATE, delta_cache_fd, 0);
1721 if (delta_cache_map == MAP_FAILED) {
1722 if (errno != ENOMEM) {
1723 err = got_error_from_errno("mmap");
1724 goto done;
1726 delta_cache_map = NULL; /* fallback on stdio */
1727 } else
1728 delta_cache_size = (size_t)sb.st_size;
1731 #endif
1732 err = hwrite(packfd, "PACK", 4, &ctx);
1733 if (err)
1734 goto done;
1735 putbe32(buf, GOT_PACKFILE_VERSION);
1736 err = hwrite(packfd, buf, 4, &ctx);
1737 if (err)
1738 goto done;
1739 putbe32(buf, ndeltify + nreuse);
1740 err = hwrite(packfd, buf, 4, &ctx);
1741 if (err)
1742 goto done;
1744 qsort(deltify, ndeltify, sizeof(struct got_pack_meta *),
1745 write_order_cmp);
1746 for (i = 0; i < ndeltify; i++) {
1747 err = got_pack_report_progress(progress_cb, progress_arg, rl,
1748 ncolored, nfound, ntrees, packfile_size, nours,
1749 ndeltify + nreuse, ndeltify + nreuse, i);
1750 if (err)
1751 goto done;
1752 m = deltify[i];
1753 err = write_packed_object(&packfile_size, packfd,
1754 delta_cache, delta_cache_map, delta_cache_size,
1755 m, &outfd, &ctx, repo, force_refdelta);
1756 if (err)
1757 goto done;
1760 qsort(reuse, nreuse, sizeof(struct got_pack_meta *),
1761 reuse_write_order_cmp);
1762 if (nreuse > 0 && reuse_pack->map == NULL) {
1763 int fd = dup(reuse_pack->fd);
1764 if (fd == -1) {
1765 err = got_error_from_errno("dup");
1766 goto done;
1768 packfile = fdopen(fd, "r");
1769 if (packfile == NULL) {
1770 err = got_error_from_errno("fdopen");
1771 close(fd);
1772 goto done;
1775 for (i = 0; i < nreuse; i++) {
1776 err = got_pack_report_progress(progress_cb, progress_arg, rl,
1777 ncolored, nfound, ntrees, packfile_size, nours,
1778 ndeltify + nreuse, ndeltify + nreuse, ndeltify + i);
1779 if (err)
1780 goto done;
1781 m = reuse[i];
1782 err = write_packed_object(&packfile_size, packfd,
1783 packfile, reuse_pack->map, reuse_pack->filesize,
1784 m, &outfd, &ctx, repo, force_refdelta);
1785 if (err)
1786 goto done;
1789 got_hash_final(&ctx, pack_sha1);
1790 err = got_poll_write_full(packfd, pack_sha1, SHA1_DIGEST_LENGTH);
1791 if (err)
1792 goto done;
1793 packfile_size += SHA1_DIGEST_LENGTH;
1794 packfile_size += sizeof(struct got_packfile_hdr);
1795 if (progress_cb) {
1796 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1797 packfile_size, nours, ndeltify + nreuse,
1798 ndeltify + nreuse, ndeltify + nreuse);
1799 if (err)
1800 goto done;
1802 done:
1803 if (outfd != -1 && close(outfd) == -1 && err == NULL)
1804 err = got_error_from_errno("close");
1805 if (delta_cache_map && munmap(delta_cache_map, delta_cache_size) == -1)
1806 err = got_error_from_errno("munmap");
1807 if (delta_cache_fd != -1 && close(delta_cache_fd) == -1 && err == NULL)
1808 err = got_error_from_errno("close");
1809 if (packfile && fclose(packfile) == EOF && err == NULL)
1810 err = got_error_from_errno("fclose");
1811 return err;
1814 static const struct got_error *
1815 add_meta_idset_cb(struct got_object_id *id, void *data, void *arg)
1817 struct got_pack_meta *m = data;
1818 struct got_pack_metavec *v = arg;
1820 if (m->reused_delta_offset != 0)
1821 return NULL;
1823 return got_pack_add_meta(m, v);
1826 const struct got_error *
1827 got_pack_create(uint8_t *packsha1, int packfd, FILE *delta_cache,
1828 struct got_object_id **theirs, int ntheirs,
1829 struct got_object_id **ours, int nours,
1830 struct got_repository *repo, int loose_obj_only, int allow_empty,
1831 int force_refdelta, got_pack_progress_cb progress_cb, void *progress_arg,
1832 struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg)
1834 const struct got_error *err;
1835 struct got_object_idset *idset;
1836 struct got_packidx *reuse_packidx = NULL;
1837 struct got_pack *reuse_pack = NULL;
1838 struct got_pack_metavec deltify, reuse;
1839 int ncolored = 0, nfound = 0, ntrees = 0;
1840 size_t ndeltify;
1841 uint32_t seed;
1843 seed = arc4random();
1845 memset(&deltify, 0, sizeof(deltify));
1846 memset(&reuse, 0, sizeof(reuse));
1848 idset = got_object_idset_alloc();
1849 if (idset == NULL)
1850 return got_error_from_errno("got_object_idset_alloc");
1852 err = load_object_ids(&ncolored, &nfound, &ntrees, idset, theirs,
1853 ntheirs, ours, nours, repo, seed, loose_obj_only,
1854 progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1855 if (err)
1856 goto done;
1858 if (progress_cb) {
1859 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1860 0L, nours, got_object_idset_num_elements(idset), 0, 0);
1861 if (err)
1862 goto done;
1865 if (got_object_idset_num_elements(idset) == 0 && !allow_empty) {
1866 err = got_error(GOT_ERR_CANNOT_PACK);
1867 goto done;
1870 reuse.metasz = 64;
1871 reuse.meta = calloc(reuse.metasz,
1872 sizeof(struct got_pack_meta *));
1873 if (reuse.meta == NULL) {
1874 err = got_error_from_errno("calloc");
1875 goto done;
1878 err = got_pack_search_deltas(&reuse_packidx, &reuse_pack,
1879 &reuse, idset, ncolored, nfound, ntrees, nours,
1880 repo, progress_cb, progress_arg, rl, cancel_cb, cancel_arg);
1881 if (err)
1882 goto done;
1884 if (reuse_packidx && reuse_pack) {
1885 err = got_repo_pin_pack(repo, reuse_packidx, reuse_pack);
1886 if (err)
1887 goto done;
1890 if (fseeko(delta_cache, 0L, SEEK_END) == -1) {
1891 err = got_error_from_errno("fseeko");
1892 goto done;
1895 ndeltify = got_object_idset_num_elements(idset) - reuse.nmeta;
1896 if (ndeltify > 0) {
1897 deltify.meta = calloc(ndeltify, sizeof(struct got_pack_meta *));
1898 if (deltify.meta == NULL) {
1899 err = got_error_from_errno("calloc");
1900 goto done;
1902 deltify.metasz = ndeltify;
1904 err = got_object_idset_for_each(idset, add_meta_idset_cb,
1905 &deltify);
1906 if (err)
1907 goto done;
1908 if (deltify.nmeta > 0) {
1909 err = pick_deltas(deltify.meta, deltify.nmeta,
1910 ncolored, nfound, ntrees, nours, reuse.nmeta,
1911 delta_cache, repo, progress_cb, progress_arg, rl,
1912 cancel_cb, cancel_arg);
1913 if (err)
1914 goto done;
1918 if (fflush(delta_cache) == EOF) {
1919 err = got_error_from_errno("fflush");
1920 goto done;
1923 if (progress_cb) {
1925 * Report a 1-byte packfile write to indicate we are about
1926 * to start sending packfile data. gotd(8) needs this.
1928 err = progress_cb(progress_arg, ncolored, nfound, ntrees,
1929 1 /* packfile_size */, nours,
1930 got_object_idset_num_elements(idset),
1931 deltify.nmeta + reuse.nmeta, 0);
1932 if (err)
1933 goto done;
1936 /* Pinned pack may have moved to different cache slot. */
1937 reuse_pack = got_repo_get_pinned_pack(repo);
1939 err = genpack(packsha1, packfd, reuse_pack, delta_cache, deltify.meta,
1940 deltify.nmeta, reuse.meta, reuse.nmeta, ncolored, nfound, ntrees,
1941 nours, repo, force_refdelta, progress_cb, progress_arg, rl,
1942 cancel_cb, cancel_arg);
1943 if (err)
1944 goto done;
1945 done:
1946 free_nmeta(deltify.meta, deltify.nmeta);
1947 free_nmeta(reuse.meta, reuse.nmeta);
1948 got_object_idset_free(idset);
1949 got_repo_unpin_pack(repo);
1950 return err;