commit 67fd68496517d3103b130a1085efdd4591bd7416 from: Stefan Sperling date: Sun Feb 13 00:10:25 2022 UTC reuse existing deltas when creating pack files tested by thomas, naddy, and myself commit - b593496508f3e82abafe91570d78867dcbcae871 commit + 67fd68496517d3103b130a1085efdd4591bd7416 blob - 60920514cdf4045702691ca0ad19a5d080a8893e blob + 7a4aaaaec7e976d89c7989136f80818dd4091850 --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -102,6 +102,14 @@ const struct got_error *got_object_open_loose_fd(int * struct got_repository *); const struct got_error *got_object_open_packed(struct got_object **, struct got_object_id *, struct got_repository *); +struct got_pack; +struct got_packidx; +const struct got_error *got_object_open_from_packfile(struct got_object **, + struct got_object_id *, struct got_pack *, struct got_packidx *, int, + struct got_repository *); +const struct got_error *got_object_read_raw_delta(uint64_t *, uint64_t *, + off_t *, off_t *, off_t *, struct got_object_id **, int, + struct got_packidx *, int, struct got_object_id *, struct got_repository *); const struct got_error *got_object_read_header_privsep(struct got_object **, struct got_object_id *, struct got_repository *, int); const struct got_error *got_object_open(struct got_object **, blob - 750be4962021434fb27d2dcc95867ff2e333a776 blob + 5346f7f2ce1559b966b7c6e2f1bde6da40d729a4 --- lib/got_lib_object_idset.h +++ lib/got_lib_object_idset.h @@ -31,3 +31,13 @@ const struct got_error *got_object_idset_for_each(stru const struct got_error *(*cb)(struct got_object_id *, void *, void *), void *); int got_object_idset_num_elements(struct got_object_idset *); + +struct got_object_idset_element; +struct got_object_idset_element *got_object_idset_get_element( + struct got_object_idset *, struct got_object_id *); +void *got_object_idset_get_element_data(struct got_object_idset_element *); +const struct got_error *got_object_idset_for_each_element(struct got_object_idset *, + const struct got_error *(*cb)(struct got_object_idset_element *, void *), void *); +void got_object_idset_remove_element(struct got_object_idset *, + struct got_object_idset_element *); + blob - 2a9c135628c6fed4aeef4fb17093f23e0b5df9ae blob + e8fb373e287ee80486d50ed07964d9d39924308d --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -22,6 +22,7 @@ struct got_pack { size_t filesize; struct got_privsep_child *privsep_child; int child_has_tempfiles; + int child_has_delta_outfd; struct got_delta_cache *delta_cache; }; @@ -96,6 +97,16 @@ struct got_packidx_v2_hdr { struct got_packidx_trailer *trailer; }; +struct got_pack_offset_index { + uint32_t offset; + uint32_t idx; +}; + +struct got_pack_large_offset_index { + uint64_t offset; + uint32_t idx; +}; + /* An open pack index file. */ struct got_packidx { char *path_packidx; /* actual on-disk path */ @@ -104,6 +115,8 @@ struct got_packidx { size_t len; size_t nlargeobj; struct got_packidx_v2_hdr hdr; /* convenient pointers into map */ + struct got_pack_offset_index *sorted_offsets; + struct got_pack_large_offset_index *sorted_large_offsets; }; struct got_packfile_hdr { @@ -177,6 +190,10 @@ const struct got_error *got_packidx_close(struct got_p const struct got_error *got_packidx_get_packfile_path(char **, const char *); off_t got_packidx_get_object_offset(struct got_packidx *, int idx); int got_packidx_get_object_idx(struct got_packidx *, struct got_object_id *); +const struct got_error *got_packidx_get_offset_idx(int *, struct got_packidx *, + off_t); +const struct got_error *got_packidx_get_object_id(struct got_object_id *, + struct got_packidx *, int); const struct got_error *got_packidx_match_id_str_prefix( struct got_object_id_queue *, struct got_packidx *, const char *); @@ -194,5 +211,8 @@ const struct got_error *got_packfile_extract_object(st struct got_object *, FILE *, FILE *, FILE *); const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *, struct got_object *, struct got_pack *); +const struct got_error *got_packfile_extract_raw_delta(uint8_t **, size_t *, + off_t *, off_t *, struct got_object_id *, uint64_t *, uint64_t *, + struct got_pack *, struct got_packidx *, int); struct got_pack *got_repo_get_cached_pack(struct got_repository *, const char *); blob - ce62cbf5e948bfee2b29b0f0524208b1a15a93e9 blob + 274e89878290befef48084afc0ae191cd5c36b16 --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -174,6 +174,11 @@ enum got_imsg_type { GOT_IMSG_RAW_OBJECT_OUTFD, GOT_IMSG_PACKED_RAW_OBJECT_REQUEST, GOT_IMSG_RAW_OBJECT, + + /* Read raw delta data from pack files. */ + GOT_IMSG_RAW_DELTA_OUTFD, + GOT_IMSG_RAW_DELTA_REQUEST, + GOT_IMSG_RAW_DELTA, }; /* Structure for GOT_IMSG_ERROR. */ @@ -260,6 +265,21 @@ struct got_imsg_raw_obj { */ #define GOT_PRIVSEP_INLINE_OBJECT_DATA_MAX \ (MAX_IMSGSIZE - IMSG_HEADER_SIZE - sizeof(struct got_imsg_raw_obj)) +}; + +/* Structure for GOT_IMSG_RAW_DELTA. */ +struct got_imsg_raw_delta { + uint8_t base_id[SHA1_DIGEST_LENGTH]; + uint64_t base_size; + uint64_t result_size; + off_t delta_size; + off_t delta_offset; + off_t delta_out_offset; + + /* + * Delta data has been written at delta_out_offset to the file + * descriptor passed via the GOT_IMSG_RAW_DELTA_OUTFD imsg. + */ }; /* Structure for GOT_IMSG_TAG data. */ @@ -427,7 +447,27 @@ struct got_imsg_packed_object { uint8_t id[SHA1_DIGEST_LENGTH]; int idx; } __attribute__((__packed__)); + +/* + * Structure for GOT_IMSG_DELTA data. + */ +struct got_imsg_delta { + /* These fields are the same as in struct got_delta. */ + off_t offset; + size_t tslen; + int type; + size_t size; + off_t data_offset; +}; +/* + * Structure for GOT_IMSG_RAW_DELTA_REQUEST data. + */ +struct got_imsg_raw_delta_request { + uint8_t id[SHA1_DIGEST_LENGTH]; + int idx; +}; + /* Structure for GOT_IMSG_COMMIT_TRAVERSAL_REQUEST */ struct got_imsg_commit_traversal_request { uint8_t id[SHA1_DIGEST_LENGTH]; @@ -588,4 +628,12 @@ const struct got_error *got_privsep_recv_traversed_com struct got_commit_object **, struct got_object_id **, struct got_object_id_queue *, struct imsgbuf *); +const struct got_error *got_privsep_send_raw_delta_req(struct imsgbuf *, int, + struct got_object_id *); +const struct got_error *got_privsep_send_raw_delta_outfd(struct imsgbuf *, int); +const struct got_error *got_privsep_send_raw_delta(struct imsgbuf *, uint64_t, + uint64_t, off_t, off_t, off_t, struct got_object_id *); +const struct got_error *got_privsep_recv_raw_delta(uint64_t *, uint64_t *, + off_t *, off_t *, off_t *, struct got_object_id **, struct imsgbuf *); + void got_privsep_exec_child(int[2], const char *, const char *); blob - 831cb967da0e76447d97b70ff058416fbf492de4 blob + 798762e89557feb475f650e62e549abe41eb43c4 --- lib/got_lib_repository.h +++ lib/got_lib_repository.h @@ -122,7 +122,13 @@ const struct got_error*got_repo_cache_raw_object(struc struct got_raw_object *got_repo_get_cached_raw_object(struct got_repository *, struct got_object_id *); int got_repo_is_packidx_filename(const char *, size_t); +int got_repo_check_packidx_bloom_filter(struct got_repository *, + const char *, struct got_object_id *); const struct got_error *got_repo_search_packidx(struct got_packidx **, int *, struct got_repository *, struct got_object_id *); +const struct got_error *got_repo_list_packidx(struct got_pathlist_head *, + struct got_repository *); +const struct got_error *got_repo_get_packidx(struct got_packidx **, const char *, + struct got_repository *); const struct got_error *got_repo_cache_pack(struct got_pack **, struct got_repository *, const char *, struct got_packidx *); blob - ce95f8795c1e303f174dcdf4d6eeb41563569e6f blob + 8218b2c4c146e832c63f84d4a9f6a43f8b3283f8 --- lib/object.c +++ lib/object.c @@ -267,6 +267,7 @@ start_pack_privsep_child(struct got_pack *pack, struct return err; } pack->child_has_tempfiles = 0; + pack->child_has_delta_outfd = 0; if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fds) == -1) { err = got_error_from_errno("socketpair"); @@ -374,6 +375,71 @@ got_object_open_packed(struct got_object **obj, struct done: free(path_packfile); return err; +} + +const struct got_error * +got_object_open_from_packfile(struct got_object **obj, struct got_object_id *id, + struct got_pack *pack, struct got_packidx *packidx, int obj_idx, + struct got_repository *repo) +{ + return read_packed_object_privsep(obj, repo, pack, packidx, + obj_idx, id); +} + +const struct got_error * +got_object_read_raw_delta(uint64_t *base_size, uint64_t *result_size, + off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset, + struct got_object_id **base_id, int delta_cache_fd, + struct got_packidx *packidx, int obj_idx, struct got_object_id *id, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_pack *pack = NULL; + char *path_packfile; + + *base_size = 0; + *result_size = 0; + *delta_size = 0; + *delta_offset = 0; + *delta_out_offset = 0; + + err = got_packidx_get_packfile_path(&path_packfile, + packidx->path_packidx); + if (err) + return err; + + pack = got_repo_get_cached_pack(repo, path_packfile); + if (pack == NULL) { + err = got_repo_cache_pack(&pack, repo, path_packfile, packidx); + if (err) + return err; + } + + if (pack->privsep_child == NULL) { + err = start_pack_privsep_child(pack, packidx); + if (err) + return err; + } + + if (!pack->child_has_delta_outfd) { + int outfd_child; + outfd_child = dup(delta_cache_fd); + if (outfd_child == -1) + return got_error_from_errno("dup"); + err = got_privsep_send_raw_delta_outfd( + pack->privsep_child->ibuf, outfd_child); + if (err) + return err; + pack->child_has_delta_outfd = 1; + } + + err = got_privsep_send_raw_delta_req(pack->privsep_child->ibuf, + obj_idx, id); + if (err) + return err; + + return got_privsep_recv_raw_delta(base_size, result_size, delta_size, + delta_offset, delta_out_offset, base_id, pack->privsep_child->ibuf); } static const struct got_error * blob - 152534fe9ad8d3e39e0d2945b284bbae1f218a2f blob + bec28e2592f77e2e43e9df547016ff61d8886b42 --- lib/object_idset.c +++ lib/object_idset.c @@ -190,5 +190,42 @@ got_object_idset_num_elements(struct got_object_idset return set->totelem; } +struct got_object_idset_element * +got_object_idset_get_element(struct got_object_idset *set, struct got_object_id *id) +{ + return find_element(set, id); +} + +void * +got_object_idset_get_element_data(struct got_object_idset_element *entry) +{ + return entry->data; +} + +const struct got_error * +got_object_idset_for_each_element(struct got_object_idset *set, + const struct got_error *(*cb)(struct got_object_idset_element *, void *), + void *arg) +{ + const struct got_error *err; + struct got_object_idset_element *entry, *tmp; + + RB_FOREACH_SAFE(entry, got_object_idset_tree, &set->entries, tmp) { + err = (*cb)(entry, arg); + if (err) + return err; + } + return NULL; +} + +void +got_object_idset_remove_element(struct got_object_idset *set, + struct got_object_idset_element *entry) +{ + RB_REMOVE(got_object_idset_tree, &set->entries, entry); + free(entry); + set->totelem--; +} + RB_GENERATE(got_object_idset_tree, got_object_idset_element, entry, cmp_elements); blob - 8a01d22a8f4fe6686821dab1e44a139ea351a8a1 blob + c981da3572edb6919e64e8fa50227b8ab99e0702 --- lib/pack.c +++ lib/pack.c @@ -433,6 +433,8 @@ got_packidx_close(struct got_packidx *packidx) } if (close(packidx->fd) == -1 && err == NULL) err = got_error_from_errno("close"); + free(packidx->sorted_offsets); + free(packidx->sorted_large_offsets); free(packidx); return err; @@ -506,9 +508,157 @@ got_packidx_get_object_idx(struct got_packidx *packidx } return -1; +} + +static int +offset_cmp(const void *pa, const void *pb) +{ + const struct got_pack_offset_index *a, *b; + + a = (const struct got_pack_offset_index *)pa; + b = (const struct got_pack_offset_index *)pb; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + + return 0; } +static int +large_offset_cmp(const void *pa, const void *pb) +{ + const struct got_pack_large_offset_index *a, *b; + + a = (const struct got_pack_large_offset_index *)pa; + b = (const struct got_pack_large_offset_index *)pb; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + + return 0; +} + +static const struct got_error * +build_offset_index(struct got_packidx *p) +{ + uint32_t nobj = be32toh(p->hdr.fanout_table[0xff]); + unsigned int i, j, k; + + p->sorted_offsets = calloc(nobj - p->nlargeobj, + sizeof(p->sorted_offsets[0])); + if (p->sorted_offsets == NULL) + return got_error_from_errno("calloc"); + + if (p->nlargeobj > 0) { + p->sorted_large_offsets = calloc(p->nlargeobj, + sizeof(p->sorted_large_offsets[0])); + if (p->sorted_large_offsets == NULL) + return got_error_from_errno("calloc"); + } + + j = 0; + k = 0; + for (i = 0; i < nobj; i++) { + uint32_t offset = be32toh(p->hdr.offsets[i]); + if (offset & GOT_PACKIDX_OFFSET_VAL_IS_LARGE_IDX) { + uint64_t loffset; + uint32_t idx; + idx = offset & GOT_PACKIDX_OFFSET_VAL_MASK; + if (idx >= p->nlargeobj || + p->nlargeobj == 0 || + p->hdr.large_offsets == NULL) + return got_error(GOT_ERR_BAD_PACKIDX); + loffset = be64toh(p->hdr.large_offsets[idx]); + p->sorted_large_offsets[j].offset = loffset; + p->sorted_large_offsets[j].idx = i; + j++; + } else { + p->sorted_offsets[k].offset = offset; + p->sorted_offsets[k].idx = i; + k++; + } + } + if (j != p->nlargeobj || k != nobj - p->nlargeobj) + return got_error(GOT_ERR_BAD_PACKIDX); + + qsort(p->sorted_offsets, nobj - p->nlargeobj, + sizeof(p->sorted_offsets[0]), offset_cmp); + + if (p->sorted_large_offsets) + qsort(p->sorted_large_offsets, p->nlargeobj, + sizeof(p->sorted_large_offsets[0]), large_offset_cmp); + + return NULL; +} + const struct got_error * +got_packidx_get_offset_idx(int *idx, struct got_packidx *packidx, off_t offset) +{ + const struct got_error *err; + uint32_t totobj = be32toh(packidx->hdr.fanout_table[0xff]); + int i, left, right; + + *idx = -1; + + if (packidx->sorted_offsets == NULL) { + err = build_offset_index(packidx); + if (err) + return err; + } + + if (offset >= 0x7fffffff) { + uint64_t lo; + left = 0, right = packidx->nlargeobj - 1; + while (left <= right) { + i = ((left + right) / 2); + lo = packidx->sorted_large_offsets[i].offset; + if (lo == offset) { + *idx = packidx->sorted_large_offsets[i].idx; + break; + } else if (offset > lo) + left = i + 1; + else if (offset < lo) + right = i - 1; + } + } else { + uint32_t o; + left = 0, right = totobj - packidx->nlargeobj - 1; + while (left <= right) { + i = ((left + right) / 2); + o = packidx->sorted_offsets[i].offset; + if (o == offset) { + *idx = packidx->sorted_offsets[i].idx; + break; + } else if (offset > o) + left = i + 1; + else if (offset < o) + right = i - 1; + } + } + + return NULL; +} + +const struct got_error * +got_packidx_get_object_id(struct got_object_id *id, + struct got_packidx *packidx, int idx) +{ + uint32_t totobj = be32toh(packidx->hdr.fanout_table[0xff]); + struct got_packidx_object_id *oid; + + if (idx < 0 || idx >= totobj) + return got_error(GOT_ERR_NO_OBJ); + + oid = &packidx->hdr.sorted_ids[idx]; + memcpy(id->sha1, oid->sha1, SHA1_DIGEST_LENGTH); + return NULL; +} + +const struct got_error * got_packidx_match_id_str_prefix(struct got_object_id_queue *matched_ids, struct got_packidx *packidx, const char *id_str_prefix) { @@ -1452,3 +1602,84 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size return err; } + +const struct got_error * +got_packfile_extract_raw_delta(uint8_t **delta_buf, size_t *delta_size, + off_t *delta_offset, off_t *base_offset, struct got_object_id *base_id, + uint64_t *base_size, uint64_t *result_size, struct got_pack *pack, + struct got_packidx *packidx, int idx) +{ + const struct got_error *err = NULL; + off_t offset; + uint8_t type; + uint64_t size; + size_t tslen, delta_hdrlen; + + *delta_buf = NULL; + *delta_size = 0; + *delta_offset = 0; + *base_offset = 0; + *base_size = 0; + *result_size = 0; + + offset = got_packidx_get_object_offset(packidx, idx); + if (offset == (uint64_t)-1) + return got_error(GOT_ERR_BAD_PACKIDX); + + if (offset >= pack->filesize) + return got_error(GOT_ERR_PACK_OFFSET); + + err = got_pack_parse_object_type_and_size(&type, &size, &tslen, + pack, offset); + if (err) + return err; + + if (tslen + size < tslen || offset + size < size || + tslen + offset < tslen) + return got_error(GOT_ERR_PACK_OFFSET); + + switch (type) { + case GOT_OBJ_TYPE_OFFSET_DELTA: + err = got_pack_parse_offset_delta(base_offset, &delta_hdrlen, + pack, offset, tslen); + if (err) + return err; + break; + case GOT_OBJ_TYPE_REF_DELTA: + err = got_pack_parse_ref_delta(base_id, pack, offset, tslen); + if (err) + return err; + delta_hdrlen = SHA1_DIGEST_LENGTH; + break; + default: + return got_error_fmt(GOT_ERR_OBJ_TYPE, + "non-delta object type %d found at offset %llu", + type, offset); + } + + if (tslen + delta_hdrlen < delta_hdrlen || + offset + delta_hdrlen < delta_hdrlen) + return got_error(GOT_ERR_BAD_DELTA); + + err = read_delta_data(delta_buf, delta_size, + offset + tslen + delta_hdrlen, pack); + if (err) + return err; + + if (*delta_size != size) { + err = got_error(GOT_ERR_BAD_DELTA); + goto done; + } + + err = got_delta_get_sizes(base_size, result_size, *delta_buf, size); + if (err) + goto done; + + *delta_offset = offset; +done: + if (err) { + free(*delta_buf); + *delta_buf = NULL; + } + return err; +} blob - f4c9f4cd1e5f2d7ec2bb640a2745e9ef11b8f29a blob + 126be35e5fb8688252b462d73222675dfe1e830d --- lib/pack_create.c +++ lib/pack_create.c @@ -74,6 +74,10 @@ struct got_pack_meta { off_t delta_len; /* encoded delta length */ int nchain; + int have_reused_delta; + off_t reused_delta_offset; /* offset of delta in reused pack file */ + struct got_object_id *base_obj_id; + /* Only used for delta window */ struct got_delta_table *dtab; @@ -124,6 +128,8 @@ clear_meta(struct got_pack_meta *meta) meta->path = NULL; free(meta->delta_buf); meta->delta_buf = NULL; + free(meta->base_obj_id); + meta->base_obj_id = NULL; } static void @@ -419,8 +425,225 @@ report_progress(got_pack_progress_cb progress_cb, void } static const struct got_error * -pick_deltas(struct got_pack_meta **meta, int nmeta, int nours, - FILE *delta_cache, struct got_repository *repo, +add_meta(struct got_pack_meta *m, struct got_pack_metavec *v) +{ + if (v->nmeta == v->metasz){ + size_t newsize = 2 * v->metasz; + struct got_pack_meta **new; + new = reallocarray(v->meta, newsize, sizeof(*new)); + if (new == NULL) + return got_error_from_errno("reallocarray"); + v->meta = new; + v->metasz = newsize; + } + + v->meta[v->nmeta++] = m; + return NULL; +} + +static const struct got_error * +reuse_delta(int idx, struct got_pack_meta *m, struct got_pack_metavec *v, + struct got_object_idset *idset, struct got_pack *pack, + struct got_packidx *packidx, int delta_cache_fd, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_pack_meta *base = NULL; + struct got_object_id *base_obj_id = NULL; + off_t delta_len = 0, delta_offset = 0, delta_cache_offset = 0; + uint64_t base_size, result_size; + + if (m->have_reused_delta) + return NULL; + + err = got_object_read_raw_delta(&base_size, &result_size, &delta_len, + &delta_offset, &delta_cache_offset, &base_obj_id, delta_cache_fd, + packidx, idx, &m->id, repo); + if (err) + return err; + + if (delta_offset + delta_len < delta_offset) + return got_error(GOT_ERR_BAD_PACKFILE); + + base = got_object_idset_get(idset, base_obj_id); + if (base == NULL) + goto done; + + m->delta_len = delta_len; + m->delta_offset = delta_cache_offset; + m->prev = base; + m->size = result_size; + m->have_reused_delta = 1; + m->reused_delta_offset = delta_offset; + m->base_obj_id = base_obj_id; + base_obj_id = NULL; + err = add_meta(m, v); +done: + free(base_obj_id); + return err; +} + +static const struct got_error * +find_pack_for_reuse(struct got_packidx **best_packidx, + struct got_repository *repo) +{ + const struct got_error *err; + struct got_pathlist_head packidx_paths; + struct got_pathlist_entry *pe; + const char *best_packidx_path = NULL; + int nobj_max = 0; + + TAILQ_INIT(&packidx_paths); + *best_packidx = NULL; + + err = got_repo_list_packidx(&packidx_paths, repo); + if (err) + return err; + + TAILQ_FOREACH(pe, &packidx_paths, entry) { + const char *path_packidx = pe->path; + struct got_packidx *packidx; + int nobj; + + err = got_repo_get_packidx(&packidx, path_packidx, repo); + if (err) + break; + + nobj = be32toh(packidx->hdr.fanout_table[0xff]); + if (nobj > nobj_max) { + best_packidx_path = path_packidx; + nobj_max = nobj; + } + } + + if (best_packidx_path) { + err = got_repo_get_packidx(best_packidx, best_packidx_path, + repo); + } + + TAILQ_FOREACH(pe, &packidx_paths, entry) + free((void *)pe->path); + got_pathlist_free(&packidx_paths); + return err; +} + +struct search_deltas_arg { + struct got_packidx *packidx; + struct got_pack *pack; + struct got_object_idset *idset; + struct got_pack_metavec *v; + int delta_cache_fd; + struct got_repository *repo; + got_pack_progress_cb progress_cb; + void *progress_arg; + struct got_ratelimit *rl; + got_cancel_cb cancel_cb; + void *cancel_arg; + int ncommits; +}; + +static const struct got_error * +search_delta_for_object(struct got_object_id *id, void *data, void *arg) +{ + const struct got_error *err; + struct got_pack_meta *m = data; + struct search_deltas_arg *a = arg; + int obj_idx; + struct got_object *obj = NULL; + + if (a->cancel_cb) { + err = (*a->cancel_cb)(a->cancel_arg); + if (err) + return err; + } + + if (!got_repo_check_packidx_bloom_filter(a->repo, + a->packidx->path_packidx, id)) + return NULL; + + obj_idx = got_packidx_get_object_idx(a->packidx, id); + if (obj_idx == -1) + return NULL; + + /* TODO: + * Opening and closing an object just to check its flags + * is a bit expensive. We could have an imsg which requests + * plain type/size information for an object without doing + * work such as traversing the object's entire delta chain + * to find the base object type, and other such info which + * we don't really need here. + */ + err = got_object_open_from_packfile(&obj, &m->id, a->pack, + a->packidx, obj_idx, a->repo); + if (err) + return err; + + if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) { + reuse_delta(obj_idx, m, a->v, a->idset, a->pack, a->packidx, + a->delta_cache_fd, a->repo); + if (err) + goto done; + err = report_progress(a->progress_cb, a->progress_arg, a->rl, + 0L, a->ncommits, got_object_idset_num_elements(a->idset), + a->v->nmeta, 0); + } +done: + got_object_close(obj); + return err; +} + +static const struct got_error * +search_deltas(struct got_pack_metavec *v, struct got_object_idset *idset, + int delta_cache_fd, int ncommits, struct got_repository *repo, + got_pack_progress_cb progress_cb, void *progress_arg, + struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) +{ + const struct got_error *err = NULL; + char *path_packfile = NULL; + struct got_packidx *packidx; + struct got_pack *pack; + struct search_deltas_arg sda; + + err = find_pack_for_reuse(&packidx, repo); + if (err) + return err; + + if (packidx == NULL) + return NULL; + + err = got_packidx_get_packfile_path(&path_packfile, + packidx->path_packidx); + if (err) + return err; + + pack = got_repo_get_cached_pack(repo, path_packfile); + if (pack == NULL) { + err = got_repo_cache_pack(&pack, repo, path_packfile, packidx); + if (err) + goto done; + } + + sda.packidx = packidx; + sda.pack = pack; + sda.idset = idset; + sda.v = v; + sda.delta_cache_fd = delta_cache_fd; + sda.repo = repo; + sda.progress_cb = progress_cb; + sda.progress_arg = progress_arg; + sda.rl = rl; + sda.cancel_cb = cancel_cb; + sda.cancel_arg = cancel_arg; + sda.ncommits = ncommits; + err = got_object_idset_for_each(idset, search_delta_for_object, &sda); +done: + free(path_packfile); + return err; +} + +static const struct got_error * +pick_deltas(struct got_pack_meta **meta, int nmeta, int ncommits, + int nreused, FILE *delta_cache, struct got_repository *repo, got_pack_progress_cb progress_cb, void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { @@ -443,7 +666,7 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in break; } err = report_progress(progress_cb, progress_arg, rl, - 0L, nours, nmeta, i, 0); + 0L, ncommits, nreused + nmeta, nreused + i, 0); if (err) goto done; m = meta[i]; @@ -492,6 +715,7 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in &base->id); if (err) goto done; + if (raw->f == NULL && base_raw->f == NULL) { err = got_deltify_mem_mem(&deltas, &ndeltas, raw->data, raw->hdrlen, @@ -556,6 +780,15 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in best_ndeltas, best_size, m->prev->size); } else { m->delta_offset = ftello(delta_cache); + /* + * TODO: + * Storing compressed delta data in the delta + * cache file would probably be more efficient + * than writing uncompressed delta data here + * and compressing it while writing the pack + * file. This would also allow for reusing + * deltas in their compressed form. + */ err = encode_delta(m, raw, best_deltas, best_ndeltas, m->prev->size, delta_cache); } @@ -614,12 +847,12 @@ static const int obj_types[] = { }; static const struct got_error * -add_meta(struct got_pack_metavec *v, struct got_object_idset *idset, +add_object(int want_meta, struct got_object_idset *idset, struct got_object_id *id, const char *path, int obj_type, time_t mtime, int loose_obj_only, struct got_repository *repo) { const struct got_error *err; - struct got_pack_meta *m; + struct got_pack_meta *m = NULL; if (loose_obj_only) { int is_packed; @@ -630,40 +863,17 @@ add_meta(struct got_pack_metavec *v, struct got_object return NULL; } - err = got_object_idset_add(idset, id, (void *)&obj_types[obj_type]); - if (err) - return err; - - if (v == NULL) - return NULL; - - err = alloc_meta(&m, id, path, obj_type, mtime); - if (err) - goto done; - - if (v->nmeta == v->metasz){ - size_t newsize = 2 * v->metasz; - struct got_pack_meta **new; - new = reallocarray(v->meta, newsize, sizeof(*new)); - if (new == NULL) { - err = got_error_from_errno("reallocarray"); - goto done; - } - v->meta = new; - v->metasz = newsize; + if (want_meta) { + err = alloc_meta(&m, id, path, obj_type, mtime); + if (err) + return err; } -done: - if (err) { - clear_meta(m); - free(m); - } else - v->meta[v->nmeta++] = m; - return err; + return got_object_idset_add(idset, id, m); } static const struct got_error * -load_tree_entries(struct got_object_id_queue *ids, struct got_pack_metavec *v, +load_tree_entries(struct got_object_id_queue *ids, int want_meta, struct got_object_idset *idset, struct got_object_id *tree_id, const char *dpath, time_t mtime, struct got_repository *repo, int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg) @@ -705,8 +915,8 @@ load_tree_entries(struct got_object_id_queue *ids, str break; STAILQ_INSERT_TAIL(ids, qid, entry); } else if (S_ISREG(mode) || S_ISLNK(mode)) { - err = add_meta(v, idset, id, p, GOT_OBJ_TYPE_BLOB, - mtime, loose_obj_only, repo); + err = add_object(want_meta, idset, id, p, + GOT_OBJ_TYPE_BLOB, mtime, loose_obj_only, repo); if (err) break; } @@ -720,7 +930,7 @@ load_tree_entries(struct got_object_id_queue *ids, str } static const struct got_error * -load_tree(struct got_pack_metavec *v, struct got_object_idset *idset, +load_tree(int want_meta, struct got_object_idset *idset, struct got_object_id *tree_id, const char *dpath, time_t mtime, int loose_obj_only, struct got_repository *repo, got_cancel_cb cancel_cb, void *cancel_arg) @@ -754,15 +964,15 @@ load_tree(struct got_pack_metavec *v, struct got_objec continue; } - err = add_meta(v, idset, qid->id, dpath, GOT_OBJ_TYPE_TREE, - mtime, loose_obj_only, repo); + err = add_object(want_meta, idset, qid->id, dpath, + GOT_OBJ_TYPE_TREE, mtime, loose_obj_only, repo); if (err) { got_object_qid_free(qid); break; } - err = load_tree_entries(&tree_ids, v, idset, qid->id, dpath, - mtime, repo, loose_obj_only, cancel_cb, cancel_arg); + err = load_tree_entries(&tree_ids, want_meta, idset, qid->id, + dpath, mtime, repo, loose_obj_only, cancel_cb, cancel_arg); got_object_qid_free(qid); if (err) break; @@ -773,7 +983,7 @@ load_tree(struct got_pack_metavec *v, struct got_objec } static const struct got_error * -load_commit(struct got_pack_metavec *v, struct got_object_idset *idset, +load_commit(int want_meta, struct got_object_idset *idset, struct got_object_id *id, struct got_repository *repo, int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg) { @@ -796,13 +1006,13 @@ load_commit(struct got_pack_metavec *v, struct got_obj if (err) return err; - err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_COMMIT, + err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_COMMIT, got_object_commit_get_committer_time(commit), loose_obj_only, repo); if (err) goto done; - err = load_tree(v, idset, got_object_commit_get_tree_id(commit), + err = load_tree(want_meta, idset, got_object_commit_get_tree_id(commit), "", got_object_commit_get_committer_time(commit), loose_obj_only, repo, cancel_cb, cancel_arg); done: @@ -811,7 +1021,7 @@ done: } static const struct got_error * -load_tag(struct got_pack_metavec *v, struct got_object_idset *idset, +load_tag(int want_meta, struct got_object_idset *idset, struct got_object_id *id, struct got_repository *repo, int loose_obj_only, got_cancel_cb cancel_cb, void *cancel_arg) { @@ -834,7 +1044,7 @@ load_tag(struct got_pack_metavec *v, struct got_object if (err) return err; - err = add_meta(v, idset, id, "", GOT_OBJ_TYPE_TAG, + err = add_object(want_meta, idset, id, "", GOT_OBJ_TYPE_TAG, got_object_tag_get_tagger_time(tag), loose_obj_only, repo); if (err) @@ -842,13 +1052,14 @@ load_tag(struct got_pack_metavec *v, struct got_object switch (got_object_tag_get_object_type(tag)) { case GOT_OBJ_TYPE_COMMIT: - err = load_commit(v, idset, + err = load_commit(want_meta, idset, got_object_tag_get_object_id(tag), repo, loose_obj_only, cancel_cb, cancel_arg); break; case GOT_OBJ_TYPE_TREE: - err = load_tree(v, idset, got_object_tag_get_object_id(tag), - "", got_object_tag_get_tagger_time(tag), + err = load_tree(want_meta, idset, + got_object_tag_get_object_id(tag), "", + got_object_tag_get_tagger_time(tag), loose_obj_only, repo, cancel_cb, cancel_arg); break; default: @@ -1124,7 +1335,7 @@ done: } static const struct got_error * -read_meta(struct got_pack_meta ***meta, int *nmeta, +load_object_ids(struct got_object_idset *idset, struct got_object_id **theirs, int ntheirs, struct got_object_id **ours, int nours, struct got_repository *repo, int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg, @@ -1132,25 +1343,8 @@ read_meta(struct got_pack_meta ***meta, int *nmeta, { const struct got_error *err = NULL; struct got_object_id **ids = NULL; - struct got_object_idset *idset; int i, nobj = 0, obj_type; - struct got_pack_metavec v; - *meta = NULL; - *nmeta = 0; - - idset = got_object_idset_alloc(); - if (idset == NULL) - return got_error_from_errno("got_object_idset_alloc"); - - v.nmeta = 0; - v.metasz = 64; - v.meta = calloc(v.metasz, sizeof(struct got_pack_meta *)); - if (v.meta == NULL) { - err = got_error_from_errno("calloc"); - goto done; - } - err = findtwixt(&ids, &nobj, ours, nours, theirs, ntheirs, repo, cancel_cb, cancel_arg); if (err || nobj == 0) @@ -1165,79 +1359,81 @@ read_meta(struct got_pack_meta ***meta, int *nmeta, return err; if (obj_type != GOT_OBJ_TYPE_COMMIT) continue; - err = load_commit(NULL, idset, id, repo, + err = load_commit(0, idset, id, repo, loose_obj_only, cancel_cb, cancel_arg); if (err) goto done; err = report_progress(progress_cb, progress_arg, rl, - 0L, nours, v.nmeta, 0, 0); + 0L, nours, got_object_idset_num_elements(idset), + 0, 0); if (err) goto done; } for (i = 0; i < ntheirs; i++) { struct got_object_id *id = theirs[i]; - int *cached_type; + struct got_pack_meta *m; if (id == NULL) continue; - cached_type = got_object_idset_get(idset, id); - if (cached_type == NULL) { + m = got_object_idset_get(idset, id); + if (m == NULL) { err = got_object_get_type(&obj_type, repo, id); if (err) goto done; } else - obj_type = *cached_type; + obj_type = m->obj_type; if (obj_type != GOT_OBJ_TYPE_TAG) continue; - err = load_tag(NULL, idset, id, repo, + err = load_tag(0, idset, id, repo, loose_obj_only, cancel_cb, cancel_arg); if (err) goto done; err = report_progress(progress_cb, progress_arg, rl, - 0L, nours, v.nmeta, 0, 0); + 0L, nours, got_object_idset_num_elements(idset), 0, 0); if (err) goto done; } for (i = 0; i < nobj; i++) { - err = load_commit(&v, idset, ids[i], repo, + err = load_commit(1, idset, ids[i], repo, loose_obj_only, cancel_cb, cancel_arg); if (err) goto done; if (err) goto done; err = report_progress(progress_cb, progress_arg, rl, - 0L, nours, v.nmeta, 0, 0); + 0L, nours, got_object_idset_num_elements(idset), 0, 0); if (err) goto done; } for (i = 0; i < nours; i++) { struct got_object_id *id = ours[i]; - int *cached_type; + struct got_pack_meta *m; if (id == NULL) continue; - cached_type = got_object_idset_get(idset, id); - if (cached_type == NULL) { + m = got_object_idset_get(idset, id); + if (m == NULL) { err = got_object_get_type(&obj_type, repo, id); if (err) goto done; } else - obj_type = *cached_type; + obj_type = m->obj_type; if (obj_type != GOT_OBJ_TYPE_TAG) continue; - err = load_tag(&v, idset, id, repo, + err = load_tag(1, idset, id, repo, loose_obj_only, cancel_cb, cancel_arg); if (err) goto done; err = report_progress(progress_cb, progress_arg, rl, - 0L, nours, v.nmeta, 0, 0); + 0L, nours, got_object_idset_num_elements(idset), 0, 0); if (err) goto done; } if (progress_cb) { - err = progress_cb(progress_arg, 0L, nours, v.nmeta, 0, 0); + err = progress_cb(progress_arg, 0L, nours, + got_object_idset_num_elements(idset), 0, 0); if (err) goto done; } @@ -1246,13 +1442,6 @@ done: free(ids[i]); } free(ids); - got_object_idset_free(idset); - if (err == NULL) { - *meta = v.meta; - *nmeta = v.nmeta; - } else - free(v.meta); - return err; } @@ -1295,6 +1484,21 @@ write_order_cmp(const void *pa, const void *pb) return a->mtime - b->mtime; } +static int +reuse_write_order_cmp(const void *pa, const void *pb) +{ + struct got_pack_meta *a, *b; + + a = *(struct got_pack_meta **)pa; + b = *(struct got_pack_meta **)pb; + + if (a->reused_delta_offset < b->reused_delta_offset) + return -1; + if (a->reused_delta_offset > b->reused_delta_offset) + return 1; + return 0; +} + static const struct got_error * packhdr(int *hdrlen, char *hdr, size_t bufsize, int obj_type, size_t len) { @@ -1337,13 +1541,13 @@ packoff(char *hdr, off_t off) static const struct got_error * deltahdr(off_t *packfile_size, SHA1_CTX *ctx, FILE *packfile, - struct got_pack_meta *m, int use_offset_deltas) + struct got_pack_meta *m) { const struct got_error *err; char buf[32]; int nh; - if (use_offset_deltas && m->prev->off != 0) { + if (m->prev->off != 0) { err = packhdr(&nh, buf, sizeof(buf), GOT_OBJ_TYPE_OFFSET_DELTA, m->delta_len); if (err) @@ -1373,27 +1577,104 @@ deltahdr(off_t *packfile_size, SHA1_CTX *ctx, FILE *pa } static const struct got_error * +write_packed_object(off_t *packfile_size, FILE *packfile, + FILE *delta_cache, struct got_pack_meta *m, int *outfd, + SHA1_CTX *ctx, struct got_repository *repo) +{ + const struct got_error *err = NULL; + struct got_deflate_checksum csum; + char buf[32]; + int nh; + struct got_raw_object *raw = NULL; + off_t outlen; + + csum.output_sha1 = ctx; + csum.output_crc = NULL; + + m->off = ftello(packfile); + if (m->delta_len == 0) { + err = got_object_raw_open(&raw, outfd, repo, &m->id); + if (err) + goto done; + err = packhdr(&nh, buf, sizeof(buf), + m->obj_type, raw->size); + if (err) + goto done; + err = hwrite(packfile, buf, nh, ctx); + if (err) + goto done; + *packfile_size += nh; + if (raw->f == NULL) { + err = got_deflate_to_file_mmap(&outlen, + raw->data + raw->hdrlen, 0, raw->size, + packfile, &csum); + if (err) + goto done; + } else { + if (fseeko(raw->f, raw->hdrlen, SEEK_SET) + == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } + err = got_deflate_to_file(&outlen, raw->f, + raw->size, packfile, &csum); + if (err) + goto done; + } + *packfile_size += outlen; + got_object_raw_close(raw); + raw = NULL; + } else if (m->delta_buf) { + err = deltahdr(packfile_size, ctx, packfile, m); + if (err) + goto done; + err = got_deflate_to_file_mmap(&outlen, + m->delta_buf, 0, m->delta_len, packfile, &csum); + if (err) + goto done; + *packfile_size += outlen; + free(m->delta_buf); + m->delta_buf = NULL; + } else { + if (fseeko(delta_cache, m->delta_offset, SEEK_SET) + == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } + err = deltahdr(packfile_size, ctx, packfile, m); + if (err) + goto done; + err = got_deflate_to_file(&outlen, delta_cache, + m->delta_len, packfile, &csum); + if (err) + goto done; + *packfile_size += outlen; + } +done: + if (raw) + got_object_raw_close(raw); + return err; +} + +static const struct got_error * genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delta_cache, - struct got_pack_meta **meta, int nmeta, int nours, - int use_offset_deltas, struct got_repository *repo, + struct got_pack_meta **deltify, int ndeltify, + struct got_pack_meta **reuse, int nreuse, + int nours, struct got_repository *repo, got_pack_progress_cb progress_cb, void *progress_arg, struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; - int i, nh; + int i; SHA1_CTX ctx; struct got_pack_meta *m; - struct got_raw_object *raw = NULL; char buf[32]; size_t n; - struct got_deflate_checksum csum; - off_t outlen, packfile_size = 0; + off_t packfile_size = 0; int outfd = -1; SHA1Init(&ctx); - csum.output_sha1 = &ctx; - csum.output_crc = NULL; err = hwrite(packfile, "PACK", 4, &ctx); if (err) @@ -1402,79 +1683,41 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt err = hwrite(packfile, buf, 4, &ctx); if (err) goto done; - putbe32(buf, nmeta); + putbe32(buf, ndeltify + nreuse); err = hwrite(packfile, buf, 4, &ctx); if (err) goto done; - qsort(meta, nmeta, sizeof(struct got_pack_meta *), write_order_cmp); - for (i = 0; i < nmeta; i++) { + + qsort(deltify, ndeltify, sizeof(struct got_pack_meta *), + write_order_cmp); + for (i = 0; i < ndeltify; i++) { err = report_progress(progress_cb, progress_arg, rl, - packfile_size, nours, nmeta, nmeta, i); + packfile_size, nours, ndeltify + nreuse, + ndeltify + nreuse, i); if (err) goto done; - m = meta[i]; - m->off = ftello(packfile); - if (m->delta_len == 0) { - err = got_object_raw_open(&raw, &outfd, repo, &m->id); - if (err) - goto done; - err = packhdr(&nh, buf, sizeof(buf), - m->obj_type, raw->size); - if (err) - goto done; - err = hwrite(packfile, buf, nh, &ctx); - if (err) - goto done; - packfile_size += nh; - if (raw->f == NULL) { - err = got_deflate_to_file_mmap(&outlen, - raw->data + raw->hdrlen, 0, raw->size, - packfile, &csum); - if (err) - goto done; - } else { - if (fseeko(raw->f, raw->hdrlen, SEEK_SET) - == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } - err = got_deflate_to_file(&outlen, raw->f, - raw->size, packfile, &csum); - if (err) - goto done; - } - packfile_size += outlen; - got_object_raw_close(raw); - raw = NULL; - } else if (m->delta_buf) { - err = deltahdr(&packfile_size, &ctx, packfile, - m, use_offset_deltas); - if (err) - goto done; - err = got_deflate_to_file_mmap(&outlen, - m->delta_buf, 0, m->delta_len, packfile, &csum); - if (err) - goto done; - packfile_size += outlen; - free(m->delta_buf); - m->delta_buf = NULL; - } else { - if (fseeko(delta_cache, m->delta_offset, SEEK_SET) - == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } - err = deltahdr(&packfile_size, &ctx, packfile, - m, use_offset_deltas); - if (err) - goto done; - err = got_deflate_to_file(&outlen, delta_cache, - m->delta_len, packfile, &csum); - if (err) - goto done; - packfile_size += outlen; - } + m = deltify[i]; + err = write_packed_object(&packfile_size, packfile, + delta_cache, m, &outfd, &ctx, repo); + if (err) + goto done; } + + qsort(reuse, nreuse, sizeof(struct got_pack_meta *), + reuse_write_order_cmp); + for (i = 0; i < nreuse; i++) { + err = report_progress(progress_cb, progress_arg, rl, + packfile_size, nours, ndeltify + nreuse, + ndeltify + nreuse, ndeltify + i); + if (err) + goto done; + m = reuse[i]; + err = write_packed_object(&packfile_size, packfile, + delta_cache, m, &outfd, &ctx, repo); + if (err) + goto done; + } + SHA1Final(pack_sha1, &ctx); n = fwrite(pack_sha1, 1, SHA1_DIGEST_LENGTH, packfile); if (n != SHA1_DIGEST_LENGTH) @@ -1483,18 +1726,50 @@ genpack(uint8_t *pack_sha1, FILE *packfile, FILE *delt packfile_size += sizeof(struct got_packfile_hdr); if (progress_cb) { err = progress_cb(progress_arg, packfile_size, nours, - nmeta, nmeta, nmeta); + ndeltify + nreuse, ndeltify + nreuse, + ndeltify + nreuse); if (err) goto done; } done: - if (raw) - got_object_raw_close(raw); if (outfd != -1 && close(outfd) == -1 && err == NULL) err = got_error_from_errno("close"); return err; } +static const struct got_error * +remove_unused_object(struct got_object_idset_element *entry, void *arg) +{ + struct got_object_idset *idset = arg; + + if (got_object_idset_get_element_data(entry) == NULL) + got_object_idset_remove_element(idset, entry); + + return NULL; +} + +static const struct got_error * +remove_reused_object(struct got_object_idset_element *entry, void *arg) +{ + struct got_object_idset *idset = arg; + struct got_pack_meta *m; + + m = got_object_idset_get_element_data(entry); + if (m->have_reused_delta) + got_object_idset_remove_element(idset, entry); + + return NULL; +} + +static const struct got_error * +add_meta_idset_cb(struct got_object_id *id, void *data, void *arg) +{ + struct got_pack_meta *m = data; + struct got_pack_metavec *v = arg; + + return add_meta(m, v); +} + const struct got_error * got_pack_create(uint8_t *packsha1, FILE *packfile, struct got_object_id **theirs, int ntheirs, @@ -1504,32 +1779,88 @@ got_pack_create(uint8_t *packsha1, FILE *packfile, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err; - struct got_pack_meta **meta; - int nmeta; + int delta_cache_fd = -1; FILE *delta_cache = NULL; + struct got_object_idset *idset; struct got_ratelimit rl; + struct got_pack_metavec deltify, reuse; + memset(&deltify, 0, sizeof(deltify)); + memset(&reuse, 0, sizeof(reuse)); + got_ratelimit_init(&rl, 0, 500); - err = read_meta(&meta, &nmeta, theirs, ntheirs, ours, nours, repo, - loose_obj_only, progress_cb, progress_arg, &rl, + idset = got_object_idset_alloc(); + if (idset == NULL) + return got_error_from_errno("got_object_idset_alloc"); + + err = load_object_ids(idset, theirs, ntheirs, ours, nours, + repo, loose_obj_only, progress_cb, progress_arg, &rl, cancel_cb, cancel_arg); if (err) return err; - if (nmeta == 0 && !allow_empty) { + err = got_object_idset_for_each_element(idset, + remove_unused_object, idset); + if (err) + goto done; + + if (got_object_idset_num_elements(idset) == 0 && !allow_empty) { err = got_error(GOT_ERR_CANNOT_PACK); goto done; } - delta_cache = got_opentemp(); - if (delta_cache == NULL) { + delta_cache_fd = got_opentempfd(); + if (delta_cache_fd == -1) { err = got_error_from_errno("got_opentemp"); goto done; } - if (nmeta > 0) { - err = pick_deltas(meta, nmeta, nours, delta_cache, repo, + reuse.metasz = 64; + reuse.meta = calloc(reuse.metasz, + sizeof(struct got_pack_meta *)); + if (reuse.meta == NULL) { + err = got_error_from_errno("calloc"); + goto done; + } + + err = search_deltas(&reuse, idset, delta_cache_fd, nours, repo, + progress_cb, progress_arg, &rl, cancel_cb, cancel_arg); + if (err) + goto done; + if (reuse.nmeta > 0) { + err = got_object_idset_for_each_element(idset, + remove_reused_object, idset); + if (err) + goto done; + } + + delta_cache = fdopen(delta_cache_fd, "a+"); + if (delta_cache == NULL) { + err = got_error_from_errno("fdopen"); + goto done; + } + delta_cache_fd = -1; + + if (fseeko(delta_cache, 0L, SEEK_END) == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } + + deltify.meta = calloc(got_object_idset_num_elements(idset), + sizeof(struct got_pack_meta *)); + if (deltify.meta == NULL) { + err = got_error_from_errno("calloc"); + goto done; + } + deltify.metasz = got_object_idset_num_elements(idset); + + err = got_object_idset_for_each(idset, add_meta_idset_cb, &deltify); + if (err) + goto done; + if (deltify.nmeta > 0) { + err = pick_deltas(deltify.meta, deltify.nmeta, nours, + reuse.nmeta, delta_cache, repo, progress_cb, progress_arg, &rl, cancel_cb, cancel_arg); if (err) goto done; @@ -1539,12 +1870,17 @@ got_pack_create(uint8_t *packsha1, FILE *packfile, } } - err = genpack(packsha1, packfile, delta_cache, meta, nmeta, nours, 1, - repo, progress_cb, progress_arg, &rl, cancel_cb, cancel_arg); + err = genpack(packsha1, packfile, delta_cache, deltify.meta, + deltify.nmeta, reuse.meta, reuse.nmeta, nours, repo, + progress_cb, progress_arg, &rl, cancel_cb, cancel_arg); if (err) goto done; done: - free_nmeta(meta, nmeta); + free_nmeta(deltify.meta, deltify.nmeta); + free_nmeta(reuse.meta, reuse.nmeta); + got_object_idset_free(idset); + if (delta_cache_fd != -1 && close(delta_cache_fd) == -1 && err == NULL) + err = got_error_from_errno("close"); if (delta_cache && fclose(delta_cache) == EOF && err == NULL) err = got_error_from_errno("fclose"); return err; blob - 9efd7ae9b91bb9c96b3d3a23e15d29ff1e58f89f blob + 48e50b65411ba0b6ffccd2853fe7f30044f9f398 --- lib/privsep.c +++ lib/privsep.c @@ -2725,9 +2725,110 @@ got_privsep_recv_traversed_commits(struct got_commit_o if (err) got_object_id_queue_free(commit_ids); return err; +} + +const struct got_error * +got_privsep_send_raw_delta_req(struct imsgbuf *ibuf, int idx, + struct got_object_id *id) +{ + struct got_imsg_raw_delta_request dreq; + + dreq.idx = idx; + memcpy(dreq.id, id->sha1, SHA1_DIGEST_LENGTH); + + if (imsg_compose(ibuf, GOT_IMSG_RAW_DELTA_REQUEST, 0, 0, -1, + &dreq, sizeof(dreq)) == -1) + return got_error_from_errno("imsg_compose RAW_DELTA_REQUEST"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_raw_delta_outfd(struct imsgbuf *ibuf, int fd) +{ + return send_fd(ibuf, GOT_IMSG_RAW_DELTA_OUTFD, fd); +} + +const struct got_error * +got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint64_t base_size, + uint64_t result_size, off_t delta_size, off_t delta_offset, + off_t delta_out_offset, struct got_object_id *base_id) +{ + struct got_imsg_raw_delta idelta; + int ret; + + idelta.base_size = base_size; + idelta.result_size = result_size; + idelta.delta_size = delta_size; + idelta.delta_offset = delta_offset; + idelta.delta_out_offset = delta_out_offset; + memcpy(idelta.base_id, base_id->sha1, SHA1_DIGEST_LENGTH); + + ret = imsg_compose(ibuf, GOT_IMSG_RAW_DELTA, 0, 0, -1, + &idelta, sizeof(idelta)); + if (ret == -1) + return got_error_from_errno("imsg_compose RAW_DELTA"); + + return flush_imsg(ibuf); } const struct got_error * +got_privsep_recv_raw_delta(uint64_t *base_size, uint64_t *result_size, + off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset, + struct got_object_id **base_id, struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + struct imsg imsg; + struct got_imsg_raw_delta *delta; + size_t datalen; + + *base_size = 0; + *result_size = 0; + *delta_size = 0; + *delta_offset = 0; + *delta_out_offset = 0; + *base_id = NULL; + + err = got_privsep_recv_imsg(&imsg, ibuf, 0); + if (err) + return err; + + datalen = imsg.hdr.len - IMSG_HEADER_SIZE; + + switch (imsg.hdr.type) { + case GOT_IMSG_RAW_DELTA: + if (datalen != sizeof(*delta)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + delta = imsg.data; + *base_size = delta->base_size; + *result_size = delta->result_size; + *delta_size = delta->delta_size; + *delta_offset = delta->delta_offset; + *delta_out_offset = delta->delta_out_offset; + *base_id = calloc(1, sizeof(**base_id)); + if (*base_id == NULL) { + err = got_error_from_errno("malloc"); + break; + } + memcpy((*base_id)->sha1, delta->base_id, SHA1_DIGEST_LENGTH); + break; + default: + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + + imsg_free(&imsg); + + if (err) { + free(*base_id); + *base_id = NULL; + } + return err; +} + +const struct got_error * got_privsep_unveil_exec_helpers(void) { const char *helpers[] = { blob - 40f5562db7ad3596203bf083a5084f34f1eb1b05 blob + 255d035fd6d1b3bd7f0720a14a8463799f261c61 --- lib/repository.c +++ lib/repository.c @@ -1020,8 +1020,8 @@ get_packidx_bloom_filter(struct got_repository *repo, &repo->packidx_bloom_filters, &key); } -static int -check_packidx_bloom_filter(struct got_repository *repo, +int +got_repo_check_packidx_bloom_filter(struct got_repository *repo, const char *path_packidx, struct got_object_id *id) { struct got_packidx_bloom_filter *bf; @@ -1103,7 +1103,7 @@ got_repo_search_packidx(struct got_packidx **packidx, for (i = 0; i < repo->pack_cache_size; i++) { if (repo->packidx_cache[i] == NULL) break; - if (!check_packidx_bloom_filter(repo, + if (!got_repo_check_packidx_bloom_filter(repo, repo->packidx_cache[i]->path_packidx, id)) continue; /* object will not be found in this index */ *idx = got_packidx_get_object_idx(repo->packidx_cache[i], id); @@ -1154,7 +1154,8 @@ got_repo_search_packidx(struct got_packidx **packidx, goto done; } - if (!check_packidx_bloom_filter(repo, path_packidx, id)) { + if (!got_repo_check_packidx_bloom_filter(repo, + path_packidx, id)) { free(path_packidx); continue; /* object will not be found in this index */ } @@ -1205,6 +1206,92 @@ done: return err; } +const struct got_error * +got_repo_list_packidx(struct got_pathlist_head *packidx_paths, + struct got_repository *repo) +{ + const struct got_error *err = NULL; + DIR *packdir = NULL; + struct dirent *dent; + char *path_packidx = NULL; + int packdir_fd; + + packdir_fd = openat(got_repo_get_fd(repo), + GOT_OBJECTS_PACK_DIR, O_DIRECTORY | O_CLOEXEC); + if (packdir_fd == -1) { + return got_error_from_errno_fmt("openat: %s/%s", + got_repo_get_path_git_dir(repo), + GOT_OBJECTS_PACK_DIR); + } + + packdir = fdopendir(packdir_fd); + if (packdir == NULL) { + err = got_error_from_errno("fdopendir"); + goto done; + } + + while ((dent = readdir(packdir)) != NULL) { + if (!got_repo_is_packidx_filename(dent->d_name, dent->d_namlen)) + continue; + + if (asprintf(&path_packidx, "%s/%s", GOT_OBJECTS_PACK_DIR, + dent->d_name) == -1) { + err = got_error_from_errno("asprintf"); + path_packidx = NULL; + break; + } + + err = got_pathlist_append(packidx_paths, path_packidx, NULL); + if (err) + break; + } +done: + if (err) + free(path_packidx); + if (packdir && closedir(packdir) != 0 && err == NULL) + err = got_error_from_errno("closedir"); + return err; +} + +const struct got_error * +got_repo_get_packidx(struct got_packidx **packidx, const char *path_packidx, + struct got_repository *repo) +{ + const struct got_error *err; + size_t i; + + *packidx = NULL; + + /* Search pack index cache. */ + for (i = 0; i < repo->pack_cache_size; i++) { + if (repo->packidx_cache[i] == NULL) + break; + if (strcmp(repo->packidx_cache[i]->path_packidx, + path_packidx) == 0) { + *packidx = repo->packidx_cache[i]; + return NULL; + } + } + /* No luck. Search the filesystem. */ + + err = got_packidx_open(packidx, got_repo_get_fd(repo), + path_packidx, 0); + if (err) + return err; + + err = add_packidx_bloom_filter(repo, *packidx, path_packidx); + if (err) + goto done; + + err = cache_packidx(repo, *packidx, path_packidx); +done: + if (err) { + got_packidx_close(*packidx); + *packidx = NULL; + } + return err; +} + static const struct got_error * read_packfile_hdr(int fd, struct got_packidx *packidx) { blob - f9be16f797d60019bfb883aac0e9b41c7beb846a blob + 422da924a77fc3864bcdfbc5e266f568fab8cef3 --- lib/repository_admin.c +++ lib/repository_admin.c @@ -140,7 +140,8 @@ const struct got_error * got_repo_pack_objects(FILE **packfile, struct got_object_id **pack_hash, struct got_reflist_head *include_refs, struct got_reflist_head *exclude_refs, struct got_repository *repo, - int loose_obj_only, got_pack_progress_cb progress_cb, void *progress_arg, + int loose_obj_only, + got_pack_progress_cb progress_cb, void *progress_arg, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; blob - 1ea0d617c52732faf35724fe601e28a24b03992f blob + a14d051c87c9e8f5b75dd5f4bcf67486dfde5f4c --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -289,11 +289,10 @@ done: } static const struct got_error * -receive_tempfile(FILE **basefile, FILE **accumfile, struct imsg *imsg, +receive_tempfile(FILE **f, const char *mode, struct imsg *imsg, struct imsgbuf *ibuf) { size_t datalen; - FILE **f; datalen = imsg->hdr.len - IMSG_HEADER_SIZE; if (datalen != 0) @@ -302,14 +301,7 @@ receive_tempfile(FILE **basefile, FILE **accumfile, st if (imsg->fd == -1) return got_error(GOT_ERR_PRIVSEP_NO_FD); - if (*basefile == NULL) - f = basefile; - else if (*accumfile == NULL) - f = accumfile; - else - return got_error(GOT_ERR_PRIVSEP_MSG); - - *f = fdopen(imsg->fd, "w+"); + *f = fdopen(imsg->fd, mode); if (*f == NULL) return got_error_from_errno("fdopen"); imsg->fd = -1; @@ -853,10 +845,81 @@ done: return err; } + +static const struct got_error * +get_base_object_id(struct got_object_id *base_id, struct got_packidx *packidx, + off_t base_offset) +{ + const struct got_error *err; + int idx; + err = got_packidx_get_offset_idx(&idx, packidx, base_offset); + if (err) + return err; + if (idx == -1) + return got_error(GOT_ERR_BAD_PACKIDX); + return got_packidx_get_object_id(base_id, packidx, idx); +} static const struct got_error * +raw_delta_request(struct imsg *imsg, struct imsgbuf *ibuf, + FILE *delta_outfile, struct got_pack *pack, + struct got_packidx *packidx) +{ + const struct got_error *err = NULL; + struct got_imsg_raw_delta_request req; + size_t datalen, delta_size; + off_t delta_offset; + uint8_t *delta_buf = NULL; + struct got_object_id id, base_id; + off_t base_offset, delta_out_offset = 0; + uint64_t base_size = 0, result_size = 0; + size_t w; + + datalen = imsg->hdr.len - IMSG_HEADER_SIZE; + if (datalen != sizeof(req)) + return got_error(GOT_ERR_PRIVSEP_LEN); + memcpy(&req, imsg->data, sizeof(req)); + memcpy(id.sha1, req.id, SHA1_DIGEST_LENGTH); + + imsg->fd = -1; + + err = got_packfile_extract_raw_delta(&delta_buf, &delta_size, + &delta_offset, &base_offset, &base_id, &base_size, &result_size, + pack, packidx, req.idx); + if (err) + goto done; + + /* + * If this is an offset delta we must determine the base + * object ID ourselves. + */ + if (base_offset != 0) { + err = get_base_object_id(&base_id, packidx, base_offset); + if (err) + goto done; + } + + delta_out_offset = ftello(delta_outfile); + w = fwrite(delta_buf, 1, delta_size, delta_outfile); + if (w != delta_size) { + err = got_ferror(delta_outfile, GOT_ERR_IO); + goto done; + } + if (fflush(delta_outfile) == -1) { + err = got_error_from_errno("fflush"); + goto done; + } + + err = got_privsep_send_raw_delta(ibuf, base_size, result_size, + delta_size, delta_offset, delta_out_offset, &base_id); +done: + free(delta_buf); + return err; +} + +static const struct got_error * receive_packidx(struct got_packidx **packidx, struct imsgbuf *ibuf) { const struct got_error *err = NULL; @@ -1009,7 +1072,7 @@ main(int argc, char *argv[]) struct got_packidx *packidx = NULL; struct got_pack *pack = NULL; struct got_object_cache objcache; - FILE *basefile = NULL, *accumfile = NULL; + FILE *basefile = NULL, *accumfile = NULL, *delta_outfile = NULL; //static int attached; //while (!attached) sleep(1); @@ -1066,8 +1129,14 @@ main(int argc, char *argv[]) switch (imsg.hdr.type) { case GOT_IMSG_TMPFD: - err = receive_tempfile(&basefile, &accumfile, - &imsg, &ibuf); + if (basefile == NULL) { + err = receive_tempfile(&basefile, "w+", + &imsg, &ibuf); + } else if (accumfile == NULL) { + err = receive_tempfile(&accumfile, "w+", + &imsg, &ibuf); + } else + err = got_error(GOT_ERR_PRIVSEP_MSG); break; case GOT_IMSG_PACKED_OBJECT_REQUEST: err = object_request(&imsg, &ibuf, pack, packidx, @@ -1081,6 +1150,22 @@ main(int argc, char *argv[]) err = raw_object_request(&imsg, &ibuf, pack, packidx, &objcache, basefile, accumfile); break; + case GOT_IMSG_RAW_DELTA_OUTFD: + if (delta_outfile != NULL) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + err = receive_tempfile(&delta_outfile, "w", + &imsg, &ibuf); + break; + case GOT_IMSG_RAW_DELTA_REQUEST: + if (delta_outfile == NULL) { + err = got_error(GOT_ERR_PRIVSEP_NO_FD); + break; + } + err = raw_delta_request(&imsg, &ibuf, delta_outfile, + pack, packidx); + break; case GOT_IMSG_COMMIT_REQUEST: err = commit_request(&imsg, &ibuf, pack, packidx, &objcache); @@ -1127,6 +1212,8 @@ main(int argc, char *argv[]) err = got_error_from_errno("fclose"); if (accumfile && fclose(accumfile) == EOF && err == NULL) err = got_error_from_errno("fclose"); + if (delta_outfile && fclose(delta_outfile) == EOF && err == NULL) + err = got_error_from_errno("fclose"); if (err) { if (!sigint_received && err->code != GOT_ERR_PRIVSEP_PIPE) { fprintf(stderr, "%s: %s\n", getprogname(), err->msg);