commit fae7e03842e8618973f4d4910a86a52d881ab2ab from: Stefan Sperling date: Sat May 07 11:50:56 2022 UTC run the search for deltas to reuse in got-read-pack This significantly speeds up the deltification step of packing by avoiding imsg traffic. gotadmin no longer requests individual raw deltas from got-read-pack to check whether it can reuse them. Instead, got-read-pack obtains a list of objects we want to pack, and hands back the list of all deltas in its pack file which can be reused. Messages are now batched such that imsg buffers are filled as much as possible. Another advantage is that deltas we are not going to reuse will no longer be written to the delta cache file, saving disk space. Before this patch, any raw delta candidate was written to the delta cache file by got-read-pack, and the decision whether to reuse the delta happened afterwards in the gotadmin process. Code for reading individual raw deltas is now unused and could be removed at some point. ok op@ commit - 2f8438b006e9015401b93f55cea57b36b021ce56 commit + fae7e03842e8618973f4d4910a86a52d881ab2ab blob - 4bbe44dda07c97ba6f0ef878da0caadbe6de9741 blob + 709eec07d085ee013da4bb4ebb657e8f97fa16b4 --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -106,6 +106,8 @@ const struct got_error *got_object_open_from_packfile( const struct got_error *got_object_read_raw_delta(uint64_t *, uint64_t *, off_t *, off_t *, off_t *, off_t *, struct got_object_id **, int, struct got_packidx *, int, struct got_object_id *, struct got_repository *); +const struct got_error *got_object_prepare_delta_reuse(struct got_pack **, + struct got_packidx *, int, struct got_repository *); const struct got_error *got_object_read_header_privsep(struct got_object **, struct got_object_id *, struct got_repository *, int); const struct got_error *got_object_open(struct got_object **, blob - e57f4dd3f8f4d207324b69c89c54442ae78cd5bb blob + b310a5120a14c05adbe049177d4af040d61f1ab9 --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -183,6 +183,15 @@ enum got_imsg_type { GOT_IMSG_RAW_DELTA_REQUEST, GOT_IMSG_RAW_DELTA, + /* Re-use deltas found in a pack file. */ + GOT_IMSG_DELTA_REUSE_REQUEST, + GOT_IMSG_REUSED_DELTAS, + GOT_IMSG_DELTA_REUSE_DONE, + + /* Transfer a list of object IDs. */ + GOT_IMSG_OBJ_ID_LIST, + GOT_IMSG_OBJ_ID_LIST_DONE, + /* Messages related to patch files. */ GOT_IMSG_PATCH_FILE, GOT_IMSG_PATCH_HUNK, @@ -281,6 +290,23 @@ struct got_imsg_raw_obj { /* Structure for GOT_IMSG_RAW_DELTA. */ struct got_imsg_raw_delta { uint8_t base_id[SHA1_DIGEST_LENGTH]; + uint64_t base_size; + uint64_t result_size; + off_t delta_size; + off_t delta_compressed_size; + off_t delta_offset; + off_t delta_out_offset; + + /* + * Delta data has been written at delta_out_offset to the file + * descriptor passed via the GOT_IMSG_RAW_DELTA_OUTFD imsg. + */ +}; + +/* Structures for GOT_IMSG_REUSED_DELTAS. */ +struct got_imsg_reused_delta { + struct got_object_id id; + struct got_object_id base_id; uint64_t base_size; uint64_t result_size; off_t delta_size; @@ -293,7 +319,19 @@ struct got_imsg_raw_delta { * descriptor passed via the GOT_IMSG_RAW_DELTA_OUTFD imsg. */ }; +struct got_imsg_reused_deltas { + size_t ndeltas; + /* + * Followed by ndeltas * struct got_imsg_reused_delta. + */ + +#define GOT_IMSG_REUSED_DELTAS_MAX_NDELTAS \ + ((MAX_IMSGSIZE - IMSG_HEADER_SIZE - \ + sizeof(struct got_imsg_reused_deltas)) \ + / sizeof(struct got_imsg_reused_delta)) +}; + /* Structure for GOT_IMSG_TAG data. */ struct got_imsg_tag_object { uint8_t id[SHA1_DIGEST_LENGTH]; @@ -478,6 +516,24 @@ struct got_imsg_delta { struct got_imsg_raw_delta_request { uint8_t id[SHA1_DIGEST_LENGTH]; int idx; +}; + +/* + * Structure for GOT_IMSG_OBJ_ID_LIST data. + * Multiple such messages may be sent back-to-back, where each message + * contains a chunk of IDs. The entire list must be terminated with a + * GOT_IMSG_OBJ_ID_LIST_DONE message. + */ +struct got_imsg_object_idlist { + size_t nids; + + /* + * Followed by nids * struct got_object_id. + */ + +#define GOT_IMSG_OBJ_ID_LIST_MAX_NIDS \ + ((MAX_IMSGSIZE - IMSG_HEADER_SIZE - \ + sizeof(struct got_imsg_object_idlist)) / sizeof(struct got_object_id)) }; /* Structure for GOT_IMSG_COMMIT_TRAVERSAL_REQUEST */ @@ -668,4 +724,17 @@ const struct got_error *got_privsep_recv_raw_delta(uin off_t *, off_t *, off_t *, off_t *, struct got_object_id **, struct imsgbuf *); +const struct got_error *got_privsep_send_object_idlist(struct imsgbuf *, + struct got_object_id **, size_t); +const struct got_error *got_privsep_send_object_idlist_done(struct imsgbuf *); +const struct got_error *got_privsep_recv_object_idlist(int *, + struct got_object_id **, size_t *, struct imsgbuf *); + +const struct got_error *got_privsep_send_delta_reuse_req(struct imsgbuf *); +const struct got_error *got_privsep_send_reused_deltas(struct imsgbuf *, + struct got_imsg_reused_delta *, size_t); +const struct got_error *got_privsep_send_reused_deltas_done(struct imsgbuf *); +const struct got_error *got_privsep_recv_reused_deltas(int *, + struct got_imsg_reused_delta *, size_t *, struct imsgbuf *); + void got_privsep_exec_child(int[2], const char *, const char *); blob - 4e5facc7f5e3c665aa540bb9caf3299f68626c2d blob + 2d612890612d7d8a8e30549c38659cd083a2e41e --- lib/object.c +++ lib/object.c @@ -443,7 +443,56 @@ got_object_read_raw_delta(uint64_t *base_size, uint64_ delta_compressed_size, delta_offset, delta_out_offset, base_id, pack->privsep_child->ibuf); } + +/* + * XXX This function does not really belong in object.c. It is only here + * because it needs start_pack_privsep_child(); relevant code should + * probably be moved to pack.c/pack_create.c. + */ +const struct got_error * +got_object_prepare_delta_reuse(struct got_pack **pack, + struct got_packidx *packidx, int delta_outfd, struct got_repository *repo) +{ + const struct got_error *err = NULL; + char *path_packfile = NULL; + + err = got_packidx_get_packfile_path(&path_packfile, + packidx->path_packidx); + if (err) + return err; + *pack = got_repo_get_cached_pack(repo, path_packfile); + if (*pack == NULL) { + err = got_repo_cache_pack(pack, repo, path_packfile, packidx); + if (err) + goto done; + } + if ((*pack)->privsep_child == NULL) { + err = start_pack_privsep_child(*pack, packidx); + if (err) + goto done; + } + + if (!(*pack)->child_has_delta_outfd) { + int outfd_child; + outfd_child = dup(delta_outfd); + if (outfd_child == -1) { + err = got_error_from_errno("dup"); + goto done; + } + err = got_privsep_send_raw_delta_outfd( + (*pack)->privsep_child->ibuf, outfd_child); + if (err) + goto done; + (*pack)->child_has_delta_outfd = 1; + } + + err = got_privsep_send_delta_reuse_req((*pack)->privsep_child->ibuf); +done: + free(path_packfile); + return err; +} + static const struct got_error * request_object(struct got_object **obj, struct got_object_id *id, struct got_repository *repo, int fd) blob - 696da4ba5a2fbbd6c9d585af41c372b9e5769666 blob + c35a034dafda33eb7652d7a5aa5627ebf4996d1c --- lib/pack_create.c +++ lib/pack_create.c @@ -507,50 +507,6 @@ add_meta(struct got_pack_meta *m, struct got_pack_meta } static const struct got_error * -reuse_delta(int idx, struct got_pack_meta *m, struct got_pack_metavec *v, - struct got_object_idset *idset, struct got_pack *pack, - struct got_packidx *packidx, int delta_cache_fd, - struct got_repository *repo) -{ - const struct got_error *err = NULL; - struct got_pack_meta *base = NULL; - struct got_object_id *base_obj_id = NULL; - off_t delta_len = 0, delta_compressed_len = 0; - off_t delta_offset = 0, delta_cache_offset = 0; - uint64_t base_size, result_size; - - if (m->have_reused_delta) - return NULL; - - err = got_object_read_raw_delta(&base_size, &result_size, &delta_len, - &delta_compressed_len, &delta_offset, &delta_cache_offset, - &base_obj_id, delta_cache_fd, packidx, idx, &m->id, repo); - if (err) - return err; - - if (delta_offset + delta_len < delta_offset) - return got_error(GOT_ERR_BAD_PACKFILE); - - base = got_object_idset_get(idset, base_obj_id); - if (base == NULL) - goto done; - - m->delta_len = delta_len; - m->delta_compressed_len = delta_compressed_len; - m->delta_offset = delta_cache_offset; - m->prev = base; - m->size = result_size; - m->have_reused_delta = 1; - m->reused_delta_offset = delta_offset; - m->base_obj_id = base_obj_id; - base_obj_id = NULL; - err = add_meta(m, v); -done: - free(base_obj_id); - return err; -} - -static const struct got_error * find_pack_for_reuse(struct got_packidx **best_packidx, struct got_repository *repo) { @@ -585,72 +541,61 @@ find_pack_for_reuse(struct got_packidx **best_packidx, return err; } -struct search_deltas_arg { - struct got_packidx *packidx; - struct got_pack *pack; - struct got_object_idset *idset; - struct got_pack_metavec *v; - int delta_cache_fd; - struct got_repository *repo; - got_pack_progress_cb progress_cb; - void *progress_arg; - struct got_ratelimit *rl; - got_cancel_cb cancel_cb; - void *cancel_arg; - int ncolored; - int nfound; - int ntrees; - int ncommits; +struct send_id_arg { + struct imsgbuf *ibuf; + struct got_object_id *ids[GOT_IMSG_OBJ_ID_LIST_MAX_NIDS]; + size_t nids; }; static const struct got_error * -search_delta_for_object(struct got_object_id *id, void *data, void *arg) +send_id(struct got_object_id *id, void *data, void *arg) { - const struct got_error *err; - struct got_pack_meta *m = data; - struct search_deltas_arg *a = arg; - int obj_idx; - struct got_object *obj = NULL; + const struct got_error *err = NULL; + struct send_id_arg *a = arg; - if (a->cancel_cb) { - err = (*a->cancel_cb)(a->cancel_arg); + a->ids[a->nids++] = id; + + if (a->nids >= GOT_IMSG_OBJ_ID_LIST_MAX_NIDS) { + err = got_privsep_send_object_idlist(a->ibuf, a->ids, a->nids); if (err) return err; + a->nids = 0; } - if (!got_repo_check_packidx_bloom_filter(a->repo, - a->packidx->path_packidx, id)) - return NULL; + return NULL; +} - obj_idx = got_packidx_get_object_idx(a->packidx, id); - if (obj_idx == -1) - return NULL; +static const struct got_error * +recv_reused_delta(struct got_imsg_reused_delta *delta, + struct got_object_idset *idset, struct got_pack_metavec *v) +{ + struct got_pack_meta *m, *base; - /* TODO: - * Opening and closing an object just to check its flags - * is a bit expensive. We could have an imsg which requests - * plain type/size information for an object without doing - * work such as traversing the object's entire delta chain - * to find the base object type, and other such info which - * we don't really need here. - */ - err = got_object_open_from_packfile(&obj, &m->id, a->pack, - a->packidx, obj_idx, a->repo); - if (err) - return err; + if (delta->delta_offset + delta->delta_size < delta->delta_offset || + delta->delta_offset + + delta->delta_compressed_size < delta->delta_offset) + return got_error(GOT_ERR_BAD_PACKFILE); - if (obj->flags & GOT_OBJ_FLAG_DELTIFIED) { - reuse_delta(obj_idx, m, a->v, a->idset, a->pack, a->packidx, - a->delta_cache_fd, a->repo); - if (err) - goto done; - err = report_progress(a->progress_cb, a->progress_arg, a->rl, - a->ncolored, a->nfound, a->ntrees, 0L, a->ncommits, - got_object_idset_num_elements(a->idset), a->v->nmeta, 0); - } -done: - got_object_close(obj); - return err; + m = got_object_idset_get(idset, &delta->id); + if (m == NULL) + return got_error(GOT_ERR_NO_OBJ); + + base = got_object_idset_get(idset, &delta->base_id); + if (base == NULL) + return got_error(GOT_ERR_NO_OBJ); + + m->delta_len = delta->delta_size; + m->delta_compressed_len = delta->delta_compressed_size; + m->delta_offset = delta->delta_out_offset; + m->prev = base; + m->size = delta->result_size; + m->have_reused_delta = 1; + m->reused_delta_offset = delta->delta_offset; + m->base_obj_id = got_object_id_dup(&delta->base_id); + if (m->base_obj_id == NULL) + return got_error_from_errno("got_object_id_dup"); + + return add_meta(m, v); } static const struct got_error * @@ -661,10 +606,11 @@ search_deltas(struct got_pack_metavec *v, struct got_o struct got_ratelimit *rl, got_cancel_cb cancel_cb, void *cancel_arg) { const struct got_error *err = NULL; - char *path_packfile = NULL; struct got_packidx *packidx; struct got_pack *pack; - struct search_deltas_arg sda; + struct send_id_arg sia; + struct got_imsg_reused_delta deltas[GOT_IMSG_REUSED_DELTAS_MAX_NDELTAS]; + size_t ndeltas, i; err = find_pack_for_reuse(&packidx, repo); if (err) @@ -673,36 +619,54 @@ search_deltas(struct got_pack_metavec *v, struct got_o if (packidx == NULL) return NULL; - err = got_packidx_get_packfile_path(&path_packfile, - packidx->path_packidx); + err = got_object_prepare_delta_reuse(&pack, packidx, + delta_cache_fd, repo); if (err) return err; - pack = got_repo_get_cached_pack(repo, path_packfile); - if (pack == NULL) { - err = got_repo_cache_pack(&pack, repo, path_packfile, packidx); - if (err) - goto done; + memset(&sia, 0, sizeof(sia)); + sia.ibuf = pack->privsep_child->ibuf; + err = got_object_idset_for_each(idset, send_id, &sia); + if (err) + return err; + if (sia.nids > 0) { + err = got_privsep_send_object_idlist(pack->privsep_child->ibuf, + sia.ids, sia.nids); + if (err) + return err; } + err = got_privsep_send_object_idlist_done(pack->privsep_child->ibuf); + if (err) + return err; - sda.packidx = packidx; - sda.pack = pack; - sda.idset = idset; - sda.v = v; - sda.delta_cache_fd = delta_cache_fd; - sda.repo = repo; - sda.progress_cb = progress_cb; - sda.progress_arg = progress_arg; - sda.rl = rl; - sda.cancel_cb = cancel_cb; - sda.cancel_arg = cancel_arg; - sda.ncolored = ncolored; - sda.nfound = nfound; - sda.ntrees = ntrees; - sda.ncommits = ncommits; - err = got_object_idset_for_each(idset, search_delta_for_object, &sda); + for (;;) { + int done = 0; + + if (cancel_cb) { + err = (*cancel_cb)(cancel_arg); + if (err) + break; + } + + err = got_privsep_recv_reused_deltas(&done, deltas, &ndeltas, + pack->privsep_child->ibuf); + if (err || done) + break; + + for (i = 0; i < ndeltas; i++) { + struct got_imsg_reused_delta *delta = &deltas[i]; + err = recv_reused_delta(delta, idset, v); + if (err) + goto done; + } + + err = report_progress(progress_cb, progress_arg, rl, + ncolored, nfound, ntrees, 0L, ncommits, + got_object_idset_num_elements(idset), v->nmeta, 0); + if (err) + break; + } done: - free(path_packfile); return err; } blob - 037c96037a94e944e0990944101d87c2ae0c88bc blob + b8e60e5a73e4d935fbc621706294df4e664a14d3 --- lib/privsep.c +++ lib/privsep.c @@ -2829,7 +2829,215 @@ got_privsep_recv_raw_delta(uint64_t *base_size, uint64 if (err) { free(*base_id); *base_id = NULL; + } + return err; +} + +const struct got_error * +got_privsep_send_object_idlist(struct imsgbuf *ibuf, + struct got_object_id **ids, size_t nids) +{ + const struct got_error *err = NULL; + struct got_imsg_object_idlist idlist; + struct ibuf *wbuf; + size_t i; + + if (nids > GOT_IMSG_OBJ_ID_LIST_MAX_NIDS) + return got_error(GOT_ERR_NO_SPACE); + + wbuf = imsg_create(ibuf, GOT_IMSG_OBJ_ID_LIST, 0, 0, + sizeof(idlist) + nids * sizeof(**ids)); + if (wbuf == NULL) { + err = got_error_from_errno("imsg_create OBJ_ID_LIST"); + return err; + } + + idlist.nids = nids; + if (imsg_add(wbuf, &idlist, sizeof(idlist)) == -1) { + err = got_error_from_errno("imsg_add OBJ_ID_LIST"); + ibuf_free(wbuf); + return err; + } + + for (i = 0; i < nids; i++) { + struct got_object_id *id = ids[i]; + if (imsg_add(wbuf, id, sizeof(*id)) == -1) { + err = got_error_from_errno("imsg_add OBJ_ID_LIST"); + ibuf_free(wbuf); + return err; + } + } + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_object_idlist_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_OBJ_ID_LIST_DONE, 0, 0, -1, NULL, 0) + == -1) + return got_error_from_errno("imsg_compose OBJ_ID_LIST_DONE"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_recv_object_idlist(int *done, struct got_object_id **ids, + size_t *nids, struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + struct imsg imsg; + struct got_imsg_object_idlist *idlist; + size_t datalen; + + *ids = NULL; + *done = 0; + *nids = 0; + + err = got_privsep_recv_imsg(&imsg, ibuf, 0); + if (err) + return err; + + datalen = imsg.hdr.len - IMSG_HEADER_SIZE; + switch (imsg.hdr.type) { + case GOT_IMSG_OBJ_ID_LIST: + if (datalen < sizeof(*idlist)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + idlist = imsg.data; + if (idlist->nids > GOT_IMSG_OBJ_ID_LIST_MAX_NIDS) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + *nids = idlist->nids; + *ids = calloc(*nids, sizeof(**ids)); + if (*ids == NULL) { + err = got_error_from_errno("calloc"); + break; + } + memcpy(*ids, (uint8_t *)imsg.data + sizeof(idlist), + *nids * sizeof(**ids)); + break; + case GOT_IMSG_OBJ_ID_LIST_DONE: + *done = 1; + break; + default: + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + + imsg_free(&imsg); + + return err; +} + +const struct got_error * +got_privsep_send_delta_reuse_req(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_DELTA_REUSE_REQUEST, 0, 0, -1, NULL, 0) + == -1) + return got_error_from_errno("imsg_compose DELTA_REUSE_REQUEST"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_reused_deltas(struct imsgbuf *ibuf, + struct got_imsg_reused_delta *deltas, size_t ndeltas) +{ + const struct got_error *err = NULL; + struct ibuf *wbuf; + struct got_imsg_reused_deltas ideltas; + size_t i; + + if (ndeltas > GOT_IMSG_REUSED_DELTAS_MAX_NDELTAS) + return got_error(GOT_ERR_NO_SPACE); + + wbuf = imsg_create(ibuf, GOT_IMSG_REUSED_DELTAS, 0, 0, + sizeof(ideltas) + ndeltas * sizeof(*deltas)); + if (wbuf == NULL) { + err = got_error_from_errno("imsg_create REUSED_DELTAS"); + return err; } + + ideltas.ndeltas = ndeltas; + if (imsg_add(wbuf, &ideltas, sizeof(ideltas)) == -1) { + err = got_error_from_errno("imsg_add REUSED_DELTAS"); + ibuf_free(wbuf); + return err; + } + + for (i = 0; i < ndeltas; i++) { + struct got_imsg_reused_delta *delta = &deltas[i]; + if (imsg_add(wbuf, delta, sizeof(*delta)) == -1) { + err = got_error_from_errno("imsg_add REUSED_DELTAS"); + ibuf_free(wbuf); + return err; + } + } + + wbuf->fd = -1; + imsg_close(ibuf, wbuf); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_send_reused_deltas_done(struct imsgbuf *ibuf) +{ + if (imsg_compose(ibuf, GOT_IMSG_DELTA_REUSE_DONE, 0, 0, -1, NULL, 0) + == -1) + return got_error_from_errno("imsg_compose DELTA_REUSE_DONE"); + + return flush_imsg(ibuf); +} + +const struct got_error * +got_privsep_recv_reused_deltas(int *done, struct got_imsg_reused_delta *deltas, + size_t *ndeltas, struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + struct imsg imsg; + struct got_imsg_reused_deltas *ideltas; + size_t datalen; + + *done = 0; + *ndeltas = 0; + + err = got_privsep_recv_imsg(&imsg, ibuf, 0); + if (err) + return err; + + datalen = imsg.hdr.len - IMSG_HEADER_SIZE; + switch (imsg.hdr.type) { + case GOT_IMSG_REUSED_DELTAS: + if (datalen < sizeof(*ideltas)) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + ideltas = imsg.data; + if (ideltas->ndeltas > GOT_IMSG_OBJ_ID_LIST_MAX_NIDS) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + break; + } + *ndeltas = ideltas->ndeltas; + memcpy(deltas, (uint8_t *)imsg.data + sizeof(ideltas), + *ndeltas * sizeof(*deltas)); + break; + case GOT_IMSG_DELTA_REUSE_DONE: + *done = 1; + break; + default: + err = got_error(GOT_ERR_PRIVSEP_MSG); + break; + } + + imsg_free(&imsg); + return err; } blob - 0b2b5ee521c3a402299aefd172c600c2e4f24c43 blob + 20a67c9701816c2474c4d9e546a0c6f4aeb8d0ae --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -40,6 +40,7 @@ #include "got_lib_object.h" #include "got_lib_object_cache.h" #include "got_lib_object_parse.h" +#include "got_lib_object_idset.h" #include "got_lib_privsep.h" #include "got_lib_pack.h" @@ -919,8 +920,160 @@ done: free(delta_buf); return err; } + +struct search_deltas_arg { + struct imsgbuf *ibuf; + struct got_packidx *packidx; + struct got_pack *pack; + struct got_object_idset *idset; + FILE *delta_outfile; + struct got_imsg_reused_delta deltas[GOT_IMSG_REUSED_DELTAS_MAX_NDELTAS]; + size_t ndeltas; +}; static const struct got_error * +search_delta_for_object(struct got_object_id *id, void *data, void *arg) +{ + const struct got_error *err; + struct search_deltas_arg *a = arg; + int obj_idx; + uint8_t *delta_buf = NULL; + uint64_t base_size, result_size; + size_t delta_size, delta_compressed_size; + off_t delta_offset, base_offset; + struct got_object_id base_id; + + if (sigint_received) + return got_error(GOT_ERR_CANCELLED); + + obj_idx = got_packidx_get_object_idx(a->packidx, id); + if (obj_idx == -1) + return NULL; /* object not present in our pack file */ + + err = got_packfile_extract_raw_delta(&delta_buf, &delta_size, + &delta_compressed_size, &delta_offset, &base_offset, &base_id, + &base_size, &result_size, a->pack, a->packidx, obj_idx); + if (err) { + if (err->code == GOT_ERR_OBJ_TYPE) + return NULL; /* object not stored as a delta */ + return err; + } + + /* + * If this is an offset delta we must determine the base + * object ID ourselves. + */ + if (base_offset != 0) { + err = get_base_object_id(&base_id, a->packidx, base_offset); + if (err) + goto done; + } + + if (got_object_idset_contains(a->idset, &base_id)) { + struct got_imsg_reused_delta *delta; + off_t delta_out_offset = ftello(a->delta_outfile); + size_t w; + + w = fwrite(delta_buf, 1, delta_compressed_size, + a->delta_outfile); + if (w != delta_compressed_size) { + err = got_ferror(a->delta_outfile, GOT_ERR_IO); + goto done; + } + + delta = &a->deltas[a->ndeltas++]; + memcpy(&delta->id, id, sizeof(delta->id)); + memcpy(&delta->base_id, &base_id, sizeof(delta->base_id)); + delta->base_size = base_size; + delta->result_size = result_size; + delta->delta_size = delta_size; + delta->delta_compressed_size = delta_compressed_size; + delta->delta_offset = delta_offset; + delta->delta_out_offset = delta_out_offset; + + if (a->ndeltas >= GOT_IMSG_REUSED_DELTAS_MAX_NDELTAS) { + err = got_privsep_send_reused_deltas(a->ibuf, + a->deltas, a->ndeltas); + if (err) + goto done; + a->ndeltas = 0; + } + } +done: + free(delta_buf); + return err; +} + +static const struct got_error * +recv_object_ids(struct got_object_idset *idset, struct imsgbuf *ibuf) +{ + const struct got_error *err = NULL; + int done = 0; + struct got_object_id *ids; + size_t nids, i; + + for (;;) { + err = got_privsep_recv_object_idlist(&done, &ids, &nids, ibuf); + if (err || done) + break; + for (i = 0; i < nids; i++) { + err = got_object_idset_add(idset, &ids[i], NULL); + if (err) { + free(ids); + return err; + } + } + free(ids); + } + + return err; +} + +static const struct got_error * +delta_reuse_request(struct imsg *imsg, struct imsgbuf *ibuf, + FILE *delta_outfile, struct got_pack *pack, struct got_packidx *packidx) +{ + const struct got_error *err = NULL; + struct got_object_idset *idset; + struct search_deltas_arg sda; + + idset = got_object_idset_alloc(); + if (idset == NULL) + return got_error_from_errno("got_object_idset_alloc"); + + err = recv_object_ids(idset, ibuf); + if (err) + return err; + + memset(&sda, 0, sizeof(sda)); + sda.ibuf = ibuf; + sda.idset = idset; + sda.pack = pack; + sda.packidx = packidx; + sda.delta_outfile = delta_outfile; + err = got_object_idset_for_each(idset, search_delta_for_object, &sda); + if (err) + goto done; + + if (sda.ndeltas > 0) { + err = got_privsep_send_reused_deltas(ibuf, sda.deltas, + sda.ndeltas); + if (err) + goto done; + } + + if (fflush(delta_outfile) == -1) { + err = got_error_from_errno("fflush"); + goto done; + } + + err = got_privsep_send_reused_deltas_done(ibuf); +done: + got_object_idset_free(idset); + return err; +} + +static const struct got_error * receive_packidx(struct got_packidx **packidx, struct imsgbuf *ibuf) { const struct got_error *err = NULL; @@ -1167,6 +1320,14 @@ main(int argc, char *argv[]) err = raw_delta_request(&imsg, &ibuf, delta_outfile, pack, packidx); break; + case GOT_IMSG_DELTA_REUSE_REQUEST: + if (delta_outfile == NULL) { + err = got_error(GOT_ERR_PRIVSEP_NO_FD); + break; + } + err = delta_reuse_request(&imsg, &ibuf, + delta_outfile, pack, packidx); + break; case GOT_IMSG_COMMIT_REQUEST: err = commit_request(&imsg, &ibuf, pack, packidx, &objcache);