commit 4788f1cebb9b14790dd4899b6f9007c721eec61e from: Stefan Sperling date: Wed Mar 18 16:13:46 2020 UTC extract large objects to a temporary file in got-index-pack commit - 892ac3b6b6161b845ed54e2ccb7d1eee44a6e410 commit + 4788f1cebb9b14790dd4899b6f9007c721eec61e blob - db72516d800d6c47c8ba69883202bc5b761f6ecc blob + 325ffecbca4bd6e77ea0ea745d8bbd0fc8490b65 --- lib/fetch.c +++ lib/fetch.c @@ -346,6 +346,7 @@ got_fetch_pack(struct got_object_id **pack_hash, struc { int imsg_fetchfds[2], imsg_idxfds[2]; int packfd = -1, npackfd = -1, idxfd = -1, nidxfd = -1, nfetchfd = -1; + int tmpfd = -1; int fetchstatus, idxstatus, done = 0; const struct got_error *err; struct imsgbuf fetchibuf, idxibuf; @@ -391,6 +392,12 @@ got_fetch_pack(struct got_object_id **pack_hash, struc goto done; } + tmpfd = got_opentempfd(); + if (tmpfd == -1) { + err = got_error_from_errno("got_opentempfd"); + goto done; + } + if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, imsg_fetchfds) == -1) { err = got_error_from_errno("socketpair"); goto done; @@ -506,6 +513,10 @@ got_fetch_pack(struct got_object_id **pack_hash, struc if (err != NULL) goto done; nidxfd = -1; + err = got_privsep_send_tmpfd(&idxibuf, tmpfd); + if (err != NULL) + goto done; + tmpfd = -1; done = 0; while (!done) { int nobj_total, nobj_indexed, nobj_loose, nobj_resolved; @@ -566,6 +577,8 @@ done: err = got_error_from_errno("close"); if (idxfd != -1 && close(idxfd) == -1 && err == NULL) err = got_error_from_errno("close"); + if (tmpfd != -1 && close(tmpfd) == -1 && err == NULL) + err = got_error_from_errno("close"); free(tmppackpath); free(tmpidxpath); free(idxpath); blob - 7a614213a52014c1c45ce8749b9340e9c1ad8d26 blob + 48821e99ab74a4c96cf039cf1b592301c97eda74 --- lib/got_lib_inflate.h +++ lib/got_lib_inflate.h @@ -44,7 +44,8 @@ const struct got_error *got_inflate_to_mem_fd(uint8_t const struct got_error *got_inflate_to_mem_mmap(uint8_t **, size_t *, size_t *, uint32_t *, uint8_t *, size_t, size_t); const struct got_error *got_inflate_to_file(size_t *, FILE *, FILE *); -const struct got_error *got_inflate_to_file_fd(size_t *, int, FILE *); +const struct got_error *got_inflate_to_file_fd(size_t *, size_t *, uint32_t *, + int, FILE *); const struct got_error *got_inflate_to_fd(size_t *, FILE *, int); -const struct got_error *got_inflate_to_file_mmap(size_t *, uint8_t *, size_t, - size_t, FILE *); +const struct got_error *got_inflate_to_file_mmap(size_t *, size_t *, + uint32_t *, uint8_t *, size_t, size_t, FILE *); blob - 15e9a291534e2394802c47c2b9fec415bf7625ef blob + f95085e6eb708f154a28d9d554aa102e69b4f939 --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -179,6 +179,8 @@ const struct got_error *got_packfile_open_object(struc struct got_pack *, struct got_packidx *, int, struct got_object_id *); const struct got_error *got_pack_get_max_delta_object_size(uint64_t *, struct got_object *, struct got_pack *); +const struct got_error *got_pack_dump_delta_chain_to_file(size_t *, + struct got_delta_chain *, struct got_pack *, FILE *, FILE *, FILE *); const struct got_error *got_pack_dump_delta_chain_to_mem(uint8_t **, size_t *, struct got_delta_chain *, struct got_pack *); const struct got_error *got_packfile_extract_object(struct got_pack *, blob - e94e2239ba047d0bcd52d285db28e19f3db75458 blob + 50d5e7725fec453a7a1d9c9401bf8576478b267e --- lib/inflate.c +++ lib/inflate.c @@ -506,20 +506,22 @@ done: } const struct got_error * -got_inflate_to_file_fd(size_t *outlen, int infd, FILE *outfile) +got_inflate_to_file_fd(size_t *outlen, size_t *consumed_total, + uint32_t *input_crc, int infd, FILE *outfile) { const struct got_error *err; - size_t avail; + size_t avail, consumed; struct got_inflate_buf zb; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, input_crc); if (err) goto done; *outlen = 0; - + if (consumed_total) + *consumed_total = 0; do { - err = got_inflate_read_fd(&zb, infd, &avail, NULL); + err = got_inflate_read_fd(&zb, infd, &avail, &consumed); if (err) goto done; if (avail > 0) { @@ -530,6 +532,8 @@ got_inflate_to_file_fd(size_t *outlen, int infd, FILE goto done; } *outlen += avail; + if (consumed_total) + *consumed_total += consumed; } } while (zb.flags & GOT_INFLATE_F_HAVE_MORE); @@ -541,26 +545,29 @@ done: } const struct got_error * -got_inflate_to_file_mmap(size_t *outlen, uint8_t *map, size_t offset, - size_t len, FILE *outfile) +got_inflate_to_file_mmap(size_t *outlen, size_t *consumed_total, + uint32_t *input_crc, uint8_t *map, size_t offset, size_t len, + FILE *outfile) { const struct got_error *err; - size_t avail; + size_t avail, consumed; struct got_inflate_buf zb; - size_t consumed; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, input_crc); if (err) goto done; *outlen = 0; - + if (consumed_total) + *consumed_total = 0; do { err = got_inflate_read_mmap(&zb, map, offset, len, &avail, &consumed); if (err) goto done; offset += consumed; + if (consumed_total) + *consumed_total += consumed; len -= consumed; if (avail > 0) { size_t n; blob - 8375932b1a549d1b7a8bd3d63a7da71b3c051bd5 blob + 9dc1a833c99e6bd6033482c75b22b9472806ff2d --- lib/pack.c +++ lib/pack.c @@ -1030,8 +1030,9 @@ got_pack_get_max_delta_object_size(uint64_t *size, str } const struct got_error * -dump_delta_chain_to_file(size_t *result_size, struct got_delta_chain *deltas, - struct got_pack *pack, FILE *outfile, FILE *base_file, FILE *accum_file) +got_pack_dump_delta_chain_to_file(size_t *result_size, + struct got_delta_chain *deltas, struct got_pack *pack, FILE *outfile, + FILE *base_file, FILE *accum_file) { const struct got_error *err = NULL; struct got_delta *delta; @@ -1089,11 +1090,13 @@ dump_delta_chain_to_file(size_t *result_size, struct g if (pack->map) { mapoff = (size_t)delta_data_offset; err = got_inflate_to_file_mmap( - &base_bufsz, pack->map, mapoff, - pack->filesize - mapoff, base_file); + &base_bufsz, NULL, NULL, pack->map, + mapoff, pack->filesize - mapoff, + base_file); } else err = got_inflate_to_file_fd( - &base_bufsz, pack->fd, base_file); + &base_bufsz, NULL, NULL, pack->fd, + base_file); } else { if (pack->map) { mapoff = (size_t)delta_data_offset; @@ -1338,17 +1341,18 @@ got_packfile_extract_object(struct got_pack *pack, str if (pack->map) { size_t mapoff = (size_t)obj->pack_offset; - err = got_inflate_to_file_mmap(&obj->size, pack->map, - mapoff, pack->filesize - mapoff, outfile); + err = got_inflate_to_file_mmap(&obj->size, NULL, NULL, + pack->map, mapoff, pack->filesize - mapoff, + outfile); } else { if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); - err = got_inflate_to_file_fd(&obj->size, pack->fd, - outfile); + err = got_inflate_to_file_fd(&obj->size, NULL, NULL, + pack->fd, outfile); } } else - err = dump_delta_chain_to_file(&obj->size, &obj->deltas, pack, - outfile, base_file, accum_file); + err = got_pack_dump_delta_chain_to_file(&obj->size, + &obj->deltas, pack, outfile, base_file, accum_file); return err; } blob - 4adf115d06b110647587d375b33aa21f27495d47 blob + 650dd1e206e344aa1713de7d98dc4dd57085dcc8 --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -145,11 +145,31 @@ read_crc(uint32_t *crc, int fd, size_t len) } static const struct got_error * -read_packed_object(struct got_pack *pack, struct got_indexed_object *obj) +read_file_sha1(SHA1_CTX *ctx, FILE *f) { + uint8_t buf[8192]; + size_t r; + + for (;;) { + r = fread(buf, 1, sizeof(buf), f); + if (r == 0) { + if (feof(f)) + return NULL; + return got_ferror(f, GOT_ERR_IO); + } + SHA1Update(ctx, buf, r); + } + + return NULL; +} + +static const struct got_error * +read_packed_object(struct got_pack *pack, struct got_indexed_object *obj, + FILE *tmpfile) +{ const struct got_error *err = NULL; SHA1_CTX ctx; - uint8_t *data; + uint8_t *data = NULL; size_t datalen; ssize_t n; char *header; @@ -179,14 +199,28 @@ read_packed_object(struct got_pack *pack, struct got_i case GOT_OBJ_TYPE_COMMIT: case GOT_OBJ_TYPE_TREE: case GOT_OBJ_TYPE_TAG: - /* XXX TODO reading large objects into memory is bad! */ - if (pack->map) { - err = got_inflate_to_mem_mmap(&data, &datalen, - &obj->len, &obj->crc, pack->map, mapoff, - pack->filesize - mapoff); + if (obj->size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) { + if (fseek(tmpfile, 0L, SEEK_SET) == -1) { + err = got_error_from_errno("fseek"); + break; + } + if (pack->map) { + err = got_inflate_to_file_mmap(&datalen, + &obj->len, &obj->crc, pack->map, mapoff, + pack->filesize - mapoff, tmpfile); + } else { + err = got_inflate_to_file_fd(&datalen, + &obj->len, &obj->crc, pack->fd, tmpfile); + } } else { - err = got_inflate_to_mem_fd(&data, &datalen, - &obj->len, &obj->crc, obj->size, pack->fd); + if (pack->map) { + err = got_inflate_to_mem_mmap(&data, &datalen, + &obj->len, &obj->crc, pack->map, mapoff, + pack->filesize - mapoff); + } else { + err = got_inflate_to_mem_fd(&data, &datalen, + &obj->len, &obj->crc, obj->size, pack->fd); + } } if (err) break; @@ -203,7 +237,12 @@ read_packed_object(struct got_pack *pack, struct got_i } headerlen = strlen(header) + 1; SHA1Update(&ctx, header, headerlen); - SHA1Update(&ctx, data, datalen); + if (obj->size > GOT_DELTA_RESULT_SIZE_CACHED_MAX) { + err = read_file_sha1(&ctx, tmpfile); + if (err) + break; + } else + SHA1Update(&ctx, data, datalen); SHA1Final(obj->id.sha1, &ctx); free(header); free(data); @@ -509,8 +548,8 @@ update_packidx(struct got_packidx *packidx, int nobj, } static const struct got_error * -index_pack(struct got_pack *pack, int idxfd, uint8_t *pack_hash, - struct imsgbuf *ibuf) +index_pack(struct got_pack *pack, int idxfd, FILE *tmpfile, + uint8_t *pack_hash, struct imsgbuf *ibuf) { const struct got_error *err; struct got_packfile_hdr hdr; @@ -629,7 +668,7 @@ index_pack(struct got_pack *pack, int idxfd, uint8_t * } } - err = read_packed_object(pack, obj); + err = read_packed_object(pack, obj, tmpfile); if (err) goto done; @@ -786,7 +825,8 @@ main(int argc, char **argv) const struct got_error *err = NULL, *close_err; struct imsgbuf ibuf; struct imsg imsg; - int idxfd = -1; + int idxfd = -1, tmpfd = -1; + FILE *tmpfile = NULL; struct got_pack pack; uint8_t pack_hash[SHA1_DIGEST_LENGTH]; off_t packfile_size; @@ -844,6 +884,27 @@ main(int argc, char **argv) goto done; } idxfd = imsg.fd; + + err = got_privsep_recv_imsg(&imsg, &ibuf, 0); + if (err) + goto done; + if (imsg.hdr.type == GOT_IMSG_STOP) + goto done; + if (imsg.hdr.type != GOT_IMSG_TMPFD) { + err = got_error(GOT_ERR_PRIVSEP_MSG); + goto done; + } + if (imsg.hdr.len - IMSG_HEADER_SIZE != 0) { + err = got_error(GOT_ERR_PRIVSEP_LEN); + goto done; + } + tmpfd = imsg.fd; + tmpfile = fdopen(tmpfd, "w+"); + if (tmpfile == NULL) { + err = got_error_from_errno("fdopen"); + goto done; + } + tmpfd = -1; if (lseek(pack.fd, 0, SEEK_END) == -1) { err = got_error_from_errno("lseek"); @@ -867,13 +928,15 @@ main(int argc, char **argv) if (pack.map == MAP_FAILED) pack.map = NULL; /* fall back to read(2) */ #endif - err = index_pack(&pack, idxfd, pack_hash, &ibuf); + err = index_pack(&pack, idxfd, tmpfile, pack_hash, &ibuf); done: close_err = got_pack_close(&pack); if (close_err && err == NULL) err = close_err; if (idxfd != -1 && close(idxfd) == -1 && err == NULL) err = got_error_from_errno("close"); + if (tmpfd != -1 && close(tmpfd) == -1 && err == NULL) + err = got_error_from_errno("close"); if (err == NULL) err = got_privsep_send_index_pack_done(&ibuf);