commit 2e5a6faddc98266ead5e12a17ceefe689cf9192e from: Stefan Sperling date: Wed Mar 18 16:13:44 2020 UTC add mmap support to got-index-pack commit - f63dc90df113ce7a79e0b477d5cbea9b61b5a460 commit + 2e5a6faddc98266ead5e12a17ceefe689cf9192e blob - 1d8212ee1d7b2dff406fc259b64350befec9ebca blob + 7a614213a52014c1c45ce8749b9340e9c1ad8d26 --- lib/got_lib_inflate.h +++ lib/got_lib_inflate.h @@ -41,8 +41,8 @@ const struct got_error *got_inflate_to_mem(uint8_t **, FILE *); const struct got_error *got_inflate_to_mem_fd(uint8_t **, size_t *, size_t *, uint32_t *, size_t, int); -const struct got_error *got_inflate_to_mem_mmap(uint8_t **, size_t *, uint8_t *, - size_t, size_t); +const struct got_error *got_inflate_to_mem_mmap(uint8_t **, size_t *, size_t *, + uint32_t *, uint8_t *, size_t, size_t); const struct got_error *got_inflate_to_file(size_t *, FILE *, FILE *); const struct got_error *got_inflate_to_file_fd(size_t *, int, FILE *); const struct got_error *got_inflate_to_fd(size_t *, FILE *, int); blob - e1e7cd506205b1d5951f680a97acc6c747d2fa59 blob + e94e2239ba047d0bcd52d285db28e19f3db75458 --- lib/inflate.c +++ lib/inflate.c @@ -370,8 +370,9 @@ done: } const struct got_error * -got_inflate_to_mem_mmap(uint8_t **outbuf, size_t *outlen, uint8_t *map, - size_t offset, size_t len) +got_inflate_to_mem_mmap(uint8_t **outbuf, size_t *outlen, + size_t *consumed_total, uint32_t *input_crc, uint8_t *map, size_t offset, + size_t len) { const struct got_error *err; size_t avail, consumed; @@ -379,29 +380,40 @@ got_inflate_to_mem_mmap(uint8_t **outbuf, size_t *outl void *newbuf; int nbuf = 1; - *outbuf = malloc(GOT_INFLATE_BUFSIZE); - if (*outbuf == NULL) - return got_error_from_errno("malloc"); - err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, NULL); - if (err) { - free(*outbuf); - *outbuf = NULL; - return err; + if (outbuf) { + *outbuf = malloc(GOT_INFLATE_BUFSIZE); + if (*outbuf == NULL) + return got_error_from_errno("malloc"); + err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, + input_crc); + if (err) { + free(*outbuf); + *outbuf = NULL; + return err; + } + } else { + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, + input_crc); } *outlen = 0; - + if (consumed_total) + *consumed_total = 0; do { err = got_inflate_read_mmap(&zb, map, offset, len, &avail, &consumed); if (err) goto done; offset += consumed; + if (consumed_total) + *consumed_total += consumed; len -= consumed; *outlen += avail; if (len == 0) break; if (zb.flags & GOT_INFLATE_F_HAVE_MORE) { + if (outbuf == NULL) + continue; newbuf = reallocarray(*outbuf, ++nbuf, GOT_INFLATE_BUFSIZE); if (newbuf == NULL) { blob - e5817bcdb4d07c0921e5917a0ca6502ae3d96c55 blob + 8375932b1a549d1b7a8bd3d63a7da71b3c051bd5 --- lib/pack.c +++ lib/pack.c @@ -714,8 +714,9 @@ read_delta_data(uint8_t **delta_buf, size_t *delta_len if (pack->map) { if (delta_data_offset >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); - err = got_inflate_to_mem_mmap(delta_buf, delta_len, pack->map, - delta_data_offset, pack->filesize - delta_data_offset); + err = got_inflate_to_mem_mmap(delta_buf, delta_len, + NULL, NULL, pack->map, delta_data_offset, + pack->filesize - delta_data_offset); } else { if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); @@ -1097,7 +1098,8 @@ dump_delta_chain_to_file(size_t *result_size, struct g if (pack->map) { mapoff = (size_t)delta_data_offset; err = got_inflate_to_mem_mmap(&base_buf, - &base_bufsz, pack->map, mapoff, + &base_bufsz, NULL, NULL, + pack->map, mapoff, pack->filesize - mapoff); } else err = got_inflate_to_mem_fd(&base_buf, @@ -1240,8 +1242,8 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz if (pack->map) { size_t mapoff = (size_t)delta_data_offset; err = got_inflate_to_mem_mmap(&base_buf, - &base_bufsz, pack->map, mapoff, - pack->filesize - mapoff); + &base_bufsz, NULL, NULL, pack->map, + mapoff, pack->filesize - mapoff); } else { if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) { @@ -1365,8 +1367,8 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size return got_error(GOT_ERR_PACK_OFFSET); if (pack->map) { size_t mapoff = (size_t)obj->pack_offset; - err = got_inflate_to_mem_mmap(buf, len, pack->map, - mapoff, pack->filesize - mapoff); + err = got_inflate_to_mem_mmap(buf, len, NULL, NULL, + pack->map, mapoff, pack->filesize - mapoff); } else { if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); blob - 53e4b08d5ce99323c387425fc8a84c9d714f547e blob + 05e55c20f5d2a0e9c7ddd22c6d4921640d54aadc --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -165,18 +166,24 @@ read_packed_object(struct got_pack *pack, struct got_i char *header; size_t headerlen; const char *obj_label; + size_t mapoff = obj->off; err = got_pack_parse_object_type_and_size(&obj->type, &obj->size, &obj->tslen, pack, obj->off); if (err) return err; - /* XXX Seek back and get the CRC of on-disk type+size bytes. */ - if (lseek(pack->fd, obj->off, SEEK_SET) == -1) - return got_error_from_errno("lseek"); - err = read_crc(&obj->crc, pack->fd, obj->tslen); - if (err) - return err; + if (pack->map) { + obj->crc = crc32(obj->crc, pack->map + mapoff, obj->tslen); + mapoff += obj->tslen; + } else { + /* XXX Seek back and get the CRC of on-disk type+size bytes. */ + if (lseek(pack->fd, obj->off, SEEK_SET) == -1) + return got_error_from_errno("lseek"); + err = read_crc(&obj->crc, pack->fd, obj->tslen); + if (err) + return err; + } switch (obj->type) { case GOT_OBJ_TYPE_BLOB: @@ -184,8 +191,14 @@ read_packed_object(struct got_pack *pack, struct got_i case GOT_OBJ_TYPE_TREE: case GOT_OBJ_TYPE_TAG: /* XXX TODO reading large objects into memory is bad! */ - err = got_inflate_to_mem_fd(&data, &datalen, &obj->len, - &obj->crc, obj->size, pack->fd); + if (pack->map) { + err = got_inflate_to_mem_mmap(&data, &datalen, + &obj->len, &obj->crc, pack->map, mapoff, + pack->filesize - mapoff); + } else { + err = got_inflate_to_mem_fd(&data, &datalen, + &obj->len, &obj->crc, obj->size, pack->fd); + } if (err) break; SHA1Init(&ctx); @@ -208,22 +221,39 @@ read_packed_object(struct got_pack *pack, struct got_i break; case GOT_OBJ_TYPE_REF_DELTA: memset(obj->id.sha1, 0xff, SHA1_DIGEST_LENGTH); - n = read(pack->fd, obj->delta.ref.ref_id.sha1, - SHA1_DIGEST_LENGTH); - if (n == -1) { - err = got_error_from_errno("read"); - break; - } - if (n < sizeof(obj->id)) { - err = got_error(GOT_ERR_BAD_PACKFILE); - break; + if (pack->map) { + if (mapoff + SHA1_DIGEST_LENGTH >= pack->filesize) { + err = got_error(GOT_ERR_BAD_PACKFILE); + break; + } + memcpy(obj->delta.ref.ref_id.sha1, pack->map + mapoff, + SHA1_DIGEST_LENGTH); + obj->crc = crc32(obj->crc, pack->map + mapoff, + SHA1_DIGEST_LENGTH); + mapoff += SHA1_DIGEST_LENGTH; + err = got_inflate_to_mem_mmap(NULL, &datalen, + &obj->len, &obj->crc, pack->map, mapoff, + pack->filesize - mapoff); + if (err) + break; + } else { + n = read(pack->fd, obj->delta.ref.ref_id.sha1, + SHA1_DIGEST_LENGTH); + if (n == -1) { + err = got_error_from_errno("read"); + break; + } + if (n < sizeof(obj->id)) { + err = got_error(GOT_ERR_BAD_PACKFILE); + break; + } + obj->crc = crc32(obj->crc, obj->delta.ref.ref_id.sha1, + SHA1_DIGEST_LENGTH); + err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, + &obj->crc, obj->size, pack->fd); + if (err) + break; } - obj->crc = crc32(obj->crc, obj->delta.ref.ref_id.sha1, - SHA1_DIGEST_LENGTH); - err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, - &obj->crc, obj->size, pack->fd); - if (err) - break; obj->len += SHA1_DIGEST_LENGTH; break; case GOT_OBJ_TYPE_OFFSET_DELTA: @@ -234,20 +264,35 @@ read_packed_object(struct got_pack *pack, struct got_i if (err) break; - /* XXX Seek back and get the CRC of on-disk offset bytes. */ - if (lseek(pack->fd, obj->off + obj->tslen, SEEK_SET) == -1) { - err = got_error_from_errno("lseek"); - break; - } - err = read_crc(&obj->crc, pack->fd, - obj->delta.ofs.base_offsetlen); - if (err) - break; + if (pack->map) { + obj->crc = crc32(obj->crc, pack->map + mapoff, + obj->delta.ofs.base_offsetlen); + mapoff += obj->delta.ofs.base_offsetlen; + err = got_inflate_to_mem_mmap(NULL, &datalen, + &obj->len, &obj->crc, pack->map, mapoff, + pack->filesize - mapoff); + if (err) + break; + } else { + /* + * XXX Seek back and get the CRC of on-disk + * offset bytes. + */ + if (lseek(pack->fd, obj->off + obj->tslen, SEEK_SET) + == -1) { + err = got_error_from_errno("lseek"); + break; + } + err = read_crc(&obj->crc, pack->fd, + obj->delta.ofs.base_offsetlen); + if (err) + break; - err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, - &obj->crc, obj->size, pack->fd); - if (err) - break; + err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, + &obj->crc, obj->size, pack->fd); + if (err) + break; + } obj->len += obj->delta.ofs.base_offsetlen; break; default: @@ -488,14 +533,23 @@ index_pack(struct got_pack *pack, int idxfd, uint8_t * uint8_t packidx_hash[SHA1_DIGEST_LENGTH]; ssize_t r, w; int pass, have_ref_deltas = 0; + size_t mapoff = 0; /* Check pack file header. */ - r = read(pack->fd, &hdr, sizeof(hdr)); - if (r == -1) - return got_error_from_errno("read"); - if (r < sizeof(hdr)) - return got_error_msg(GOT_ERR_BAD_PACKFILE, - "short packfile header"); + if (pack->map) { + if (pack->filesize < sizeof(hdr)) + return got_error_msg(GOT_ERR_BAD_PACKFILE, + "short packfile header"); + memcpy(&hdr, pack->map, sizeof(hdr)); + mapoff += sizeof(hdr); + } else { + r = read(pack->fd, &hdr, sizeof(hdr)); + if (r == -1) + return got_error_from_errno("read"); + if (r < sizeof(hdr)) + return got_error_msg(GOT_ERR_BAD_PACKFILE, + "short packfile header"); + } if (hdr.signature != htobe32(GOT_PACKFILE_SIGNATURE)) return got_error_msg(GOT_ERR_BAD_PACKFILE, @@ -576,20 +630,28 @@ index_pack(struct got_pack *pack, int idxfd, uint8_t * obj->crc = crc32(0L, NULL, 0); /* Store offset to type+size information for this object. */ - obj->off = lseek(pack->fd, 0, SEEK_CUR); - if (obj->off == -1) { - err = got_error_from_errno("lseek"); - goto done; + if (pack->map) { + obj->off = mapoff; + } else { + obj->off = lseek(pack->fd, 0, SEEK_CUR); + if (obj->off == -1) { + err = got_error_from_errno("lseek"); + goto done; + } } err = read_packed_object(pack, obj); if (err) goto done; - if (lseek(pack->fd, obj->off + obj->tslen + obj->len, - SEEK_SET) == -1) { - err = got_error_from_errno("lseek"); - goto done; + if (pack->map) { + mapoff += obj->tslen + obj->len; + } else { + if (lseek(pack->fd, obj->off + obj->tslen + obj->len, + SEEK_SET) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } } if (obj->type == GOT_OBJ_TYPE_BLOB || @@ -625,10 +687,11 @@ index_pack(struct got_pack *pack, int idxfd, uint8_t * if (obj->valid) continue; - if (lseek(pack->fd, obj->off + obj->tslen, SEEK_SET) + if (pack->map == NULL && + lseek(pack->fd, obj->off + obj->tslen, SEEK_SET) == -1) { - err = got_error_from_errno("lseek"); - goto done; + err = got_error_from_errno("lseek"); + goto done; } err = resolve_deltified_object(pack, &packidx, obj); @@ -817,6 +880,12 @@ main(int argc, char **argv) goto done; } +#ifndef GOT_PACK_NO_MMAP + pack.map = mmap(NULL, pack.filesize, PROT_READ, MAP_PRIVATE, + pack.fd, 0); + if (pack.map == MAP_FAILED) + pack.map = NULL; /* fall back to read(2) */ +#endif err = index_pack(&pack, idxfd, pack_hash, &ibuf); done: close_err = got_pack_close(&pack);