commit 1e87a3c39e7647b4dba91b8b1c77da25d677294a from: Stefan Sperling date: Wed Mar 18 16:13:42 2020 UTC avoid re-reading the entirety of an object's data to calculate the CRC commit - 7bad153797cc0aad421bbca3534ad21e75e84907 commit + 1e87a3c39e7647b4dba91b8b1c77da25d677294a blob - d8d1ad0afb7092c9dbd63eb106bf5bdf1aed6b19 blob + e97951c3fe40ff57563fcd8b71cac2022417eae9 --- lib/got_lib_inflate.h +++ lib/got_lib_inflate.h @@ -23,12 +23,13 @@ struct got_inflate_buf { int flags; #define GOT_INFLATE_F_HAVE_MORE 0x01 #define GOT_INFLATE_F_OWN_OUTBUF 0x02 + uint32_t *input_crc; }; #define GOT_INFLATE_BUFSIZE 32768 const struct got_error *got_inflate_init(struct got_inflate_buf *, uint8_t *, - size_t); + size_t, uint32_t *); const struct got_error *got_inflate_read(struct got_inflate_buf *, FILE *, size_t *, size_t *); const struct got_error *got_inflate_read_fd(struct got_inflate_buf *, int, @@ -39,7 +40,7 @@ void got_inflate_end(struct got_inflate_buf *); const struct got_error *got_inflate_to_mem(uint8_t **, size_t *, size_t *, FILE *); const struct got_error *got_inflate_to_mem_fd(uint8_t **, size_t *, size_t *, - int); + uint32_t *, int); const struct got_error *got_inflate_to_mem_mmap(uint8_t **, size_t *, uint8_t *, size_t, size_t); const struct got_error *got_inflate_to_file(size_t *, FILE *, FILE *); blob - d093fd10dca947cf39e062dd85b6888dcb7ae8eb blob + 75d3b40de1a47788c753781ed4e70deaaf8ef0f1 --- lib/inflate.c +++ lib/inflate.c @@ -35,7 +35,8 @@ #endif const struct got_error * -got_inflate_init(struct got_inflate_buf *zb, uint8_t *outbuf, size_t bufsize) +got_inflate_init(struct got_inflate_buf *zb, uint8_t *outbuf, size_t bufsize, + uint32_t *input_crc) { const struct got_error *err = NULL; int zerr; @@ -74,6 +75,7 @@ got_inflate_init(struct got_inflate_buf *zb, uint8_t * } else zb->outbuf = outbuf; + zb->input_crc = input_crc; done: if (err) got_inflate_end(zb); @@ -96,6 +98,9 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, if (consumed) *consumed = 0; do { + char *crc_in = NULL; + size_t crc_avail = 0; + if (z->avail_in == 0) { size_t n = fread(zb->inbuf, 1, zb->inlen, f); if (n == 0) { @@ -108,7 +113,15 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, z->next_in = zb->inbuf; z->avail_in = n; } + if (zb->input_crc) { + crc_in = z->next_in; + crc_avail = z->avail_in; + } ret = inflate(z, Z_SYNC_FLUSH); + if (zb->input_crc) { + *zb->input_crc = crc32(*zb->input_crc, + crc_in, crc_avail - z->avail_in); + } } while (ret == Z_OK && z->avail_out > 0); if (ret == Z_OK || ret == Z_BUF_ERROR) { @@ -141,6 +154,9 @@ got_inflate_read_fd(struct got_inflate_buf *zb, int fd if (consumed) *consumed = 0; do { + char *crc_in = NULL; + size_t crc_avail = 0; + if (z->avail_in == 0) { ssize_t n = read(fd, zb->inbuf, zb->inlen); if (n < 0) @@ -153,7 +169,15 @@ got_inflate_read_fd(struct got_inflate_buf *zb, int fd z->next_in = zb->inbuf; z->avail_in = n; } + if (zb->input_crc) { + crc_in = z->next_in; + crc_avail = z->avail_in; + } ret = inflate(z, Z_SYNC_FLUSH); + if (zb->input_crc) { + *zb->input_crc = crc32(*zb->input_crc, + crc_in, crc_avail - z->avail_in); + } } while (ret == Z_OK && z->avail_out > 0); if (ret == Z_OK || ret == Z_BUF_ERROR) { @@ -185,7 +209,10 @@ got_inflate_read_mmap(struct got_inflate_buf *zb, uint *consumed = 0; do { + char *crc_in = NULL; + size_t crc_avail = 0; size_t last_total_in = zb->z.total_in; + if (z->avail_in == 0) { if (len == 0) { /* EOF */ @@ -195,7 +222,15 @@ got_inflate_read_mmap(struct got_inflate_buf *zb, uint z->next_in = map + offset + *consumed; z->avail_in = len - *consumed; } + if (zb->input_crc) { + crc_in = z->next_in; + crc_avail = z->avail_in; + } ret = inflate(z, Z_SYNC_FLUSH); + if (zb->input_crc) { + *zb->input_crc = crc32(*zb->input_crc, + crc_in, crc_avail - z->avail_in); + } *consumed += z->total_in - last_total_in; } while (ret == Z_OK && z->avail_out > 0); @@ -234,9 +269,9 @@ got_inflate_to_mem(uint8_t **outbuf, size_t *outlen, *outbuf = malloc(GOT_INFLATE_BUFSIZE); if (*outbuf == NULL) return got_error_from_errno("malloc"); - err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, NULL); } else - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL); if (err) return err; @@ -276,7 +311,7 @@ done: const struct got_error * got_inflate_to_mem_fd(uint8_t **outbuf, size_t *outlen, - size_t *consumed_total, int infd) + size_t *consumed_total, uint32_t *input_crc, int infd) { const struct got_error *err; size_t avail, consumed; @@ -288,9 +323,11 @@ got_inflate_to_mem_fd(uint8_t **outbuf, size_t *outlen *outbuf = malloc(GOT_INFLATE_BUFSIZE); if (*outbuf == NULL) return got_error_from_errno("malloc"); - err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, + input_crc); } else - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, + input_crc); if (err) goto done; @@ -341,7 +378,7 @@ got_inflate_to_mem_mmap(uint8_t **outbuf, size_t *outl *outbuf = malloc(GOT_INFLATE_BUFSIZE); if (*outbuf == NULL) return got_error_from_errno("malloc"); - err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, NULL); if (err) { free(*outbuf); *outbuf = NULL; @@ -387,7 +424,7 @@ got_inflate_to_fd(size_t *outlen, FILE *infile, int ou size_t avail; struct got_inflate_buf zb; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL); if (err) goto done; @@ -424,7 +461,7 @@ got_inflate_to_file(size_t *outlen, FILE *infile, FILE size_t avail; struct got_inflate_buf zb; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL); if (err) goto done; @@ -459,7 +496,7 @@ got_inflate_to_file_fd(size_t *outlen, int infd, FILE size_t avail; struct got_inflate_buf zb; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL); if (err) goto done; @@ -496,7 +533,7 @@ got_inflate_to_file_mmap(size_t *outlen, uint8_t *map, struct got_inflate_buf zb; size_t consumed; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, NULL); if (err) goto done; blob - a4a82ed4280ba51077e164cd2fed73eeb6bba742 blob + 032e9030441206d1d8bfd8ec7621c1fa28019a05 --- lib/object_parse.c +++ lib/object_parse.c @@ -221,7 +221,7 @@ got_object_read_header(struct got_object **obj, int fd if (buf == NULL) return got_error_from_errno("malloc"); - err = got_inflate_init(&zb, buf, zbsize); + err = got_inflate_init(&zb, buf, zbsize, NULL); if (err) return err; blob - 7f264cdcbb1fb07ab69610ba8ef70883ac16cfc2 blob + 28dd5296a74ec449099ea0735ace1978b81b2567 --- lib/pack.c +++ lib/pack.c @@ -720,7 +720,7 @@ read_delta_data(uint8_t **delta_buf, size_t *delta_len if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); err = got_inflate_to_mem_fd(delta_buf, delta_len, NULL, - pack->fd); + NULL, pack->fd); } return err; } @@ -1101,7 +1101,7 @@ dump_delta_chain_to_file(size_t *result_size, struct g pack->filesize - mapoff); } else err = got_inflate_to_mem_fd(&base_buf, - &base_bufsz, NULL, pack->fd); + &base_bufsz, NULL, NULL, pack->fd); } if (err) goto done; @@ -1248,7 +1248,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz goto done; } err = got_inflate_to_mem_fd(&base_buf, - &base_bufsz, NULL, pack->fd); + &base_bufsz, NULL, NULL, pack->fd); } if (err) goto done; @@ -1368,7 +1368,8 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size } else { if (lseek(pack->fd, obj->pack_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); - err = got_inflate_to_mem_fd(buf, len, NULL, pack->fd); + err = got_inflate_to_mem_fd(buf, len, NULL, NULL, + pack->fd); } } else err = got_pack_dump_delta_chain_to_mem(buf, len, &obj->deltas, blob - 580ee4306f0ef90163538a0e25a2ee46e54c5a27 blob + 45930556cd110e5ed911da0e3c9cce993446a6d4 --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -130,7 +130,25 @@ get_obj_type_label(const char **label, int obj_type) return err; } +static const struct got_error * +read_crc(uint32_t *crc, int fd, size_t len) +{ + uint8_t buf[8192]; + size_t n; + ssize_t r; + for (n = len; n > 0; n -= r){ + r = read(fd, buf, n > sizeof(buf) ? sizeof(buf) : n); + if (r == -1) + return got_error_from_errno("read"); + if (r == 0) + break; + *crc = crc32(*crc, buf, r); + } + + return NULL; +} + static const struct got_error * read_packed_object(struct got_pack *pack, struct got_indexed_object *obj) { @@ -148,13 +166,21 @@ read_packed_object(struct got_pack *pack, struct got_i if (err) return err; + /* XXX Seek back and get the CRC of on-disk type+size bytes. */ + if (lseek(pack->fd, obj->off, SEEK_SET) == -1) + return got_error_from_errno("lseek"); + err = read_crc(&obj->crc, pack->fd, obj->tslen); + if (err) + return err; + switch (obj->type) { case GOT_OBJ_TYPE_BLOB: case GOT_OBJ_TYPE_COMMIT: case GOT_OBJ_TYPE_TREE: case GOT_OBJ_TYPE_TAG: /* XXX TODO reading large objects into memory is bad! */ - err = got_inflate_to_mem_fd(&data, &datalen, &obj->len, pack->fd); + err = got_inflate_to_mem_fd(&data, &datalen, &obj->len, + &obj->crc, pack->fd); if (err) break; SHA1Init(&ctx); @@ -184,7 +210,10 @@ read_packed_object(struct got_pack *pack, struct got_i err = got_error(GOT_ERR_BAD_PACKFILE); break; } - err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, pack->fd); + obj->crc = crc32(obj->crc, obj->ref_id.sha1, + SHA1_DIGEST_LENGTH); + err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, + &obj->crc, pack->fd); if (err) break; obj->len += SHA1_DIGEST_LENGTH; @@ -195,9 +224,20 @@ read_packed_object(struct got_pack *pack, struct got_i &obj->base_offsetlen, pack, obj->off, obj->tslen); if (err) break; - err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, pack->fd); + + /* XXX Seek back and get the CRC of on-disk offset bytes. */ + if (lseek(pack->fd, obj->off + obj->tslen, SEEK_SET) == -1) { + err = got_error_from_errno("lseek"); + break; + } + err = read_crc(&obj->crc, pack->fd, obj->base_offsetlen); if (err) break; + + err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, + &obj->crc, pack->fd); + if (err) + break; obj->len += obj->base_offsetlen; break; default: @@ -222,28 +262,6 @@ hwrite(int fd, void *buf, int len, SHA1_CTX *ctx) return got_error(GOT_ERR_IO); return NULL; -} - -static const struct got_error * -object_crc(int packfd, struct got_indexed_object *obj) -{ - char buf[8096]; - size_t n; - ssize_t r; - - if (lseek(packfd, obj->off, SEEK_SET) == -1) - return got_error_from_errno("lseek"); - - obj->crc = crc32(0L, NULL, 0); - for (n = obj->tslen + obj->len; n > 0; n -= r){ - r = read(packfd, buf, n > sizeof(buf) ? sizeof(buf) : n); - if (r == -1) - return got_error_from_errno("read"); - if (r == 0) - return NULL; - obj->crc = crc32(obj->crc, buf, r); - } - return 0; } static const struct got_error * @@ -517,6 +535,7 @@ index_pack(struct got_pack *pack, int idxfd, uint8_t * err = got_error_from_errno("calloc"); goto done; } + obj->crc = crc32(0L, NULL, 0); /* Store offset to type+size information for this object. */ obj->off = lseek(pack->fd, 0, SEEK_CUR); @@ -531,9 +550,11 @@ index_pack(struct got_pack *pack, int idxfd, uint8_t * objects[i] = obj; - err = object_crc(pack->fd, obj); - if (err) + if (lseek(pack->fd, obj->off + obj->tslen + obj->len, + SEEK_SET) == -1) { + err = got_error_from_errno("lseek"); goto done; + } if (obj->type == GOT_OBJ_TYPE_BLOB || obj->type == GOT_OBJ_TYPE_TREE ||