commit 6ad68bce3639912fcf7725dbb589febec015788a from: Stefan Sperling date: Tue Mar 24 15:15:51 2020 UTC make got-index-pack compute and verify the pack file's SHA1 checksum commit - 1d72a2a0ee2d5c4546c0471e054c0d81e407bc72 commit + 6ad68bce3639912fcf7725dbb589febec015788a blob - 48821e99ab74a4c96cf039cf1b592301c97eda74 blob + 0ccf42c214a51c167c3c22051afc9fa855ec2c19 --- lib/got_lib_inflate.h +++ lib/got_lib_inflate.h @@ -14,6 +14,14 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +struct got_inflate_checksum { + /* If not NULL, mix input bytes into this CRC checksum. */ + uint32_t *input_crc; + + /* If not NULL, mix input bytes into this SHA1 context. */ + SHA1_CTX *input_sha1; +}; + struct got_inflate_buf { z_stream z; char *inbuf; @@ -23,13 +31,13 @@ struct got_inflate_buf { int flags; #define GOT_INFLATE_F_HAVE_MORE 0x01 #define GOT_INFLATE_F_OWN_OUTBUF 0x02 - uint32_t *input_crc; + struct got_inflate_checksum *csum; }; #define GOT_INFLATE_BUFSIZE 32768 const struct got_error *got_inflate_init(struct got_inflate_buf *, uint8_t *, - size_t, uint32_t *); + size_t, struct got_inflate_checksum *); const struct got_error *got_inflate_read(struct got_inflate_buf *, FILE *, size_t *, size_t *); const struct got_error *got_inflate_read_fd(struct got_inflate_buf *, int, @@ -40,12 +48,12 @@ void got_inflate_end(struct got_inflate_buf *); const struct got_error *got_inflate_to_mem(uint8_t **, size_t *, size_t *, FILE *); const struct got_error *got_inflate_to_mem_fd(uint8_t **, size_t *, size_t *, - uint32_t *, size_t, int); + struct got_inflate_checksum *, size_t, int); const struct got_error *got_inflate_to_mem_mmap(uint8_t **, size_t *, size_t *, - uint32_t *, uint8_t *, size_t, size_t); + struct got_inflate_checksum *, uint8_t *, size_t, size_t); const struct got_error *got_inflate_to_file(size_t *, FILE *, FILE *); -const struct got_error *got_inflate_to_file_fd(size_t *, size_t *, uint32_t *, - int, FILE *); +const struct got_error *got_inflate_to_file_fd(size_t *, size_t *, + struct got_inflate_checksum *, int, FILE *); const struct got_error *got_inflate_to_fd(size_t *, FILE *, int); const struct got_error *got_inflate_to_file_mmap(size_t *, size_t *, - uint32_t *, uint8_t *, size_t, size_t, FILE *); + struct got_inflate_checksum *, uint8_t *, size_t, size_t, FILE *); blob - 50d5e7725fec453a7a1d9c9401bf8576478b267e blob + fbef58ae984ad17c5cf64c92059811d717395226 --- lib/inflate.c +++ lib/inflate.c @@ -36,7 +36,7 @@ const struct got_error * got_inflate_init(struct got_inflate_buf *zb, uint8_t *outbuf, size_t bufsize, - uint32_t *input_crc) + struct got_inflate_checksum *csum) { const struct got_error *err = NULL; int zerr; @@ -75,13 +75,23 @@ got_inflate_init(struct got_inflate_buf *zb, uint8_t * } else zb->outbuf = outbuf; - zb->input_crc = input_crc; + zb->csum = csum; done: if (err) got_inflate_end(zb); return err; } +static void +csum_input(struct got_inflate_checksum *csum, const char *buf, size_t len) +{ + if (csum->input_crc) + *csum->input_crc = crc32(*csum->input_crc, buf, len); + + if (csum->input_sha1) + SHA1Update(csum->input_sha1, buf, len); +} + const struct got_error * got_inflate_read(struct got_inflate_buf *zb, FILE *f, size_t *outlenp, size_t *consumed) @@ -98,8 +108,8 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, if (consumed) *consumed = 0; do { - char *crc_in = NULL; - size_t crc_avail = 0; + char *csum_in = NULL; + size_t csum_avail = 0; if (z->avail_in == 0) { size_t n = fread(zb->inbuf, 1, zb->inlen, f); @@ -113,15 +123,13 @@ got_inflate_read(struct got_inflate_buf *zb, FILE *f, z->next_in = zb->inbuf; z->avail_in = n; } - if (zb->input_crc) { - crc_in = z->next_in; - crc_avail = z->avail_in; + if (zb->csum) { + csum_in = z->next_in; + csum_avail = z->avail_in; } ret = inflate(z, Z_SYNC_FLUSH); - if (zb->input_crc) { - *zb->input_crc = crc32(*zb->input_crc, - crc_in, crc_avail - z->avail_in); - } + if (zb->csum) + csum_input(zb->csum, csum_in, csum_avail - z->avail_in); } while (ret == Z_OK && z->avail_out > 0); if (ret == Z_OK || ret == Z_BUF_ERROR) { @@ -154,8 +162,8 @@ got_inflate_read_fd(struct got_inflate_buf *zb, int fd if (consumed) *consumed = 0; do { - char *crc_in = NULL; - size_t crc_avail = 0; + char *csum_in = NULL; + size_t csum_avail = 0; if (z->avail_in == 0) { ssize_t n = read(fd, zb->inbuf, zb->inlen); @@ -169,15 +177,13 @@ got_inflate_read_fd(struct got_inflate_buf *zb, int fd z->next_in = zb->inbuf; z->avail_in = n; } - if (zb->input_crc) { - crc_in = z->next_in; - crc_avail = z->avail_in; + if (zb->csum) { + csum_in = z->next_in; + csum_avail = z->avail_in; } ret = inflate(z, Z_SYNC_FLUSH); - if (zb->input_crc) { - *zb->input_crc = crc32(*zb->input_crc, - crc_in, crc_avail - z->avail_in); - } + if (zb->csum) + csum_input(zb->csum, csum_in, csum_avail - z->avail_in); } while (ret == Z_OK && z->avail_out > 0); if (ret == Z_OK || ret == Z_BUF_ERROR) { @@ -209,8 +215,8 @@ got_inflate_read_mmap(struct got_inflate_buf *zb, uint *consumed = 0; do { - char *crc_in = NULL; - size_t crc_avail = 0; + char *csum_in = NULL; + size_t csum_avail = 0; size_t last_total_in = zb->z.total_in; if (z->avail_in == 0) { @@ -222,15 +228,13 @@ got_inflate_read_mmap(struct got_inflate_buf *zb, uint z->next_in = map + offset + *consumed; z->avail_in = len - *consumed; } - if (zb->input_crc) { - crc_in = z->next_in; - crc_avail = z->avail_in; + if (zb->csum) { + csum_in = z->next_in; + csum_avail = z->avail_in; } ret = inflate(z, Z_SYNC_FLUSH); - if (zb->input_crc) { - *zb->input_crc = crc32(*zb->input_crc, - crc_in, crc_avail - z->avail_in); - } + if (zb->csum) + csum_input(zb->csum, csum_in, csum_avail - z->avail_in); *consumed += z->total_in - last_total_in; } while (ret == Z_OK && z->avail_out > 0); @@ -311,7 +315,8 @@ done: const struct got_error * got_inflate_to_mem_fd(uint8_t **outbuf, size_t *outlen, - size_t *consumed_total, uint32_t *input_crc, size_t expected_size, int infd) + size_t *consumed_total, struct got_inflate_checksum *csum, + size_t expected_size, int infd) { const struct got_error *err; size_t avail, consumed; @@ -328,10 +333,9 @@ got_inflate_to_mem_fd(uint8_t **outbuf, size_t *outlen *outbuf = malloc(bufsize); if (*outbuf == NULL) return got_error_from_errno("malloc"); - err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, - input_crc); + err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, csum); } else - err = got_inflate_init(&zb, NULL, bufsize, input_crc); + err = got_inflate_init(&zb, NULL, bufsize, csum); if (err) goto done; @@ -371,8 +375,8 @@ done: const struct got_error * got_inflate_to_mem_mmap(uint8_t **outbuf, size_t *outlen, - size_t *consumed_total, uint32_t *input_crc, uint8_t *map, size_t offset, - size_t len) + size_t *consumed_total, struct got_inflate_checksum *csum, uint8_t *map, + size_t offset, size_t len) { const struct got_error *err; size_t avail, consumed; @@ -384,16 +388,14 @@ got_inflate_to_mem_mmap(uint8_t **outbuf, size_t *outl *outbuf = malloc(GOT_INFLATE_BUFSIZE); if (*outbuf == NULL) return got_error_from_errno("malloc"); - err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, - input_crc); + err = got_inflate_init(&zb, *outbuf, GOT_INFLATE_BUFSIZE, csum); if (err) { free(*outbuf); *outbuf = NULL; return err; } } else { - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, - input_crc); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, csum); } *outlen = 0; @@ -507,13 +509,13 @@ done: const struct got_error * got_inflate_to_file_fd(size_t *outlen, size_t *consumed_total, - uint32_t *input_crc, int infd, FILE *outfile) + struct got_inflate_checksum *csum, int infd, FILE *outfile) { const struct got_error *err; size_t avail, consumed; struct got_inflate_buf zb; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, input_crc); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, csum); if (err) goto done; @@ -546,14 +548,14 @@ done: const struct got_error * got_inflate_to_file_mmap(size_t *outlen, size_t *consumed_total, - uint32_t *input_crc, uint8_t *map, size_t offset, size_t len, - FILE *outfile) + struct got_inflate_checksum *csum, uint8_t *map, size_t offset, + size_t len, FILE *outfile) { const struct got_error *err; size_t avail, consumed; struct got_inflate_buf zb; - err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, input_crc); + err = got_inflate_init(&zb, NULL, GOT_INFLATE_BUFSIZE, csum); if (err) goto done; blob - bfc9107d2db18c2536c48ab9fad45c73b2153910 blob + 90e1671dd36a9e6ac6007310645d997084c9c5db --- libexec/got-index-pack/got-index-pack.c +++ libexec/got-index-pack/got-index-pack.c @@ -130,7 +130,7 @@ get_obj_type_label(const char **label, int obj_type) } static const struct got_error * -read_crc(uint32_t *crc, int fd, size_t len) +read_checksum(uint32_t *crc, SHA1_CTX *sha1_ctx, int fd, size_t len) { uint8_t buf[8192]; size_t n; @@ -142,7 +142,10 @@ read_crc(uint32_t *crc, int fd, size_t len) return got_error_from_errno("read"); if (r == 0) break; - *crc = crc32(*crc, buf, r); + if (crc) + *crc = crc32(*crc, buf, r); + if (sha1_ctx) + SHA1Update(sha1_ctx, buf, r); } return NULL; @@ -169,7 +172,7 @@ read_file_sha1(SHA1_CTX *ctx, FILE *f, size_t len) static const struct got_error * read_packed_object(struct got_pack *pack, struct got_indexed_object *obj, - FILE *tmpfile) + FILE *tmpfile, SHA1_CTX *pack_sha1_ctx) { const struct got_error *err = NULL; SHA1_CTX ctx; @@ -180,7 +183,11 @@ read_packed_object(struct got_pack *pack, struct got_i size_t headerlen; const char *obj_label; size_t mapoff = obj->off; + struct got_inflate_checksum csum; + csum.input_sha1 = pack_sha1_ctx; + csum.input_crc = &obj->crc; + err = got_pack_parse_object_type_and_size(&obj->type, &obj->size, &obj->tslen, pack, obj->off); if (err) @@ -188,12 +195,14 @@ read_packed_object(struct got_pack *pack, struct got_i if (pack->map) { obj->crc = crc32(obj->crc, pack->map + mapoff, obj->tslen); + SHA1Update(pack_sha1_ctx, pack->map + mapoff, obj->tslen); mapoff += obj->tslen; } else { /* XXX Seek back and get the CRC of on-disk type+size bytes. */ if (lseek(pack->fd, obj->off, SEEK_SET) == -1) return got_error_from_errno("lseek"); - err = read_crc(&obj->crc, pack->fd, obj->tslen); + err = read_checksum(&obj->crc, pack_sha1_ctx, + pack->fd, obj->tslen); if (err) return err; } @@ -210,20 +219,20 @@ read_packed_object(struct got_pack *pack, struct got_i } if (pack->map) { err = got_inflate_to_file_mmap(&datalen, - &obj->len, &obj->crc, pack->map, mapoff, + &obj->len, &csum, pack->map, mapoff, pack->filesize - mapoff, tmpfile); } else { err = got_inflate_to_file_fd(&datalen, - &obj->len, &obj->crc, pack->fd, tmpfile); + &obj->len, &csum, pack->fd, tmpfile); } } else { if (pack->map) { err = got_inflate_to_mem_mmap(&data, &datalen, - &obj->len, &obj->crc, pack->map, mapoff, + &obj->len, &csum, pack->map, mapoff, pack->filesize - mapoff); } else { err = got_inflate_to_mem_fd(&data, &datalen, - &obj->len, &obj->crc, obj->size, pack->fd); + &obj->len, &csum, obj->size, pack->fd); } } if (err) @@ -262,9 +271,11 @@ read_packed_object(struct got_pack *pack, struct got_i SHA1_DIGEST_LENGTH); obj->crc = crc32(obj->crc, pack->map + mapoff, SHA1_DIGEST_LENGTH); + SHA1Update(pack_sha1_ctx, pack->map + mapoff, + SHA1_DIGEST_LENGTH); mapoff += SHA1_DIGEST_LENGTH; err = got_inflate_to_mem_mmap(NULL, &datalen, - &obj->len, &obj->crc, pack->map, mapoff, + &obj->len, &csum, pack->map, mapoff, pack->filesize - mapoff); if (err) break; @@ -281,8 +292,10 @@ read_packed_object(struct got_pack *pack, struct got_i } obj->crc = crc32(obj->crc, obj->delta.ref.ref_id.sha1, SHA1_DIGEST_LENGTH); + SHA1Update(pack_sha1_ctx, obj->delta.ref.ref_id.sha1, + SHA1_DIGEST_LENGTH); err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, - &obj->crc, obj->size, pack->fd); + &csum, obj->size, pack->fd); if (err) break; } @@ -299,15 +312,17 @@ read_packed_object(struct got_pack *pack, struct got_i if (pack->map) { obj->crc = crc32(obj->crc, pack->map + mapoff, obj->delta.ofs.base_offsetlen); + SHA1Update(pack_sha1_ctx, pack->map + mapoff, + obj->delta.ofs.base_offsetlen); mapoff += obj->delta.ofs.base_offsetlen; err = got_inflate_to_mem_mmap(NULL, &datalen, - &obj->len, &obj->crc, pack->map, mapoff, + &obj->len, &csum, pack->map, mapoff, pack->filesize - mapoff); if (err) break; } else { /* - * XXX Seek back and get the CRC of on-disk + * XXX Seek back and get CRC and SHA1 of on-disk * offset bytes. */ if (lseek(pack->fd, obj->off + obj->tslen, SEEK_SET) @@ -315,13 +330,13 @@ read_packed_object(struct got_pack *pack, struct got_i err = got_error_from_errno("lseek"); break; } - err = read_crc(&obj->crc, pack->fd, - obj->delta.ofs.base_offsetlen); + err = read_checksum(&obj->crc, pack_sha1_ctx, + pack->fd, obj->delta.ofs.base_offsetlen); if (err) break; err = got_inflate_to_mem_fd(NULL, &datalen, &obj->len, - &obj->crc, obj->size, pack->fd); + &csum, obj->size, pack->fd); if (err) break; } @@ -607,6 +622,8 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp struct got_packfile_hdr hdr; struct got_packidx packidx; char buf[8]; + char pack_sha1[SHA1_DIGEST_LENGTH]; + char pack_sha1_expected[SHA1_DIGEST_LENGTH]; int nobj, nvalid, nloose, nresolved = 0, i; struct got_indexed_object *objects = NULL, *obj; SHA1_CTX ctx; @@ -617,11 +634,12 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp int p_indexed = 0, last_p_indexed = -1; int p_resolved = 0, last_p_resolved = -1; - /* Check pack file header. */ + /* Require that pack file header and SHA1 trailer are present. */ + if (pack->filesize < sizeof(hdr) + SHA1_DIGEST_LENGTH) + return got_error_msg(GOT_ERR_BAD_PACKFILE, + "short pack file"); + if (pack->map) { - if (pack->filesize < sizeof(hdr)) - return got_error_msg(GOT_ERR_BAD_PACKFILE, - "short packfile header"); memcpy(&hdr, pack->map, sizeof(hdr)); mapoff += sizeof(hdr); } else { @@ -630,7 +648,7 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp return got_error_from_errno("read"); if (r < sizeof(hdr)) return got_error_msg(GOT_ERR_BAD_PACKFILE, - "short packfile header"); + "short pack file"); } if (hdr.signature != htobe32(GOT_PACKFILE_SIGNATURE)) @@ -644,6 +662,10 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp return got_error_msg(GOT_ERR_BAD_PACKFILE, "bad packfile with zero objects"); + /* We compute the SHA1 of pack file contents and verify later on. */ + SHA1Init(&ctx); + SHA1Update(&ctx, (void *)&hdr, sizeof(hdr)); + /* * Create an in-memory pack index which will grow as objects * IDs in the pack file are discovered. Only fields used to @@ -732,7 +754,7 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp } } - err = read_packed_object(pack, obj, tmpfile); + err = read_packed_object(pack, obj, tmpfile, &ctx); if (err) goto done; @@ -761,6 +783,37 @@ index_pack(struct got_pack *pack, int idxfd, FILE *tmp } nvalid = nloose; + /* + * Having done a full pass over the pack file and can now + * verify its checksum. + */ + SHA1Final(pack_sha1, &ctx); + if (pack->map) { + memcpy(pack_sha1_expected, pack->map + + pack->filesize - SHA1_DIGEST_LENGTH, + SHA1_DIGEST_LENGTH); + } else { + ssize_t n; + if (lseek(pack->fd, -SHA1_DIGEST_LENGTH, SEEK_END) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } + n = read(pack->fd, pack_sha1_expected, SHA1_DIGEST_LENGTH); + if (n == -1) { + err = got_error_from_errno("read"); + goto done; + } + if (n != SHA1_DIGEST_LENGTH) { + err = got_error(GOT_ERR_IO); + goto done; + } + } + if (memcmp(pack_sha1, pack_sha1_expected, SHA1_DIGEST_LENGTH) != 0) { + err = got_error_msg(GOT_ERR_BAD_PACKFILE, + "pack file checksum mismatch"); + goto done; + } + if (first_delta_idx == -1) first_delta_idx = 0;