commit 2d9e6abf243a0a1895786fa9002b28d69a0f6fea from: Stefan Sperling date: Wed May 04 13:43:24 2022 UTC store deltas in compressed form while packing, both in memory and cache file This reduces memory and disk space consumption during packing. with tweaks + memleak on error fix from op@ ok op@ commit - 6b7665acf3ac9dd7d0c30372df5a4fa09b1b47fa commit + 2d9e6abf243a0a1895786fa9002b28d69a0f6fea blob - 3c97a77f77b3da0548ab67dbcb2685723456ee39 blob + d4180f6aedf54820d7406dc3029ad2247929e580 --- lib/deflate.c +++ lib/deflate.c @@ -136,9 +136,9 @@ got_deflate_read(struct got_deflate_buf *zb, FILE *f, return NULL; } -const struct got_error * -got_deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset, - size_t len, size_t *outlenp, size_t *consumed) +static const struct got_error * +deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset, + size_t len, size_t *outlenp, size_t *consumed, int flush_on_eof) { z_stream *z = &zb->z; size_t last_total_out = z->total_out; @@ -159,7 +159,8 @@ got_deflate_read_mmap(struct got_deflate_buf *zb, uint z->avail_in = len - *consumed; if (z->avail_in == 0) { /* EOF */ - ret = deflate(z, Z_FINISH); + if (flush_on_eof) + ret = deflate(z, Z_FINISH); break; } } @@ -176,6 +177,51 @@ got_deflate_read_mmap(struct got_deflate_buf *zb, uint } *outlenp = z->total_out - last_total_out; + return NULL; +} + +const struct got_error * +got_deflate_read_mmap(struct got_deflate_buf *zb, uint8_t *map, size_t offset, + size_t len, size_t *outlenp, size_t *consumed) +{ + return deflate_read_mmap(zb, map, offset, len, outlenp, consumed, 1); +} + +const struct got_error * +got_deflate_flush(struct got_deflate_buf *zb, FILE *outfile, + struct got_deflate_checksum *csum, off_t *outlenp) +{ + int ret; + size_t n; + z_stream *z = &zb->z; + + if (z->avail_in != 0) + return got_error_msg(GOT_ERR_COMPRESSION, + "cannot flush zb with pending input data"); + + do { + size_t avail, last_total_out = zb->z.total_out; + + z->next_out = zb->outbuf; + z->avail_out = zb->outlen; + + ret = deflate(z, Z_FINISH); + if (ret != Z_STREAM_END && ret != Z_OK) + return got_error(GOT_ERR_COMPRESSION); + + avail = z->total_out - last_total_out; + if (avail > 0) { + n = fwrite(zb->outbuf, avail, 1, outfile); + if (n != 1) + return got_ferror(outfile, GOT_ERR_IO); + if (csum) + csum_output(csum, zb->outbuf, avail); + if (outlenp) + *outlenp += avail; + } + } while (ret != Z_STREAM_END); + + zb->flags &= ~GOT_DEFLATE_F_HAVE_MORE; return NULL; } @@ -258,7 +304,101 @@ got_deflate_to_file_mmap(off_t *outlen, uint8_t *map, *outlen += avail; } } while (zb.flags & GOT_DEFLATE_F_HAVE_MORE); + +done: + got_deflate_end(&zb); + return err; +} + +const struct got_error * +got_deflate_append_to_file_mmap(struct got_deflate_buf *zb, off_t *outlen, + uint8_t *map, size_t offset, size_t len, FILE *outfile, + struct got_deflate_checksum *csum) +{ + const struct got_error *err; + size_t avail, consumed; + + do { + err = deflate_read_mmap(zb, map, offset, len, &avail, + &consumed, 0); + if (err) + break; + offset += consumed; + len -= consumed; + if (avail > 0) { + size_t n; + n = fwrite(zb->outbuf, avail, 1, outfile); + if (n != 1) { + err = got_ferror(outfile, GOT_ERR_IO); + break; + } + if (csum) + csum_output(csum, zb->outbuf, avail); + if (outlen) + *outlen += avail; + } + } while ((zb->flags & GOT_DEFLATE_F_HAVE_MORE) && len > 0); + return err; +} + +const struct got_error * +got_deflate_to_mem_mmap(uint8_t **outbuf, size_t *outlen, + size_t *consumed_total, struct got_deflate_checksum *csum, uint8_t *map, + size_t offset, size_t len) +{ + const struct got_error *err; + size_t avail, consumed; + struct got_deflate_buf zb; + void *newbuf; + size_t nbuf = 1; + + if (outbuf) { + *outbuf = malloc(GOT_DEFLATE_BUFSIZE); + if (*outbuf == NULL) + return got_error_from_errno("malloc"); + err = got_deflate_init(&zb, *outbuf, GOT_DEFLATE_BUFSIZE); + if (err) { + free(*outbuf); + *outbuf = NULL; + return err; + } + } else { + err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE); + if (err) + return err; + } + + *outlen = 0; + if (consumed_total) + *consumed_total = 0; + do { + err = got_deflate_read_mmap(&zb, map, offset, len, &avail, + &consumed); + if (err) + goto done; + offset += consumed; + if (consumed_total) + *consumed_total += consumed; + len -= consumed; + if (avail > 0 && csum) + csum_output(csum, zb.outbuf, avail); + *outlen += avail; + if ((zb.flags & GOT_DEFLATE_F_HAVE_MORE) && outbuf != NULL) { + newbuf = reallocarray(*outbuf, ++nbuf, + GOT_DEFLATE_BUFSIZE); + if (newbuf == NULL) { + err = got_error_from_errno("reallocarray"); + free(*outbuf); + *outbuf = NULL; + *outlen = 0; + goto done; + } + *outbuf = newbuf; + zb.outbuf = newbuf + *outlen; + zb.outlen = (nbuf * GOT_DEFLATE_BUFSIZE) - *outlen; + } + } while (zb.flags & GOT_DEFLATE_F_HAVE_MORE); done: got_deflate_end(&zb); return err; blob - 1c429af85a27229451e05798e9511d0a8f474968 blob + 09a8755cf062db2ffd1f7e2c26deef470dbba250 --- lib/got_lib_deflate.h +++ lib/got_lib_deflate.h @@ -39,8 +39,17 @@ const struct got_error *got_deflate_init(struct got_de size_t); const struct got_error *got_deflate_read(struct got_deflate_buf *, FILE *, off_t, size_t *, off_t *); +const struct got_error *got_deflate_read_mmap(struct got_deflate_buf *, + uint8_t *, size_t, size_t, size_t *, size_t *); void got_deflate_end(struct got_deflate_buf *); const struct got_error *got_deflate_to_file(off_t *, FILE *, off_t, FILE *, struct got_deflate_checksum *); const struct got_error *got_deflate_to_file_mmap(off_t *, uint8_t *, size_t, size_t, FILE *, struct got_deflate_checksum *); +const struct got_error *got_deflate_flush(struct got_deflate_buf *, FILE *, + struct got_deflate_checksum *, off_t *); +const struct got_error *got_deflate_append_to_file_mmap( + struct got_deflate_buf *, off_t *, uint8_t *, size_t, size_t, FILE *, + struct got_deflate_checksum *); +const struct got_error *got_deflate_to_mem_mmap(uint8_t **, size_t *, size_t *, + struct got_deflate_checksum *, uint8_t *, size_t, size_t); blob - 6af8d574c7b345c52d3e0c19759bf3ae6bd62b20 blob + 4bbe44dda07c97ba6f0ef878da0caadbe6de9741 --- lib/got_lib_object.h +++ lib/got_lib_object.h @@ -104,7 +104,7 @@ const struct got_error *got_object_open_from_packfile( struct got_object_id *, struct got_pack *, struct got_packidx *, int, struct got_repository *); const struct got_error *got_object_read_raw_delta(uint64_t *, uint64_t *, - off_t *, off_t *, off_t *, struct got_object_id **, int, + off_t *, off_t *, off_t *, off_t *, struct got_object_id **, int, struct got_packidx *, int, struct got_object_id *, struct got_repository *); const struct got_error *got_object_read_header_privsep(struct got_object **, struct got_object_id *, struct got_repository *, int); blob - e8fb373e287ee80486d50ed07964d9d39924308d blob + 6a3d3981c9afd96d48ef7746b2d0b1d78793a7ca --- lib/got_lib_pack.h +++ lib/got_lib_pack.h @@ -212,7 +212,7 @@ const struct got_error *got_packfile_extract_object(st const struct got_error *got_packfile_extract_object_to_mem(uint8_t **, size_t *, struct got_object *, struct got_pack *); const struct got_error *got_packfile_extract_raw_delta(uint8_t **, size_t *, - off_t *, off_t *, struct got_object_id *, uint64_t *, uint64_t *, + size_t *, off_t *, off_t *, struct got_object_id *, uint64_t *, uint64_t *, struct got_pack *, struct got_packidx *, int); struct got_pack *got_repo_get_cached_pack(struct got_repository *, const char *); blob - 110fe049d86c1a33fb3b33e4fe74ffa8a3dbbfa8 blob + e57f4dd3f8f4d207324b69c89c54442ae78cd5bb --- lib/got_lib_privsep.h +++ lib/got_lib_privsep.h @@ -284,6 +284,7 @@ struct got_imsg_raw_delta { uint64_t base_size; uint64_t result_size; off_t delta_size; + off_t delta_compressed_size; off_t delta_offset; off_t delta_out_offset; @@ -662,8 +663,9 @@ const struct got_error *got_privsep_send_raw_delta_req struct got_object_id *); const struct got_error *got_privsep_send_raw_delta_outfd(struct imsgbuf *, int); const struct got_error *got_privsep_send_raw_delta(struct imsgbuf *, uint64_t, - uint64_t, off_t, off_t, off_t, struct got_object_id *); + uint64_t, off_t, off_t, off_t, off_t, struct got_object_id *); const struct got_error *got_privsep_recv_raw_delta(uint64_t *, uint64_t *, - off_t *, off_t *, off_t *, struct got_object_id **, struct imsgbuf *); + off_t *, off_t *, off_t *, off_t *, struct got_object_id **, + struct imsgbuf *); void got_privsep_exec_child(int[2], const char *, const char *); blob - b87e6eecb828ef98889452c3dd9b205e5eaf3c33 blob + 4e5facc7f5e3c665aa540bb9caf3299f68626c2d --- lib/object.c +++ lib/object.c @@ -388,8 +388,8 @@ got_object_open_from_packfile(struct got_object **obj, const struct got_error * got_object_read_raw_delta(uint64_t *base_size, uint64_t *result_size, - off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset, - struct got_object_id **base_id, int delta_cache_fd, + off_t *delta_size, off_t *delta_compressed_size, off_t *delta_offset, + off_t *delta_out_offset, struct got_object_id **base_id, int delta_cache_fd, struct got_packidx *packidx, int obj_idx, struct got_object_id *id, struct got_repository *repo) { @@ -400,6 +400,7 @@ got_object_read_raw_delta(uint64_t *base_size, uint64_ *base_size = 0; *result_size = 0; *delta_size = 0; + *delta_compressed_size = 0; *delta_offset = 0; *delta_out_offset = 0; @@ -439,7 +440,8 @@ got_object_read_raw_delta(uint64_t *base_size, uint64_ return err; return got_privsep_recv_raw_delta(base_size, result_size, delta_size, - delta_offset, delta_out_offset, base_id, pack->privsep_child->ibuf); + delta_compressed_size, delta_offset, delta_out_offset, base_id, + pack->privsep_child->ibuf); } static const struct got_error * blob - d875046e25b7f0b4172baa4dbd064445a73f18c4 blob + 4bac59b80e15c0c64099a61c98b1dd60cd3e01a4 --- lib/pack.c +++ lib/pack.c @@ -902,23 +902,33 @@ got_pack_parse_offset_delta(off_t *base_offset, size_t static const struct got_error * read_delta_data(uint8_t **delta_buf, size_t *delta_len, - size_t delta_data_offset, struct got_pack *pack) + size_t *delta_compressed_len, size_t delta_data_offset, + struct got_pack *pack) { const struct got_error *err = NULL; + size_t consumed = 0; if (pack->map) { if (delta_data_offset >= pack->filesize) return got_error(GOT_ERR_PACK_OFFSET); err = got_inflate_to_mem_mmap(delta_buf, delta_len, - NULL, NULL, pack->map, delta_data_offset, + &consumed, NULL, pack->map, delta_data_offset, pack->filesize - delta_data_offset); + if (err) + return err; } else { if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) return got_error_from_errno("lseek"); - err = got_inflate_to_mem_fd(delta_buf, delta_len, NULL, - NULL, 0, pack->fd); + err = got_inflate_to_mem_fd(delta_buf, delta_len, + &consumed, NULL, 0, pack->fd); + if (err) + return err; } - return err; + + if (delta_compressed_len) + *delta_compressed_len = consumed; + + return NULL; } static const struct got_error * @@ -1200,7 +1210,7 @@ got_pack_get_delta_chain_max_size(uint64_t *max_size, if (delta_buf == NULL) { cached = 0; err = read_delta_data(&delta_buf, &delta_len, - delta->data_offset, pack); + NULL, delta->data_offset, pack); if (err) return err; err = got_delta_cache_add(pack->delta_cache, @@ -1336,7 +1346,7 @@ got_pack_dump_delta_chain_to_file(size_t *result_size, pack->delta_cache, delta->data_offset); if (delta_buf == NULL) { cached = 0; - err = read_delta_data(&delta_buf, &delta_len, + err = read_delta_data(&delta_buf, &delta_len, NULL, delta->data_offset, pack); if (err) goto done; @@ -1482,7 +1492,7 @@ got_pack_dump_delta_chain_to_mem(uint8_t **outbuf, siz pack->delta_cache, delta->data_offset); if (delta_buf == NULL) { cached = 0; - err = read_delta_data(&delta_buf, &delta_len, + err = read_delta_data(&delta_buf, &delta_len, NULL, delta->data_offset, pack); if (err) goto done; @@ -1601,20 +1611,80 @@ got_packfile_extract_object_to_mem(uint8_t **buf, size return err; } +static const struct got_error * +read_raw_delta_data(uint8_t **delta_buf, size_t *delta_len, + size_t *delta_len_compressed, uint64_t *base_size, uint64_t *result_size, + off_t delta_data_offset, struct got_pack *pack, struct got_packidx *packidx) +{ + const struct got_error *err = NULL; + + /* Validate decompression and obtain the decompressed size. */ + err = read_delta_data(delta_buf, delta_len, delta_len_compressed, + delta_data_offset, pack); + if (err) + return err; + + /* Read delta base/result sizes from head of delta stream. */ + err = got_delta_get_sizes(base_size, result_size, + *delta_buf, *delta_len); + if (err) + goto done; + + /* Discard decompressed delta and read it again in compressed form. */ + free(*delta_buf); + *delta_buf = malloc(*delta_len_compressed); + if (*delta_buf == NULL) { + err = got_error_from_errno("malloc"); + goto done; + } + if (pack->map) { + if (delta_data_offset >= pack->filesize) + err = got_error(GOT_ERR_PACK_OFFSET); + memcpy(*delta_buf, pack->map + delta_data_offset, + *delta_len_compressed); + } else { + ssize_t n; + if (lseek(pack->fd, delta_data_offset, SEEK_SET) == -1) { + err = got_error_from_errno("lseek"); + goto done; + } + n = read(pack->fd, *delta_buf, *delta_len_compressed); + if (n < 0) { + err = got_error_from_errno("read"); + goto done; + } else if (n != *delta_len_compressed) { + err = got_error(GOT_ERR_IO); + goto done; + } + } +done: + if (err) { + free(*delta_buf); + *delta_buf = NULL; + *delta_len = 0; + *delta_len_compressed = 0; + *base_size = 0; + *result_size = 0; + } + return err; +} + const struct got_error * got_packfile_extract_raw_delta(uint8_t **delta_buf, size_t *delta_size, - off_t *delta_offset, off_t *base_offset, struct got_object_id *base_id, - uint64_t *base_size, uint64_t *result_size, struct got_pack *pack, - struct got_packidx *packidx, int idx) + size_t *delta_compressed_size, off_t *delta_offset, off_t *base_offset, + struct got_object_id *base_id, uint64_t *base_size, uint64_t *result_size, + struct got_pack *pack, struct got_packidx *packidx, int idx) { const struct got_error *err = NULL; off_t offset; uint8_t type; uint64_t size; size_t tslen, delta_hdrlen; + off_t delta_data_offset; *delta_buf = NULL; *delta_size = 0; + *delta_compressed_size = 0; *delta_offset = 0; *base_offset = 0; *base_size = 0; @@ -1659,8 +1729,9 @@ got_packfile_extract_raw_delta(uint8_t **delta_buf, si offset + delta_hdrlen < delta_hdrlen) return got_error(GOT_ERR_BAD_DELTA); - err = read_delta_data(delta_buf, delta_size, - offset + tslen + delta_hdrlen, pack); + delta_data_offset = offset + tslen + delta_hdrlen; + err = read_raw_delta_data(delta_buf, delta_size, delta_compressed_size, + base_size, result_size, delta_data_offset, pack, packidx); if (err) return err; @@ -1669,15 +1740,17 @@ got_packfile_extract_raw_delta(uint8_t **delta_buf, si goto done; } - err = got_delta_get_sizes(base_size, result_size, *delta_buf, size); - if (err) - goto done; - *delta_offset = offset; done: if (err) { free(*delta_buf); *delta_buf = NULL; + *delta_size = 0; + *delta_compressed_size = 0; + *delta_offset = 0; + *base_offset = 0; + *base_size = 0; + *result_size = 0; } return err; } blob - c4297f6ad6a288e1c8b13eba926caea877ea2bf9 blob + fb1e3545cc2cf36d2fb0c5ebec569c5b2302472d --- lib/pack_create.c +++ lib/pack_create.c @@ -52,6 +52,7 @@ #include "got_lib_privsep.h" #include "got_lib_repository.h" #include "got_lib_ratelimit.h" +#include "got_lib_inflate.h" #ifndef MIN #define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) @@ -75,9 +76,10 @@ struct got_pack_meta { /* The best delta we picked */ struct got_pack_meta *head; struct got_pack_meta *prev; - unsigned char *delta_buf; /* if not encoded in delta cache file */ - off_t delta_offset; /* offset in delta cache file */ + unsigned char *delta_buf; /* if encoded in memory (compressed) */ + off_t delta_offset; /* offset in delta cache file (compressed) */ off_t delta_len; /* encoded delta length */ + off_t delta_compressed_len; /* encoded+compressed delta length */ int nchain; int have_reused_delta; @@ -209,13 +211,15 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go const struct got_error *err; unsigned char buf[16], *bp; int i, j; - size_t len = 0; + size_t len = 0, compressed_len; + off_t bufsize = delta_size; off_t n; struct got_delta_instruction *d; + uint8_t *delta_buf; - m->delta_buf = malloc(delta_size); - if (m->delta_buf == NULL) - return got_error_from_errno("calloc"); + delta_buf = malloc(bufsize); + if (delta_buf == NULL) + return got_error_from_errno("malloc"); /* base object size */ buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK; @@ -225,9 +229,9 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - err = append(&m->delta_buf, &len, &delta_size, buf, i); + err = append(&delta_buf, &len, &bufsize, buf, i); if (err) - return err; + goto done; /* target object size */ buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK; @@ -237,9 +241,9 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - err = append(&m->delta_buf, &len, &delta_size, buf, i); + err = append(&delta_buf, &len, &bufsize, buf, i); if (err) - return err; + goto done; for (j = 0; j < ndeltas; j++) { d = &deltas[j]; @@ -265,51 +269,63 @@ encode_delta_in_mem(struct got_pack_meta *m, struct go n >>= 8; } } - err = append(&m->delta_buf, &len, &delta_size, + err = append(&delta_buf, &len, &bufsize, buf, bp - buf); if (err) - return err; + goto done; } else if (o->f == NULL) { n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - err = append(&m->delta_buf, &len, &delta_size, + err = append(&delta_buf, &len, &bufsize, buf, 1); if (err) - return err; - err = append(&m->delta_buf, &len, &delta_size, + goto done; + err = append(&delta_buf, &len, &bufsize, o->data + o->hdrlen + d->offset + n, buf[0]); if (err) - return err; + goto done; n += buf[0]; } } else { char content[128]; size_t r; - if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) - return got_error_from_errno("fseeko"); + if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - err = append(&m->delta_buf, &len, &delta_size, + err = append(&delta_buf, &len, &bufsize, buf, 1); if (err) - return err; + goto done; r = fread(content, 1, buf[0], o->f); - if (r != buf[0]) - return got_ferror(o->f, GOT_ERR_IO); - err = append(&m->delta_buf, &len, &delta_size, + if (r != buf[0]) { + err = got_ferror(o->f, GOT_ERR_IO); + goto done; + } + err = append(&delta_buf, &len, &bufsize, content, buf[0]); if (err) - return err; + goto done; n += buf[0]; } } } + err = got_deflate_to_mem_mmap(&m->delta_buf, &compressed_len, + NULL, NULL, delta_buf, 0, len); + if (err) + goto done; + m->delta_len = len; - return NULL; + m->delta_compressed_len = compressed_len; +done: + free(delta_buf); + return err; } static const struct got_error * @@ -317,12 +333,18 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o struct got_delta_instruction *deltas, int ndeltas, off_t base_size, FILE *f) { + const struct got_error *err; unsigned char buf[16], *bp; int i, j; off_t n; - size_t w; + struct got_deflate_buf zb; struct got_delta_instruction *d; + off_t delta_len = 0, compressed_len = 0; + err = got_deflate_init(&zb, NULL, GOT_DEFLATE_BUFSIZE); + if (err) + return err; + /* base object size */ buf[0] = base_size & GOT_DELTA_SIZE_VAL_MASK; n = base_size >> GOT_DELTA_SIZE_SHIFT; @@ -331,10 +353,13 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - w = fwrite(buf, 1, i, f); - if (w != i) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, &compressed_len, + buf, 0, i, f, NULL); + if (err) + goto done; + delta_len += i; + /* target object size */ buf[0] = o->size & GOT_DELTA_SIZE_VAL_MASK; n = o->size >> GOT_DELTA_SIZE_SHIFT; @@ -343,10 +368,13 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o buf[i] = n & GOT_DELTA_SIZE_VAL_MASK; n >>= GOT_DELTA_SIZE_SHIFT; } - w = fwrite(buf, 1, i, f); - if (w != i) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, &compressed_len, + buf, 0, i, f, NULL); + if (err) + goto done; + delta_len += i; + for (j = 0; j < ndeltas; j++) { d = &deltas[j]; if (d->copy) { @@ -361,7 +389,6 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o if (n == 0) break; } - n = d->len; if (n != GOT_DELTA_COPY_DEFAULT_LEN) { /* DELTA_COPY_LEN1 ... DELTA_COPY_LEN3 */ @@ -371,46 +398,75 @@ encode_delta(struct got_pack_meta *m, struct got_raw_o n >>= 8; } } - w = fwrite(buf, 1, bp - buf, f); - if (w != bp - buf) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, buf, 0, bp - buf, f, NULL); + if (err) + goto done; + delta_len += (bp - buf); } else if (o->f == NULL) { n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - w = fwrite(buf, 1, 1, f); - if (w != 1) - return got_ferror(f, GOT_ERR_IO); - w = fwrite(o->data + o->hdrlen + d->offset + n, - 1, buf[0], f); - if (w != buf[0]) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, buf, 0, 1, f, NULL); + if (err) + goto done; + delta_len++; + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, + o->data + o->hdrlen + d->offset + n, 0, + buf[0], f, NULL); + if (err) + goto done; + delta_len += buf[0]; n += buf[0]; } } else { char content[128]; size_t r; - if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) - return got_error_from_errno("fseeko"); + if (fseeko(o->f, o->hdrlen + d->offset, SEEK_SET) == -1) { + err = got_error_from_errno("fseeko"); + goto done; + } n = 0; while (n != d->len) { buf[0] = (d->len - n < 127) ? d->len - n : 127; - w = fwrite(buf, 1, 1, f); - if (w != 1) - return got_ferror(f, GOT_ERR_IO); + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, buf, 0, 1, f, NULL); + if (err) + goto done; + delta_len++; r = fread(content, 1, buf[0], o->f); - if (r != buf[0]) - return got_ferror(o->f, GOT_ERR_IO); - w = fwrite(content, 1, buf[0], f); - if (w != buf[0]) - return got_ferror(f, GOT_ERR_IO); + if (r != buf[0]) { + err = got_ferror(o->f, GOT_ERR_IO); + goto done; + } + err = got_deflate_append_to_file_mmap(&zb, + &compressed_len, content, 0, buf[0], f, + NULL); + if (err) + goto done; + delta_len += buf[0]; n += buf[0]; } } } - m->delta_len = ftello(f) - m->delta_offset; - return NULL; + err = got_deflate_flush(&zb, f, NULL, &compressed_len); + if (err) + goto done; + + /* sanity check */ + if (compressed_len != ftello(f) - m->delta_offset) { + err = got_error(GOT_ERR_COMPRESSION); + goto done; + } + + m->delta_len = delta_len; + m->delta_compressed_len = compressed_len; +done: + got_deflate_end(&zb); + return err; } static const struct got_error * @@ -459,15 +515,16 @@ reuse_delta(int idx, struct got_pack_meta *m, struct g const struct got_error *err = NULL; struct got_pack_meta *base = NULL; struct got_object_id *base_obj_id = NULL; - off_t delta_len = 0, delta_offset = 0, delta_cache_offset = 0; + off_t delta_len = 0, delta_compressed_len = 0; + off_t delta_offset = 0, delta_cache_offset = 0; uint64_t base_size, result_size; if (m->have_reused_delta) return NULL; err = got_object_read_raw_delta(&base_size, &result_size, &delta_len, - &delta_offset, &delta_cache_offset, &base_obj_id, delta_cache_fd, - packidx, idx, &m->id, repo); + &delta_compressed_len, &delta_offset, &delta_cache_offset, + &base_obj_id, delta_cache_fd, packidx, idx, &m->id, repo); if (err) return err; @@ -479,6 +536,7 @@ reuse_delta(int idx, struct got_pack_meta *m, struct g goto done; m->delta_len = delta_len; + m->delta_compressed_len = delta_compressed_len; m->delta_offset = delta_cache_offset; m->prev = base; m->size = result_size; @@ -789,15 +847,6 @@ pick_deltas(struct got_pack_meta **meta, int nmeta, in best_ndeltas, best_size, m->prev->size); } else { m->delta_offset = ftello(delta_cache); - /* - * TODO: - * Storing compressed delta data in the delta - * cache file would probably be more efficient - * than writing uncompressed delta data here - * and compressing it while writing the pack - * file. This would also allow for reusing - * deltas in their compressed form. - */ err = encode_delta(m, raw, best_deltas, best_ndeltas, m->prev->size, delta_cache); } @@ -1487,7 +1536,7 @@ done: } const struct got_error * -hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx) +hwrite(FILE *f, void *buf, off_t len, SHA1_CTX *ctx) { size_t n; @@ -1495,6 +1544,28 @@ hwrite(FILE *f, void *buf, int len, SHA1_CTX *ctx) n = fwrite(buf, 1, len, f); if (n != len) return got_ferror(f, GOT_ERR_IO); + return NULL; +} + +const struct got_error * +hcopy(FILE *fsrc, FILE *fdst, off_t len, SHA1_CTX *ctx) +{ + unsigned char buf[65536]; + off_t remain = len; + size_t n; + + while (remain > 0) { + size_t copylen = MIN(sizeof(buf), remain); + n = fread(buf, 1, copylen, fsrc); + if (n != copylen) + return got_ferror(fsrc, GOT_ERR_IO); + SHA1Update(ctx, buf, copylen); + n = fwrite(buf, 1, copylen, fdst); + if (n != copylen) + return got_ferror(fdst, GOT_ERR_IO); + remain -= copylen; + } + return NULL; } @@ -1677,11 +1748,11 @@ write_packed_object(off_t *packfile_size, FILE *packfi err = deltahdr(packfile_size, ctx, packfile, m); if (err) goto done; - err = got_deflate_to_file_mmap(&outlen, - m->delta_buf, 0, m->delta_len, packfile, &csum); + err = hwrite(packfile, m->delta_buf, + m->delta_compressed_len, ctx); if (err) goto done; - *packfile_size += outlen; + *packfile_size += m->delta_compressed_len; free(m->delta_buf); m->delta_buf = NULL; } else { @@ -1693,11 +1764,11 @@ write_packed_object(off_t *packfile_size, FILE *packfi err = deltahdr(packfile_size, ctx, packfile, m); if (err) goto done; - err = got_deflate_to_file(&outlen, delta_cache, - m->delta_len, packfile, &csum); + err = hcopy(delta_cache, packfile, + m->delta_compressed_len, ctx); if (err) goto done; - *packfile_size += outlen; + *packfile_size += m->delta_compressed_len; } done: if (raw) @@ -1921,12 +1992,12 @@ got_pack_create(uint8_t *packsha1, FILE *packfile, progress_cb, progress_arg, &rl, cancel_cb, cancel_arg); if (err) goto done; - if (fseeko(delta_cache, 0L, SEEK_SET) == -1) { - err = got_error_from_errno("fseeko"); - goto done; - } } + if (fflush(delta_cache) == EOF) { + err = got_error_from_errno("fflush"); + goto done; + } err = genpack(packsha1, packfile, delta_cache, deltify.meta, deltify.nmeta, reuse.meta, reuse.nmeta, ncolored, nfound, ntrees, nours, repo, progress_cb, progress_arg, &rl, blob - de7f8e9eb12711a08545a3396b393be498822f4c blob + 037c96037a94e944e0990944101d87c2ae0c88bc --- lib/privsep.c +++ lib/privsep.c @@ -2753,8 +2753,8 @@ got_privsep_send_raw_delta_outfd(struct imsgbuf *ibuf, const struct got_error * got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint64_t base_size, - uint64_t result_size, off_t delta_size, off_t delta_offset, - off_t delta_out_offset, struct got_object_id *base_id) + uint64_t result_size, off_t delta_size, off_t delta_compressed_size, + off_t delta_offset, off_t delta_out_offset, struct got_object_id *base_id) { struct got_imsg_raw_delta idelta; int ret; @@ -2762,6 +2762,7 @@ got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint6 idelta.base_size = base_size; idelta.result_size = result_size; idelta.delta_size = delta_size; + idelta.delta_compressed_size = delta_compressed_size; idelta.delta_offset = delta_offset; idelta.delta_out_offset = delta_out_offset; memcpy(idelta.base_id, base_id->sha1, SHA1_DIGEST_LENGTH); @@ -2776,8 +2777,8 @@ got_privsep_send_raw_delta(struct imsgbuf *ibuf, uint6 const struct got_error * got_privsep_recv_raw_delta(uint64_t *base_size, uint64_t *result_size, - off_t *delta_size, off_t *delta_offset, off_t *delta_out_offset, - struct got_object_id **base_id, struct imsgbuf *ibuf) + off_t *delta_size, off_t *delta_compressed_size, off_t *delta_offset, + off_t *delta_out_offset, struct got_object_id **base_id, struct imsgbuf *ibuf) { const struct got_error *err = NULL; struct imsg imsg; @@ -2787,6 +2788,7 @@ got_privsep_recv_raw_delta(uint64_t *base_size, uint64 *base_size = 0; *result_size = 0; *delta_size = 0; + *delta_compressed_size = 0; *delta_offset = 0; *delta_out_offset = 0; *base_id = NULL; @@ -2807,6 +2809,7 @@ got_privsep_recv_raw_delta(uint64_t *base_size, uint64 *base_size = delta->base_size; *result_size = delta->result_size; *delta_size = delta->delta_size; + *delta_compressed_size = delta->delta_compressed_size; *delta_offset = delta->delta_offset; *delta_out_offset = delta->delta_out_offset; *base_id = calloc(1, sizeof(**base_id)); blob - b014128a336645e26ebddf6b06cecbdc652fc1b3 blob + 0b2b5ee521c3a402299aefd172c600c2e4f24c43 --- libexec/got-read-pack/got-read-pack.c +++ libexec/got-read-pack/got-read-pack.c @@ -869,7 +869,7 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i { const struct got_error *err = NULL; struct got_imsg_raw_delta_request req; - size_t datalen, delta_size; + size_t datalen, delta_size, delta_compressed_size; off_t delta_offset; uint8_t *delta_buf = NULL; struct got_object_id id, base_id; @@ -886,8 +886,8 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i imsg->fd = -1; err = got_packfile_extract_raw_delta(&delta_buf, &delta_size, - &delta_offset, &base_offset, &base_id, &base_size, &result_size, - pack, packidx, req.idx); + &delta_compressed_size, &delta_offset, &base_offset, &base_id, + &base_size, &result_size, pack, packidx, req.idx); if (err) goto done; @@ -902,8 +902,8 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i } delta_out_offset = ftello(delta_outfile); - w = fwrite(delta_buf, 1, delta_size, delta_outfile); - if (w != delta_size) { + w = fwrite(delta_buf, 1, delta_compressed_size, delta_outfile); + if (w != delta_compressed_size) { err = got_ferror(delta_outfile, GOT_ERR_IO); goto done; } @@ -913,7 +913,8 @@ raw_delta_request(struct imsg *imsg, struct imsgbuf *i } err = got_privsep_send_raw_delta(ibuf, base_size, result_size, - delta_size, delta_offset, delta_out_offset, &base_id); + delta_size, delta_compressed_size, delta_offset, delta_out_offset, + &base_id); done: free(delta_buf); return err;