commit 885d3e0206a04e896704eb67f94c5f39b6976d33 from: Stefan Sperling date: Sat Jan 27 00:05:56 2018 UTC implement delta combiner and a small test suite for it commit - 4ca7b755cdcdddd626e7de549b567b22e096c116 commit + 885d3e0206a04e896704eb67f94c5f39b6976d33 blob - c25f0bf68b076236a834cec3f33f2b61af12ca14 blob + 5543f39b3fadb56aa44361f6af19d4e979ec45a5 --- include/got_error.h +++ include/got_error.h @@ -36,6 +36,8 @@ #define GOT_ERR_NOT_IMPL 18 #define GOT_ERR_OBJ_NOT_PACKED 19 #define GOT_ERR_BAD_DELTA_CHAIN 20 +#define GOT_ERR_BAD_DELTA 21 +#define GOT_ERR_COMPRESSION 22 static const struct got_error { int code; @@ -62,6 +64,8 @@ static const struct got_error { { GOT_ERR_NOT_IMPL, "feature not implemented" }, { GOT_ERR_OBJ_NOT_PACKED,"object is not packed" }, { GOT_ERR_BAD_DELTA_CHAIN,"bad delta chain" }, + { GOT_ERR_BAD_DELTA ,"bad delta" }, + { GOT_ERR_COMPRESSION,"compression failed" }, }; const struct got_error * got_error(int code); blob - 70c6704d0313d8059c362e51ff46094f89488561 blob + 48f534b64d3f4b9c1cb7a82e3cf5ebc14ed5e407 --- lib/delta.c +++ lib/delta.c @@ -27,7 +27,12 @@ #include "got_object.h" #include "delta.h" +#include "zb.h" +#ifndef MIN +#define MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) +#endif + struct got_delta * got_delta_open(const char *path_packfile, int type, off_t offset, size_t size) @@ -75,8 +80,227 @@ got_delta_chain_get_base_type(int *type, struct got_de return got_error(GOT_ERR_BAD_DELTA_CHAIN); } -const struct got_error *got_delta_apply(struct got_delta *delta, - FILE *base_file, FILE *delta_file, FILE *outfile) +/* Fetch another (required) byte from the delta stream. */ +static const struct got_error * +next_delta_byte(const uint8_t **p, size_t *remain) { - return got_error(GOT_ERR_NOT_IMPL); + if (--(*remain) == 0) + return got_error(GOT_ERR_BAD_DELTA); + (*p)++; + return NULL; } + +static const struct got_error * +parse_size(uint64_t *size, const uint8_t **p, size_t *remain) +{ + const struct got_error *err = NULL; + int i = 0; + + *size = 0; + do { + /* We do not support size values which don't fit in 64 bit. */ + if (i > 9) + return got_error(GOT_ERR_NO_SPACE); + + if (i == 0) + *size = ((**p) & GOT_DELTA_SIZE_VAL_MASK); + else { + size_t shift = GOT_DELTA_SIZE_SHIFT * i; + *size |= (((**p) & GOT_DELTA_SIZE_VAL_MASK) << shift); + } + + if (((**p) & GOT_DELTA_SIZE_MORE) == 0) + break; + i++; + err = next_delta_byte(p, remain); + } while (err == NULL); + + return err; +} + +static const struct got_error * +parse_opcode(off_t *offset, size_t *len, const uint8_t **p, size_t *remain) +{ + const struct got_error *err = NULL; + off_t o = 0; + size_t l = 0; + uint8_t opcode = **p; + + if (opcode & GOT_DELTA_COPY_OFF1) { + err = next_delta_byte(p, remain); + if (err) + return err; + o = (off_t)(**p); + } + if (opcode & GOT_DELTA_COPY_OFF2) { + err = next_delta_byte(p, remain); + if (err) + return err; + o |= ((off_t)(**p)) << 8; + } + if (opcode & GOT_DELTA_COPY_OFF3) { + err = next_delta_byte(p, remain); + if (err) + return err; + o |= ((off_t)(**p)) << 16; + } + if (opcode & GOT_DELTA_COPY_OFF4) { + err = next_delta_byte(p, remain); + if (err) + return err; + o |= ((off_t)(**p)) << 24; + } + + if (opcode & GOT_DELTA_COPY_LEN1) { + err = next_delta_byte(p, remain); + if (err) + return err; + l = (off_t)(**p); + } + if (opcode & GOT_DELTA_COPY_LEN2) { + err = next_delta_byte(p, remain); + if (err) + return err; + l |= ((off_t)(**p)) << 8; + } + if (opcode & GOT_DELTA_COPY_LEN3) { + err = next_delta_byte(p, remain); + if (err) + return err; + l |= ((off_t)(**p)) << 16; + } + + if (o == 0) + o = GOT_DELTA_COPY_DEFAULT_OFF; + if (l == 0) + l = GOT_DELTA_COPY_DEFAULT_LEN; + + *offset = o; + *len = l; + return NULL; +} + +static const struct got_error * +copy_from_base(FILE *base_file, off_t offset, size_t size, FILE *outfile) +{ + if (fseeko(base_file, offset, SEEK_SET) != 0) + return got_error_from_errno(); + + while (size > 0) { + uint8_t data[2048]; + size_t len = MIN(size, sizeof(data)); + size_t n; + + n = fread(data, len, 1, base_file); + if (n != 1) + return got_ferror(base_file, GOT_ERR_IO); + + n = fwrite(data, len, 1, outfile); + if (n != 1) + return got_ferror(outfile, GOT_ERR_IO); + + size -= len; + } + + return NULL; +} + +static const struct got_error * +copy_from_delta(const uint8_t **p, size_t *remain, size_t len, FILE *outfile) +{ + size_t n; + + if (*remain < len) + return got_error(GOT_ERR_BAD_DELTA); + + n = fwrite(*p, len, 1, outfile); + if (n != 1) + return got_ferror(outfile, GOT_ERR_IO); + + *p += len; + *remain -= len; + return NULL; +} + +const struct got_error * +got_delta_apply(FILE *base_compressed, const uint8_t *delta_buf, + size_t delta_len, FILE *outfile) +{ + const struct got_error *err = NULL; + FILE *base_file = NULL; + uint64_t base_size, result_size; + size_t remain, outsize = 0; + const uint8_t *p; + + if (delta_len < GOT_DELTA_STREAM_LENGTH_MIN) + return got_error(GOT_ERR_BAD_DELTA); + + p = delta_buf; + remain = delta_len; + + /* Read the two size fields at the beginning of the stream. */ + err = parse_size(&base_size, &p, &remain); + if (err) + return err; + err = next_delta_byte(&p, &remain); + if (err) + return err; + err = parse_size(&result_size, &p, &remain); + if (err) + return err; + + /* Decode and execute copy instructions from the delta stream. */ + err = next_delta_byte(&p, &remain); + while (err == NULL) { + if (*p & GOT_DELTA_COPY_OPCODE) { + off_t offset = 0; + size_t len = 0; + err = parse_opcode(&offset, &len, &p, &remain); + if (err) + break; + if (base_file == NULL) { + size_t inflated_size; + err = got_inflate_to_tempfile(&base_file, + &inflated_size, base_compressed); + if (err) + break; + if (inflated_size != base_size) { + err = got_error(GOT_ERR_BAD_DELTA); + break; + } + } + err = copy_from_base(base_file, offset, len, outfile); + if (err == NULL) + outsize += len; + } else { + size_t len = (size_t)*p; + if (len == 0) { + err = got_error(GOT_ERR_BAD_DELTA); + break; + } + err = next_delta_byte(&p, &remain); + if (err) + return err; + err = copy_from_delta(&p, &remain, len, outfile); + if (err == NULL) + outsize += len; + } + + if (err == NULL) { + if (remain == 0) + break; + /* Fetch the next instruction. */ + p++; + remain--; + } + } + + if (outsize != result_size) + err = got_error(GOT_ERR_BAD_DELTA); + + if (base_file) + fclose(base_file); + if (err == NULL) + rewind(outfile); + return err; +} blob - ebdebab3fc9d4c856f0e8b58963f92726a6439a5 blob + 75cfc705330f66fe8da903edd8d787e0474dc7c5 --- lib/delta.h +++ lib/delta.h @@ -31,7 +31,7 @@ struct got_delta *got_delta_open(const char *, int, of void got_delta_close(struct got_delta *); const struct got_error *got_delta_chain_get_base_type(int *, struct got_delta_chain *); -const struct got_error *got_delta_apply(struct got_delta *, FILE *, FILE *, +const struct got_error *got_delta_apply(FILE *, const uint8_t *, size_t, FILE *); /* blob - 630811b41d88d95732deb486b2e19d69aac4f813 blob + 7660d7a105dfbcf014a9314ab80be0b2621e4ee0 --- lib/object.c +++ lib/object.c @@ -157,7 +157,7 @@ read_object_header(struct got_object **obj, struct got i = 0; totlen = 0; do { - err = got_inflate_read(&zb, f, &outlen); + err = got_inflate_read(&zb, f, NULL, &outlen); if (err) goto done; if (strchr(zb.outbuf, '\0') == NULL) { @@ -508,7 +508,7 @@ read_commit_object(struct got_commit_object **commit, return err; do { - err = got_inflate_read(&zb, f, &len); + err = got_inflate_read(&zb, f, NULL, &len); if (err || len == 0) break; } while (len < obj->hdrlen + obj->size); @@ -577,7 +577,7 @@ read_tree_object(struct got_tree_object **tree, return err; do { - err = got_inflate_read(&zb, f, &len); + err = got_inflate_read(&zb, f, NULL, &len); if (err || len == 0) break; } while (len < obj->hdrlen + obj->size); @@ -674,5 +674,5 @@ got_object_blob_close(struct got_blob_object *blob) const struct got_error * got_object_blob_read_block(struct got_blob_object *blob, size_t *outlenp) { - return got_inflate_read(&blob->zb, blob->f, outlenp); + return got_inflate_read(&blob->zb, blob->f, NULL, outlenp); } blob - f5d5be05e26c6af56f7af5d8c2daf82708434e9d blob + e169c217ee8f3cba3a5ec9f6d91e63691772e8ab --- lib/pack.c +++ lib/pack.c @@ -37,6 +37,7 @@ #include "path.h" #include "delta.h" #include "object.h" +#include "zb.h" #define GOT_PACK_PREFIX "pack-" #define GOT_PACKFILE_SUFFIX ".pack" @@ -845,7 +846,11 @@ dump_delta_chain(struct got_delta_chain *deltas, FILE /* Deltas are ordered in ascending order. */ SIMPLEQ_FOREACH(delta, &deltas->entries, entry) { - FILE *delta_file = fopen(delta->path_packfile, "rb"); + uint8_t *delta_buf = NULL; + size_t delta_len = 0; + FILE *delta_file; + + delta_file = fopen(delta->path_packfile, "rb"); if (delta_file == NULL) { err = got_error_from_errno(); goto done; @@ -857,10 +862,18 @@ dump_delta_chain(struct got_delta_chain *deltas, FILE goto done; } - err = got_delta_apply(delta, base_file, delta_file, + /* Delta streams should always fit in memory. */ + err = got_inflate_to_mem(&delta_buf, &delta_len, delta_file, + delta->size); + if (err) + return err; + + fclose(delta_file); + + err = got_delta_apply(base_file, delta_buf, delta_len, /* Final delta application writes to the output file. */ ++n < deltas->nentries ? accum_file : outfile); - fclose(delta_file); + free(delta_buf); if (err) goto done; blob - 27a5186187c4c25687da18825582c01b29cd0092 blob + fa57ae67369e2d3a5d678d12618c28d6ddd8787d --- lib/zb.c +++ lib/zb.c @@ -25,6 +25,7 @@ #include "got_error.h" #include "got_object.h" +#include "path.h" #include "zb.h" const struct got_error * @@ -62,19 +63,21 @@ done: } const struct got_error * -got_inflate_read(struct got_zstream_buf *zb, FILE *f, size_t *outlenp) +got_inflate_read(struct got_zstream_buf *zb, FILE *f, size_t *inlenp, + size_t *outlenp) { size_t last_total_out = zb->z.total_out; z_stream *z = &zb->z; - int n, ret; + int ret; z->next_out = zb->outbuf; z->avail_out = zb->outlen; + if (inlenp) + *inlenp = 0; do { if (z->avail_in == 0) { - int i; - n = fread(zb->inbuf, 1, zb->inlen, f); + size_t n = fread(zb->inbuf, 1, zb->inlen, f); if (n == 0) { if (ferror(f)) return got_ferror(f, GOT_ERR_IO); @@ -83,6 +86,8 @@ got_inflate_read(struct got_zstream_buf *zb, FILE *f, } z->next_in = zb->inbuf; z->avail_in = n; + if (inlenp) + *inlenp += n; } ret = inflate(z, Z_SYNC_FLUSH); } while (ret == Z_OK && z->avail_out > 0); @@ -104,3 +109,90 @@ got_inflate_end(struct got_zstream_buf *zb) free(zb->outbuf); inflateEnd(&zb->z); } + +const struct got_error * +got_inflate_to_mem(uint8_t **outbuf, size_t *outlen, FILE *f, size_t insize) +{ + const struct got_error *err; + size_t inbytes, consumed, avail; + struct got_zstream_buf zb; + void *newbuf; + + err = got_inflate_init(&zb, 8192); + if (err) + return err; + + *outbuf = NULL; + *outlen = 0; + inbytes = 0; + + do { + err = got_inflate_read(&zb, f, &consumed, &avail); + if (err) + return err; + inbytes += consumed; + if (avail == 0) { + if (inbytes < insize) + err = got_error(GOT_ERR_BAD_DELTA); + break; + } + newbuf = reallocarray(*outbuf, 1, *outlen + avail); + if (newbuf == NULL) { + free(*outbuf); + *outbuf = NULL; + *outlen = 0; + err = got_error(GOT_ERR_NO_MEM); + goto done; + } + memcpy(newbuf + *outlen, zb.outbuf, avail); + *outbuf = newbuf; + *outlen += avail; + } while (inbytes < insize); + +done: + got_inflate_end(&zb); + return err; +} + +const struct got_error * +got_inflate_to_tempfile(FILE **outfile, size_t *outlen, FILE *f) +{ + const struct got_error *err; + size_t avail; + struct got_zstream_buf zb; + void *newbuf; + + *outfile = got_opentemp(); + if (*outfile == NULL) + return got_error_from_errno(); + + err = got_inflate_init(&zb, 8192); + if (err) + goto done; + + *outlen = 0; + + do { + err = got_inflate_read(&zb, f, NULL, &avail); + if (err) + return err; + if (avail > 0) { + size_t n; + n = fwrite(zb.outbuf, avail, 1, *outfile); + if (n != 1) { + err = got_ferror(*outfile, GOT_ERR_IO); + goto done; + } + *outlen += avail; + } + } while (avail > 0); + +done: + if (err) { + fclose(*outfile); + *outfile = NULL; + } else + rewind(*outfile); + got_inflate_end(&zb); + return err; +} blob - f5e21a1104d34f2659a176038e2d7147e1e90a0d blob + 0fc27e548da6a124cd236a741baca6b7ca9d51ed --- lib/zb.h +++ lib/zb.h @@ -16,5 +16,8 @@ const struct got_error *got_inflate_init(struct got_zstream_buf *, size_t); const struct got_error *got_inflate_read(struct got_zstream_buf *, FILE *, - size_t *); + size_t *, size_t *); void got_inflate_end(struct got_zstream_buf *); +const struct got_error *got_inflate_to_mem(uint8_t **, size_t *, FILE *, + size_t); +const struct got_error *got_inflate_to_tempfile(FILE **, size_t *, FILE *); blob - d29f5e886a4cb76ed9292d940cc10abf3141cea0 blob + 50c015abeb22bec57c9b6c433abd755912f97d8b --- regress/delta/Makefile +++ regress/delta/Makefile @@ -1,7 +1,7 @@ .PATH:${.CURDIR}/../../lib PROG = delta_test -SRCS = delta.c error.c delta_test.c +SRCS = delta.c error.c path.c zb.c delta_test.c CPPFLAGS = -I${.CURDIR}/../../include -I${.CURDIR}/../../lib LDADD = -lz blob - 4bd6cac860f5d1a2785889f4b433226a5cdc89ee blob + ca34b8eb00423922715b9ae6251d21f32951ffee --- regress/delta/delta_test.c +++ regress/delta/delta_test.c @@ -18,15 +18,116 @@ #include #include +#include +#include #include "got_error.h" #include "delta.h" +#include "path.h" +#ifndef nitems +#define nitems(_a) (sizeof(_a) / sizeof((_a)[0])) +#endif + +struct delta_test { + const char *base; + const char *delta; + size_t delta_len; + const char *expected; +} delta_tests[] = { + /* base len 0, target len 4, append 4 'x' */ + { "", "\x00\x04\x04xxxx", 7, "xxxx" }, + /* copy 4 bytes at offset 0 from base, append 4 'x' */ + { "aabbccdd", "\x08\x08\x90\x04\x04xxxx", 9, "aabbxxxx" }, + /* copy 4 bytes at offset 4 from base, append 4 'x' */ + { "aabbccdd", "\x08\x08\x91\x04\x04\x04xxxx", 10, "ccddxxxx" }, +}; + +static const struct got_error * +compress_to_file(FILE **outfile, const char *input, size_t inlen) +{ + const struct got_error *err = NULL; + z_stream z; + char buf[2048]; + char *inbuf; + int ret; + size_t n; + + memset(&z, 0, sizeof(z)); + z.zalloc = Z_NULL; + z.zfree = Z_NULL; + + if (deflateInit(&z, 8) != Z_OK) + return got_error(GOT_ERR_IO); + + *outfile = got_opentemp(); + if (*outfile == NULL) + return got_error_from_errno(); + + z.next_in = (Bytef *)input; + z.avail_in = inlen; + z.next_out = buf; + z.avail_out = sizeof(buf); + /* Our output buffer is large enough so one round should be enough. */ + ret = deflate(&z, Z_FINISH); + if (ret != Z_STREAM_END || z.avail_out == 0) { + err = got_error(GOT_ERR_COMPRESSION); + goto done; + } + + deflateEnd(&z); + + n = fwrite(buf, 1, z.avail_out, *outfile); + if (n != z.avail_out) + err = got_ferror(*outfile, GOT_ERR_IO); +done: + if (err) { + fclose(*outfile); + *outfile = NULL; + } else + rewind(*outfile); + return err; +} + static int delta_combine() { - return 1; + const struct got_error *err = NULL; + int i; + FILE *result_file; + + result_file = got_opentemp(); + if (result_file == NULL) + return 1; + + for (i = 0; i < nitems(delta_tests); i++) { + struct delta_test *dt = &delta_tests[i]; + FILE *base_file; + char buf[1024]; + size_t n, len, result_len; + + len = strlen(dt->base); + err = compress_to_file(&base_file, dt->base, len); + if (err) + break; + + err = got_delta_apply(base_file, dt->delta, dt->delta_len, + result_file); + fclose(base_file); + if (err) + break; + result_len = strlen(dt->expected); + n = fread(buf, result_len, 1, result_file); + if (n != 1 || strncmp(buf, dt->expected, result_len) != 0) { + err = got_ferror(result_file, GOT_ERR_BAD_DELTA); + break; + } + rewind(result_file); + } + + fclose(result_file); + return (err == NULL); } #define RUN_TEST(expr, name) \