commit 5391aec51faf860600b91bc2a3e253d119896de1 from: Omar Polo date: Sat Aug 21 15:27:52 2021 UTC initial commit commit - /dev/null commit + 5391aec51faf860600b91bc2a3e253d119896de1 blob - /dev/null blob + 0a58a0c2a8130893bf70250fd1b46bff2b4b37a1 (mode 644) --- /dev/null +++ README.md @@ -0,0 +1,32 @@ +# zip utils + +This is the code for the series of posts about zip files in my blog. + +To compile the first program: + + cc zipls.c -o zipls + +To compile the second program: + + cc zipview.c -o zipview -lz + + +## License + +This is free and unencumbered software released into the public +domain; see the UNLICENSE file. + + +## Posts + +Posts (english): + + - [Inspecting zip + files](https://www.omarpolo.com/post/inspecting-zips.html) + - [Extracting files from + zips](https://www.omarpolo.com/post/extracting-from-zips.html) + +Posts (italian and only over gemini): + + - [Elencare i file di uno + zip](gemini://it.omarpolo.com/articoli/elencare-i-file-di-uno-zip.gmi) blob - /dev/null blob + 68a49daad8ff7e35068f2b7a97d643aab440eaec (mode 644) --- /dev/null +++ UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to blob - /dev/null blob + 460636c9864cbd26ebdce1e738325a8d95f1720e (mode 644) --- /dev/null +++ zipls.c @@ -0,0 +1,133 @@ +/* + * This is free and unencumbered software released into the public domain. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +void * +find_central_directory(uint8_t *addr, size_t len) +{ + uint32_t offset; + uint16_t clen; + uint8_t *p, *end; + + /* + * At -22 bytes from the end there is the end of the central + * directory assuming an empty comment. It's a sensible place + * from which start. + */ + if (len < 22) + return NULL; + end = addr + len; + p = end - 22; + +again: + for (; p > addr; --p) + if (memcmp(p, "\x50\x4b\x05\x06", 4) == 0) + break; + + if (p == addr) + return NULL; + + /* read comment length */ + memcpy(&clen, p + 20, sizeof(clen)); + clen = le16toh(clen); + + /* false signature inside a comment? */ + if (clen + 22 != end - p) { + p--; + goto again; + } + + /* read the offset for the central directory */ + memcpy(&offset, p + 16, sizeof(offset)); + offset = le32toh(offset); + + if (addr + offset > p) + return NULL; + + return addr + offset; +} + +void +ls(uint8_t *zip, size_t len, uint8_t *cd) +{ + uint16_t flen, xlen, clen; + uint8_t *end; + char filename[PATH_MAX]; + + end = zip + len; + while (cd < end - 46 && memcmp(cd, "\x50\x4b\x01\x02", 4) == 0) { + memcpy(&flen, cd + 28, sizeof(flen)); + memcpy(&xlen, cd + 28 + 2, sizeof(xlen)); + memcpy(&clen, cd + 28 + 2 + 2, sizeof(clen)); + + flen = le16toh(flen); + xlen = le16toh(xlen); + clen = le16toh(clen); + + memset(filename, 0, sizeof(filename)); + memcpy(filename, cd + 46, MIN(sizeof(filename)-1, flen)); + + printf("%s\n", filename); + + cd += 46 + flen + xlen + clen; + } +} + +void * +map_file(int fd, size_t *len) +{ + off_t jump; + void *addr; + + if ((jump = lseek(fd, 0, SEEK_END)) == -1) + err(1, "lseek"); + + if (lseek(fd, 0, SEEK_SET) == -1) + err(1, "lseek"); + + if ((addr = mmap(NULL, jump, PROT_READ, MAP_PRIVATE, fd, 0)) + == MAP_FAILED) + err(1, "mmap"); + + *len = jump; + return addr; +} + +int +main(int argc, char **argv) +{ + int fd; + void *zip, *cd; + size_t len; + + if (argc != 2) + errx(1, "missing file to inspect"); + + if ((fd = open(argv[1], O_RDONLY)) == -1) + err(1, "can't open %s", argv[1]); + + zip = map_file(fd, &len); + if ((cd = find_central_directory(zip, len)) == NULL) + errx(1, "can't find the central directory"); + + ls(zip, len, cd); + + munmap(zip, len); + close(fd); + + return 0; +} blob - /dev/null blob + 74a7ae855184cc21722d0a385df995f4da76ab7b (mode 644) --- /dev/null +++ zipview.c @@ -0,0 +1,305 @@ +/* + * This is free and unencumbered software released into the public domain. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define COMPRESSION_NONE 0x00 +#define COMPRESSION_DEFLATE 0x08 + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +void * +find_central_directory(uint8_t *addr, size_t len) +{ + uint32_t offset; + uint16_t clen; + uint8_t *p, *end; + + /* + * At -22 bytes from the end there is the end of the central + * directory assuming an empty comment. It's a sensible place + * from which start. + */ + if (len < 22) + return NULL; + end = addr + len; + p = end - 22; + +again: + for (; p > addr; --p) + if (memcmp(p, "\x50\x4b\x05\x06", 4) == 0) + break; + + if (p == addr) + return NULL; + + /* read comment length */ + memcpy(&clen, p + 20, sizeof(clen)); + clen = le16toh(clen); + + /* false signature inside a comment? */ + if (clen + 22 != end - p) { + p--; + goto again; + } + + /* read the offset for the central directory */ + memcpy(&offset, p + 16, sizeof(offset)); + offset = le32toh(offset); + + if (addr + offset > p) + return NULL; + + return addr + offset; +} + +void +unzip_none(uint8_t *data, size_t size, unsigned long ocrc) +{ + unsigned long crc = 0; + + fwrite(data, 1, size, stdout); + + crc = crc32(0, data, size); + if (crc != ocrc) + errx(1, "CRC mismatch"); +} + +void +unzip_deflate(uint8_t *data, size_t size, unsigned long ocrc) +{ + z_stream stream; + size_t have; + unsigned long crc = 0; + char buf[BUFSIZ]; + + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; + stream.next_in = data; + stream.avail_in = size; + stream.next_out = Z_NULL; + stream.avail_out = 0; + if (inflateInit2(&stream, -15) != Z_OK) + err(1, "inflateInit failed"); + + do { + stream.next_out = buf; + stream.avail_out = sizeof(buf); + + switch (inflate(&stream, Z_BLOCK)) { + case Z_STREAM_ERROR: + errx(1, "stream error"); + case Z_NEED_DICT: + errx(1, "need dict"); + case Z_DATA_ERROR: + errx(1, "data error: %s", stream.msg); + case Z_MEM_ERROR: + errx(1, "memory error"); + } + + have = sizeof(buf) - stream.avail_out; + fwrite(buf, 1, have, stdout); + crc = crc32(crc, buf, have); + } while (stream.avail_out == 0); + + inflateEnd(&stream); + + if (crc != ocrc) + errx(1, "CRC mismatch"); +} + +void +unzip(uint8_t *zip, size_t len, uint8_t *entry) +{ + uint32_t size, osize, crc, off; + uint16_t flags, compression; + uint16_t flen, xlen; + uint8_t *data, *offset; + + /* read the offset of the file record */ + memcpy(&off, entry + 42, sizeof(off)); + offset = zip + le32toh(off); + + if (offset > zip + len - 4 || + memcmp(offset, "\x50\x4b\x03\x04", 4) != 0) + errx(1, "invalid offset or file header signature"); + + memcpy(&flags, offset + 6, sizeof(flags)); + memcpy(&compression, offset + 8, sizeof(compression)); + + flags = le16toh(flags); + compression = le16toh(compression); + + memcpy(&crc, entry + 16, sizeof(crc)); + memcpy(&size, entry + 20, sizeof(size)); + memcpy(&osize, entry + 24, sizeof(osize)); + + crc = le32toh(crc); + size = le32toh(size); + osize = le32toh(osize); + + memcpy(&flen, offset + 26, sizeof(flen)); + memcpy(&xlen, offset + 28, sizeof(xlen)); + + flen = le16toh(flen); + xlen = le16toh(xlen); + + data = offset + 30 + flen + xlen; + if (data + size > zip + len) + errx(1, "corrupted zip, offset out of file"); + + switch (compression) { + case COMPRESSION_NONE: + unzip_none(data, size, crc); + break; + case COMPRESSION_DEFLATE: + unzip_deflate(data, size, crc); + break; + default: + errx(1, "unknown compression method 0x%02x", + compression); + } +} + +void * +next(uint8_t *zip, size_t len, uint8_t *entry) +{ + uint16_t flen, xlen, clen; + uint8_t *next, *end; + + memcpy(&flen, entry + 28, sizeof(flen)); + memcpy(&xlen, entry + 28 + 2, sizeof(xlen)); + memcpy(&clen, entry + 28 + 2 + 2, sizeof(xlen)); + + flen = le16toh(flen); + xlen = le16toh(xlen); + clen = le16toh(clen); + + next = entry + 46 + flen + xlen + clen; + end = zip + len; + if (next >= end - 46 || + memcmp(next, "\x50\x4b\x01\x02", 4) != 0) + return NULL; + return next; +} + +void +filename(uint8_t *zip, size_t len, uint8_t *entry, char *buf, + size_t size) +{ + uint16_t flen; + size_t s; + + memcpy(&flen, entry + 28, sizeof(flen)); + flen = le16toh(flen); + + s = MIN(size-1, flen); + memcpy(buf, entry + 46, s); + buf[s] = '\0'; +} + +void +ls(uint8_t *zip, size_t len, uint8_t *cd) +{ + char name[PATH_MAX]; + + do { + filename(zip, len, cd, name, sizeof(name)); + printf("%s\n", name); + } while ((cd = next(zip, len, cd)) != NULL); +} + +void * +find_file(uint8_t *zip, size_t len, uint8_t *cd, const char *target) +{ + char name[PATH_MAX]; + + do { + filename(zip, len, cd, name, sizeof(name)); + if (!strcmp(name, target)) + return cd; + } while ((cd = next(zip, len, cd)) != NULL); + + return NULL; +} + +int +extract_file(uint8_t *zip, size_t len, uint8_t *cd, const char *target) +{ + if ((cd = find_file(zip, len, cd, target)) == NULL) + return -1; + + unzip(zip, len, cd); + return 0; +} + +void * +map_file(int fd, size_t *len) +{ + off_t jump; + void *addr; + + if ((jump = lseek(fd, 0, SEEK_END)) == -1) + err(1, "lseek"); + + if (lseek(fd, 0, SEEK_SET) == -1) + err(1, "lseek"); + + if ((addr = mmap(NULL, jump, PROT_READ, MAP_PRIVATE, fd, 0)) + == MAP_FAILED) + err(1, "mmap"); + + *len = jump; + return addr; +} + +int +main(int argc, char **argv) +{ + int i, fd; + void *zip, *cd; + size_t len; + + if (argc < 2) { + fprintf(stderr, "Usage: %s archive.zip [files...]", + *argv); + return 1; + } + + if ((fd = open(argv[1], O_RDONLY)) == -1) + err(1, "can't open %s", argv[1]); + + zip = map_file(fd, &len); + +#ifdef __OpenBSD__ + if (pledge("stdio", NULL) == -1) + err(1, "pledge"); +#endif + + if ((cd = find_central_directory(zip, len)) == NULL) + errx(1, "can't find the central directory"); + + if (argc == 2) + ls(zip, len, cd); + else { + for (i = 2; i < argc; ++i) + extract_file(zip, len, cd, argv[i]); + } + + munmap(zip, len); + close(fd); + + return 0; +}