Commit Diff


commit - /dev/null
commit + 5391aec51faf860600b91bc2a3e253d119896de1
blob - /dev/null
blob + 0a58a0c2a8130893bf70250fd1b46bff2b4b37a1 (mode 644)
--- /dev/null
+++ README.md
@@ -0,0 +1,32 @@
+# zip utils
+
+This is the code for the series of posts about zip files in my blog.
+
+To compile the first program:
+
+	cc zipls.c -o zipls
+
+To compile the second program:
+
+	cc zipview.c -o zipview -lz
+
+
+## License
+
+This is free and unencumbered software released into the public
+domain; see the UNLICENSE file.
+
+
+## Posts
+
+Posts (english):
+
+ - [Inspecting zip
+   files](https://www.omarpolo.com/post/inspecting-zips.html)
+ - [Extracting files from
+   zips](https://www.omarpolo.com/post/extracting-from-zips.html)
+
+Posts (italian and only over gemini):
+
+ - [Elencare i file di uno
+   zip](gemini://it.omarpolo.com/articoli/elencare-i-file-di-uno-zip.gmi)
blob - /dev/null
blob + 68a49daad8ff7e35068f2b7a97d643aab440eaec (mode 644)
--- /dev/null
+++ UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
blob - /dev/null
blob + 460636c9864cbd26ebdce1e738325a8d95f1720e (mode 644)
--- /dev/null
+++ zipls.c
@@ -0,0 +1,133 @@
+/*
+ * This is free and unencumbered software released into the public domain.
+ */
+
+#include <sys/mman.h>
+
+#include <endian.h>
+#include <err.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <zlib.h>
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+void *
+find_central_directory(uint8_t *addr, size_t len)
+{
+	uint32_t	 offset;
+	uint16_t	 clen;
+	uint8_t		*p, *end;
+
+	/*
+	 * At -22 bytes from the end there is the end of the central
+	 * directory assuming an empty comment.  It's a sensible place
+	 * from which start.
+	 */
+	if (len < 22)
+		return NULL;
+	end = addr + len;
+	p = end - 22;
+
+again:
+	for (; p > addr; --p)
+		if (memcmp(p, "\x50\x4b\x05\x06", 4) == 0)
+			break;
+
+	if (p == addr)
+		return NULL;
+
+	/* read comment length */
+	memcpy(&clen, p + 20, sizeof(clen));
+	clen = le16toh(clen);
+
+	/* false signature inside a comment? */
+	if (clen + 22 != end - p) {
+		p--;
+		goto again;
+	}
+
+	/* read the offset for the central directory */
+	memcpy(&offset, p + 16, sizeof(offset));
+	offset = le32toh(offset);
+
+	if (addr + offset > p)
+		return NULL;
+
+	return addr + offset;
+}
+
+void
+ls(uint8_t *zip, size_t len, uint8_t *cd)
+{
+	uint16_t	 flen, xlen, clen;
+	uint8_t		*end;
+	char		 filename[PATH_MAX];
+
+	end = zip + len;
+	while (cd < end - 46 && memcmp(cd, "\x50\x4b\x01\x02", 4) == 0) {
+		memcpy(&flen, cd + 28, sizeof(flen));
+		memcpy(&xlen, cd + 28 + 2, sizeof(xlen));
+		memcpy(&clen, cd + 28 + 2 + 2, sizeof(clen));
+
+		flen = le16toh(flen);
+		xlen = le16toh(xlen);
+		clen = le16toh(clen);
+
+		memset(filename, 0, sizeof(filename));
+		memcpy(filename, cd + 46, MIN(sizeof(filename)-1, flen));
+
+		printf("%s\n", filename);
+
+		cd += 46 + flen + xlen + clen;
+	}
+}
+
+void *
+map_file(int fd, size_t *len)
+{
+	off_t	 jump;
+	void	*addr;
+
+	if ((jump = lseek(fd, 0, SEEK_END)) == -1)
+		err(1, "lseek");
+
+	if (lseek(fd, 0, SEEK_SET) == -1)
+		err(1, "lseek");
+
+	if ((addr = mmap(NULL, jump, PROT_READ, MAP_PRIVATE, fd, 0))
+	    == MAP_FAILED)
+                err(1, "mmap");
+
+	*len = jump;
+	return addr;
+}
+
+int
+main(int argc, char **argv)
+{
+	int	 fd;
+	void	*zip, *cd;
+	size_t	 len;
+
+	if (argc != 2)
+		errx(1, "missing file to inspect");
+
+	if ((fd = open(argv[1], O_RDONLY)) == -1)
+		err(1, "can't open %s", argv[1]);
+
+	zip = map_file(fd, &len);
+	if ((cd = find_central_directory(zip, len)) == NULL)
+		errx(1, "can't find the central directory");
+
+	ls(zip, len, cd);
+
+	munmap(zip, len);
+	close(fd);
+
+	return 0;
+}
blob - /dev/null
blob + 74a7ae855184cc21722d0a385df995f4da76ab7b (mode 644)
--- /dev/null
+++ zipview.c
@@ -0,0 +1,305 @@
+/*
+ * This is free and unencumbered software released into the public domain.
+ */
+
+#include <sys/mman.h>
+
+#include <endian.h>
+#include <err.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <zlib.h>
+
+#define COMPRESSION_NONE	0x00
+#define COMPRESSION_DEFLATE	0x08
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+void *
+find_central_directory(uint8_t *addr, size_t len)
+{
+	uint32_t	 offset;
+	uint16_t	 clen;
+	uint8_t		*p, *end;
+
+	/*
+	 * At -22 bytes from the end there is the end of the central
+	 * directory assuming an empty comment.  It's a sensible place
+	 * from which start.
+	 */
+	if (len < 22)
+		return NULL;
+	end = addr + len;
+	p = end - 22;
+
+again:
+	for (; p > addr; --p)
+		if (memcmp(p, "\x50\x4b\x05\x06", 4) == 0)
+			break;
+
+	if (p == addr)
+		return NULL;
+
+	/* read comment length */
+	memcpy(&clen, p + 20, sizeof(clen));
+	clen = le16toh(clen);
+
+	/* false signature inside a comment? */
+	if (clen + 22 != end - p) {
+		p--;
+		goto again;
+	}
+
+	/* read the offset for the central directory */
+	memcpy(&offset, p + 16, sizeof(offset));
+	offset = le32toh(offset);
+
+	if (addr + offset > p)
+		return NULL;
+
+	return addr + offset;
+}
+
+void
+unzip_none(uint8_t *data, size_t size, unsigned long ocrc)
+{
+	unsigned long crc = 0;
+
+	fwrite(data, 1, size, stdout);
+
+	crc = crc32(0, data, size);
+	if (crc != ocrc)
+		errx(1, "CRC mismatch");
+}
+
+void
+unzip_deflate(uint8_t *data, size_t size, unsigned long ocrc)
+{
+	z_stream	stream;
+	size_t		have;
+	unsigned long	crc = 0;
+	char		buf[BUFSIZ];
+
+	stream.zalloc = Z_NULL;
+	stream.zfree = Z_NULL;
+	stream.opaque = Z_NULL;
+	stream.next_in = data;
+	stream.avail_in = size;
+	stream.next_out = Z_NULL;
+	stream.avail_out = 0;
+	if (inflateInit2(&stream, -15) != Z_OK)
+		err(1, "inflateInit failed");
+
+	do {
+		stream.next_out = buf;
+		stream.avail_out = sizeof(buf);
+
+		switch (inflate(&stream, Z_BLOCK)) {
+		case Z_STREAM_ERROR:
+			errx(1, "stream error");
+		case Z_NEED_DICT:
+			errx(1, "need dict");
+		case Z_DATA_ERROR:
+			errx(1, "data error: %s", stream.msg);
+		case Z_MEM_ERROR:
+			errx(1, "memory error");
+		}
+
+		have = sizeof(buf) - stream.avail_out;
+		fwrite(buf, 1, have, stdout);
+		crc = crc32(crc, buf, have);
+	} while (stream.avail_out == 0);
+
+	inflateEnd(&stream);
+
+	if (crc != ocrc)
+		errx(1, "CRC mismatch");
+}
+
+void
+unzip(uint8_t *zip, size_t len, uint8_t *entry)
+{
+	uint32_t	 size, osize, crc, off;
+	uint16_t	 flags, compression;
+	uint16_t	 flen, xlen;
+	uint8_t		*data, *offset;
+
+	/* read the offset of the file record */
+	memcpy(&off, entry + 42, sizeof(off));
+	offset = zip + le32toh(off);
+
+	if (offset > zip + len - 4 ||
+	    memcmp(offset, "\x50\x4b\x03\x04", 4) != 0)
+		errx(1, "invalid offset or file header signature");
+
+	memcpy(&flags, offset + 6, sizeof(flags));
+	memcpy(&compression, offset + 8, sizeof(compression));
+
+	flags = le16toh(flags);
+	compression = le16toh(compression);
+
+	memcpy(&crc, entry + 16, sizeof(crc));
+	memcpy(&size, entry + 20, sizeof(size));
+	memcpy(&osize, entry + 24, sizeof(osize));
+
+	crc = le32toh(crc);
+	size = le32toh(size);
+	osize = le32toh(osize);
+
+	memcpy(&flen, offset + 26, sizeof(flen));
+	memcpy(&xlen, offset + 28, sizeof(xlen));
+
+	flen = le16toh(flen);
+	xlen = le16toh(xlen);
+
+	data = offset + 30 + flen + xlen;
+	if (data + size > zip + len)
+		errx(1, "corrupted zip, offset out of file");
+
+	switch (compression) {
+	case COMPRESSION_NONE:
+                unzip_none(data, size, crc);
+		break;
+	case COMPRESSION_DEFLATE:
+                unzip_deflate(data, size, crc);
+		break;
+	default:
+		errx(1, "unknown compression method 0x%02x",
+		    compression);
+	}
+}
+
+void *
+next(uint8_t *zip, size_t len, uint8_t *entry)
+{
+	uint16_t	 flen, xlen, clen;
+	uint8_t		*next, *end;
+
+	memcpy(&flen, entry + 28, sizeof(flen));
+	memcpy(&xlen, entry + 28 + 2, sizeof(xlen));
+	memcpy(&clen, entry + 28 + 2 + 2, sizeof(xlen));
+
+	flen = le16toh(flen);
+	xlen = le16toh(xlen);
+	clen = le16toh(clen);
+
+	next = entry + 46 + flen + xlen + clen;
+	end = zip + len;
+	if (next >= end - 46 ||
+	    memcmp(next, "\x50\x4b\x01\x02", 4) != 0)
+		return NULL;
+	return next;
+}
+
+void
+filename(uint8_t *zip, size_t len, uint8_t *entry, char *buf,
+    size_t size)
+{
+	uint16_t	flen;
+	size_t		s;
+
+	memcpy(&flen, entry + 28, sizeof(flen));
+	flen = le16toh(flen);
+
+        s = MIN(size-1, flen);
+	memcpy(buf, entry + 46, s);
+	buf[s] = '\0';
+}
+
+void
+ls(uint8_t *zip, size_t len, uint8_t *cd)
+{
+	char	name[PATH_MAX];
+
+	do {
+		filename(zip, len, cd, name, sizeof(name));
+		printf("%s\n", name);
+	} while ((cd = next(zip, len, cd)) != NULL);
+}
+
+void *
+find_file(uint8_t *zip, size_t len, uint8_t *cd, const char *target)
+{
+	char	name[PATH_MAX];
+
+	do {
+		filename(zip, len, cd, name, sizeof(name));
+		if (!strcmp(name, target))
+			return cd;
+	} while ((cd = next(zip, len, cd)) != NULL);
+
+	return NULL;
+}
+
+int
+extract_file(uint8_t *zip, size_t len, uint8_t *cd, const char *target)
+{
+	if ((cd = find_file(zip, len, cd, target)) == NULL)
+		return -1;
+
+	unzip(zip, len, cd);
+	return 0;
+}
+
+void *
+map_file(int fd, size_t *len)
+{
+	off_t	 jump;
+	void	*addr;
+
+	if ((jump = lseek(fd, 0, SEEK_END)) == -1)
+		err(1, "lseek");
+
+	if (lseek(fd, 0, SEEK_SET) == -1)
+		err(1, "lseek");
+
+	if ((addr = mmap(NULL, jump, PROT_READ, MAP_PRIVATE, fd, 0))
+	    == MAP_FAILED)
+                err(1, "mmap");
+
+	*len = jump;
+	return addr;
+}
+
+int
+main(int argc, char **argv)
+{
+	int	 i, fd;
+	void	*zip, *cd;
+	size_t	 len;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s archive.zip [files...]",
+		    *argv);
+		return 1;
+	}
+
+	if ((fd = open(argv[1], O_RDONLY)) == -1)
+		err(1, "can't open %s", argv[1]);
+
+	zip = map_file(fd, &len);
+
+#ifdef __OpenBSD__
+	if (pledge("stdio", NULL) == -1)
+		err(1, "pledge");
+#endif
+
+	if ((cd = find_central_directory(zip, len)) == NULL)
+		errx(1, "can't find the central directory");
+
+        if (argc == 2)
+		ls(zip, len, cd);
+        else {
+                for (i = 2; i < argc; ++i)
+			extract_file(zip, len, cd, argv[i]);
+	}
+
+	munmap(zip, len);
+	close(fd);
+
+	return 0;
+}