2 * tar - `tape archiver', actually usable on any medium.
3 * POSIX "ustar" compliant when extracting, and by default when creating.
4 * this tar attempts to read and write multiple Tblock-byte blocks
5 * at once to and from the filesystem, and does not copy blocks
11 #include <fcall.h> /* for %M */
12 #include <libString.h>
15 * modified versions of those in libc.h; scans only the first arg for
16 * keyletters and options.
19 if (!argv0) argv0 = *argv; \
26 while(*_args && (_args += chartorune(&_argc, _args)))\
28 #define TARGEND SET(_argt); USED(_argt);USED(_argc);USED(_args); \
30 USED(argv); USED(argc); }
31 #define TARGC() (_argc)
33 #define ROUNDUP(a, b) (((a) + (b) - 1)/(b))
34 #define BYTES2TBLKS(bytes) ROUNDUP(bytes, Tblock)
36 /* read big-endian binary integers; args must be (uchar *) */
37 #define G2BEBYTE(x) (((x)[0]<<8) | (x)[1])
38 #define G3BEBYTE(x) (((x)[0]<<16) | ((x)[1]<<8) | (x)[2])
39 #define G4BEBYTE(x) (((x)[0]<<24) | ((x)[1]<<16) | ((x)[2]<<8) | (x)[3])
40 #define G8BEBYTE(x) (((vlong)G4BEBYTE(x)<<32) | (u32int)G4BEBYTE((x)+4))
43 typedef char *(*Refill)(int ar, char *bufs, int justhdr);
45 enum { Stdin, Stdout, Stderr };
46 enum { Rd, Wr }; /* pipe fd-array indices */
47 enum { Output, Input };
48 enum { None, Toc, Xtract, Replace };
49 enum { Alldata, Justnxthdr };
53 Maxpfx = 155, /* from POSIX */
54 Maxname = Namsiz + 1 + Maxpfx,
55 Binsize = 0x80, /* flag in size[0], from gnu: positive binary size */
56 Binnegsz = 0xff, /* flag in size[0]: negative binary size */
58 Nblock = 40, /* maximum blocksize */
59 Dblock = 20, /* default blocksize */
63 /* POSIX link flags */
69 LF_SYMLINK2 = 's', /* 4BSD used this */
75 /* 'A' - 'Z' are reserved for custom implementations */
78 #define islink(lf) (isreallink(lf) || issymlink(lf))
79 #define isreallink(lf) ((lf) == LF_LINK)
80 #define issymlink(lf) ((lf) == LF_SYMLINK1 || (lf) == LF_SYMLINK2)
91 char linkname[Namsiz];
93 /* rest are defined by POSIX's ustar format; see p1003.2b */
94 char magic[6]; /* "ustar" */
100 char prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
105 typedef struct Compress {
111 static Compress comps[] = {
112 "gzip", "gunzip", { ".tar.gz", ".tgz" }, /* default */
113 "compress", "uncompress", { ".tar.Z", ".tz" },
114 "bzip2", "bunzip2", { ".tar.bz", ".tbz",
115 ".tar.bz2",".tbz2" },
118 typedef struct Pushstate {
120 int fd; /* original fd */
121 int rfd; /* replacement fd */
126 #define OTHER(rdwr) (rdwr == Rd? Wr: Rd)
128 /* static int debug; */
130 static int posix = 1;
134 static int relative = 1;
137 static int docompress;
138 static int keepexisting;
139 static Off blkoff; /* offset of the current archive block (not Tblock) */
142 static int nblock = Dblock;
143 static char *usefile;
144 static char origdir[Maxname*2];
145 static Hdr *tpblk, *endblk;
151 fprint(2, "usage: %s {crtx}[PRTfgkmpuvz] [archive] file1 file2...\n",
159 compmethod(char *name)
161 int i, nmlen = strlen(name), sfxlen;
164 for (cp = comps; cp < comps + nelem(comps); cp++)
165 for (i = 0; i < nelem(cp->sfx) && cp->sfx[i]; i++) {
166 sfxlen = strlen(cp->sfx[i]);
167 if (nmlen > sfxlen &&
168 strcmp(cp->sfx[i], name + nmlen - sfxlen) == 0)
171 return docompress? comps: nil;
175 * push a filter, cmd, onto fd. if input, it's an input descriptor.
176 * returns a descriptor to replace fd, or -1 on error.
179 push(int fd, char *cmd, int input, Pushstate *ps)
186 if (fd < 0 || pipe(pifds) < 0)
194 dup(pifds[Wr], Stdout);
196 dup(pifds[Rd], Stdin);
197 close(pifds[input? Rd: Wr]);
198 dup(fd, (input? Stdin: Stdout));
199 execl(cmd, cmd, nil);
200 sysfatal("can't exec %s: %r", cmd);
202 nfd = pifds[input? Rd: Wr];
203 close(pifds[input? Wr: Rd]);
212 pushclose(Pushstate *ps)
216 if (ps->fd < 0 || ps->rfd < 0 || !ps->open)
221 while ((wm = wait()) != nil && wm->pid != ps->kid)
223 return wm? wm->msg: nil;
227 * block-buffer management
234 tpblk = malloc(Tblock * nblock);
235 assert(tpblk != nil);
236 endblk = tpblk + nblock;
240 * (re)fill block buffers from archive. `justhdr' means we don't care
241 * about the data before the next header block.
244 refill(int ar, char *bufs, int justhdr)
247 unsigned bytes = Tblock * nblock;
248 static int done, first = 1, seekable;
254 seekable = seek(ar, 0, 1) >= 0;
255 blkoff = seek(ar, 0, 1); /* note position for `tar r' */
256 /* try to size non-pipe input at first read */
257 if (first && usefile) {
258 n = read(ar, bufs, bytes);
260 sysfatal("error reading archive: %r");
262 if (i % Tblock != 0) {
263 fprint(2, "%s: archive block size (%d) error\n",
270 fprint(2, "%s: blocking = %d\n", argv0, nblock);
271 endblk = (Hdr *)bufs + nblock;
274 } else if (justhdr && seekable && nexthdr - seek(ar, 0, 1) >= bytes) {
275 /* optimisation for huge archive members on seekable media */
276 if (seek(ar, bytes, 1) < 0)
277 sysfatal("can't seek on archive: %r");
280 n = readn(ar, bufs, bytes);
284 sysfatal("unexpected EOF reading archive");
286 sysfatal("error reading archive: %r");
287 else if (n%Tblock != 0)
288 sysfatal("partial block read from archive");
291 memset(bufs + n, 0, bytes - n);
297 getblk(int ar, Refill rfp, int justhdr)
299 if (curblk == nil || curblk >= endblk) { /* input block exhausted? */
300 if (rfp != nil && (*rfp)(ar, (char *)tpblk, justhdr) == nil)
308 getblkrd(int ar, int justhdr)
310 return getblk(ar, refill, justhdr);
316 return getblk(ar, nil, Alldata);
322 Hdr *hp = getblke(ar);
325 memset(hp, 0, Tblock);
330 * how many block buffers are available, starting at the address
331 * just returned by getblk*?
336 int n = endblk - (curblk - 1);
338 return n > max? max: n;
342 * indicate that one is done with the last block obtained from getblke
343 * and it is now available to be written into the archive.
348 unsigned bytes = Tblock * nblock;
350 /* if writing end-of-archive, aid compression (good hygiene too) */
352 memset(curblk, 0, (char *)endblk - (char *)curblk);
353 if (write(ar, tpblk, bytes) != bytes)
354 sysfatal("error writing archive: %r");
360 if (curblk >= endblk)
372 putreadblks(int ar, int blks)
379 putblkmany(int ar, int blks)
390 * modifies hp->chksum but restores it; important for the last block of the
391 * old archive when updating with `tar rf archive'
398 uchar *cp = (uchar*)hp;
399 char oldsum[sizeof hp->chksum];
401 memmove(oldsum, hp->chksum, sizeof oldsum);
402 memset(hp->chksum, ' ', sizeof hp->chksum);
405 memmove(hp->chksum, oldsum, sizeof oldsum);
412 return strcmp(hp->magic, "ustar") == 0;
416 * s is at most n bytes long, but need not be NUL-terminated.
417 * if shorter than n bytes, all bytes after the first NUL must also
421 tar_sstrnlen(char *s, int n)
423 return s[n - 1] != '\0'? n: strlen(s);
426 /* set fullname from header */
431 static char fullnamebuf[2 + Maxname + 1]; /* 2 at beginning for ./ on relative names */
434 fullname = fullnamebuf+2;
435 namlen = tar_sstrnlen(hp->name, sizeof hp->name);
436 if (hp->prefix[0] == '\0' || !isustar(hp)) { /* old-style name? */
437 memmove(fullname, hp->name, namlen);
438 fullname[namlen] = '\0';
442 /* name is in two pieces */
443 pfxlen = tar_sstrnlen(hp->prefix, sizeof hp->prefix);
444 memmove(fullname, hp->prefix, pfxlen);
445 fullname[pfxlen] = '/';
446 memmove(fullname + pfxlen + 1, hp->name, namlen);
447 fullname[pfxlen + 1 + namlen] = '\0';
454 /* the mode test is ugly but sometimes necessary */
455 return hp->linkflag == LF_DIR ||
456 strrchr(name(hp), '\0')[-1] == '/' ||
457 (strtoul(hp->mode, nil, 8)&0170000) == 040000;
463 return name(hp)[0] == '\0';
468 getbe(uchar *src, int size)
481 putbe(uchar *dest, uvlong vl, int size)
483 for (dest += size; size-- > 0; vl >>= 8)
488 * return the nominal size from the header block, which is not always the
489 * size in the archive (the archive size may be zero for some file types
490 * regardless of the nominal size).
492 * gnu and freebsd tars are now recording vlongs as big-endian binary
493 * with a flag in byte 0 to indicate this, which permits file sizes up to
494 * 2^64-1 (actually 2^80-1 but our file sizes are vlongs) rather than 2^33-1.
501 if((uchar)hp->size[0] == Binnegsz) {
502 fprint(2, "%s: %s: negative length, which is insane\n",
505 } else if((uchar)hp->size[0] == Binsize) {
506 p = (uchar *)hp->size + sizeof hp->size - 1 -
507 sizeof(vlong); /* -1 for terminating space */
510 return strtoull(hp->size, nil, 8);
514 * return the number of bytes recorded in the archive.
519 if(isdir(hp) || islink(hp->linkflag))
530 hp = getblkrd(ar, Alldata);
532 sysfatal("unexpected EOF instead of archive header");
533 if (eotar(hp)) /* end-of-archive block? */
535 hdrcksum = strtoul(hp->chksum, nil, 8);
536 if (chksum(hp) != hdrcksum)
537 sysfatal("bad archive header checksum: name %.64s... %ld %ld",
538 hp->name, chksum(hp), hdrcksum);
539 nexthdr += Tblock*(1 + BYTES2TBLKS(arsize(hp)));
548 * if name is longer than Namsiz bytes, try to split it at a slash and fit the
549 * pieces into hp->prefix and hp->name.
552 putfullname(Hdr *hp, char *name)
556 String *slname = nil;
560 s_append(slname, name);
561 s_append(slname, "/"); /* posix requires this */
562 name = s_to_c(slname);
565 namlen = strlen(name);
566 if (namlen <= Namsiz) {
567 strncpy(hp->name, name, Namsiz);
568 hp->prefix[0] = '\0'; /* ustar paranoia */
572 if (!posix || namlen > Maxname) {
573 fprint(2, "%s: name too long for tar header: %s\n",
578 * try various splits until one results in pieces that fit into the
579 * appropriate fields of the header. look for slashes from right
580 * to left, in the hopes of putting the largest part of the name into
581 * hp->prefix, which is larger than hp->name.
583 sl = strrchr(name, '/');
586 if (pfxlen <= sizeof hp->prefix && namlen-1 - pfxlen <= Namsiz)
590 sl = strrchr(name, '/');
594 fprint(2, "%s: name can't be split to fit tar header: %s\n",
599 strncpy(hp->prefix, name, sizeof hp->prefix);
601 strncpy(hp->name, sl, sizeof hp->name);
608 mkhdr(Hdr *hp, Dir *dir, char *file)
611 * these fields run together, so we format them in order and don't use
614 sprint(hp->mode, "%6lo ", dir->mode & 0777);
615 sprint(hp->uid, "%6o ", aruid);
616 sprint(hp->gid, "%6o ", argid);
617 if (dir->length >= (Off)1<<32) {
622 fprint(2, "%s: storing large sizes in \"base 256\"\n", argv0);
624 hp->size[0] = (char)Binsize;
625 /* emit so-called `base 256' representation of size */
626 putbe((uchar *)hp->size+1, dir->length, sizeof hp->size - 2);
627 hp->size[sizeof hp->size - 1] = ' ';
629 sprint(hp->size, "%11lluo ", dir->length);
630 sprint(hp->mtime, "%11luo ", dir->mtime);
631 hp->linkflag = (dir->mode&DMDIR? LF_DIR: LF_PLAIN1);
632 putfullname(hp, file);
634 strncpy(hp->magic, "ustar", sizeof hp->magic);
635 strncpy(hp->version, "00", sizeof hp->version);
636 strncpy(hp->uname, dir->uid, sizeof hp->uname);
637 strncpy(hp->gname, dir->gid, sizeof hp->gname);
639 sprint(hp->chksum, "%6luo", chksum(hp));
643 static void addtoar(int ar, char *file, char *shortf);
646 addtreetoar(int ar, char *file, char *shortf, int fd)
650 String *name = s_new();
652 n = dirreadall(fd, &dirents);
657 if (chdir(shortf) < 0)
658 sysfatal("chdir %s: %r", file);
660 fprint(2, "chdir %s\t# %s\n", shortf, file);
662 for (dent = dirents; dent < dirents + n; dent++) {
664 s_append(name, file);
666 s_append(name, dent->name);
667 addtoar(ar, s_to_c(name), dent->name);
673 * this assumes that shortf is just one component, which is true
674 * during directory descent, but not necessarily true of command-line
675 * arguments. Our caller (or addtoar's) must reset the working
676 * directory if necessary.
679 sysfatal("chdir %s/..: %r", file);
681 fprint(2, "chdir ..\n");
685 addtoar(int ar, char *file, char *shortf)
689 ulong blksleft, blksread;
694 if (shortf[0] == '#') {
696 s_append(name, "./");
697 s_append(name, shortf);
698 shortf = s_to_c(name);
701 fd = open(shortf, OREAD);
703 fprint(2, "%s: can't open %s: %r\n", argv0, file);
710 sysfatal("can't fstat %s: %r", file);
713 isdir = !!(dir->qid.type&QTDIR);
714 if (mkhdr(hbp, dir, file) < 0) {
724 blksleft = BYTES2TBLKS(dir->length);
728 addtreetoar(ar, file, shortf, fd);
730 for (; blksleft > 0; blksleft -= blksread) {
732 blksread = gothowmany(blksleft);
733 bytes = blksread * Tblock;
734 n = readn(fd, hbp, bytes);
736 sysfatal("error reading %s: %r", file);
738 * ignore EOF. zero any partial block to aid
739 * compression and emergency recovery of data.
742 memset((uchar*)hbp + n, 0, bytes - n);
743 putblkmany(ar, blksread);
747 fprint(2, "%s\n", file);
757 ulong blksleft, blksread;
760 Compress *comp = nil;
763 if (usefile && docreate) {
764 ar = create(usefile, OWRITE, 0666);
766 comp = compmethod(usefile);
768 ar = open(usefile, ORDWR);
772 ar = push(ar, comp->comp, Output, &ps);
774 sysfatal("can't open archive %s: %r", usefile);
776 if (usefile && !docreate) {
777 /* skip quickly to the end */
778 while ((hp = readhdr(ar)) != nil) {
780 for (blksleft = BYTES2TBLKS(bytes);
781 blksleft > 0 && getblkrd(ar, Justnxthdr) != nil;
782 blksleft -= blksread) {
783 blksread = gothowmany(blksleft);
784 putreadblks(ar, blksread);
788 * we have just read the end-of-archive Tblock.
789 * now seek back over the (big) archive block containing it,
790 * and back up curblk ptr over end-of-archive Tblock in memory.
792 if (seek(ar, blkoff, 0) < 0)
793 sysfatal("can't seek back over end-of-archive: %r");
797 for (i = 0; argv[i] != nil; i++) {
798 addtoar(ar, argv[i], argv[i]);
799 chdir(origdir); /* for correctness & profiling */
802 /* write end-of-archive marker */
809 return pushclose(&ps);
819 /* is pfx a file-name prefix of name? */
821 prefix(char *name, char *pfx)
823 int pfxlen = strlen(pfx);
824 char clpfx[Maxname+1];
826 if (pfxlen > Maxname)
830 return strncmp(pfx, name, pfxlen) == 0 &&
831 (name[pfxlen] == '\0' || name[pfxlen] == '/');
835 match(char *name, char **argv)
838 char clname[Maxname+1];
842 strcpy(clname, name);
844 for (i = 0; argv[i] != nil; i++)
845 if (prefix(clname, argv[i]))
851 cantcreate(char *s, int mode)
857 * Always print about files. Only print about directories
858 * we haven't printed about. (Assumes archive is ordered
863 /* already printed this directory */
864 if(strcmp(s, last) == 0)
866 /* printed a higher directory, so printed this one */
868 if(memcmp(s, last, len) == 0 && last[len] == '/')
875 fprint(2, "%s: can't create %s: %r\n", argv0, s);
883 if (access(s, AEXIST) == 0)
885 f = create(s, OREAD, DMDIR | 0777);
889 cantcreate(s, DMDIR);
901 while (!err && (p = strchr(p+1, '/')) != nil) {
903 err = (access(s, AEXIST) < 0 && makedir(s) < 0);
909 /* Call access but preserve the error string. */
911 xaccess(char *name, int mode)
917 errstr(err, sizeof err);
918 rv = access(name, mode);
919 errstr(err, sizeof err);
923 /* copy a file from the archive into the filesystem */
924 /* fname is result of name(), so has two extra bytes at beginning */
926 extract1(int ar, Hdr *hp, char *fname)
928 int wrbytes, fd = -1, dir = 0;
929 long mtime = strtol(hp->mtime, nil, 8);
930 ulong mode = strtoul(hp->mode, nil, 8) & 0777;
931 Off bytes = hdrsize(hp); /* for printing */
932 ulong blksread, blksleft = BYTES2TBLKS(arsize(hp));
939 switch (hp->linkflag) {
950 else if(fname[0] == '#'){
955 if (verb == Xtract) {
957 switch (hp->linkflag) {
961 fprint(2, "%s: can't make (sym)link %s\n",
965 fprint(2, "%s: can't make fifo %s\n", argv0, fname);
968 if (!keepexisting || access(fname, AEXIST) < 0) {
969 int rw = (dir? OREAD: OWRITE);
971 fd = create(fname, rw, mode);
974 fd = create(fname, rw, mode);
977 (!dir || xaccess(fname, AEXIST) < 0))
978 cantcreate(fname, mode);
980 if (fd >= 0 && verbose)
981 fprint(2, "%s\n", fname);
984 } else if (verbose) {
985 char *cp = ctime(mtime);
987 print("%M %8lld %-12.12s %-4.4s %s\n",
988 mode, bytes, cp+4, cp+24, fname);
990 print("%s\n", fname);
994 for (; blksleft > 0; blksleft -= blksread) {
995 hbp = getblkrd(ar, (fd >= 0? Alldata: Justnxthdr));
997 sysfatal("unexpected EOF on archive extracting %s",
999 blksread = gothowmany(blksleft);
1001 fprint(2, "%s: got %ld blocks reading %s!\n",
1002 argv0, blksread, fname);
1003 wrbytes = Tblock*blksread;
1006 if (fd >= 0 && write(fd, hbp, wrbytes) != wrbytes)
1007 sysfatal("write error on %s: %r", fname);
1008 putreadblks(ar, blksread);
1013 "%s: %lld bytes uncopied at eof; %s not fully extracted\n",
1014 argv0, bytes, fname);
1017 * directories should be wstated after we're done
1018 * creating files in them.
1037 skip(int ar, Hdr *hp, char *fname)
1039 ulong blksleft, blksread;
1042 for (blksleft = BYTES2TBLKS(arsize(hp)); blksleft > 0;
1043 blksleft -= blksread) {
1044 hbp = getblkrd(ar, Justnxthdr);
1046 sysfatal("unexpected EOF on archive extracting %s",
1048 blksread = gothowmany(blksleft);
1049 putreadblks(ar, blksread);
1054 extract(char **argv)
1059 Compress *comp = nil;
1063 ar = open(usefile, OREAD);
1064 comp = compmethod(usefile);
1068 ar = push(ar, comp->decomp, Input, &ps);
1070 sysfatal("can't open archive %s: %r", usefile);
1072 while ((hp = readhdr(ar)) != nil) {
1073 longname = name(hp);
1074 if (match(longname, argv))
1075 extract1(ar, hp, longname);
1077 skip(ar, hp, longname);
1081 return pushclose(&ps);
1088 main(int argc, char *argv[])
1093 fmtinstall('M', dirmodefmt);
1095 if(sizeof(Hdr) != Tblock)
1096 sysfatal("padding in hdr should be %d", Tblock-sizeof(Hdr)+sizeof(curblk->pad));
1103 usefile = EARGF(usage());
1106 argid = strtoul(EARGF(usage()), 0, 0);
1111 case 'm': /* compatibility */
1133 aruid = strtoul(EARGF(usage()), 0, 0);
1147 fprint(2, "tar: unknown letter %C\n", TARGC());
1152 if (argc < 0 || errflg)
1159 ret = extract(argv);
1162 if (getwd(origdir, sizeof origdir) == nil)
1163 strcpy(origdir, "/tmp");
1164 ret = replace(argv);