Blob


1 /*
2 * tar - `tape archiver', actually usable on any medium.
3 * POSIX "ustar" compliant when extracting, and by default when creating.
4 * this tar attempts to read and write multiple Tblock-byte blocks
5 * at once to and from the filesystem, and does not copy blocks
6 * around internally.
7 */
9 #include <u.h>
10 #include <libc.h>
11 #include <fcall.h> /* for %M */
12 #include <libString.h>
14 #define creat tar_creat
16 /*
17 * modified versions of those in libc.h; scans only the first arg for
18 * keyletters and options.
19 */
20 #define TARGBEGIN {\
21 __fixargv0(); if(!argv0) argv0=*argv; \
22 argv++, argc--; \
23 if (argv[0]) {\
24 char *_args, *_argt;\
25 Rune _argc;\
26 _args = &argv[0][0];\
27 _argc = 0;\
28 while(*_args && (_args += chartorune(&_argc, _args)))\
29 switch(_argc)
30 #define TARGEND SET(_argt); USED(_argt); USED(_argc); USED(_args); argc--, argv++; } \
31 USED(argv);USED(argc); }
33 #define ROUNDUP(a, b) (((a) + (b) - 1)/(b))
34 #define BYTES2TBLKS(bytes) ROUNDUP(bytes, Tblock)
36 typedef vlong Off;
37 typedef char *(*Refill)(int ar, char *bufs);
39 enum { Stdin, Stdout, Stderr };
40 enum { None, Toc, Xtract, Replace };
41 enum {
42 Tblock = 512,
43 Nblock = 40, /* maximum blocksize */
44 Dblock = 20, /* default blocksize */
45 Namsiz = 100,
46 Maxpfx = 155, /* from POSIX */
47 Maxname = Namsiz + 1 + Maxpfx,
48 DEBUG = 0,
49 };
51 /* POSIX link flags */
52 enum {
53 LF_PLAIN1 = '\0',
54 LF_PLAIN2 = '0',
55 LF_LINK = '1',
56 LF_SYMLINK1 = '2',
57 LF_SYMLINK2 = 's',
58 LF_CHR = '3',
59 LF_BLK = '4',
60 LF_DIR = '5',
61 LF_FIFO = '6',
62 LF_CONTIG = '7',
63 /* 'A' - 'Z' are reserved for custom implementations */
64 };
66 #define islink(lf) (isreallink(lf) || issymlink(lf))
67 #define isreallink(lf) ((lf) == LF_LINK)
68 #define issymlink(lf) ((lf) == LF_SYMLINK1 || (lf) == LF_SYMLINK2)
70 typedef union {
71 char name[Namsiz];
72 char mode[8];
73 char uid[8];
74 char gid[8];
75 char size[12];
76 char mtime[12];
77 char chksum[8];
78 char linkflag;
79 char linkname[Namsiz];
81 /* rest are defined by POSIX's ustar format; see p1003.2b */
82 char magic[6]; /* "ustar" */
83 char version[2];
84 char uname[32];
85 char gname[32];
86 char devmajor[8];
87 char devminor[8];
88 char prefix[Maxpfx]; /* if non-null, path= prefix "/" name */
89 } Hdr;
91 int debug;
92 static int verb;
93 static int posix = 1;
94 static int creat;
95 static int aruid;
96 static int argid;
97 static int relative;
98 static int settime;
99 static int verbose;
101 static int nblock = Dblock;
102 static char *usefile;
103 static char origdir[Maxname*2];
104 static Hdr *tpblk, *endblk;
105 static Hdr *curblk;
107 static void
108 usage(void)
110 fprint(2, "usage: %s {txrc}[pPvRTugf] [archive] file1 file2...\n",
111 argv0);
112 exits("usage");
115 /*
116 * block-buffer management
117 */
119 static void
120 initblks(void)
122 free(tpblk);
123 tpblk = malloc(Tblock * nblock);
124 assert(tpblk != nil);
125 endblk = tpblk + nblock;
128 static char *
129 refill(int ar, char *bufs)
131 int i, n;
132 unsigned bytes = Tblock * nblock;
133 static int done, first = 1;
135 if (done)
136 return nil;
138 /* try to size non-pipe input at first read */
139 if (first && usefile) {
140 first = 0;
141 n = read(ar, bufs, bytes);
142 if (n <= 0)
143 sysfatal("error reading archive: %r");
144 i = n;
145 if (i % Tblock != 0) {
146 fprint(2, "%s: archive block size (%d) error\n",
147 argv0, i);
148 exits("blocksize");
150 i /= Tblock;
151 if (i != nblock) {
152 nblock = i;
153 fprint(2, "%s: blocking = %d\n", argv0, nblock);
154 endblk = (Hdr *)bufs + nblock;
155 bytes = n;
157 } else
158 n = readn(ar, bufs, bytes);
159 if (n == 0)
160 sysfatal("unexpected EOF reading archive");
161 else if (n < 0)
162 sysfatal("error reading archive: %r");
163 else if (n%Tblock != 0)
164 sysfatal("partial block read from archive");
165 if (n != bytes) {
166 done = 1;
167 memset(bufs + n, 0, bytes - n);
169 return bufs;
172 static Hdr *
173 getblk(int ar, Refill rfp)
175 if (curblk == nil || curblk >= endblk) { /* input block exhausted? */
176 if (rfp != nil && (*rfp)(ar, (char *)tpblk) == nil)
177 return nil;
178 curblk = tpblk;
180 return curblk++;
183 static Hdr *
184 getblkrd(int ar)
186 return getblk(ar, refill);
189 static Hdr *
190 getblke(int ar)
192 return getblk(ar, nil);
195 static Hdr *
196 getblkz(int ar)
198 Hdr *hp = getblke(ar);
200 if (hp != nil)
201 memset(hp, 0, Tblock);
202 return hp;
205 /*
206 * how many block buffers are available, starting at the address
207 * just returned by getblk*?
208 */
209 static int
210 gothowmany(int max)
212 int n = endblk - (curblk - 1);
214 return n > max? max: n;
217 /*
218 * indicate that one is done with the last block obtained from getblke
219 * and it is now available to be written into the archive.
220 */
221 static void
222 putlastblk(int ar)
224 unsigned bytes = Tblock * nblock;
226 /* if writing end-of-archive, aid compression (good hygiene too) */
227 if (curblk < endblk)
228 memset(curblk, 0, (char *)endblk - (char *)curblk);
229 if (write(ar, tpblk, bytes) != bytes)
230 sysfatal("error writing archive: %r");
233 static void
234 putblk(int ar)
236 if (curblk >= endblk)
237 putlastblk(ar);
240 static void
241 putbackblk(int ar)
243 curblk--;
244 USED(ar);
247 static void
248 putreadblks(int ar, int blks)
250 curblk += blks - 1;
251 USED(ar);
254 static void
255 putblkmany(int ar, int blks)
257 curblk += blks - 1;
258 putblk(ar);
261 /*
262 * common routines
263 */
265 /* modifies hp->chksum */
266 long
267 chksum(Hdr *hp)
269 int n = Tblock;
270 long i = 0;
271 uchar *cp = (uchar*)hp;
273 memset(hp->chksum, ' ', sizeof hp->chksum);
274 while (n-- > 0)
275 i += *cp++;
276 return i;
279 static int
280 isustar(Hdr *hp)
282 return strcmp(hp->magic, "ustar") == 0;
285 /*
286 * s is at most n bytes long, but need not be NUL-terminated.
287 * if shorter than n bytes, all bytes after the first NUL must also
288 * be NUL.
289 */
290 static int
291 strnlen(char *s, int n)
293 return s[n - 1] != '\0'? n: strlen(s);
296 /* set fullname from header */
297 static char *
298 name(Hdr *hp)
300 int pfxlen, namlen;
301 static char fullname[Maxname + 1];
303 namlen = strnlen(hp->name, sizeof hp->name);
304 if (hp->prefix[0] == '\0' || !isustar(hp)) { /* old-style name? */
305 memmove(fullname, hp->name, namlen);
306 fullname[namlen] = '\0';
307 return fullname;
310 /* name is in two pieces */
311 pfxlen = strnlen(hp->prefix, sizeof hp->prefix);
312 memmove(fullname, hp->prefix, pfxlen);
313 fullname[pfxlen] = '/';
314 memmove(fullname + pfxlen + 1, hp->name, namlen);
315 fullname[pfxlen + 1 + namlen] = '\0';
316 return fullname;
319 static int
320 isdir(Hdr *hp)
322 /* the mode test is ugly but sometimes necessary */
323 return hp->linkflag == LF_DIR ||
324 strrchr(name(hp), '\0')[-1] == '/' ||
325 (strtoul(hp->mode, nil, 8)&0170000) == 040000;
328 static int
329 eotar(Hdr *hp)
331 return name(hp)[0] == '\0';
334 static Hdr *
335 readhdr(int ar)
337 long hdrcksum;
338 Hdr *hp;
340 hp = getblkrd(ar);
341 if (hp == nil)
342 sysfatal("unexpected EOF instead of archive header");
343 if (eotar(hp)) /* end-of-archive block? */
344 return nil;
345 hdrcksum = strtoul(hp->chksum, nil, 8);
346 if (chksum(hp) != hdrcksum)
347 sysfatal("bad archive header checksum: name %.64s...",
348 hp->name);
349 return hp;
352 /*
353 * tar r[c]
354 */
356 /*
357 * if name is longer than Namsiz bytes, try to split it at a slash and fit the
358 * pieces into hp->prefix and hp->name.
359 */
360 static int
361 putfullname(Hdr *hp, char *name)
363 int namlen, pfxlen;
364 char *sl, *osl;
365 String *slname = nil;
367 if (isdir(hp)) {
368 slname = s_new();
369 s_append(slname, name);
370 s_append(slname, "/"); /* posix requires this */
371 name = s_to_c(slname);
374 namlen = strlen(name);
375 if (namlen <= Namsiz) {
376 strncpy(hp->name, name, Namsiz);
377 hp->prefix[0] = '\0'; /* ustar paranoia */
378 return 0;
381 if (!posix || namlen > Maxname) {
382 fprint(2, "%s: name too long for tar header: %s\n",
383 argv0, name);
384 return -1;
386 /*
387 * try various splits until one results in pieces that fit into the
388 * appropriate fields of the header. look for slashes from right
389 * to left, in the hopes of putting the largest part of the name into
390 * hp->prefix, which is larger than hp->name.
391 */
392 sl = strrchr(name, '/');
393 while (sl != nil) {
394 pfxlen = sl - name;
395 if (pfxlen <= sizeof hp->prefix && namlen-1 - pfxlen <= Namsiz)
396 break;
397 osl = sl;
398 *osl = '\0';
399 sl = strrchr(name, '/');
400 *osl = '/';
402 if (sl == nil) {
403 fprint(2, "%s: name can't be split to fit tar header: %s\n",
404 argv0, name);
405 return -1;
407 *sl = '\0';
408 strncpy(hp->prefix, name, sizeof hp->prefix);
409 *sl++ = '/';
410 strncpy(hp->name, sl, sizeof hp->name);
411 if (slname)
412 s_free(slname);
413 return 0;
416 static int
417 mkhdr(Hdr *hp, Dir *dir, char *file)
419 /*
420 * these fields run together, so we format them in order and don't use
421 * snprint.
422 */
423 sprint(hp->mode, "%6lo ", dir->mode & 0777);
424 sprint(hp->uid, "%6o ", aruid);
425 sprint(hp->gid, "%6o ", argid);
426 /*
427 * files > 2⁳⁳ bytes can't be described
428 * (unless we resort to xustar or exustar formats).
429 */
430 if (dir->length >= (Off)1<<33) {
431 fprint(2, "%s: %s: too large for tar header format\n",
432 argv0, file);
433 return -1;
435 sprint(hp->size, "%11lluo ", dir->length);
436 sprint(hp->mtime, "%11luo ", dir->mtime);
437 hp->linkflag = (dir->mode&DMDIR? LF_DIR: LF_PLAIN1);
438 putfullname(hp, file);
439 if (posix) {
440 strncpy(hp->magic, "ustar", sizeof hp->magic);
441 strncpy(hp->version, "00", sizeof hp->version);
442 strncpy(hp->uname, dir->uid, sizeof hp->uname);
443 strncpy(hp->gname, dir->gid, sizeof hp->gname);
445 sprint(hp->chksum, "%6luo", chksum(hp));
446 return 0;
449 static void addtoar(int ar, char *file, char *shortf);
451 static void
452 addtreetoar(int ar, char *file, char *shortf, int fd)
454 int n;
455 Dir *dent, *dirents;
456 String *name = s_new();
458 n = dirreadall(fd, &dirents);
459 close(fd);
460 if (n == 0)
461 return;
463 if (chdir(shortf) < 0)
464 sysfatal("chdir %s: %r", file);
465 if (DEBUG)
466 fprint(2, "chdir %s\t# %s\n", shortf, file);
468 for (dent = dirents; dent < dirents + n; dent++) {
469 s_reset(name);
470 s_append(name, file);
471 s_append(name, "/");
472 s_append(name, dent->name);
473 addtoar(ar, s_to_c(name), dent->name);
475 s_free(name);
476 free(dirents);
478 if (chdir("..") < 0)
479 sysfatal("chdir %s: %r", file);
480 if (DEBUG)
481 fprint(2, "chdir ..\n");
484 static void
485 addtoar(int ar, char *file, char *shortf)
487 int n, fd, isdir, r;
488 long bytes;
489 ulong blksleft, blksread;
490 Hdr *hbp;
491 Dir *dir;
493 fd = open(shortf, OREAD);
494 if (fd < 0) {
495 fprint(2, "%s: can't open %s: %r\n", argv0, file);
496 return;
498 dir = dirfstat(fd);
499 if (dir == nil)
500 sysfatal("can't fstat %s: %r", file);
502 hbp = getblkz(ar);
503 isdir = !!(dir->qid.type&QTDIR);
504 r = mkhdr(hbp, dir, file);
505 if (r < 0) {
506 putbackblk(ar);
507 free(dir);
508 close(fd);
509 return;
511 putblk(ar);
513 blksleft = BYTES2TBLKS(dir->length);
514 free(dir);
516 if (isdir)
517 addtreetoar(ar, file, shortf, fd);
518 else {
519 for (; blksleft > 0; blksleft -= blksread) {
520 hbp = getblke(ar);
521 blksread = gothowmany(blksleft);
522 bytes = blksread * Tblock;
523 n = read(fd, hbp, bytes);
524 if (n < 0)
525 sysfatal("error reading %s: %r", file);
526 /*
527 * ignore EOF. zero any partial block to aid
528 * compression and emergency recovery of data.
529 */
530 if (n < Tblock)
531 memset((char*)hbp + n, 0, bytes - n);
532 putblkmany(ar, blksread);
534 close(fd);
535 if (verbose)
536 fprint(2, "%s: a %s\n", argv0, file);
540 static void
541 replace(char **argv)
543 int i, ar;
544 ulong blksleft, blksread;
545 Off bytes;
546 Hdr *hp;
548 if (usefile && creat)
549 ar = create(usefile, ORDWR, 0666);
550 else if (usefile)
551 ar = open(usefile, ORDWR);
552 else
553 ar = Stdout;
554 if (ar < 0)
555 sysfatal("can't open archive %s: %r", usefile);
557 if (usefile && !creat) {
558 /* skip quickly to the end */
559 while ((hp = readhdr(ar)) != nil) {
560 bytes = (isdir(hp)? 0: strtoull(hp->size, nil, 8));
561 for (blksleft = BYTES2TBLKS(bytes);
562 blksleft > 0 && getblkrd(ar) != nil;
563 blksleft -= blksread) {
564 blksread = gothowmany(blksleft);
565 putreadblks(ar, blksread);
568 /*
569 * we have just read the end-of-archive Tblock.
570 * now seek back over the (big) archive block containing it,
571 * and back up curblk ptr over end-of-archive Tblock in memory.
572 */
573 if (seek(ar, -Tblock*nblock, 1) < 0)
574 sysfatal("can't seek back over end-of-archive: %r");
575 curblk--;
578 for (i = 0; argv[i] != nil; i++)
579 addtoar(ar, argv[i], argv[i]);
581 /* write end-of-archive marker */
582 getblkz(ar);
583 putblk(ar);
584 getblkz(ar);
585 putlastblk(ar);
587 if (ar > Stderr)
588 close(ar);
591 /*
592 * tar [xt]
593 */
595 /* is pfx a file-name prefix of name? */
596 static int
597 prefix(char *name, char *pfx)
599 int pfxlen = strlen(pfx);
600 char clpfx[Maxname+1];
602 if (pfxlen > Maxname)
603 return 0;
604 strcpy(clpfx, pfx);
605 cleanname(clpfx);
606 return strncmp(pfx, name, pfxlen) == 0 &&
607 (name[pfxlen] == '\0' || name[pfxlen] == '/');
610 static int
611 match(char *name, char **argv)
613 int i;
614 char clname[Maxname+1];
616 if (argv[0] == nil)
617 return 1;
618 strcpy(clname, name);
619 cleanname(clname);
620 for (i = 0; argv[i] != nil; i++)
621 if (prefix(clname, argv[i]))
622 return 1;
623 return 0;
626 static int
627 makedir(char *s)
629 int f;
631 if (access(s, AEXIST) == 0)
632 return -1;
633 f = create(s, OREAD, DMDIR | 0777);
634 if (f >= 0)
635 close(f);
636 return f;
639 static void
640 mkpdirs(char *s)
642 int done = 0;
643 char *p = s;
645 while (!done && (p = strchr(p + 1, '/')) != nil) {
646 *p = '\0';
647 done = (access(s, AEXIST) < 0 && makedir(s) < 0);
648 *p = '/';
652 /* copy a file from the archive into the filesystem */
653 static void
654 extract1(int ar, Hdr *hp, char *fname)
656 int wrbytes, fd = -1, dir = 0, okcreate = 1;
657 long mtime = strtol(hp->mtime, nil, 8);
658 ulong mode = strtoul(hp->mode, nil, 8) & 0777;
659 Off bytes = strtoll(hp->size, nil, 8);
660 ulong blksread, blksleft = BYTES2TBLKS(bytes);
661 Hdr *hbp;
663 if (isdir(hp)) {
664 mode |= DMDIR|0700;
665 blksleft = 0;
666 dir = 1;
668 switch (hp->linkflag) {
669 case LF_LINK:
670 case LF_SYMLINK1:
671 case LF_SYMLINK2:
672 case LF_FIFO:
673 blksleft = okcreate = 0;
674 break;
676 if (relative && fname[0] == '/')
677 fname++;
678 if (verb == Xtract) {
679 cleanname(fname);
680 switch (hp->linkflag) {
681 case LF_LINK:
682 case LF_SYMLINK1:
683 case LF_SYMLINK2:
684 fprint(2, "%s: can't make (sym)link %s\n",
685 argv0, fname);
686 break;
687 case LF_FIFO:
688 fprint(2, "%s: can't make fifo %s\n", argv0, fname);
689 break;
691 if (okcreate)
692 fd = create(fname, (dir? OREAD: OWRITE), mode);
693 if (fd < 0) {
694 mkpdirs(fname);
695 fd = create(fname, (dir? OREAD: OWRITE), mode);
696 if (fd < 0 && (!dir || access(fname, AEXIST) < 0))
697 fprint(2, "%s: can't create %s: %r\n",
698 argv0, fname);
700 if (fd >= 0 && verbose)
701 fprint(2, "%s: x %s\n", argv0, fname);
702 } else if (verbose) {
703 char *cp = ctime(mtime);
705 print("%M %8lld %-12.12s %-4.4s %s\n",
706 mode, bytes, cp+4, cp+24, fname);
707 } else
708 print("%s\n", fname);
710 for (; blksleft > 0; blksleft -= blksread) {
711 hbp = getblkrd(ar);
712 if (hbp == nil)
713 sysfatal("unexpected EOF on archive extracting %s",
714 fname);
715 blksread = gothowmany(blksleft);
716 wrbytes = (bytes >= Tblock*blksread? Tblock*blksread: bytes);
717 if (fd >= 0 && write(fd, (char*)hbp, wrbytes) != wrbytes)
718 sysfatal("write error on %s: %r", fname);
719 putreadblks(ar, blksread);
720 bytes -= wrbytes;
722 if (fd >= 0) {
723 /*
724 * directories should be wstated after we're done
725 * creating files in them.
726 */
727 if (settime) {
728 Dir nd;
730 nulldir(&nd);
731 nd.mtime = mtime;
732 if (isustar(hp))
733 nd.gid = hp->gname;
734 dirfwstat(fd, &nd);
736 close(fd);
740 static void
741 skip(int ar, Hdr *hp, char *fname)
743 Off bytes;
744 ulong blksleft, blksread;
745 Hdr *hbp;
747 if (isdir(hp))
748 return;
749 bytes = strtoull(hp->size, nil, 8);
750 blksleft = BYTES2TBLKS(bytes);
751 for (; blksleft > 0; blksleft -= blksread) {
752 hbp = getblkrd(ar);
753 if (hbp == nil)
754 sysfatal("unexpected EOF on archive extracting %s",
755 fname);
756 blksread = gothowmany(blksleft);
757 putreadblks(ar, blksread);
761 static void
762 extract(char **argv)
764 int ar;
765 char *longname;
766 Hdr *hp;
768 if (usefile)
769 ar = open(usefile, OREAD);
770 else
771 ar = Stdin;
772 if (ar < 0)
773 sysfatal("can't open archive %s: %r", usefile);
774 while ((hp = readhdr(ar)) != nil) {
775 longname = name(hp);
776 if (match(longname, argv))
777 extract1(ar, hp, longname);
778 else
779 skip(ar, hp, longname);
781 if (ar > Stderr)
782 close(ar);
785 void
786 main(int argc, char *argv[])
788 int errflg = 0;
789 char *ret = nil;
791 quotefmtinstall();
792 fmtinstall('M', dirmodefmt);
794 TARGBEGIN {
795 case 'c':
796 creat++;
797 verb = Replace;
798 break;
799 case 'f':
800 usefile = EARGF(usage());
801 break;
802 case 'g':
803 argid = strtoul(EARGF(usage()), 0, 0);
804 break;
805 case 'p':
806 posix++;
807 break;
808 case 'P':
809 posix = 0;
810 break;
811 case 'r':
812 verb = Replace;
813 break;
814 case 'R':
815 relative++;
816 break;
817 case 't':
818 verb = Toc;
819 break;
820 case 'T':
821 settime++;
822 break;
823 case 'u':
824 aruid = strtoul(EARGF(usage()), 0, 0);
825 break;
826 case 'v':
827 verbose++;
828 break;
829 case 'x':
830 verb = Xtract;
831 break;
832 case '-':
833 break;
834 default:
835 errflg++;
836 break;
837 } TARGEND
839 if (argc < 0 || errflg)
840 usage();
842 initblks();
843 switch (verb) {
844 case Toc:
845 case Xtract:
846 extract(argv);
847 break;
848 case Replace:
849 if (getwd(origdir, sizeof origdir) == nil)
850 strcpy(origdir, "/tmp");
851 replace(argv);
852 chdir(origdir); /* for profiling */
853 break;
854 default:
855 usage();
856 break;
858 exits(ret);