Commit Diff


commit - 8477327a46979f9cfb839174a87ce51a085c3abd
commit + c15ce40c4456d3bb6d44092bb22097ad93768c3a
blob - /dev/null
blob + f09977b1a54d355b9b1d2d849dfac05e34f35590 (mode 644)
--- /dev/null
+++ src/cmd/file.c
@@ -0,0 +1,1252 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include <mach.h>
+
+/*
+ * file - determine type of file
+ */
+#define	LENDIAN(p)	((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
+
+uchar	buf[6001];
+short	cfreq[140];
+short	wfreq[50];
+int	nbuf;
+Dir*	mbuf;
+int	fd;
+char 	*fname;
+char	*slash;
+
+enum
+{
+	Cword,
+	Fword,
+	Aword,
+	Alword,
+	Lword,
+	I1,
+	I2,
+	I3,
+	Clatin	= 128,
+	Cbinary,
+	Cnull,
+	Ceascii,
+	Cutf,
+};
+struct
+{
+	char*	word;
+	int	class;
+} dict[] =
+{
+	"PATH",		Lword,
+	"TEXT",		Aword,
+	"adt",		Alword,
+	"aggr",		Alword,
+	"alef",		Alword,
+	"array",	Lword,
+	"block",	Fword,
+	"chan",		Alword,
+	"char",		Cword,
+	"common",	Fword,
+	"con",		Lword,
+	"data",		Fword,
+	"dimension",	Fword,	
+	"double",	Cword,
+	"extern",	Cword,
+	"bio",		I2,
+	"float",	Cword,
+	"fn",		Lword,
+	"function",	Fword,
+	"h",		I3,
+	"implement",	Lword,
+	"import",	Lword,
+	"include",	I1,
+	"int",		Cword,
+	"integer",	Fword,
+	"iota",		Lword,
+	"libc",		I2,
+	"long",		Cword,
+	"module",	Lword,
+	"real",		Fword,
+	"ref",		Lword,
+	"register",	Cword,
+	"self",		Lword,
+	"short",	Cword,
+	"static",	Cword,
+	"stdio",	I2,
+	"struct",	Cword,
+	"subroutine",	Fword,
+	"u",		I2,
+	"void",		Cword,
+};
+
+/* codes for 'mode' field in language structure */
+enum	{
+		Normal	= 0,
+		First,		/* first entry for language spanning several ranges */
+		Multi,		/* later entries "   "       "  ... */ 
+		Shared,		/* codes used in several languages */
+	};
+
+struct
+{
+	int	mode;		/* see enum above */
+	int 	count;
+	int	low;
+	int	high;
+	char	*name;
+	
+} language[] =
+{
+	Normal, 0,	0x0080, 0x0080,	"Extended Latin",
+	Normal,	0,	0x0100,	0x01FF,	"Extended Latin",
+	Normal,	0,	0x0370,	0x03FF,	"Greek",
+	Normal,	0,	0x0400,	0x04FF,	"Cyrillic",
+	Normal,	0,	0x0530,	0x058F,	"Armenian",
+	Normal,	0,	0x0590,	0x05FF,	"Hebrew",
+	Normal,	0,	0x0600,	0x06FF,	"Arabic",
+	Normal,	0,	0x0900,	0x097F,	"Devanagari",
+	Normal,	0,	0x0980,	0x09FF,	"Bengali",
+	Normal,	0,	0x0A00,	0x0A7F,	"Gurmukhi",
+	Normal,	0,	0x0A80,	0x0AFF,	"Gujarati",
+	Normal,	0,	0x0B00,	0x0B7F,	"Oriya",
+	Normal,	0,	0x0B80,	0x0BFF,	"Tamil",
+	Normal,	0,	0x0C00,	0x0C7F,	"Telugu",
+	Normal,	0,	0x0C80,	0x0CFF,	"Kannada",
+	Normal,	0,	0x0D00,	0x0D7F,	"Malayalam",
+	Normal,	0,	0x0E00,	0x0E7F,	"Thai",
+	Normal,	0,	0x0E80,	0x0EFF,	"Lao",
+	Normal,	0,	0x1000,	0x105F,	"Tibetan",
+	Normal,	0,	0x10A0,	0x10FF,	"Georgian",
+	Normal,	0,	0x3040,	0x30FF,	"Japanese",
+	Normal,	0,	0x3100,	0x312F,	"Chinese",
+	First,	0,	0x3130,	0x318F,	"Korean",
+	Multi,	0,	0x3400,	0x3D2F,	"Korean",
+	Shared,	0,	0x4e00,	0x9fff,	"CJK",
+	Normal,	0,	0,	0,	0,		/* terminal entry */
+};
+	
+	
+enum
+{
+	Fascii,		/* printable ascii */
+	Flatin,		/* latin 1*/
+	Futf,		/* UTf character set */
+	Fbinary,	/* binary */
+	Feascii,	/* ASCII with control chars */
+	Fnull,		/* NULL in file */
+} guess;
+
+void	bump_utf_count(Rune);
+int	cistrncmp(char*, char*, int);
+void	filetype(int);
+int	getfontnum(uchar*, uchar**);
+int	isas(void);
+int	isc(void);
+int	isenglish(void);
+int	ishp(void);
+int	ishtml(void);
+int	isrfc822(void);
+int	ismbox(void);
+int	islimbo(void);
+int	ismung(void);
+int	isp9bit(void);
+int	isp9font(void);
+int	isrtf(void);
+int	ismsdos(void);
+int	iself(void);
+int	istring(void);
+int	iff(void);
+int	long0(void);
+int	istar(void);
+int	p9bitnum(uchar*);
+int	p9subfont(uchar*);
+void	print_utf(void);
+void	type(char*, int);
+int	utf_count(void);
+void	wordfreq(void);
+
+int	(*call[])(void) =
+{
+	long0,		/* recognizable by first 4 bytes */
+	istring,	/* recognizable by first string */
+	iff,		/* interchange file format (strings) */
+	isrfc822,	/* email file */
+	ismbox,		/* mail box */
+	istar,		/* recognizable by tar checksum */
+	ishtml,		/* html keywords */
+/*	iscint,		/* compiler/assembler intermediate */
+	islimbo,	/* limbo source */
+	isc,		/* c & alef compiler key words */
+	isas,		/* assembler key words */
+	ismung,		/* entropy compressed/encrypted */
+	isp9font,	/* plan 9 font */
+	isp9bit,	/* plan 9 image (as from /dev/window) */
+	isenglish,	/* char frequency English */
+	isrtf,		/* rich text format */
+	ismsdos,	/* msdos exe (virus file attachement) */
+	iself,		/* ELF (foreign) executable */
+	0
+};
+
+int mime;
+
+#define OCTET	"application/octet-stream\n"
+#define PLAIN	"text/plain\n"
+
+void
+main(int argc, char *argv[])
+{
+	int i, j, maxlen;
+	char *cp;
+	Rune r;
+
+	ARGBEGIN{
+	case 'm':
+		mime = 1;
+		break;
+	default:
+		fprint(2, "usage: file [-m] [file...]\n");
+		exits("usage");
+	}ARGEND;
+
+	maxlen = 0;
+	if(mime == 0 || argc > 1){
+		for(i = 0; i < argc; i++) {
+			for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
+					;
+			if(j > maxlen)
+				maxlen = j;
+		}
+	}
+	if (argc <= 0) {
+		if(!mime)
+			print ("stdin: ");
+		filetype(0);
+	}
+	else {
+		for(i = 0; i < argc; i++)
+			type(argv[i], maxlen);
+	}
+	exits(0);
+}
+
+void
+type(char *file, int nlen)
+{
+	Rune r;
+	int i;
+	char *p;
+
+	if(nlen > 0){
+		slash = 0;
+		for (i = 0, p = file; *p; i++) {
+			if (*p == '/')			/* find rightmost slash */
+				slash = p;
+			p += chartorune(&r, p);		/* count runes */
+		}
+		print("%s:%*s",file, nlen-i+1, "");
+	}
+	fname = file;
+	if ((fd = open(file, OREAD)) < 0) {
+		print("cannot open\n");
+		return;
+	}
+	filetype(fd);
+	close(fd);
+}
+
+void
+filetype(int fd)
+{
+	Rune r;
+	int i, f, n;
+	char *p, *eob;
+
+	free(mbuf);
+	mbuf = dirfstat(fd);
+	if(mbuf == nil){
+		print("cannot stat: %r\n");
+		return;
+	}
+	if(mbuf->mode & DMDIR) {
+		print(mime ? "text/directory\n" : "directory\n");
+		return;
+	}
+	if(mbuf->type != 'M' && mbuf->type != '|') {
+		print(mime ? OCTET : "special file #%c/%s\n",
+			mbuf->type, mbuf->name);
+		return;
+	}
+	nbuf = read(fd, buf, sizeof(buf)-1);
+
+	if(nbuf < 0) {
+		print("cannot read\n");
+		return;
+	}
+	if(nbuf == 0) {
+		print(mime ? PLAIN : "empty file\n");
+		return;
+	}
+	buf[nbuf] = 0;
+
+	/*
+	 * build histogram table
+	 */
+	memset(cfreq, 0, sizeof(cfreq));
+	for (i = 0; language[i].name; i++)
+		language[i].count = 0;
+	eob = (char *)buf+nbuf;
+	for(n = 0, p = (char *)buf; p < eob; n++) {
+		if (!fullrune(p, eob-p) && eob-p < UTFmax)
+			break;
+		p += chartorune(&r, p);
+		if (r == 0)
+			f = Cnull;
+		else if (r <= 0x7f) {
+			if (!isprint(r) && !isspace(r))
+				f = Ceascii;	/* ASCII control char */
+			else f = r;
+		} else if (r == 0x080) {
+			bump_utf_count(r);
+			f = Cutf;
+		} else if (r < 0xA0)
+				f = Cbinary;	/* Invalid Runes */
+		else if (r <= 0xff)
+				f = Clatin;	/* Latin 1 */
+		else {
+			bump_utf_count(r);
+			f = Cutf;		/* UTF extension */
+		}
+		cfreq[f]++;			/* ASCII chars peg directly */
+	}
+	/*
+	 * gross classify
+	 */
+	if (cfreq[Cbinary])
+		guess = Fbinary;
+	else if (cfreq[Cutf])
+		guess = Futf;
+	else if (cfreq[Clatin])
+		guess = Flatin;
+	else if (cfreq[Ceascii])
+		guess = Feascii;
+	else if (cfreq[Cnull] == n) {
+		print(mime ? OCTET : "first block all null bytes\n");
+		return;
+	}
+	else guess = Fascii;
+	/*
+	 * lookup dictionary words
+	 */
+	memset(wfreq, 0, sizeof(wfreq));
+	if(guess == Fascii || guess == Flatin || guess == Futf) 
+		wordfreq();
+	/*
+	 * call individual classify routines
+	 */
+	for(i=0; call[i]; i++)
+		if((*call[i])())
+			return;
+
+	/*
+	 * if all else fails,
+	 * print out gross classification
+	 */
+	if (nbuf < 100 && !mime)
+		print(mime ? PLAIN : "short ");
+	if (guess == Fascii)
+		print(mime ? PLAIN : "Ascii\n");
+	else if (guess == Feascii)
+		print(mime ? PLAIN : "extended ascii\n");
+	else if (guess == Flatin)
+		print(mime ? PLAIN : "latin ascii\n");
+	else if (guess == Futf && utf_count() < 4)
+		print_utf();
+	else print(mime ? OCTET : "binary\n");
+}
+
+void
+bump_utf_count(Rune r)
+{
+	int low, high, mid;
+
+	high = sizeof(language)/sizeof(language[0])-1;
+	for (low = 0; low < high;) {
+		mid = (low+high)/2;
+		if (r >=language[mid].low) {
+			if (r <= language[mid].high) {
+				language[mid].count++;
+				break;
+			} else low = mid+1;
+		} else high = mid;
+	}
+}
+
+int
+utf_count(void)
+{
+	int i, count;
+
+	count = 0;
+	for (i = 0; language[i].name; i++)
+		if (language[i].count > 0)
+			switch (language[i].mode) {
+			case Normal:
+			case First:
+				count++;
+				break;
+			default:
+				break;
+			}
+	return count;
+}
+
+int
+chkascii(void)
+{
+	int i;
+
+	for (i = 'a'; i < 'z'; i++)
+		if (cfreq[i])
+			return 1;
+	for (i = 'A'; i < 'Z'; i++)
+		if (cfreq[i])
+			return 1;
+	return 0;
+}
+
+int
+find_first(char *name)
+{
+	int i;
+
+	for (i = 0; language[i].name != 0; i++)
+		if (language[i].mode == First
+			&& strcmp(language[i].name, name) == 0)
+			return i;
+	return -1;
+}
+
+void
+print_utf(void)
+{
+	int i, printed, j;
+
+	if(mime){
+		print(PLAIN);
+		return;
+	}
+	if (chkascii()) {
+		printed = 1;
+		print("Ascii");
+	} else
+		printed = 0;
+	for (i = 0; language[i].name; i++)
+		if (language[i].count) {
+			switch(language[i].mode) {
+			case Multi:
+				j = find_first(language[i].name);
+				if (j < 0)
+					break;
+				if (language[j].count > 0)
+					break;
+				/* Fall through */
+			case Normal:
+			case First:
+				if (printed)
+					print(" & ");
+				else printed = 1;
+				print("%s", language[i].name);
+				break;
+			case Shared:
+			default:
+				break;
+			}
+		}
+	if(!printed)
+		print("UTF");
+	print(" text\n");
+}
+
+void
+wordfreq(void)
+{
+	int low, high, mid, r;
+	uchar *p, *p2, c;
+
+	p = buf;
+	for(;;) {
+		while (p < buf+nbuf && !isalpha(*p))
+			p++;
+		if (p >= buf+nbuf)
+			return;
+		p2 = p;
+		while(p < buf+nbuf && isalpha(*p))
+			p++;
+		c = *p;
+		*p = 0;
+		high = sizeof(dict)/sizeof(dict[0]);
+		for(low = 0;low < high;) {
+			mid = (low+high)/2;
+			r = strcmp(dict[mid].word, (char*)p2);
+			if(r == 0) {
+				wfreq[dict[mid].class]++;
+				break;
+			}
+			if(r < 0)
+				low = mid+1;
+			else
+				high = mid;
+		}
+		*p++ = c;
+	}
+}
+
+typedef struct Filemagic Filemagic;
+struct Filemagic {
+	ulong x;
+	ulong mask;
+	char *desc;
+	char *mime;
+};
+
+Filemagic long0tab[] = {
+	0xF16DF16D,	0xFFFFFFFF,	"pac1 audio file\n",	OCTET,
+	0x31636170,	0xFFFFFFFF,	"pac3 audio file\n",	OCTET,
+	0x32636170,	0xFFFF00FF,	"pac4 audio file\n",	OCTET,
+	0xBA010000,	0xFFFFFFFF,	"mpeg system stream\n",	OCTET,
+	0x30800CC0,	0xFFFFFFFF,	"inferno .dis executable\n", OCTET,
+	0x04034B50,	0xFFFFFFFF,	"zip archive\n", "application/zip",
+	070707,		0xFFFF,		"cpio archive\n", OCTET,
+	0x2F7,		0xFFFF,		"tex dvi\n", "application/dvi",
+	0xfffa0000,	0xfffe0000,	"mp3 audio",	"audio/mpeg",
+};
+
+int
+filemagic(Filemagic *tab, int ntab, ulong x)
+{
+	int i;
+
+	for(i=0; i<ntab; i++)
+		if((x&tab[i].mask) == tab[i].x){
+			print(mime ? tab[i].mime : tab[i].desc);
+			return 1;
+		}
+	return 0;
+}
+	
+int
+long0(void)
+{
+//	Fhdr *f;
+	long x;
+
+	seek(fd, 0, 0);		/* reposition to start of file */
+/*
+	if(crackhdr(fd, &f)) {
+		print(mime ? OCTET : "%s\n", f.name);
+		return 1;
+	}
+*/
+	x = LENDIAN(buf);
+	if(filemagic(long0tab, nelem(long0tab), x))
+		return 1;
+	return 0;
+}
+
+/* from tar.c */
+enum { NAMSIZ = 100, TBLOCK = 512 };
+
+union	hblock
+{
+	char	dummy[TBLOCK];
+	struct	header
+	{
+		char	name[NAMSIZ];
+		char	mode[8];
+		char	uid[8];
+		char	gid[8];
+		char	size[12];
+		char	mtime[12];
+		char	chksum[8];
+		char	linkflag;
+		char	linkname[NAMSIZ];
+		/* rest are defined by POSIX's ustar format; see p1003.2b */
+		char	magic[6];	/* "ustar" */
+		char	version[2];
+		char	uname[32];
+		char	gname[32];
+		char	devmajor[8];
+		char	devminor[8];
+		char	prefix[155];  /* if non-null, path = prefix "/" name */
+	} dbuf;
+};
+
+int
+checksum(union hblock *hp)
+{
+	int i;
+	char *cp;
+	struct header *hdr = &hp->dbuf;
+
+	for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
+		*cp = ' ';
+	i = 0;
+	for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
+		i += *cp & 0xff;
+	return i;
+}
+
+int
+istar(void)
+{
+	int chksum;
+	char tblock[TBLOCK];
+	union hblock *hp = (union hblock *)tblock;
+	struct header *hdr = &hp->dbuf;
+
+	seek(fd, 0, 0);		/* reposition to start of file */
+	if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
+		return 0;
+	chksum = strtol(hdr->chksum, 0, 8);
+	if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
+		if (strcmp(hdr->magic, "ustar") == 0)
+			print(mime? "application/x-ustar\n":
+				"posix tar archive\n");
+		else
+			print(mime? "application/x-tar\n": "tar archive\n");
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * initial words to classify file
+ */
+struct	FILE_STRING
+{
+	char 	*key;
+	char	*filetype;
+	int	length;
+	char	*mime;
+} file_string[] =
+{
+	"!<arch>\n__.SYMDEF",	"archive random library",	16,	"application/octet-stream",
+	"!<arch>\n",		"archive",			8,	"application/octet-stream",
+	"070707",		"cpio archive - ascii header",	6,	"application/octet-stream",
+	"#!/bin/rc",		"rc executable file",		9,	"text/plain",
+	"#!/bin/sh",		"sh executable file",		9,	"text/plain",
+	"%!",			"postscript",			2,	"application/postscript",
+	"\004%!",		"postscript",			3,	"application/postscript",
+	"x T post",		"troff output for post",	8,	"application/troff",
+	"x T Latin1",		"troff output for Latin1",	10,	"application/troff",
+	"x T utf",		"troff output for UTF",		7,	"application/troff",
+	"x T 202",		"troff output for 202",		7,	"application/troff",
+	"x T aps",		"troff output for aps",		7,	"application/troff",
+	"GIF",			"GIF image", 			3,	"image/gif",
+	"\0PC Research, Inc\0",	"ghostscript fax file",		18,	"application/ghostscript",
+	"%PDF",			"PDF",				4,	"application/pdf",
+	"<html>\n",		"HTML file",			7,	"text/html",
+	"<HTML>\n",		"HTML file",			7,	"text/html",
+	"compressed\n",		"Compressed image or subfont",	11,	"application/octet-stream",
+	"\111\111\052\000",	"tiff",				4,	"image/tiff",
+	"\115\115\000\052",	"tiff",				4,	"image/tiff",
+	"\377\330\377\340",	"jpeg",				4,	"image/jpeg",
+	"\377\330\377\341",	"jpeg",				4,	"image/jpeg",
+	"\377\330\377\333",	"jpeg",				4,	"image/jpeg",
+	"BM",			"bmp",				2,	"image/bmp",
+	"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",	"microsoft office document",	8,	"application/octet-stream",
+	"<MakerFile ",		"FrameMaker file",		11,	"application/framemaker",
+	"\033%-12345X",	"HPJCL file",		9,	"application/hpjcl",
+	"ID3",			"mp3 audio with id3",	3,	"audio/mpeg",
+	0,0,0,0
+};
+
+int
+istring(void)
+{
+	int i;
+	struct FILE_STRING *p;
+
+	for(p = file_string; p->key; p++) {
+		if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) {
+			if(mime)
+				print("%s\n", p->mime);
+			else
+				print("%s\n", p->filetype);
+			return 1;
+		}
+	}
+	if(strncmp((char*)buf, "TYPE=", 5) == 0) {	/* td */
+		for(i = 5; i < nbuf; i++)
+			if(buf[i] == '\n')
+				break;
+		if(mime)
+			print(OCTET);
+		else
+			print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
+		return 1;
+	}
+	return 0;
+}
+
+int
+iff(void)
+{
+	if (strncmp((char*)buf, "FORM", 4) == 0 &&
+	    strncmp((char*)buf+8, "AIFF", 4) == 0) {
+		print("%s\n", mime? "audio/x-aiff": "aiff audio");
+		return 1;
+	}
+	return 0;
+}
+
+char*	html_string[] =
+{
+	"title",
+	"body",
+	"head",
+	"strong",
+	"h1",
+	"h2",
+	"h3",
+	"h4",
+	"h5",
+	"h6",
+	"ul",
+	"li",
+	"dl",
+	"br",
+	"em",
+	0,
+};
+
+int
+ishtml(void)
+{
+	uchar *p, *q;
+	int i, count;
+
+		/* compare strings between '<' and '>' to html table */
+	count = 0;
+	p = buf;
+	for(;;) {
+		while (p < buf+nbuf && *p != '<')
+			p++;
+		p++;
+		if (p >= buf+nbuf)
+			break;
+		if(*p == '/')
+			p++;
+		q = p;
+		while(p < buf+nbuf && *p != '>')
+			p++;
+		if (p >= buf+nbuf)
+			break;
+		for(i = 0; html_string[i]; i++) {
+			if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
+				if(count++ > 4) {
+					print(mime ? "text/html\n" : "HTML file\n");
+					return 1;
+				}
+				break;
+			}
+		}
+		p++;
+	}
+	return 0;
+}
+
+char*	rfc822_string[] =
+{
+	"from:",
+	"date:",
+	"to:",
+	"subject:",
+	"received:",
+	"reply to:",
+	"sender:",
+	0,
+};
+
+int
+isrfc822(void)
+{
+
+	char *p, *q, *r;
+	int i, count;
+
+	count = 0;
+	p = (char*)buf;
+	for(;;) {
+		q = strchr(p, '\n');
+		if(q == nil)
+			break;
+		*q = 0;
+		if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
+			count++;
+			*q = '\n';
+			p = q+1;
+			continue;
+		}
+		*q = '\n';
+		if(*p != '\t' && *p != ' '){
+			r = strchr(p, ':');
+			if(r == 0 || r > q)
+				break;
+			for(i = 0; rfc822_string[i]; i++) {
+				if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
+					count++;
+					break;
+				}
+			}
+		}
+		p = q+1;
+	}
+	if(count >= 3){
+		print(mime ? "message/rfc822\n" : "email file\n");
+		return 1;
+	}
+	return 0;
+}
+
+int
+ismbox(void)
+{
+	char *p, *q;
+
+	p = (char*)buf;
+	q = strchr(p, '\n');
+	if(q == nil)
+		return 0;
+	*q = 0;
+	if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
+		print(mime ? "text/plain\n" : "mail box\n");
+		return 1;
+	}
+	*q = '\n';
+	return 0;
+}
+
+int
+isc(void)
+{
+	int n;
+
+	n = wfreq[I1];
+	/*
+	 * includes
+	 */
+	if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
+		goto yes;
+	if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
+		goto yes;
+	/*
+	 * declarations
+	 */
+	if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
+		goto yes;
+	/*
+	 * assignments
+	 */
+	if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
+		goto yes;
+	return 0;
+
+yes:
+	if(mime){
+		print(PLAIN);
+		return 1;
+	}
+	if(wfreq[Alword] > 0)
+		print("alef program\n");
+	else 
+		print("c program\n");
+	return 1;
+}
+
+int
+islimbo(void)
+{
+
+	/*
+	 * includes
+	 */
+	if(wfreq[Lword] < 4)
+		return 0;
+	print(mime ? PLAIN : "limbo program\n");
+	return 1;
+}
+
+int
+isas(void)
+{
+
+	/*
+	 * includes
+	 */
+	if(wfreq[Aword] < 2)
+		return 0;
+	print(mime ? PLAIN : "as program\n");
+	return 1;
+}
+
+/*
+ * low entropy means encrypted
+ */
+int
+ismung(void)
+{
+	int i, bucket[8];
+	float cs;
+
+	if(nbuf < 64)
+		return 0;
+	memset(bucket, 0, sizeof(bucket));
+	for(i=0; i<64; i++)
+		bucket[(buf[i]>>5)&07] += 1;
+
+	cs = 0.;
+	for(i=0; i<8; i++)
+		cs += (bucket[i]-8)*(bucket[i]-8);
+	cs /= 8.;
+	if(cs <= 24.322) {
+		if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d))
+			print(mime ? OCTET : "compressed\n");
+		else
+			print(mime ? OCTET : "encrypted\n");
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * english by punctuation and frequencies
+ */
+int
+isenglish(void)
+{
+	int vow, comm, rare, badpun, punct;
+	char *p;
+
+	if(guess != Fascii && guess != Feascii)
+		return 0;
+	badpun = 0;
+	punct = 0;
+	for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
+		switch(*p) {
+		case '.':
+		case ',':
+		case ')':
+		case '%':
+		case ';':
+		case ':':
+		case '?':
+			punct++;
+			if(p[1] != ' ' && p[1] != '\n')
+				badpun++;
+		}
+	if(badpun*5 > punct)
+		return 0;
+	if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e'])	/* shell file test */
+		return 0;
+	if(2*cfreq[';'] > cfreq['e'])
+		return 0;
+
+	vow = 0;
+	for(p="AEIOU"; *p; p++) {
+		vow += cfreq[*p];
+		vow += cfreq[tolower(*p)];
+	}
+	comm = 0;
+	for(p="ETAION"; *p; p++) {
+		comm += cfreq[*p];
+		comm += cfreq[tolower(*p)];
+	}
+	rare = 0;
+	for(p="VJKQXZ"; *p; p++) {
+		rare += cfreq[*p];
+		rare += cfreq[tolower(*p)];
+	}
+	if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
+		print(mime ? PLAIN : "English text\n");
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * pick up a number with
+ * syntax _*[0-9]+_
+ */
+#define	P9BITLEN	12
+int
+p9bitnum(uchar *bp)
+{
+	int n, c, len;
+
+	len = P9BITLEN;
+	while(*bp == ' ') {
+		bp++;
+		len--;
+		if(len <= 0)
+			return -1;
+	}
+	n = 0;
+	while(len > 1) {
+		c = *bp++;
+		if(!isdigit(c))
+			return -1;
+		n = n*10 + c-'0';
+		len--;
+	}
+	if(*bp != ' ')
+		return -1;
+	return n;
+}
+
+int
+depthof(char *s, int *newp)
+{
+	char *es;
+	int d;
+
+	*newp = 0;
+	es = s+12;
+	while(s<es && *s==' ')
+		s++;
+	if(s == es)
+		return -1;
+	if('0'<=*s && *s<='9')
+		return 1<<atoi(s);
+
+	*newp = 1;
+	d = 0;
+	while(s<es && *s!=' '){
+		s++;	/* skip letter */
+		d += strtoul(s, &s, 10);
+	}
+	
+	switch(d){
+	case 32:
+	case 24:
+	case 16:
+	case 8:
+		return d;
+	}
+	return -1;
+}
+
+int
+isp9bit(void)
+{
+	int dep, lox, loy, hix, hiy, px, new;
+	ulong t;
+	long len;
+	char *newlabel;
+
+	newlabel = "old ";
+
+	dep = depthof((char*)buf + 0*P9BITLEN, &new);
+	if(new)
+		newlabel = "";
+	lox = p9bitnum(buf + 1*P9BITLEN);
+	loy = p9bitnum(buf + 2*P9BITLEN);
+	hix = p9bitnum(buf + 3*P9BITLEN);
+	hiy = p9bitnum(buf + 4*P9BITLEN);
+	if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0)
+		return 0;
+
+	if(dep < 8){
+		px = 8/dep;	/* pixels per byte */
+		/* set l to number of bytes of data per scan line */
+		if(lox >= 0)
+			len = (hix+px-1)/px - lox/px;
+		else{	/* make positive before divide */
+			t = (-lox)+px-1;
+			t = (t/px)*px;
+			len = (t+hix+px-1)/px;
+		}
+	}else
+		len = (hix-lox)*dep/8;
+	len *= (hiy-loy);		/* col length */
+	len += 5*P9BITLEN;		/* size of initial ascii */
+
+	/*
+	 * for image file, length is non-zero and must match calculation above
+	 * for /dev/window and /dev/screen the length is always zero
+	 * for subfont, the subfont header should follow immediately.
+	 */
+	if (len != 0 && mbuf->length == 0) {
+		print("%splan 9 image\n", newlabel);
+		return 1;
+	}
+	if (mbuf->length == len) {
+		print("%splan 9 image\n", newlabel);
+		return 1;
+	}
+	/* Ghostscript sometimes produces a little extra on the end */
+	if (mbuf->length < len+P9BITLEN) {
+		print("%splan 9 image\n", newlabel);
+		return 1;
+	}
+	if (p9subfont(buf+len)) {
+		print("%ssubfont file\n", newlabel);
+		return 1;
+	}
+	return 0;
+}
+
+int
+p9subfont(uchar *p)
+{
+	int n, h, a;
+
+		/* if image too big, assume it's a subfont */
+	if (p+3*P9BITLEN > buf+sizeof(buf))
+		return 1;
+
+	n = p9bitnum(p + 0*P9BITLEN);	/* char count */
+	if (n < 0)
+		return 0;
+	h = p9bitnum(p + 1*P9BITLEN);	/* height */
+	if (h < 0)
+		return 0;
+	a = p9bitnum(p + 2*P9BITLEN);	/* ascent */
+	if (a < 0)
+		return 0;
+	return 1;
+}
+
+#define	WHITESPACE(c)		((c) == ' ' || (c) == '\t' || (c) == '\n')
+
+int
+isp9font(void)
+{
+	uchar *cp, *p;
+	int i, n;
+	char pathname[1024];
+
+	cp = buf;
+	if (!getfontnum(cp, &cp))	/* height */
+		return 0;
+	if (!getfontnum(cp, &cp))	/* ascent */
+		return 0;
+	for (i = 0; 1; i++) {
+		if (!getfontnum(cp, &cp))	/* min */
+			break;
+		if (!getfontnum(cp, &cp))	/* max */
+			return 0;
+		while (WHITESPACE(*cp))
+			cp++;
+		for (p = cp; *cp && !WHITESPACE(*cp); cp++)
+				;
+			/* construct a path name, if needed */
+		n = 0;
+		if (*p != '/' && slash) {
+			n = slash-fname+1;
+			if (n < sizeof(pathname))
+				memcpy(pathname, fname, n);
+			else n = 0;
+		}
+		if (n+cp-p < sizeof(pathname)) {
+			memcpy(pathname+n, p, cp-p);
+			n += cp-p;
+			pathname[n] = 0;
+			if (access(pathname, AEXIST) < 0)
+				return 0;
+		}
+	}
+	if (i) {
+		print(mime ? "text/plain\n" : "font file\n");
+		return 1;
+	}
+	return 0;
+}
+
+int
+getfontnum(uchar *cp, uchar **rp)
+{
+	while (WHITESPACE(*cp))		/* extract ulong delimited by whitespace */
+		cp++;
+	if (*cp < '0' || *cp > '9')
+		return 0;
+	strtoul((char *)cp, (char **)rp, 0);
+	if (!WHITESPACE(**rp))
+		return 0;
+	return 1;
+}
+
+int
+isrtf(void)
+{
+	if(strstr((char *)buf, "\\rtf1")){
+		print(mime ? "application/rtf\n" : "rich text format\n");
+		return 1;
+	}
+	return 0;
+}
+
+int
+ismsdos(void)
+{
+	if (buf[0] == 0x4d && buf[1] == 0x5a){
+		print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
+		return 1;
+	}
+	return 0;
+}
+
+int
+iself(void)
+{
+	char *cpu[] = {		/* NB: incomplete and arbitary list */
+	[1]	"WE32100",
+	[2]	"SPARC",
+	[3]	"i386",
+	[4]	"M68000",
+	[5]	"M88000",
+	[6]	"i486",
+	[7]	"i860",
+	[8]	"R3000",
+	[9]	"S370",
+	[10]	"R4000",
+	[15]	"HP-PA",
+	[18]	"sparc v8+",
+	[19]	"i960",
+	[20]	"PPC-32",
+	[21]	"PPC-64",
+	[40]	"ARM",
+	[41]	"Alpha",
+	[43]	"sparc v9",
+	[50]	"IA-46",
+	[62]	"AMD64",
+	[75]	"VAX",
+	};
+
+
+	if (memcmp(buf, "\x7fELF", 4) == 0){
+		if (!mime){
+			int n = (buf[19] << 8) | buf[18];
+			char *p = "unknown";
+
+			if (n > 0 && n < nelem(cpu) && cpu[n])
+				p = cpu[n];
+			else {
+				/* try the other byte order */
+				n = (buf[18] << 8) | buf[19];
+				if (n > 0 && n < nelem(cpu) && cpu[n])
+					p = cpu[n];
+			}
+			print("%s ELF executable\n", p);
+		}
+		else
+			print("application/x-elf-executable");
+		return 1;
+	}
+
+	return 0;
+}
blob - /dev/null
blob + 3fb60ac66c790f5c14c5a8fa69630c10c2bac7d3 (mode 644)
--- /dev/null
+++ src/cmd/postscript/text2post/mkfile
@@ -0,0 +1,23 @@
+<$PLAN9/src/mkhdr
+
+<../config
+
+COMMONDIR=../common
+
+TARG=text2post
+
+OFILES=text2post.$O\
+
+HFILES=$COMMONDIR/comments.h\
+	$COMMONDIR/path.h\
+
+BIN=$POSTBIN
+<$PLAN9/src/mkone
+
+CFLAGS=-c -D'PROGRAMVERSION="0.1"' -D'DOROUND=1' -I$COMMONDIR
+
+install:V:	$POSTLIB/pjw.char.ps
+
+$POSTLIB/pjw.char.ps:	pjw.char.ps
+	cp $prereq $target
+
blob - /dev/null
blob + 5530861894ae0f66dc9e565e126a72d9f39d8cdf (mode 644)
--- /dev/null
+++ src/cmd/postscript/text2post/pjw.char.ps
@@ -0,0 +1,142 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%BoundingBox: 1 -1 199 258
+%%Creator: MetaPost
+%%CreationDate: 1994.06.28:1046
+/pjw1 {
+37 211 moveto
+37 206 lineto
+41 206 lineto
+43 212 lineto
+44 212 46 212 46 210 curveto
+41 198 35 186 35 174 curveto
+50 174 66 175 81 173 curveto
+81 171 lineto
+61 171 lineto
+61 170 lineto
+86 170 lineto
+88 168 89 166 90 164 curveto
+101 164 111 169 122 172 curveto
+139 172 lineto
+125 194 115 219 93 233 curveto
+87 234 80 236 74 235 curveto
+64 229 54 224 46 217 curveto
+45 217 44 217 44 218 curveto
+61 242 lineto
+67 248 72 258 82 258 curveto
+94 257 107 256 119 254 curveto
+134 247 147 239 161 231 curveto
+180 231 lineto
+180 226 174 228 171 226 curveto
+178 220 185 214 192 207 curveto
+189 207 187 207 187 205 curveto
+190 205 193 204 196 203 curveto
+198 193 202 181 193 181 curveto
+193 177 198 175 198 171 curveto
+196 171 194 171 194 169 curveto
+195 165 196 161 198 157 curveto
+194 147 193 135 184 130 curveto
+180 130 lineto
+169 107 lineto
+171 103 176 104 180 104 curveto
+180 93 176 81 166 76 curveto
+162 76 157 77 155 80 curveto
+153 80 152 80 151 80 curveto
+151 71 lineto
+155 71 lineto
+155 68 153 65 152 63 curveto
+145 62 lineto
+141 47 144 31 144 15 curveto
+141 0 lineto
+124 0 107 -1 90 1 curveto
+72 36 lineto
+86 48 105 49 122 54 curveto
+122 54 lineto
+110 61 97 65 84 69 curveto
+81 71 78 73 78 76 curveto
+86 77 93 77 101 77 curveto
+106 79 113 79 113 84 curveto
+95 86 77 87 59 89 curveto
+59 92 61 95 64 95 curveto
+69 88 80 92 89 92 curveto
+95 92 104 90 104 96 curveto
+93 97 lineto
+87 103 82 109 77 115 curveto
+89 116 101 117 113 118 curveto
+99 154 lineto
+90 154 87 144 82 138 curveto
+77 132 73 124 66 120 curveto
+63 120 59 119 59 122 curveto
+62 123 66 123 66 126 curveto
+58 126 50 127 42 128 curveto
+34 137 lineto
+34 140 34 144 31 144 curveto
+30 135 31 125 31 116 curveto
+25 116 22 122 19 127 curveto
+16 131 15 136 12 139 curveto
+5 139 15 127 9 127 curveto
+3 133 3 142 1 150 curveto
+1 158 6 166 9 173 curveto
+18 186 25 199 35 211 curveto
+closepath
+} bind def
+
+/pjw2 {
+27 112 moveto
+40 75 lineto
+40 74 38 73 37 73 curveto
+25 87 lineto
+17 112 lineto
+closepath 
+43 154 moveto
+43 167 lineto
+41 167 38 167 39 169 curveto
+57 171 lineto
+74 167 lineto
+74 166 73 165 72 165 curveto
+64 164 56 162 49 158 curveto
+48 154 45 149 49 149 curveto
+51 151 53 152 55 154 curveto
+87 153 lineto
+87 144 80 136 74 129 curveto
+64 128 53 126 43 129 curveto
+37 135 lineto
+37 138 36 141 36 145 curveto
+40 145 lineto
+41 148 42 151 43 154 curveto closepath 
+108 145 moveto
+119 145 130 143 141 140 curveto
+146 134 155 129 149 126 curveto
+138 124 126 123 116 129 curveto
+113 134 108 139 108 145 curveto
+closepath 
+114 96 moveto
+116 103 118 110 121 117 curveto
+128 117 134 112 139 107 curveto
+139 101 137 96 132 93 curveto
+closepath 
+134 162 moveto
+115 162 lineto
+115 162 115 163 115 164 curveto
+134 164 lineto
+closepath 
+117 73 moveto
+115 78 121 81 125 85 curveto
+129 85 lineto
+130 83 131 81 131 79 curveto
+128 74 lineto
+124 74 121 73 117 73 curveto closepath 
+141 119 moveto
+134 119 126 117 126 123 curveto
+131 123 136 122 141 121 curveto
+closepath
+} bind def
+
+/pw {
+%% pop
+gsave
+pointsize .0022 mul dup scale
+currentpoint translate
+ pjw1 pjw2 eofill
+grestore
+6 0 rmoveto
+} bind def
blob - /dev/null
blob + cf6b109e4685dcca21bb823593a1313b4a427937 (mode 644)
--- /dev/null
+++ src/cmd/postscript/text2post/text2post.c
@@ -0,0 +1,564 @@
+#include <u.h>
+#include <libc.h>
+#include <ctype.h>
+#include <bio.h>
+#include <comments.h>
+#include <path.h>
+
+#define UNKNOWNCHAR	unsharp("#9/postscript/prologues/pjw.char.ps")
+
+char	*optnames = "a:c:f:l:m:n:o:p:s:t:x:y:P:";
+
+Biobuf *bstdin, *bstdout, *bstderr;
+Biobuf *Bstdin, *Bstdout, *Bstderr;
+int char_no = 0;		/* character to be done on a line */
+int line_no = 0;		/* line number on a page */
+int page_no = 0;		/* page number in a document */
+int in_string;		/* Boolean, to know whether or not we are inside a Postscript string */
+int spaces = 0;
+int tabs = 0;
+int pages_printed;
+double aspectratio = 1.0;
+int copies = 1;
+double magnification = 1.0;
+int landscape = 0;
+int formsperpage = 1;
+int linesperpage = 66;
+int pointsize = 10;
+double xoffset = .25;
+double yoffset = .25;
+char *passthrough = 0;
+static int pplistmaxsize=0;
+
+unsigned char *pplist=0;	/* bitmap list for storing pages to print */
+
+struct strtab {
+	int size;
+	char *str;
+	int used;
+};
+
+struct strtab charcode[256] = {
+	{4, "\\000"}, {4, "\\001"}, {4, "\\002"}, {4, "\\003"},
+	{4, "\\004"}, {4, "\\005"}, {4, "\\006"}, {4, "\\007"},
+	{4, "\\010"}, {4, "\\011"}, {4, "\\012"}, {4, "\\013"},
+	{4, "\\014"}, {4, "\\015"}, {4, "\\016"}, {4, "\\017"},
+	{4, "\\020"}, {4, "\\021"}, {4, "\\022"}, {4, "\\023"},
+	{4, "\\024"}, {4, "\\025"}, {4, "\\026"}, {4, "\\027"},
+	{4, "\\030"}, {4, "\\031"}, {4, "\\032"}, {4, "\\033"},
+	{4, "\\034"}, {4, "\\035"}, {4, "\\036"}, {4, "\\037"},
+	{1, " "}, {1, "!"}, {1, "\""}, {1, "#"},
+	{1, "$"}, {1, "%"}, {1, "&"}, {1, "'"},
+	{2, "\\("}, {2, "\\)"}, {1, "*"}, {1, "+"},
+	{1, ","}, {1, "-"}, {1, "."}, {1, "/"},
+	{1, "0"}, {1, "1"}, {1, "2"}, {1, "3"},
+	{1, "4"}, {1, "5"}, {1, "6"}, {1, "7"},
+	{1, "8"}, {1, "9"}, {1, ":"}, {1, ";"},
+	{1, "<"}, {1, "="}, {1, ">"}, {1, "?"},
+	{1, "@"}, {1, "A"}, {1, "B"}, {1, "C"},
+	{1, "D"}, {1, "E"}, {1, "F"}, {1, "G"},
+	{1, "H"}, {1, "I"}, {1, "J"}, {1, "K"},
+	{1, "L"}, {1, "M"}, {1, "N"}, {1, "O"},
+	{1, "P"}, {1, "Q"}, {1, "R"}, {1, "S"},
+	{1, "T"}, {1, "U"}, {1, "V"}, {1, "W"},
+	{1, "X"}, {1, "Y"}, {1, "Z"}, {1, "["},
+	{2, "\\\\"}, {1, "]"}, {1, "^"}, {1, "_"},
+	{1, "`"}, {1, "a"}, {1, "b"}, {1, "c"},
+	{1, "d"}, {1, "e"}, {1, "f"}, {1, "g"},
+	{1, "h"}, {1, "i"}, {1, "j"}, {1, "k"},
+	{1, "l"}, {1, "m"}, {1, "n"}, {1, "o"},
+	{1, "p"}, {1, "q"}, {1, "r"}, {1, "s"},
+	{1, "t"}, {1, "u"}, {1, "v"}, {1, "w"},
+	{1, "x"}, {1, "y"}, {1, "z"}, {1, "{"},
+	{1, "|"}, {1, "}"}, {1, "~"}, {4, "\\177"},
+	{4, "\\200"}, {4, "\\201"}, {4, "\\202"}, {4, "\\203"},
+	{4, "\\204"}, {4, "\\205"}, {4, "\\206"}, {4, "\\207"},
+	{4, "\\210"}, {4, "\\211"}, {4, "\\212"}, {4, "\\213"},
+	{4, "\\214"}, {4, "\\215"}, {4, "\\216"}, {4, "\\217"},
+	{4, "\\220"}, {4, "\\221"}, {4, "\\222"}, {4, "\\223"},
+	{4, "\\224"}, {4, "\\225"}, {4, "\\226"}, {4, "\\227"},
+	{4, "\\230"}, {4, "\\231"}, {4, "\\232"}, {4, "\\233"},
+	{4, "\\234"}, {4, "\\235"}, {4, "\\236"}, {4, "\\237"},
+	{4, "\\240"}, {4, "\\241"}, {4, "\\242"}, {4, "\\243"},
+	{4, "\\244"}, {4, "\\245"}, {4, "\\246"}, {4, "\\247"},
+	{4, "\\250"}, {4, "\\251"}, {4, "\\252"}, {4, "\\253"},
+	{4, "\\254"}, {4, "\\255"}, {4, "\\256"}, {4, "\\257"},
+	{4, "\\260"}, {4, "\\261"}, {4, "\\262"}, {4, "\\263"},
+	{4, "\\264"}, {4, "\\265"}, {4, "\\266"}, {4, "\\267"},
+	{4, "\\270"}, {4, "\\271"}, {4, "\\272"}, {4, "\\273"},
+	{4, "\\274"}, {4, "\\275"}, {4, "\\276"}, {4, "\\277"},
+	{4, "\\300"}, {4, "\\301"}, {4, "\\302"}, {4, "\\303"},
+	{4, "\\304"}, {4, "\\305"}, {4, "\\306"}, {4, "\\307"},
+	{4, "\\310"}, {4, "\\311"}, {4, "\\312"}, {4, "\\313"},
+	{4, "\\314"}, {4, "\\315"}, {4, "\\316"}, {4, "\\317"},
+	{4, "\\320"}, {4, "\\321"}, {4, "\\322"}, {4, "\\323"},
+	{4, "\\324"}, {4, "\\325"}, {4, "\\326"}, {4, "\\327"},
+	{4, "\\330"}, {4, "\\331"}, {4, "\\332"}, {4, "\\333"},
+	{4, "\\334"}, {4, "\\335"}, {4, "\\336"}, {4, "\\337"},
+	{4, "\\340"}, {4, "\\341"}, {4, "\\342"}, {4, "\\343"},
+	{4, "\\344"}, {4, "\\345"}, {4, "\\346"}, {4, "\\347"},
+	{4, "\\350"}, {4, "\\351"}, {4, "\\352"}, {4, "\\353"},
+	{4, "\\354"}, {4, "\\355"}, {4, "\\356"}, {4, "\\357"},
+	{4, "\\360"}, {4, "\\361"}, {4, "\\362"}, {4, "\\363"},
+	{4, "\\364"}, {4, "\\365"}, {4, "\\366"}, {4, "\\367"},
+	{4, "\\370"}, {4, "\\371"}, {4, "\\372"}, {4, "\\373"},
+	{4, "\\374"}, {4, "\\375"}, {4, "\\376"}, {4, "\\377"}
+};
+
+#define FONTABSIZE 0x27
+
+struct strtab fontname[FONTABSIZE] = {
+	{19, "LucidaSansUnicode00", 0},
+	{19, "LucidaSansUnicode01", 0},
+	{19, "LucidaSansUnicode02", 0},
+	{19, "LucidaSansUnicode03", 0},
+	{19, "LucidaSansUnicode04", 0},
+	{19, "LucidaSansUnicode05", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{0, "", 0},
+	{19, "LucidaSansUnicode20", 0},
+	{19, "LucidaSansUnicode21", 0},
+	{19, "LucidaSansUnicode22", 0},
+	{0, "", 0},
+	{19, "LucidaSansUnicode24", 0},
+	{19, "LucidaSansUnicode25", 0},
+	{7, "Courier", 0}
+};
+
+/* This was taken from postprint */
+
+int
+cat(char *filename) {
+	Biobuf *bfile, *Bfile;
+	int n;
+	static char buf[Bsize];
+
+	bstdin = Bopen(filename, 0);
+	if (bstdin == 0) {
+		return(1);
+	}
+	Bstdin = bstdin;
+	if ((bfile = Bopen(filename, OREAD)) == 0) {
+		return(1);
+	}
+	Bfile = bfile;
+	while ((n=Bread(Bfile, buf, Bsize)) > 0) {
+		if (Bwrite(Bstdout, buf, n) != n) {
+			return(1);
+		}
+	}
+	if (n != 0) {
+		return(1);
+	}
+	return(0);
+}
+
+void
+prologues(void) {
+	char *ts;
+	int tabstop;
+
+	Bprint(Bstdout, "%s", CONFORMING);
+	Bprint(Bstdout, "%s %s\n", VERSION, PROGRAMVERSION);
+	Bprint(Bstdout, "%s %s\n", DOCUMENTFONTS, ATEND);
+	Bprint(Bstdout, "%s %s\n", PAGES, ATEND);
+	Bprint(Bstdout, "%s", ENDCOMMENTS);
+
+	if (cat(POSTPRINT)) {
+		Bprint(Bstderr, "can't read %s", POSTPRINT);
+		exits("prologue");
+	}
+
+	if (DOROUND)
+		cat(ROUNDPAGE);
+
+	tabstop = 0;
+	ts = getenv("tabstop");
+	if(ts != nil)
+		tabstop = strtol(ts, nil, 0);
+	if(tabstop == 0)
+		tabstop = 8;
+	Bprint(Bstdout, "/f {findfont pointsize scalefont setfont} bind def\n");
+	Bprint(Bstdout, "/tabwidth /Courier f (");
+	while(tabstop--)
+		Bputc(Bstdout, 'n');
+	Bprint(Bstdout, ") stringwidth pop def\n");
+	Bprint(Bstdout, "/tab {tabwidth 0 ne {currentpoint 3 1 roll exch tabwidth mul add tabwidth\n");
+	Bprint(Bstdout, "\tdiv truncate tabwidth mul exch moveto} if} bind def\n");
+	Bprint(Bstdout, "/spacewidth /%s f ( ) stringwidth pop def\n", fontname[0].str);
+	Bprint(Bstdout, "/sp {spacewidth mul 0 rmoveto} bind def\n");
+	Bprint(Bstdout, "%s", ENDPROLOG);
+	Bprint(Bstdout, "%s", BEGINSETUP);
+	Bprint(Bstdout, "mark\n");
+
+	if (formsperpage > 1) {
+		Bprint(Bstdout, "%s %d\n", FORMSPERPAGE, formsperpage);
+		Bprint(Bstdout, "/formsperpage %d def\n", formsperpage);
+	}
+	if (aspectratio != 1) Bprint(Bstdout, "/aspectratio %g def\n", aspectratio);
+	if (copies != 1) Bprint(Bstdout, "/#copies %d store\n", copies);
+	if (landscape) Bprint(Bstdout, "/landscape true def\n");
+	if (magnification != 1) Bprint(Bstdout, "/magnification %s def\n", magnification);
+	if (pointsize != 10) Bprint(Bstdout, "/pointsize %d def\n", pointsize);
+	if (xoffset != .25) Bprint(Bstdout, "/xoffset %g def\n", xoffset);
+	if (yoffset != .25) Bprint(Bstdout, "/yoffset %g def\n", yoffset);
+	cat(unsharp("#9/postscript/prologues/Latin1.enc"));
+	if (passthrough != 0) Bprint(Bstdout, "%s\n", passthrough);
+	Bprint(Bstdout, "setup\n");
+	if (formsperpage > 1) {
+		cat(FORMFILE);
+		Bprint(Bstdout, "%d setupforms \n", formsperpage);
+	}
+	if (cat(UNKNOWNCHAR))
+		Bprint(Bstderr, "cannot open %s\n", UNKNOWNCHAR);
+	Bprint(Bstdout, "%s", ENDSETUP);
+}
+
+int
+pageon(void) {
+	if (pplist == 0 && page_no != 0) return(1);	/* no page list, print all pages */
+	if (page_no/8 < pplistmaxsize && (pplist[page_no/8] & 1<<(page_no%8)))
+		return(1);
+	else
+		return(0);
+}
+
+void
+startpage(void) {
+	++char_no;
+	++line_no;
+	++page_no;
+	if (pageon()) {
+		++pages_printed;
+		Bprint(Bstdout, "%s %d %d\n", PAGE, page_no, pages_printed);
+		Bprint(Bstdout, "/saveobj save def\n");
+		Bprint(Bstdout, "mark\n");
+		Bprint(Bstdout, "%d pagesetup\n", pages_printed);
+	}
+}
+
+void
+endpage(void) {
+	line_no = 0;
+	char_no = 0;
+	if (pageon()) {
+		Bprint(Bstdout, "cleartomark\n");
+		Bprint(Bstdout, "showpage\n");
+		Bprint(Bstdout, "saveobj restore\n");
+		Bprint(Bstdout, "%s %d %d\n", ENDPAGE, page_no, pages_printed);
+	}
+}
+
+void
+startstring(void) {
+	if (!in_string) {
+		if (pageon()) Bprint(Bstdout, "(");
+		in_string = 1;
+	}
+}
+
+void
+endstring(void) {
+	if (in_string) {
+		if (pageon()) Bprint(Bstdout, ") show ");
+		in_string = 0;
+	}
+}
+
+void
+prspace(void) {
+	if (spaces) {
+		endstring();
+		if (pageon()) Bprint(Bstdout, "%d sp ", spaces);
+		spaces = 0;
+	}
+}
+
+void
+prtab(void) {
+	if (tabs) {
+		endstring();
+		if (pageon()) Bprint(Bstdout, "%d tab ", tabs);
+		tabs = 0;
+	}
+}
+
+void
+txt2post(void) {
+	int lastfont = -1;
+	int lastchar = -1;
+	int thisfont, thischar;
+	long r;
+
+	in_string = 0;
+	char_no = 0;
+	line_no = 0;
+	page_no = 0;
+	spaces = 0;
+	fontname[0].used++;
+	while ((r=Bgetrune(Bstdin)) >= 0) {
+		thischar = r & 0xff;
+		thisfont = (r>>8) & 0xff;
+
+		if (line_no == 0 && char_no == 0)
+			startpage();
+
+		if (line_no == 1 && char_no == 1) {
+			if (pageon()) Bprint(Bstdout, " /%s f\n", fontname[thisfont].str);
+			lastfont = thisfont;
+		}
+
+		switch (r) {
+		case ' ':
+			prtab();
+			if (lastfont > 0) {
+				spaces++;
+				continue;
+			}
+			break;
+		case '\n':
+		case '\f':
+			startstring();
+			if (pageon()) Bprint(Bstdout, ")l\n");
+			char_no = 1;
+			in_string = 0;
+			spaces = 0;
+			tabs = 0;
+			if (++line_no > linesperpage || r == '\f') {
+				endpage();
+			}
+			lastchar = -1;
+			continue;
+		case '\t':
+			prspace();
+			tabs++;
+			char_no++;
+			lastchar = -1;
+			continue;
+		case '\b':
+			/* just toss out backspaces for now */
+			if (lastchar != -1) {
+				endstring();
+				if (pageon()) Bprint(Bstdout, "(%s) stringwidth pop neg 0 rmoveto ", charcode[lastchar].str);
+			}
+			char_no++;
+			lastchar = -1;
+			continue;
+		}
+
+		/* do something if font is out of table range */
+		if (thisfont>=FONTABSIZE || fontname[thisfont].size == 0) {
+			prspace();
+			prtab();
+			endstring();
+			Bprint(Bstdout, "pw ");
+			char_no++;
+			lastchar = -1;
+			continue;
+		}
+
+		if (thisfont != lastfont) {
+			endstring();
+			if (pageon()) {
+				Bprint(Bstdout, "/%s f\n", fontname[thisfont].str);
+			}
+			fontname[thisfont].used++;
+		}
+		prspace();
+		prtab();
+		startstring();
+		if (pageon()) Bprint(Bstdout, "%s", charcode[thischar].str);
+/*		if (pageon()) Bprint(Bstdout, "%2.2x", thischar);	/* try hex strings*/
+		char_no++;
+		lastchar = thischar;
+		lastfont = thisfont;
+	}
+	if (line_no != 0 || char_no != 0) {
+		if (char_no != 1) {
+			Bprint(Bstderr, "premature EOF: newline appended\n");
+			startstring();
+			if (pageon()) Bprint(Bstdout, ")l\n");
+		}
+		endpage();
+	}
+}
+
+void
+pagelist(char *list) {
+	char c;
+	int n, m;
+	int state, start, end;
+
+	if (list == 0) return;
+	state = 1;
+	while ((c=*list) != '\0') {
+		n = 0;
+		while (isdigit(c)) {
+			n = n * 10 + c - '0';
+			c = *++list;
+		}
+		switch (state) {
+		case 1:
+			start = n;
+		case 2:
+			if (n/8+1 > pplistmaxsize) {
+				pplistmaxsize = n/8+1;
+				if ((pplist = realloc(pplist, n/8+1)) == 0) {
+					Bprint(Bstderr, "cannot allocate memory for page list\n");
+					exits("malloc");
+				}
+			}
+			for (m=start; m<=n; m++)
+				pplist[m/8] |= 1<<(m%8);
+			break;
+		}
+		switch (c) {
+		case '-':
+			state = 2;
+			list++;
+			break;
+		case ',':
+			state = 1;
+			list++;
+			break;
+		case '\0':
+			break;
+		}
+	}
+}
+
+void
+finish(void) {
+	int i;
+
+	Bprint(Bstdout, "%s", TRAILER);
+	Bprint(Bstdout, "done\n");
+	Bprint(Bstdout, "%s", DOCUMENTFONTS);
+
+	for (i=0; i<FONTABSIZE; i++)
+		if (fontname[i].used)
+			Bprint(Bstdout, " %s", fontname[i].str);
+	Bprint(Bstdout, "\n");
+
+	Bprint(Bstdout, "%s %d\n", PAGES, pages_printed);
+
+}
+
+main(int argc, char *argv[]) {
+	int i;
+	char *t;
+	Biobuf *input;
+
+	if ((bstderr = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0)
+		exits("malloc");
+	if (Binit(bstderr, 2, OWRITE) == Beof)
+		exits("Binit");
+	Bstderr = bstderr;
+
+	if ((bstdout = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0)
+		exits("malloc");
+	if (Binit(bstdout, 1, OWRITE) == Beof)
+		exits("Binit");
+	Bstdout = bstdout;
+
+	ARGBEGIN{
+		case 'a':			/* aspect ratio */
+			aspectratio = atof(ARGF());
+			break;
+		case 'c':			/* copies */
+			copies = atoi(ARGF());
+			break;
+		case 'f':			/* primary font, for now */
+			t = ARGF();
+			fontname[0].str = malloc(strlen(t)+1);
+			strcpy(fontname[0].str, t);
+			break;
+		case 'l':			/* lines per page */
+			linesperpage = atoi(ARGF());
+			break;
+		case 'm':			/* magnification */
+			magnification = atof(ARGF());
+			break;
+		case 'n':			/* forms per page */
+			formsperpage = atoi(ARGF());
+			break;
+		case 'o':			/* output page list */
+			pagelist(ARGF());
+			break;
+		case 'p':			/* landscape or portrait mode */
+			if ( ARGF()[0] == 'l' )
+				landscape = 1;
+			else
+				landscape = 0;
+			break;
+		case 's':			/* point size */
+			pointsize = atoi(ARGF());
+			break;
+		case 'x':			/* shift things horizontally */
+			xoffset = atof(ARGF());
+			break;
+
+		case 'y':			/* and vertically on the page */
+			yoffset = atof(ARGF());
+			break;
+		case 'P':			/* PostScript pass through */
+			t = ARGF();
+			i = strlen(t) + 1;
+			passthrough = malloc(i);
+			if (passthrough == 0) {
+				Bprint(Bstderr, "cannot allocate memory for argument string\n");
+				exits("malloc");
+			}
+			strncpy(passthrough, t, i);
+			break;
+		default:			/* don't know what to do for ch */
+			Bprint(Bstderr, "unknown option %C\n", ARGC());
+			break;
+	}ARGEND;
+	prologues();
+	if (argc <= 0) {
+		if ((bstdin = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0)
+			exits("malloc");
+		if (Binit(bstdin, 0, OREAD) == Beof) {
+			fprint(2, "cannot Binit stdin\n");
+			exits("Binit");
+		}
+		Bstdin = bstdin;
+		txt2post();
+	}
+	for (i=0; i<argc; i++) {
+		bstdin = Bopen(argv[i], 0);
+		if (bstdin == 0) {
+			fprint(2, "cannot open file %s\n", argv[i]);
+			continue;
+		}
+		Bstdin = bstdin;
+		txt2post();
+	}
+	finish();
+	exits("");
+}