29 long ninput, noutput, nrunes, nerrors;
33 char obuf[UTFmax*N]; /* maximum bloat from N runes */
36 extern char version[];
39 void intable(int, long *, struct convert *);
40 void unicode_in(int, long *, struct convert *);
41 void unicode_out(Rune *, int, long *);
44 main(int argc, char **argv)
50 struct convert *t, *f;
87 #define PROC {if(f->flags&Table)\
88 intable(fd, (long *)f->data, t);\
90 ((Infn)(f->fn))(fd, (long *)0, t);}
95 if((fd = open(*argv, 0)) < 0){
96 EPR "%s: %s: %s\n", argv0, *argv, strerror(errno));
98 if((fd = open(*argv, OREAD)) < 0){
99 EPR "%s: %s: %r\n", argv0, *argv);
101 EXIT(1, "open failure");
112 EPR "%s: %ld input bytes, %ld runes, %ld output bytes (%ld errors)\n", argv0,
113 ninput, nrunes, noutput, nerrors);
114 EXIT(((nerrors && squawk)? 1:0), ((nerrors && squawk)? "conversion error":0));
115 return(0); /* shut up compiler */
121 EPR "Usage: %s [-slv] [-f cs] [-t cs] [file ...]\n", argv0);
130 char ch = verbose?'\t':' ';
133 EPR "%s version = '%s'\n", argv0, version);
136 EPR "character sets:\n");
139 for(c = convert; c->name; c++){
140 if((c->flags&From) && c[1].name && (strcmp(c[1].name, c->name) == 0)){
141 EPR "%c%s", ch, c->name);
143 } else if(c->flags&Table)
144 EPR "%c%s", ch, c->name);
145 else if(c->flags&From)
146 EPR "%c%s(from)", ch, c->name);
148 EPR "%c%s(to)", ch, c->name);
150 EPR "\t%s\n", c->chatter);
157 conv(char *name, int from)
161 for(c = convert; c->name; c++){
162 if(strcmp(c->name, name) != 0)
166 if(((c->flags&From) == 0) == (from == 0))
169 EPR "%s: charset `%s' unknown\n", argv0, name);
170 EXIT(1, "unknown character set");
171 return(0); /* just shut the compiler up */
175 swab2(char *b, int n)
179 for(e = b+n; b < e; b++){
187 unicode_in(int fd, long *notused, struct convert *out)
194 if(read(fd, (char *)buf, 2) != 2)
208 while((n = read(fd, (char *)buf, 2*N)) > 0){
212 EPR "%s: odd byte count in %s\n", argv0, file);
218 buf[n/2] = Runeerror;
219 if(swabme) /* swab so later swab undoes it */
220 swab2((char *)&buf[n/2], 2);
224 swab2((char *)buf, n);
230 unicode_out(Rune *base, int n, long *notused)
232 static int first = 1;
237 unsigned short x = 0xFEFF;
239 write(1, (char *)&x, 2);
243 write(1, (char *)base, 2*n);
247 intable(int fd, long *table, struct convert *out)
255 while((n = read(fd, (char *)buf, N)) > 0){
258 for(p = buf, e = buf+n; p < e; p++){
262 EPR "%s: bad char 0x%x near byte %ld in %s\n", argv0, *p, ninput+(p-e), file);
270 OUT(out, runes, r-runes);
274 EPR "%s: input read: %r\n", argv0);
276 EPR "%s: input read: %s\n", argv0, strerror(errno));
278 EXIT(1, "input read error");
283 outtable(Rune *base, int n, long *map)
290 for(i = 0; i < NRUNE; i++)
292 for(i = 0; i < 256; i++)
295 for(i = 0, p = obuf; i < n; i++){
299 EPR "%s: rune 0x%x not in output cs\n", argv0, base[i]);
308 write(1, obuf, p-obuf);
313 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
314 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
315 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
316 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
317 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
318 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
319 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
320 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
321 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
322 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
323 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
324 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
325 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
326 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
327 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
328 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
331 long tabmsdos[256] = /* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
333 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
334 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
335 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
336 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
337 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
338 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
339 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
340 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
341 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
342 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
343 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
344 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
345 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
346 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
347 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
348 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
349 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
350 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
351 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
352 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
353 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
354 0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
355 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
356 0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
358 long tabmsdos2[256] = /* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
360 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
361 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
362 0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x2043, 0x21a8,
363 0x2191, 0x2193, 0x2192, 0x2190, 0x2319, 0x2194, 0x25b2, 0x25bc,
364 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
365 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
366 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
367 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
368 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
369 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
370 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
371 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
372 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
373 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
374 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
375 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
376 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
377 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
378 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
379 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
380 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
381 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
382 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
383 0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
384 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
385 0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
387 struct convert convert[] =
388 { /* if two entries have the same name, put the from one first */
389 { "8859-1", "Latin-1 (Western and Northern Europe including Italian)", Table, (void *)tab8859_1 },
390 { "8859-2", "Latin-2 (Eastern Europe except Turkey and the Baltic countries)", Table, (void *)tab8859_2 },
391 { "8859-3", "Latin-3 (Mediterranean, South Africa, Esperanto)", Table, (void *)tab8859_3 },
392 { "8859-4", "Latin-4 (Scandinavia and the Baltic countries; obsolete)", Table, (void *)tab8859_4 },
393 { "8859-5", "Part 5 (Cyrillic)", Table, (void *)tab8859_5 },
394 { "8859-6", "Part 6 (Arabic)", Table, (void *)tab8859_6 },
395 { "8859-7", "Part 7 (Greek)", Table, (void *)tab8859_7 },
396 { "8859-8", "Part 8 (Hebrew)", Table, (void *)tab8859_8 },
397 { "8859-9", "Latin-5 (Turkey, Western Europe except Icelandic and Faroese)", Table, (void *)tab8859_9 },
398 { "8859-10", "Latin-6 (Northern Europe)", Table, (void *)tab8859_10 },
399 { "8859-15", "Latin-9 (Western Europe)", Table, (void *)tab8859_15 },
400 { "ascii", "7-bit ASCII", Table, (void *)tabascii },
401 { "atari", "ATARI-ST character set", Table, (void *)tabatari },
402 { "av", "Alternativnyj Variant", Table, (void *)tabav },
403 { "big5", "Big 5 (HKU)", From|Func, 0, (Fnptr)big5_in },
404 { "big5", "Big 5 (HKU)", Func, 0, (Fnptr)big5_out },
405 { "cp437", "Code Page 437 (US)", Table, (void*)tabcp437 },
406 { "cp720", "Code Page 720 (Arabic)", Table, (void*)tabcp720 },
407 { "cp737", "Code Page 737 (Greek)", Table, (void*)tabcp737 },
408 { "cp775", "Code Page 775 (Baltic)", Table, (void*)tabcp775 },
409 { "cp850", "Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
410 { "cp852", "Code Page 852 (Latin II)", Table, (void*)tabcp852 },
411 { "cp855", "Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
412 { "cp857", "Code Page 857 (Turkish)", Table, (void*)tabcp857 },
413 { "cp858", "Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
414 { "cp862", "Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
415 { "cp866", "Code Page 866 (Russian)", Table, (void*)tabcp866 },
416 { "cp874", "Code Page 874 (Thai)", Table, (void*)tabcp874 },
417 { "cp1250", "Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
418 { "cp1251", "Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
419 { "cp1252", "Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
420 { "cp1253", "Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
421 { "cp1254", "Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
422 { "cp1255", "Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
423 { "cp1256", "Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
424 { "cp1257", "Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
425 { "cp1258", "Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
426 { "ebcdic", "EBCDIC", Table, (void *)tabebcdic }, /* 6f is recommended bad map */
427 { "euc-k", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
428 { "euc-k", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
429 { "gb", "GB2312-80", From|Func, 0, (Fnptr)gb_in },
430 { "gb", "GB2312-80", Func, 0, (Fnptr)gb_out },
431 { "html", "HTML", From|Func, 0, (Fnptr)html_in },
432 { "html", "HTML", Func, 0, (Fnptr)html_out },
433 { "jis", "guesses at the JIS encoding", From|Func, 0, (Fnptr)jis_in },
434 { "jis-kanji", "ISO 2022-JP", From|Func, 0, (Fnptr)jisjis_in },
435 { "jis-kanji", "ISO 2022-JP", Func, 0, (Fnptr)jisjis_out },
436 { "koi8", "KOI-8 (GOST 19769-74)", Table, (void *)tabkoi8 },
437 { "latin1", "ISO 8859-1", Table, (void *)tab8859_1 },
438 { "macrom", "Macintosh Standard Roman character set", Table, (void *)tabmacroman },
439 { "microsoft", "Windows (CP 1252)", Table, (void *)tabcp1252 },
440 { "msdos", "IBM PC (CP 437)", Table, (void *)tabcp437 },
441 { "msdos2", "IBM PC (CP 437 with graphics in C0)", Table, (void *)tabmsdos2 },
442 { "ms-kanji", "Microsoft, or Shift-JIS", From|Func, 0, (Fnptr)msjis_in },
443 { "ms-kanji", "Microsoft, or Shift-JIS", Func, 0, (Fnptr)msjis_out },
444 { "next", "NEXTSTEP character set", Table, (void *)tabnextstep },
445 { "ov", "Osnovnoj Variant", Table, (void *)tabov },
446 { "ps2", "IBM PS/2: (CP 850)", Table, (void *)tabcp850 },
447 { "sf1", "ISO-646: Finnish/Swedish SF-1 variant", Table, (void *)tabsf1 },
448 { "sf2", "ISO-646: Finnish/Swedish SF-2 variant (recommended)", Table, (void *)tabsf2 },
449 { "tis", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
450 { "ucode", "Russian U-code", Table, (void *)tabucode },
451 { "ujis", "EUC-JX: JIS 0208", From|Func, 0, (Fnptr)ujis_in },
452 { "ujis", "EUC-JX: JIS 0208", Func, 0, (Fnptr)ujis_out },
453 { "unicode", "Unicode 1.1", From|Func, 0, (Fnptr)unicode_in },
454 { "unicode", "Unicode 1.1", Func, 0, (Fnptr)unicode_out },
455 { "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
456 { "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
457 { "utf", "FSS-UTF a.k.a. UTF-8", From|Func, 0, (Fnptr)utf_in },
458 { "utf", "FSS-UTF a.k.a. UTF-8", Func, 0, (Fnptr)utf_out },
459 { "utf-l2", "from", From|Func, 0, (Fnptr)utf_in },
460 { "utf-l2", "to", Func, 0, (Fnptr)utf_out },
461 { "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
462 { "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
463 { "viscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },