commit c006e984192aa8bcb755bdd25e111bb37b5db16b
from: Michael Teichgräber <mt4swm@googlemail.com>
via: Russ Cox <rsc@swtch.com>
date: Mon Jun 28 21:43:28 2010 UTC

This patch fixes unicode, unicode-be and unicode-le
input, and unicode output, which have been broken due
to the change of the size of Rune from two bytes to
four bytes.

(Unicode-le and unicode-be output have not been
affected, since they work different, and produce
correct output as long as sizeof(Rune) >= 2.)

In all of the affected functions an extra array of N
elements of type u16int is used to temporarily store
the 16-bit unicode data.  As this increases stack
usage, perhaps a different, slightly more complicated
solution might be preferred, where only a single
`Rune buf[N]' is used for both temporary storage and
the result.

R=rsc
CC=codebot
http://codereview.appspot.com/1574041

commit - 25989d9ecabaf82acbf72f5c8fbf1c369a6b98ae
commit + c006e984192aa8bcb755bdd25e111bb37b5db16b
blob - d7d18e41da4997f2b4b550094f4d540ecccaa291
blob + 6722bb6bef8805ca80b35417cdbe828fa5bd6872
--- src/cmd/tcs/tcs.c
+++ src/cmd/tcs/tcs.c
@@ -187,17 +187,19 @@ swab2(char *b, int n)
 void
 unicode_in(int fd, long *notused, struct convert *out)
 {
+	u16int ubuf[N];
 	Rune buf[N];
-	int n;
+	int i, n;
 	int swabme;
 
 	USED(notused);
-	if(read(fd, (char *)buf, 2) != 2)
+	if(read(fd, (char *)ubuf, 2) != 2)
 		return;
 	ninput += 2;
-	switch(buf[0])
+	switch(ubuf[0])
 	{
 	default:
+		buf[0] = ubuf[0];
 		OUT(out, buf, 1);
 	case 0xFEFF:
 		swabme = 0;
@@ -206,10 +208,12 @@ unicode_in(int fd, long *notused, struct convert *out)
 		swabme = 1;
 		break;
 	}
-	while((n = read(fd, (char *)buf, 2*N)) > 0){
+	while((n = read(fd, (char *)ubuf, 2*N)) > 0){
 		ninput += n;
 		if(swabme)
-			swab2((char *)buf, n);
+			swab2((char *)ubuf, n);
+		for(i=0; i<n/2; i++)
+			buf[i] = ubuf[i];
 		if(n&1){
 			if(squawk)
 				EPR "%s: odd byte count in %s\n", argv0, file);
@@ -227,13 +231,14 @@ void
 unicode_in_be(int fd, long *notused, struct convert *out)
 {
 	int i, n;
+	u16int ubuf[N];
 	Rune buf[N], r;
 	uchar *p;
 
 	USED(notused);
-	while((n = read(fd, (char *)buf, 2*N)) > 0){
+	while((n = read(fd, (char *)ubuf, 2*N)) > 0){
 		ninput += n;
-		p = (uchar*)buf;
+		p = (uchar*)ubuf;
 		for(i=0; i<n/2; i++){
 			r = *p++<<8;
 			r |= *p++;
@@ -257,13 +262,14 @@ void
 unicode_in_le(int fd, long *notused, struct convert *out)
 {
 	int i, n;
+	u16int ubuf[N];
 	Rune buf[N], r;
 	uchar *p;
 
 	USED(notused);
-	while((n = read(fd, (char *)buf, 2*N)) > 0){
+	while((n = read(fd, (char *)ubuf, 2*N)) > 0){
 		ninput += n;
-		p = (uchar*)buf;
+		p = (uchar*)ubuf;
 		for(i=0; i<n/2; i++){
 			r = *p++;
 			r |= *p++<<8;
@@ -287,17 +293,21 @@ void
 unicode_out(Rune *base, int n, long *notused)
 {
 	static int first = 1;
+	u16int buf[N];
+	int i;
 
 	USED(notused);
 	nrunes += n;
 	if(first){
-		unsigned short x = 0xFEFF;
+		u16int x = 0xFEFF;
 		noutput += 2;
 		write(1, (char *)&x, 2);
 		first = 0;
 	}
 	noutput += 2*n;
-	write(1, (char *)base, 2*n);
+	for(i=0; i<n; i++)
+		buf[i] = base[i];
+	write(1, (char *)buf, 2*n);
 }
 
 void