commit - 4dbf255619efac4f0a00e4216d6c999128910df2
commit + 0cadb4301d18724e7513d7489cb5bebd262c82f1
blob - 480ccad58d4538b5bf912b209231d4b41849e925
blob + 795e83e84fd9969bc146792074860d49546530f8
--- include/fmt.h
+++ include/fmt.h
void *farg; /* to make flush a closure */
int nfmt; /* num chars formatted so far */
va_list args; /* args passed to dofmt */
- int r; /* % format Rune */
+ Rune r; /* % format Rune */
int width;
int prec;
unsigned long flags;
blob - eb5ea42a8fb35a599cf3b1be760a3142fc5f7f34
blob + 44052f417281099c9095d6ffd26991018b6d84b9
--- include/utf.h
+++ include/utf.h
extern "C" {
#endif
-typedef unsigned short Rune; /* 16 bits */
+typedef unsigned int Rune; /* 32 bits */
enum
{
- UTFmax = 3, /* maximum bytes per rune */
+ UTFmax = 4, /* maximum bytes per rune */
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
- Runeerror = 0xFFFD /* decoding error in UTF */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF /* maximum rune value */
};
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
blob - d77b52ee4e1db06c4a7074c57608adf683880a90
blob + 6f04595ded3c14c65c884bdb6433b9ab64124284
--- src/cmd/9term/wind.c
+++ src/cmd/9term/wind.c
Rune *rp, *bp, *up, *kbdr;
uint qh;
int nr, nb, c, wid, i, npart, initial, lastb;
- char *s, *t, part[3];
+ char *s, *t, part[UTFmax];
Window *w;
Mousestate *mp, m;
enum { WKey, WMouse, WMouseread, WCtl, WCwrite, WCread, WWread, NWALT };
blob - 5197e8a45f7c4a3bc4b6df6167eab470469588cc
blob + a58437ba2178b17fe7fd955297dce4fe1a720f38
--- src/cmd/acme/regx.c
+++ src/cmd/acme/regx.c
exprp++; /* eat '-' */
if((c2 = nextrec()) == ']')
goto Error;
- classp[n+0] = 0xFFFF;
+ classp[n+0] = Runemax;
classp[n+1] = c1;
classp[n+2] = c2;
n += 3;
p = class[classno];
while(*p){
- if(*p == 0xFFFF){
+ if(*p == Runemax){
if(p[1]<=c && c<=p[2])
return !negate;
p += 3;
blob - cdf00b9b087beb0eacef7c37ceb9620033d8d728
blob + 12510689daf638433011cf7f481a585ad46f4024
--- src/cmd/sam/cmd.c
+++ src/cmd/sam/cmd.c
inputc(void)
{
int n, nbuf;
- char buf[3];
+ char buf[UTFmax];
Rune r;
Again:
blob - 3477baeff0492dad47afedb8dc3eb580ade8b44c
blob + 3234d73133414b8f6f6745979575e438c6d575e0
--- src/cmd/sam/regexp.c
+++ src/cmd/sam/regexp.c
exprp++; /* eat '-' */
if((c2 = nextrec()) == ']')
goto Error;
- classp[n+0] = 0xFFFF;
+ classp[n+0] = Runemax;
classp[n+1] = c1;
classp[n+2] = c2;
n += 3;
p = class[classno];
while(*p){
- if(*p == 0xFFFF){
+ if(*p == Runemax){
if(p[1]<=c && c<=p[2])
return !negate;
p += 3;
blob - 5849c83fb955a0f2b4534c6904f42c8a12dd19c4
blob + a78c9c66496d54118c6f150daf516610e6982faf
--- src/cmd/sed.c
+++ src/cmd/sed.c
while ((r = *cp++) != '\0') {
if(r == '\\') {
if (rhs < end)
- *rhs++ = 0xFFFF;
+ *rhs++ = Runemax;
else
return 0;
r = *cp++;
sp = place(sp, loc1, loc2);
continue;
}
- if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') {
+ if (c == Runemax && (c = *rp++) >= '1' && c < MAXSUB+'0') {
n = c-'0';
if (subexp[n].s.rsp && subexp[n].e.rep) {
sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
blob - 39ba747c0b8870a85c3ca588f28a3de7a16a15dc
blob + 46e98e15d182be9044d7862a9555dbb5c351acba
--- src/cmd/tr.c
+++ src/cmd/tr.c
#define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
#define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
-#define MAXRUNE 0xFFFF
+#define MAXRUNE Runemax
uchar f[(MAXRUNE+1)/8];
uchar t[(MAXRUNE+1)/8];
blob - e52348966da5e4df420195df07ac936bc487555e
blob + c97b036ca8ee2f520857f95c466271d0e88efc66
--- src/cmd/troff/mbwc.c
+++ src/cmd/troff/mbwc.c
if(p+d <= pe+3) {
*p++ = buf[0];
if(d > 1) {
- *p++ = buf[2];
+ *p++ = buf[1];
if(d > 2)
- *p++ = buf[3];
+ *p++ = buf[2];
}
}
if(c == 0)
blob - a04472711b6590b8aa95e52960bfdfd447c453b9
blob + aec44b75005e57862fb5e82257ddafe69421addc
--- src/cmd/unicode.c
+++ src/cmd/unicode.c
return "bad range";
}
min = strtoul(q, &q, 16);
- if(min<0 || min>0xFFFF || *q!='-')
+ if(min<0 || min>Runemax || *q!='-')
goto err;
q++;
if(strchr(hex, *q) == 0)
goto err;
max = strtoul(q, &q, 16);
- if(max<0 || max>0xFFFF || max<min || *q!=0)
+ if(max<0 || max>Runemax || max<min || *q!=0)
goto err;
i = 0;
do{
return "bad char";
}
m = strtoul(q, &q, 16);
- if(m<0 || m>0xFFFF || *q!=0)
+ if(m<0 || m>Runemax || *q!=0)
goto err;
Bprint(&bout, "%C", m);
if(!text)
blob - 214b71db0e0fc30eb357ed107b3a3f25249509d2
blob + 353c76e37f25e0af8707c5c72199b28bd663eddb
--- src/lib9/fmt/dofmt.c
+++ src/lib9/fmt/dofmt.c
int
__badfmt(Fmt *f)
{
- char x[3];
+ char x[2+UTFmax];
+ int n;
x[0] = '%';
- x[1] = f->r;
- x[2] = '%';
- f->prec = 3;
- __fmtcpy(f, (const void*)x, 3, 3);
+ n = 1 + runetochar(x+1, &f->r);
+ x[n++] = '%';
+ f->prec = n;
+ __fmtcpy(f, (const void*)x, n, n);
return 0;
}
blob - 3d6831b029f61522f122c51bebecf0e488ff88ef
blob + f5944806f28d457c6d0875524ca20b338733e440
--- src/lib9/utf/rune.c
+++ src/lib9/utf/rune.c
Bit2 = 5,
Bit3 = 4,
Bit4 = 3,
+ Bit5 = 2,
T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
Maskx = (1<<Bitx)-1, /* 0011 1111 */
Testx = Maskx ^ 0xFF, /* 1100 0000 */
int
chartorune(Rune *rune, char *str)
{
- int c, c1, c2;
+ int c, c1, c2, c3;
long l;
/*
}
/*
+ * four character sequence
+ * 10000-10FFFF => T4 Tx Tx Tx
+ */
+ if(UTFmax >= 4) {
+ c3 = *(uchar*)(str+3) ^ Tx;
+ if(c3 & Testx)
+ goto bad;
+ if(c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if(l <= Rune3)
+ goto bad;
+ if(l > Runemax)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+ }
+
+ /*
* bad decoding
*/
bad:
/*
* two character sequence
- * 0080-07FF => T2 Tx
+ * 00080-007FF => T2 Tx
*/
if(c <= Rune2) {
str[0] = T2 | (c >> 1*Bitx);
/*
* three character sequence
- * 0800-FFFF => T3 Tx Tx
+ * 00800-0FFFF => T3 Tx Tx
*/
- str[0] = T3 | (c >> 2*Bitx);
- str[1] = Tx | ((c >> 1*Bitx) & Maskx);
- str[2] = Tx | (c & Maskx);
- return 3;
+ if(c > Runemax)
+ c = Runeerror;
+ if(c <= Rune3) {
+ str[0] = T3 | (c >> 2*Bitx);
+ str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[2] = Tx | (c & Maskx);
+ return 3;
+ }
+
+ /*
+ * four character sequence
+ * 010000-1FFFFF => T4 Tx Tx Tx
+ */
+ str[0] = T4 | (c >> 3*Bitx);
+ str[1] = Tx | ((c >> 2*Bitx) & Maskx);
+ str[2] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[3] = Tx | (c & Maskx);
+ return 4;
}
int
if(c <= Rune2)
nb += 2;
else
+ if(c <= Rune3 || c > Runemax)
nb += 3;
+ else
+ nb += 4;
}
return nb;
}
{
int c;
- if(n > 0) {
- c = *(uchar*)str;
- if(c < Tx)
- return 1;
- if(n > 1)
- if(c < T3 || n > 2)
- return 1;
- }
- return 0;
+ if(n <= 0)
+ return 0;
+ c = *(uchar*)str;
+ if(c < Tx)
+ return 1;
+ if(c < T3)
+ return n >= 2;
+ if(UTFmax == 3 || c < T4)
+ return n >= 3;
+ return n >= 4;
}
blob - f1b065bcee408c25c163162aed6da34eb61cc815
blob + 46991e161f4ecc5c6958c9b84c7bc207d9b519f3
--- src/libbio/bgetrune.c
+++ src/libbio/bgetrune.c
{
int c, i;
Rune rune;
- char str[4];
+ char str[UTFmax];
c = Bgetc(bp);
if(c < Runeself) { /* one char */
blob - a2eaa83ec28212f175c199290c7931c7ed11671f
blob + 651ae7b762e78ef77b7d78e06e174deb8c5a8ee0
--- src/libbio/bputrune.c
+++ src/libbio/bputrune.c
Bputrune(Biobuf *bp, long c)
{
Rune rune;
- char str[4];
+ char str[UTFmax];
int n;
rune = c;