提交 1a63a9fc 编写于 作者: R Rich Felker

sync case mappings with unicode 6.1

also special-case ß (U+00DF) as lowercase even though it does not have
a mapping to uppercase. unicode added an uppercase version of this
character but does not map it, presumably because the uppercase
version is not actually used except for some obscure purpose...
上级 38b5d7d0
...@@ -2,5 +2,5 @@ ...@@ -2,5 +2,5 @@
int iswlower(wint_t wc) int iswlower(wint_t wc)
{ {
return towupper(wc) != wc; return towupper(wc) != wc || wc == 0xdf;
} }
...@@ -30,6 +30,7 @@ static const struct { ...@@ -30,6 +30,7 @@ static const struct {
CASELACE(0x4c1,0x4cd), CASELACE(0x4c1,0x4cd),
CASELACE(0x4d0,0x50e), CASELACE(0x4d0,0x50e),
CASELACE(0x514,0x526),
CASEMAP(0x531,0x556,0x561), CASEMAP(0x531,0x556,0x561),
CASELACE(0x01a0,0x01a4), CASELACE(0x01a0,0x01a4),
...@@ -69,12 +70,19 @@ static const struct { ...@@ -69,12 +70,19 @@ static const struct {
CASEMAP(0x2c00,0x2c2e,0x2c30), CASEMAP(0x2c00,0x2c2e,0x2c30),
CASELACE(0x2c67,0x2c6b), CASELACE(0x2c67,0x2c6b),
CASELACE(0x2c80,0x2ce2), CASELACE(0x2c80,0x2ce2),
CASELACE(0x2ceb,0x2ced),
CASELACE(0xa640,0xa66c),
CASELACE(0xa680,0xa696),
CASELACE(0xa722,0xa72e), CASELACE(0xa722,0xa72e),
CASELACE(0xa732,0xa76e), CASELACE(0xa732,0xa76e),
CASELACE(0xa779,0xa77b), CASELACE(0xa779,0xa77b),
CASELACE(0xa77e,0xa786), CASELACE(0xa77e,0xa786),
CASELACE(0xa790,0xa792),
CASELACE(0xa7a0,0xa7a8),
CASEMAP(0xff21,0xff3a,0xff41), CASEMAP(0xff21,0xff3a,0xff41),
{ 0,0,0 } { 0,0,0 }
}; };
...@@ -144,6 +152,8 @@ static const unsigned short pairs[][2] = { ...@@ -144,6 +152,8 @@ static const unsigned short pairs[][2] = {
{ 0x03f7, 0x03f8 }, { 0x03f7, 0x03f8 },
{ 0x03fa, 0x03fb }, { 0x03fa, 0x03fb },
{ 0x1e60, 0x1e9b }, { 0x1e60, 0x1e9b },
{ 0xdf, 0xdf },
{ 0x1e9e, 0xdf },
{ 0x1f59, 0x1f51 }, { 0x1f59, 0x1f51 },
{ 0x1f5b, 0x1f53 }, { 0x1f5b, 0x1f53 },
...@@ -181,10 +191,20 @@ static const unsigned short pairs[][2] = { ...@@ -181,10 +191,20 @@ static const unsigned short pairs[][2] = {
{ 0x2c6d, 0x251 }, { 0x2c6d, 0x251 },
{ 0x2c6e, 0x271 }, { 0x2c6e, 0x271 },
{ 0x2c6f, 0x250 }, { 0x2c6f, 0x250 },
{ 0x2c70, 0x252 },
{ 0x2c72, 0x2c73 }, { 0x2c72, 0x2c73 },
{ 0x2c75, 0x2c76 }, { 0x2c75, 0x2c76 },
{ 0x2c7e, 0x23f },
{ 0x2c7f, 0x240 },
{ 0x2cf2, 0x2cf3 },
{ 0xa77d, 0x1d79 }, { 0xa77d, 0x1d79 },
{ 0xa78b, 0xa78c },
{ 0xa78d, 0x265 },
{ 0xa7aa, 0x266 },
{ 0x10c7, 0x2d27 },
{ 0x10cd, 0x2d2d },
/* bogus greek 'symbol' letters */ /* bogus greek 'symbol' letters */
{ 0x376, 0x377 }, { 0x376, 0x377 },
...@@ -207,17 +227,19 @@ static wchar_t __towcase(wchar_t wc, int lower) ...@@ -207,17 +227,19 @@ static wchar_t __towcase(wchar_t wc, int lower)
int i; int i;
int lmul = 2*lower-1; int lmul = 2*lower-1;
int lmask = lower-1; int lmask = lower-1;
if ((unsigned)wc - 0x10400 < 0x50)
return wc + lmul*0x28;
/* no letters with case in these large ranges */ /* no letters with case in these large ranges */
if (!iswalpha(wc) if (!iswalpha(wc)
|| (unsigned)wc - 0x0600 <= 0x0fff-0x0600 || (unsigned)wc - 0x0600 <= 0x0fff-0x0600
|| (unsigned)wc - 0x2e00 <= 0xa6ff-0x2e00 || (unsigned)wc - 0x2e00 <= 0xa63f-0x2e00
|| (unsigned)wc - 0xa800 <= 0xfeff-0xa800) || (unsigned)wc - 0xa800 <= 0xfeff-0xa800)
return wc; return wc;
/* special case because the diff between upper/lower is too big */ /* special case because the diff between upper/lower is too big */
if ((unsigned)wc - 0x10a0 < 0x26 || (unsigned)wc - 0x2d00 < 0x26) if (lower && (unsigned)wc - 0x10a0 < 0x2e)
return wc + lmul*(0x2d00-0x10a0); if (wc>0x10c5 && wc != 0x10c7 && wc != 0x10cd) return wc;
else return wc + 0x2d00 - 0x10a0;
if (!lower && (unsigned)wc - 0x2d00 < 0x26)
if (wc>0x2d25 && wc != 0x2d27 && wc != 0x2d2d) return wc;
else return wc + 0x10a0 - 0x2d00;
for (i=0; casemaps[i].len; i++) { for (i=0; casemaps[i].len; i++) {
int base = casemaps[i].upper + (lmask & casemaps[i].lower); int base = casemaps[i].upper + (lmask & casemaps[i].lower);
if ((unsigned)wc-base < casemaps[i].len) { if ((unsigned)wc-base < casemaps[i].len) {
...@@ -230,8 +252,8 @@ static wchar_t __towcase(wchar_t wc, int lower) ...@@ -230,8 +252,8 @@ static wchar_t __towcase(wchar_t wc, int lower)
if (pairs[i][1-lower] == wc) if (pairs[i][1-lower] == wc)
return pairs[i][lower]; return pairs[i][lower];
} }
if ((unsigned)wc - 0x10428 + (lower<<5) + (lower<<3) < 0x28) if ((unsigned)wc - (0x10428 - 0x28*lower) < 0x28)
return wc - 0x28 + (lower<<10) + (lower<<6); return wc - 0x28 + 0x50*lower;
return wc; return wc;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册