提交 aed29e12 编写于 作者: P peytoia

6959267: Support Unicode 6.0.0

Reviewed-by: okutsu
上级 f90d6618
/* /*
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -192,7 +192,11 @@ class CharacterData00 extends CharacterData { ...@@ -192,7 +192,11 @@ class CharacterData00 extends CharacterData {
case 0x2C6D : mapChar = 0x0251; break; case 0x2C6D : mapChar = 0x0251; break;
case 0x2C6E : mapChar = 0x0271; break; case 0x2C6E : mapChar = 0x0271; break;
case 0x2C6F : mapChar = 0x0250; break; case 0x2C6F : mapChar = 0x0250; break;
case 0x2C70 : mapChar = 0x0252; break;
case 0x2C7E : mapChar = 0x023F; break;
case 0x2C7F : mapChar = 0x0240; break;
case 0xA77D : mapChar = 0x1D79; break; case 0xA77D : mapChar = 0x1D79; break;
case 0xA78D : mapChar = 0x0265; break;
// default mapChar is already set, so no // default mapChar is already set, so no
// need to redo it here. // need to redo it here.
// default : mapChar = ch; // default : mapChar = ch;
...@@ -246,8 +250,12 @@ class CharacterData00 extends CharacterData { ...@@ -246,8 +250,12 @@ class CharacterData00 extends CharacterData {
case 0x1FC3 : mapChar = 0x1FCC; break; case 0x1FC3 : mapChar = 0x1FCC; break;
case 0x1FF3 : mapChar = 0x1FFC; break; case 0x1FF3 : mapChar = 0x1FFC; break;
case 0x023F : mapChar = 0x2C7E; break;
case 0x0240 : mapChar = 0x2C7F; break;
case 0x0250 : mapChar = 0x2C6F; break; case 0x0250 : mapChar = 0x2C6F; break;
case 0x0251 : mapChar = 0x2C6D; break; case 0x0251 : mapChar = 0x2C6D; break;
case 0x0252 : mapChar = 0x2C70; break;
case 0x0265 : mapChar = 0xA78D; break;
case 0x026B : mapChar = 0x2C62; break; case 0x026B : mapChar = 0x2C62; break;
case 0x0271 : mapChar = 0x2C6E; break; case 0x0271 : mapChar = 0x2C6E; break;
case 0x027D : mapChar = 0x2C64; break; case 0x027D : mapChar = 0x2C64; break;
...@@ -487,8 +495,12 @@ class CharacterData00 extends CharacterData { ...@@ -487,8 +495,12 @@ class CharacterData00 extends CharacterData {
case 0x017F : mapChar = 0x0053; break; case 0x017F : mapChar = 0x0053; break;
case 0x1FBE : mapChar = 0x0399; break; case 0x1FBE : mapChar = 0x0399; break;
case 0x023F : mapChar = 0x2C7E; break;
case 0x0240 : mapChar = 0x2C7F; break;
case 0x0250 : mapChar = 0x2C6F; break; case 0x0250 : mapChar = 0x2C6F; break;
case 0x0251 : mapChar = 0x2C6D; break; case 0x0251 : mapChar = 0x2C6D; break;
case 0x0252 : mapChar = 0x2C70; break;
case 0x0265 : mapChar = 0xA78D; break;
case 0x026B : mapChar = 0x2C62; break; case 0x026B : mapChar = 0x2C62; break;
case 0x0271 : mapChar = 0x2C6E; break; case 0x0271 : mapChar = 0x2C6E; break;
case 0x027D : mapChar = 0x2C64; break; case 0x027D : mapChar = 0x2C64; break;
......
/* /*
* Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -251,9 +251,40 @@ class CharacterData01 extends CharacterData { ...@@ -251,9 +251,40 @@ class CharacterData01 extends CharacterData {
case 0x010341: retval = 90; break; // GOTHIC LETTER NINETY case 0x010341: retval = 90; break; // GOTHIC LETTER NINETY
case 0x01034A: retval = 900; break; // GOTHIC LETTER NINE HUNDRED case 0x01034A: retval = 900; break; // GOTHIC LETTER NINE HUNDRED
case 0x0103D5: retval = 100; break; // OLD PERSIAN NUMBER HUNDRED case 0x0103D5: retval = 100; break; // OLD PERSIAN NUMBER HUNDRED
case 0x01085D: retval = 100; break; // IMPERIAL ARAMAIC NUMBER ONE HUNDRED
case 0x01085E: retval = 1000; break; // IMPERIAL ARAMAIC NUMBER ONE THOUSAND
case 0x01085F: retval = 10000; break; // IMPERIAL ARAMAIC NUMBER TEN THOUSAND
case 0x010919: retval = 100; break; // PHOENICIAN NUMBER ONE HUNDRED case 0x010919: retval = 100; break; // PHOENICIAN NUMBER ONE HUNDRED
case 0x010A46: retval = 100; break; // KHAROSHTHI NUMBER ONE HUNDRED case 0x010A46: retval = 100; break; // KHAROSHTHI NUMBER ONE HUNDRED
case 0x010A47: retval = 1000; break; // KHAROSHTHI NUMBER ONE THOUSAND case 0x010A47: retval = 1000; break; // KHAROSHTHI NUMBER ONE THOUSAND
case 0x010A7E: retval = 50; break; // OLD SOUTH ARABIAN NUMBER FIFTY
case 0x010B5E: retval = 100; break; // INSCRIPTIONAL PARTHIAN NUMBER ONE HUNDRED
case 0x010B5F: retval = 1000; break; // INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
case 0x010B7E: retval = 100; break; // INSCRIPTIONAL PAHLAVI NUMBER ONE HUNDRED
case 0x010B7F: retval = 1000; break; // INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
case 0x010E6C: retval = 40; break; // RUMI NUMBER FORTY
case 0x010E6D: retval = 50; break; // RUMI NUMBER FIFTY
case 0x010E6E: retval = 60; break; // RUMI NUMBER SIXTY
case 0x010E6F: retval = 70; break; // RUMI NUMBER SEVENTY
case 0x010E70: retval = 80; break; // RUMI NUMBER EIGHTY
case 0x010E71: retval = 90; break; // RUMI NUMBER NINETY
case 0x010E72: retval = 100; break; // RUMI NUMBER ONE HUNDRED
case 0x010E73: retval = 200; break; // RUMI NUMBER TWO HUNDRED
case 0x010E74: retval = 300; break; // RUMI NUMBER THREE HUNDRED
case 0x010E75: retval = 400; break; // RUMI NUMBER FOUR HUNDRED
case 0x010E76: retval = 500; break; // RUMI NUMBER FIVE HUNDRED
case 0x010E77: retval = 600; break; // RUMI NUMBER SIX HUNDRED
case 0x010E78: retval = 700; break; // RUMI NUMBER SEVEN HUNDRED
case 0x010E79: retval = 800; break; // RUMI NUMBER EIGHT HUNDRED
case 0x010E7A: retval = 900; break; // RUMI NUMBER NINE HUNDRED
case 0x01105E: retval = 40; break; // BRAHMI NUMBER FORTY
case 0x01105F: retval = 50; break; // BRAHMI NUMBER FIFTY
case 0x011060: retval = 60; break; // BRAHMI NUMBER SIXTY
case 0x011061: retval = 70; break; // BRAHMI NUMBER SEVENTY
case 0x011062: retval = 80; break; // BRAHMI NUMBER EIGHTY
case 0x011063: retval = 90; break; // BRAHMI NUMBER NINETY
case 0x011064: retval = 100; break; // BRAHMI NUMBER ONE HUNDRED
case 0x011065: retval = 1000; break; // BRAHMI NUMBER ONE THOUSAND
case 0x01D36C: retval = 40; break; // COUNTING ROD TENS DIGIT FOUR case 0x01D36C: retval = 40; break; // COUNTING ROD TENS DIGIT FOUR
case 0x01D36D: retval = 50; break; // COUNTING ROD TENS DIGIT FIVE case 0x01D36D: retval = 50; break; // COUNTING ROD TENS DIGIT FIVE
case 0x01D36E: retval = 60; break; // COUNTING ROD TENS DIGIT SIX case 0x01D36E: retval = 60; break; // COUNTING ROD TENS DIGIT SIX
......
此差异已折叠。
# SpecialCasing-5.1.0.txt # SpecialCasing-6.0.0.txt
# Date: 2008-03-03, 21:58:10 GMT [MD] # Date: 2010-05-18, 00:49:39 GMT [MD]
# #
# Unicode Character Database # Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc. # Copyright (c) 1991-2010 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html # For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html # For documentation, see http://www.unicode.org/reports/tr44/
# #
# Special Casing Properties # Special Casing Properties
# #
...@@ -106,11 +106,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH ...@@ -106,11 +106,11 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
# IMPORTANT-when capitalizing iota-subscript (0345) # IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
# It MUST be in normalized form--moved to the end of any sequence of combining marks. # the result will be incorrect unless the iota-subscript is moved to the end
# This is because logically it represents a following base character! # of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
# E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript> # This process can be achieved by first transforming the text to NFC before casing.
# It should never be the first character in a word, so in titlecasing it can be left as is. # E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA>
# The following cases are already in the UnicodeData file, so are only commented here. # The following cases are already in the UnicodeData file, so are only commented here.
......
/* /*
* Copyright (c) 2000, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -262,7 +262,23 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -262,7 +262,23 @@ public final class NumericShaper implements java.io.Serializable {
/** /**
* The Cham range with the Cham digits. * The Cham range with the Cham digits.
*/ */
CHAM ('\uaa50', '\uaa00', '\uaa60'); CHAM ('\uaa50', '\uaa00', '\uaa60'),
/**
* The Tai Tham Hora range with the Tai Tham Hora digits.
*/
TAI_THAM_HORA ('\u1a80', '\u1a20', '\u1ab0'),
/**
* The Tai Tham Tham range with the Tai Tham Tham digits.
*/
TAI_THAM_THAM ('\u1a90', '\u1a20', '\u1ab0'),
/**
* The Javanese range with the Javanese digits.
*/
JAVANESE ('\ua9d0', '\ua980', '\ua9e0'),
/**
* The Meetei Mayek range with the Meetei Mayek digits.
*/
MEETEI_MAYEK ('\uabf0', '\uabc0', '\uac00');
private static int toRangeIndex(Range script) { private static int toRangeIndex(Range script) {
int index = script.ordinal(); int index = script.ordinal();
...@@ -592,10 +608,16 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -592,10 +608,16 @@ public final class NumericShaper implements java.io.Serializable {
0x07a6, 0x07b1, 0x07a6, 0x07b1,
0x07eb, 0x07f4, 0x07eb, 0x07f4,
0x07f6, 0x07fa, 0x07f6, 0x07fa,
0x0901, 0x0903, 0x0816, 0x081a,
0x081b, 0x0824,
0x0825, 0x0828,
0x0829, 0x0830,
0x0859, 0x085e,
0x0900, 0x0903,
0x093a, 0x093b,
0x093c, 0x093d, 0x093c, 0x093d,
0x0941, 0x0949, 0x0941, 0x0949,
0x094d, 0x0950, 0x094d, 0x094e,
0x0951, 0x0958, 0x0951, 0x0958,
0x0962, 0x0964, 0x0962, 0x0964,
0x0981, 0x0982, 0x0981, 0x0982,
...@@ -604,7 +626,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -604,7 +626,7 @@ public final class NumericShaper implements java.io.Serializable {
0x09cd, 0x09ce, 0x09cd, 0x09ce,
0x09e2, 0x09e6, 0x09e2, 0x09e6,
0x09f2, 0x09f4, 0x09f2, 0x09f4,
0x0a01, 0x0a03, 0x09fb, 0x0a03,
0x0a3c, 0x0a3e, 0x0a3c, 0x0a3e,
0x0a41, 0x0a59, 0x0a41, 0x0a59,
0x0a70, 0x0a72, 0x0a70, 0x0a72,
...@@ -630,9 +652,8 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -630,9 +652,8 @@ public final class NumericShaper implements java.io.Serializable {
0x0cbc, 0x0cbd, 0x0cbc, 0x0cbd,
0x0ccc, 0x0cd5, 0x0ccc, 0x0cd5,
0x0ce2, 0x0ce6, 0x0ce2, 0x0ce6,
0x0cf1, 0x0d02,
0x0d41, 0x0d46, 0x0d41, 0x0d46,
0x0d4d, 0x0d57, 0x0d4d, 0x0d4e,
0x0d62, 0x0d66, 0x0d62, 0x0d66,
0x0dca, 0x0dcf, 0x0dca, 0x0dcf,
0x0dd2, 0x0dd8, 0x0dd2, 0x0dd8,
...@@ -649,7 +670,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -649,7 +670,7 @@ public final class NumericShaper implements java.io.Serializable {
0x0f71, 0x0f7f, 0x0f71, 0x0f7f,
0x0f80, 0x0f85, 0x0f80, 0x0f85,
0x0f86, 0x0f88, 0x0f86, 0x0f88,
0x0f90, 0x0fbe, 0x0f8d, 0x0fbe,
0x0fc6, 0x0fc7, 0x0fc6, 0x0fc7,
0x102d, 0x1031, 0x102d, 0x1031,
0x1032, 0x1038, 0x1032, 0x1038,
...@@ -661,8 +682,10 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -661,8 +682,10 @@ public final class NumericShaper implements java.io.Serializable {
0x1082, 0x1083, 0x1082, 0x1083,
0x1085, 0x1087, 0x1085, 0x1087,
0x108d, 0x108e, 0x108d, 0x108e,
0x135f, 0x1360, 0x109d, 0x109e,
0x135d, 0x1360,
0x1390, 0x13a0, 0x1390, 0x13a0,
0x1400, 0x1401,
0x1680, 0x1681, 0x1680, 0x1681,
0x169b, 0x16a0, 0x169b, 0x16a0,
0x1712, 0x1720, 0x1712, 0x1720,
...@@ -682,6 +705,11 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -682,6 +705,11 @@ public final class NumericShaper implements java.io.Serializable {
0x1939, 0x1946, 0x1939, 0x1946,
0x19de, 0x1a00, 0x19de, 0x1a00,
0x1a17, 0x1a19, 0x1a17, 0x1a19,
0x1a56, 0x1a57,
0x1a58, 0x1a61,
0x1a62, 0x1a63,
0x1a65, 0x1a6d,
0x1a73, 0x1a80,
0x1b00, 0x1b04, 0x1b00, 0x1b04,
0x1b34, 0x1b35, 0x1b34, 0x1b35,
0x1b36, 0x1b3b, 0x1b36, 0x1b3b,
...@@ -691,8 +719,16 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -691,8 +719,16 @@ public final class NumericShaper implements java.io.Serializable {
0x1b80, 0x1b82, 0x1b80, 0x1b82,
0x1ba2, 0x1ba6, 0x1ba2, 0x1ba6,
0x1ba8, 0x1baa, 0x1ba8, 0x1baa,
0x1be6, 0x1be7,
0x1be8, 0x1bea,
0x1bed, 0x1bee,
0x1bef, 0x1bf2,
0x1c2c, 0x1c34, 0x1c2c, 0x1c34,
0x1c36, 0x1c3b, 0x1c36, 0x1c3b,
0x1cd0, 0x1cd3,
0x1cd4, 0x1ce1,
0x1ce2, 0x1ce9,
0x1ced, 0x1cee,
0x1dc0, 0x1e00, 0x1dc0, 0x1e00,
0x1fbd, 0x1fbe, 0x1fbd, 0x1fbe,
0x1fbf, 0x1fc2, 0x1fbf, 0x1fc2,
...@@ -716,14 +752,16 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -716,14 +752,16 @@ public final class NumericShaper implements java.io.Serializable {
0x213a, 0x213c, 0x213a, 0x213c,
0x2140, 0x2145, 0x2140, 0x2145,
0x214a, 0x214e, 0x214a, 0x214e,
0x2153, 0x2160, 0x2150, 0x2160,
0x2190, 0x2336, 0x2189, 0x2336,
0x237b, 0x2395, 0x237b, 0x2395,
0x2396, 0x249c, 0x2396, 0x249c,
0x24ea, 0x26ac, 0x24ea, 0x26ac,
0x26ad, 0x2800, 0x26ad, 0x2800,
0x2900, 0x2c00, 0x2900, 0x2c00,
0x2ce5, 0x2d00, 0x2ce5, 0x2ceb,
0x2cef, 0x2d00,
0x2d7f, 0x2d80,
0x2de0, 0x3005, 0x2de0, 0x3005,
0x3008, 0x3021, 0x3008, 0x3021,
0x302a, 0x3031, 0x302a, 0x3031,
...@@ -742,25 +780,40 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -742,25 +780,40 @@ public final class NumericShaper implements java.io.Serializable {
0x33de, 0x33e0, 0x33de, 0x33e0,
0x33ff, 0x3400, 0x33ff, 0x3400,
0x4dc0, 0x4e00, 0x4dc0, 0x4e00,
0xa490, 0xa500, 0xa490, 0xa4d0,
0xa60d, 0xa610, 0xa60d, 0xa610,
0xa66f, 0xa680, 0xa66f, 0xa680,
0xa6f0, 0xa6f2,
0xa700, 0xa722, 0xa700, 0xa722,
0xa788, 0xa789, 0xa788, 0xa789,
0xa802, 0xa803, 0xa802, 0xa803,
0xa806, 0xa807, 0xa806, 0xa807,
0xa80b, 0xa80c, 0xa80b, 0xa80c,
0xa825, 0xa827, 0xa825, 0xa827,
0xa828, 0xa840, 0xa828, 0xa830,
0xa838, 0xa840,
0xa874, 0xa880, 0xa874, 0xa880,
0xa8c4, 0xa8ce, 0xa8c4, 0xa8ce,
0xa8e0, 0xa8f2,
0xa926, 0xa92e, 0xa926, 0xa92e,
0xa947, 0xa952, 0xa947, 0xa952,
0xa980, 0xa983,
0xa9b3, 0xa9b4,
0xa9b6, 0xa9ba,
0xa9bc, 0xa9bd,
0xaa29, 0xaa2f, 0xaa29, 0xaa2f,
0xaa31, 0xaa33, 0xaa31, 0xaa33,
0xaa35, 0xaa40, 0xaa35, 0xaa40,
0xaa43, 0xaa44, 0xaa43, 0xaa44,
0xaa4c, 0xaa4d, 0xaa4c, 0xaa4d,
0xaab0, 0xaab1,
0xaab2, 0xaab5,
0xaab7, 0xaab9,
0xaabe, 0xaac0,
0xaac1, 0xaac2,
0xabe5, 0xabe6,
0xabe8, 0xabe9,
0xabed, 0xabf0,
0xfb1e, 0xfb1f, 0xfb1e, 0xfb1f,
0xfb29, 0xfb2a, 0xfb29, 0xfb2a,
0xfd3e, 0xfd50, 0xfd3e, 0xfd50,
...@@ -775,12 +828,28 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -775,12 +828,28 @@ public final class NumericShaper implements java.io.Serializable {
0x1091f, 0x10920, 0x1091f, 0x10920,
0x10a01, 0x10a10, 0x10a01, 0x10a10,
0x10a38, 0x10a40, 0x10a38, 0x10a40,
0x10b39, 0x10b40,
0x10e60, 0x11000,
0x11001, 0x11002,
0x11038, 0x11047,
0x11052, 0x11066,
0x11080, 0x11082,
0x110b3, 0x110b7,
0x110b9, 0x110bb,
0x1d167, 0x1d16a, 0x1d167, 0x1d16a,
0x1d173, 0x1d183, 0x1d173, 0x1d183,
0x1d185, 0x1d18c, 0x1d185, 0x1d18c,
0x1d1aa, 0x1d1ae, 0x1d1aa, 0x1d1ae,
0x1d200, 0x1d360, 0x1d200, 0x1d360,
0x1d7ce, 0x20000, 0x1d6db, 0x1d6dc,
0x1d715, 0x1d716,
0x1d74f, 0x1d750,
0x1d789, 0x1d78a,
0x1d7c3, 0x1d7c4,
0x1d7ce, 0x1f110,
0x1f300, 0x1f48c,
0x1f48d, 0x1f524,
0x1f525, 0x20000,
0xe0001, 0xf0000, 0xe0001, 0xf0000,
0x10fffe, 0x10ffff // sentinel 0x10fffe, 0x10ffff // sentinel
}; };
...@@ -947,6 +1016,14 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -947,6 +1016,14 @@ public final class NumericShaper implements java.io.Serializable {
&& rangeSet.contains(Range.ARABIC)) { && rangeSet.contains(Range.ARABIC)) {
rangeSet.remove(Range.ARABIC); rangeSet.remove(Range.ARABIC);
} }
// As well as the above case, give precedance to TAI_THAM_THAM if both
// TAI_THAM_HORA and TAI_THAM_THAM are specified.
if (rangeSet.contains(Range.TAI_THAM_THAM)
&& rangeSet.contains(Range.TAI_THAM_HORA)) {
rangeSet.remove(Range.TAI_THAM_HORA);
}
rangeArray = rangeSet.toArray(new Range[rangeSet.size()]); rangeArray = rangeSet.toArray(new Range[rangeSet.size()]);
if (rangeArray.length > BSEARCH_THRESHOLD) { if (rangeArray.length > BSEARCH_THRESHOLD) {
// sort rangeArray for binary search // sort rangeArray for binary search
......
/* /*
* Copyright (c) 2005, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -1472,19 +1472,9 @@ public final class NormalizerImpl { ...@@ -1472,19 +1472,9 @@ public final class NormalizerImpl {
} }
--remove; --remove;
} }
} else if(value2!=0) { } else if(value2!=0) { // for U+1109A, U+1109C, and U+110AB
/* the composition is longer than the starter,
* move the intermediate characters back one */
starterIsSupplementary=true; starterIsSupplementary=true;
/* temporarily increment for the loop boundary */ args.source[starter+1]=(char)value2;
++starter;
q=remove;
r=++remove;
while(starter<q) {
args.source[--r]=args.source[--q];
}
args.source[starter]=(char)value2;
--starter; /* undo the temporary increment */
/* } else { both are on the BMP, nothing more to do */ /* } else { both are on the BMP, nothing more to do */
} }
......
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
/* /*
* @test * @test
* @bug 6842557 6943963 * @bug 6842557 6943963 6959267
* @summary confirm that shaping works as expected. (Mainly for new characters which were added in Unicode 5) * @summary confirm that shaping works as expected. (Mainly for new characters which were added in Unicode 5 and 6)
* used where appropriate. * used where appropriate.
*/ */
...@@ -39,6 +39,7 @@ public class ShapingTest { ...@@ -39,6 +39,7 @@ public class ShapingTest {
public static void main(String[] args) { public static void main(String[] args) {
test6842557(); test6842557();
test6943963(); test6943963();
test6903266();
if (err) { if (err) {
throw new RuntimeException("shape() returned unexpected value."); throw new RuntimeException("shape() returned unexpected value.");
...@@ -109,6 +110,34 @@ public class ShapingTest { ...@@ -109,6 +110,34 @@ public class ShapingTest {
checkResult("Range.ARABIC, Range.EASTERN_ARABIC", ns, given, expected_EASTERN_ARABIC); checkResult("Range.ARABIC, Range.EASTERN_ARABIC", ns, given, expected_EASTERN_ARABIC);
} }
private static void test6903266() {
NumericShaper ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_HORA));
String given = "\u1a20 012";
String expected = "\u1a20 \u1a80\u1a81\u1a82";
checkResult("Range.TAI_THAM_HORA", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_HORA,
Range.TAI_THAM_THAM));
given = "\u1a20 012";
expected = "\u1a20 \u1a90\u1a91\u1a92"; // Tham digits are prioritized.
checkResult("Range.TAI_THAM_HORA, Range.TAI_THAM_THAM", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.JAVANESE));
given = "\ua984 012";
expected = "\ua984 \ua9d0\ua9d1\ua9d2";
checkResult("Range.JAVANESE", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.TAI_THAM_THAM));
given = "\u1a20 012";
expected = "\u1a20 \u1a90\u1a91\u1a92";
checkResult("Range.TAI_THAM_THAM", ns, given, expected);
ns = getContextualShaper(EnumSet.of(Range.MEETEI_MAYEK));
given = "\uabc0 012";
expected = "\uabc0 \uabf0\uabf1\uabf2";
checkResult("Range.MEETEI_MAYEK", ns, given, expected);
}
private static void checkResult(String ranges, NumericShaper ns, private static void checkResult(String ranges, NumericShaper ns,
String given, String expected) { String given, String expected) {
char[] text = given.toCharArray(); char[] text = given.toCharArray();
......
/** /**
* @test * @test
* @bug 6945564 * @bug 6945564 6959267
* @summary Check that the j.l.Character.UnicodeScript * @summary Check that the j.l.Character.UnicodeScript
* @ignore don't run until #6903266 is integrated
*/ */
import java.io.*; import java.io.*;
...@@ -15,11 +14,15 @@ public class CheckScript { ...@@ -15,11 +14,15 @@ public class CheckScript {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
if (args.length != 1) { BufferedReader sbfr = null;
System.out.println("java CharacterScript script.txt"); if (args.length == 0) {
System.exit(1); sbfr = new BufferedReader(new FileReader(new File(System.getProperty("test.src", "."), "Scripts.txt")));
} else if (args.length == 1) {
sbfr = new BufferedReader(new FileReader(args[0]));
} else {
System.out.println("java CharacterScript Scripts.txt");
throw new RuntimeException("Datafile name should be specified.");
} }
BufferedReader sbfr = new BufferedReader(new FileReader(args[0]));
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher(""); Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s+;\\s+(\\w+)\\s+#.*").matcher("");
String line = null; String line = null;
HashMap<String,ArrayList<Integer>> scripts = new HashMap<>(); HashMap<String,ArrayList<Integer>> scripts = new HashMap<>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册