提交 bc3e9700 编写于 作者: S sherman

7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard

Summary: updated j.l.c.lsLowerCase/isUpperCase
Reviewed-by: okutsu
上级 175817fd
...@@ -356,6 +356,7 @@ JAVA_JAVA_java = \ ...@@ -356,6 +356,7 @@ JAVA_JAVA_java = \
java/util/regex/Matcher.java \ java/util/regex/Matcher.java \
java/util/regex/MatchResult.java \ java/util/regex/MatchResult.java \
java/util/regex/ASCII.java \ java/util/regex/ASCII.java \
java/util/regex/UnicodeProp.java \
java/util/regex/PatternSyntaxException.java \ java/util/regex/PatternSyntaxException.java \
java/util/prefs/Preferences.java \ java/util/prefs/Preferences.java \
java/util/prefs/AbstractPreferences.java \ java/util/prefs/AbstractPreferences.java \
......
...@@ -345,30 +345,35 @@ $(GENSRCDIR)/java/lang/CharacterDataLatin1.java \ ...@@ -345,30 +345,35 @@ $(GENSRCDIR)/java/lang/CharacterDataLatin1.java \
-template $(CHARACTERDATA)/CharacterDataLatin1.java.template \ -template $(CHARACTERDATA)/CharacterDataLatin1.java.template \
-spec $(UNICODEDATA)/UnicodeData.txt \ -spec $(UNICODEDATA)/UnicodeData.txt \
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \ -specialcasing $(UNICODEDATA)/SpecialCasing.txt \
-proplist $(UNICODEDATA)/PropList.txt \
-o $(GENSRCDIR)/java/lang/CharacterDataLatin1.java -string \ -o $(GENSRCDIR)/java/lang/CharacterDataLatin1.java -string \
-usecharforbyte -latin1 8 -usecharforbyte -latin1 8
$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 0 \ $(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 0 \
-template $(CHARACTERDATA)/CharacterData00.java.template \ -template $(CHARACTERDATA)/CharacterData00.java.template \
-spec $(UNICODEDATA)/UnicodeData.txt \ -spec $(UNICODEDATA)/UnicodeData.txt \
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \ -specialcasing $(UNICODEDATA)/SpecialCasing.txt \
-proplist $(UNICODEDATA)/PropList.txt \
-o $(GENSRCDIR)/java/lang/CharacterData00.java -string \ -o $(GENSRCDIR)/java/lang/CharacterData00.java -string \
-usecharforbyte 11 4 1 -usecharforbyte 11 4 1
$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 1 \ $(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 1 \
-template $(CHARACTERDATA)/CharacterData01.java.template \ -template $(CHARACTERDATA)/CharacterData01.java.template \
-spec $(UNICODEDATA)/UnicodeData.txt \ -spec $(UNICODEDATA)/UnicodeData.txt \
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \ -specialcasing $(UNICODEDATA)/SpecialCasing.txt \
-proplist $(UNICODEDATA)/PropList.txt \
-o $(GENSRCDIR)/java/lang/CharacterData01.java -string \ -o $(GENSRCDIR)/java/lang/CharacterData01.java -string \
-usecharforbyte 11 4 1 -usecharforbyte 11 4 1
$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 2 \ $(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 2 \
-template $(CHARACTERDATA)/CharacterData02.java.template \ -template $(CHARACTERDATA)/CharacterData02.java.template \
-spec $(UNICODEDATA)/UnicodeData.txt \ -spec $(UNICODEDATA)/UnicodeData.txt \
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \ -specialcasing $(UNICODEDATA)/SpecialCasing.txt \
-proplist $(UNICODEDATA)/PropList.txt \
-o $(GENSRCDIR)/java/lang/CharacterData02.java -string \ -o $(GENSRCDIR)/java/lang/CharacterData02.java -string \
-usecharforbyte 11 4 1 -usecharforbyte 11 4 1
$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 14 \ $(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 14 \
-template $(CHARACTERDATA)/CharacterData0E.java.template \ -template $(CHARACTERDATA)/CharacterData0E.java.template \
-spec $(UNICODEDATA)/UnicodeData.txt \ -spec $(UNICODEDATA)/UnicodeData.txt \
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \ -specialcasing $(UNICODEDATA)/SpecialCasing.txt \
-proplist $(UNICODEDATA)/PropList.txt \
-o $(GENSRCDIR)/java/lang/CharacterData0E.java -string \ -o $(GENSRCDIR)/java/lang/CharacterData0E.java -string \
-usecharforbyte 11 4 1 -usecharforbyte 11 4 1
......
...@@ -73,11 +73,37 @@ class CharacterData00 extends CharacterData { ...@@ -73,11 +73,37 @@ class CharacterData00 extends CharacterData {
return props; return props;
} }
int getPropertiesEx(int ch) {
char offset = (char)ch;
int props = $$LookupEx(offset);
return props;
}
int getType(int ch) { int getType(int ch) {
int props = getProperties(ch); int props = getProperties(ch);
return (props & $$maskType); return (props & $$maskType);
} }
boolean isOtherLowercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherLowercase) != 0;
}
boolean isOtherUppercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherUppercase) != 0;
}
boolean isOtherAlphabetic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherAlphabetic) != 0;
}
boolean isIdeographic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskIdeographic) != 0;
}
boolean isJavaIdentifierStart(int ch) { boolean isJavaIdentifierStart(int ch) {
int props = getProperties(ch); int props = getProperties(ch);
return ((props & $$maskIdentifierInfo) >= $$lowJavaStart); return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
......
...@@ -72,11 +72,37 @@ class CharacterData01 extends CharacterData { ...@@ -72,11 +72,37 @@ class CharacterData01 extends CharacterData {
return props; return props;
} }
int getPropertiesEx(int ch) {
char offset = (char)ch;
int props = $$LookupEx(offset);
return props;
}
int getType(int ch) { int getType(int ch) {
int props = getProperties(ch); int props = getProperties(ch);
return (props & $$maskType); return (props & $$maskType);
} }
boolean isOtherLowercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherLowercase) != 0;
}
boolean isOtherUppercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherUppercase) != 0;
}
boolean isOtherAlphabetic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherAlphabetic) != 0;
}
boolean isIdeographic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskIdeographic) != 0;
}
boolean isJavaIdentifierStart(int ch) { boolean isJavaIdentifierStart(int ch) {
int props = getProperties(ch); int props = getProperties(ch);
return ((props & $$maskIdentifierInfo) >= $$lowJavaStart); return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
......
...@@ -66,11 +66,37 @@ class CharacterData02 extends CharacterData { ...@@ -66,11 +66,37 @@ class CharacterData02 extends CharacterData {
*/ */
int getProperties(int ch) { int getProperties(int ch) {
char offset = (char)ch; char offset = (char)ch;
int props = $$Lookup(offset); int props = $$Lookup(offset);
return props; return props;
} }
int getPropertiesEx(int ch) {
char offset = (char)ch;
int props = $$LookupEx(offset);
return props;
}
boolean isOtherLowercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherLowercase) != 0;
}
boolean isOtherUppercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherUppercase) != 0;
}
boolean isOtherAlphabetic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherAlphabetic) != 0;
}
boolean isIdeographic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskIdeographic) != 0;
}
int getType(int ch) { int getType(int ch) {
int props = getProperties(ch); int props = getProperties(ch);
return (props & $$maskType); return (props & $$maskType);
......
...@@ -66,11 +66,37 @@ class CharacterData0E extends CharacterData { ...@@ -66,11 +66,37 @@ class CharacterData0E extends CharacterData {
*/ */
int getProperties(int ch) { int getProperties(int ch) {
char offset = (char)ch; char offset = (char)ch;
int props = $$Lookup(offset); int props = $$Lookup(offset);
return props; return props;
} }
int getPropertiesEx(int ch) {
char offset = (char)ch;
int props = $$LookupEx(offset);
return props;
}
boolean isOtherLowercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherLowercase) != 0;
}
boolean isOtherUppercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherUppercase) != 0;
}
boolean isOtherAlphabetic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherAlphabetic) != 0;
}
boolean isIdeographic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskIdeographic) != 0;
}
int getType(int ch) { int getType(int ch) {
int props = getProperties(ch); int props = getProperties(ch);
return (props & $$maskType); return (props & $$maskType);
......
...@@ -67,11 +67,37 @@ class CharacterDataLatin1 extends CharacterData { ...@@ -67,11 +67,37 @@ class CharacterDataLatin1 extends CharacterData {
*/ */
int getProperties(int ch) { int getProperties(int ch) {
char offset = (char)ch; char offset = (char)ch;
int props = $$Lookup(offset); int props = $$Lookup(offset);
return props; return props;
} }
int getPropertiesEx(int ch) {
char offset = (char)ch;
int props = $$LookupEx(offset);
return props;
}
boolean isOtherLowercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherLowercase) != 0;
}
boolean isOtherUppercase(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherUppercase) != 0;
}
boolean isOtherAlphabetic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskOtherAlphabetic) != 0;
}
boolean isIdeographic(int ch) {
int props = getPropertiesEx(ch);
return (props & $$maskIdeographic) != 0;
}
int getType(int ch) { int getType(int ch) {
int props = getProperties(ch); int props = getProperties(ch);
return (props & $$maskType); return (props & $$maskType);
......
此差异已折叠。
/* /*
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
...@@ -34,6 +33,7 @@ import java.io.PrintWriter; ...@@ -34,6 +33,7 @@ import java.io.PrintWriter;
import java.io.BufferedWriter; import java.io.BufferedWriter;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.File; import java.io.File;
import java.util.List;
import build.tools.generatecharacter.CharacterName; import build.tools.generatecharacter.CharacterName;
...@@ -68,18 +68,17 @@ public class GenerateCharacter { ...@@ -68,18 +68,17 @@ public class GenerateCharacter {
final static boolean DEBUG = false; final static boolean DEBUG = false;
final static int MAX_UNICODE_VALUE = 0xFFFF;
final static String commandMarker = "$$"; final static String commandMarker = "$$";
static String ROOT = ""; static String ROOT = "";
static String DefaultUnicodeSpecFileName = ROOT + "UnicodeData.txt"; static String DefaultUnicodeSpecFileName = ROOT + "UnicodeData.txt";
static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt"; static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
static String DefaultPropListFileName = ROOT + "PropList.txt";
static String DefaultJavaTemplateFileName = ROOT + "Character.java.template"; static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
static String DefaultJavaOutputFileName = ROOT + "Character.java"; static String DefaultJavaOutputFileName = ROOT + "Character.java";
static String DefaultCTemplateFileName = ROOT + "Character.c.template"; static String DefaultCTemplateFileName = ROOT + "Character.c.template";
static String DefaultCOutputFileName = ROOT + "Character.c"; static String DefaultCOutputFileName = ROOT + "Character.c";
static String CharacterDataClassName = "CharacterData"; static int plane = 0;
static int plane = 0;
/* The overall idea is that, in the generated Character class source code, /* The overall idea is that, in the generated Character class source code,
most character property data is stored in a special multi-level table whose most character property data is stored in a special multi-level table whose
...@@ -105,7 +104,11 @@ public class GenerateCharacter { ...@@ -105,7 +104,11 @@ public class GenerateCharacter {
entries are short rather than byte). entries are short rather than byte).
*/ */
/* The character properties are currently encoded into 32 bits in the following manner: /* The character properties are currently encoded into A (32 bits)and B (16 bits)
two parts.
A: the low 32 bits are defined in the following manner:
1 bit Mirrored property. 1 bit Mirrored property.
4 bits Bidirectional category (see below) (unused if -nobidi switch specified) 4 bits Bidirectional category (see below) (unused if -nobidi switch specified)
9 bits A signed offset used for converting case . 9 bits A signed offset used for converting case .
...@@ -148,6 +151,14 @@ public class GenerateCharacter { ...@@ -148,6 +151,14 @@ public class GenerateCharacter {
will produce the desired numeric value. will produce the desired numeric value.
5 bits The digit offset (see description of previous field) 5 bits The digit offset (see description of previous field)
5 bits Character type (see below) 5 bits Character type (see below)
B: the high 16 bits are defined as:
1 bit Other_Lowercase property
1 bit Other_Uppercase property
1 bit Other_Alphabetic property
1 bit Other_Math property
1 bit Ideographic property
1 bit Noncharacter codepoint property
*/ */
...@@ -173,9 +184,22 @@ public class GenerateCharacter { ...@@ -173,9 +184,22 @@ public class GenerateCharacter {
// case offset are 9 bits // case offset are 9 bits
maskCase = 0x01FF, maskCase = 0x01FF,
shiftBidi = 27, maskBidi = 0x78000000, shiftBidi = 27, maskBidi = 0x78000000,
shiftMirrored = 31, maskMirrored = 0x80000000, shiftMirrored = 31, //maskMirrored = 0x80000000,
shiftPlane = 16, maskPlane = 0xFF0000; shiftPlane = 16, maskPlane = 0xFF0000;
// maskMirrored needs to be long, if up 16-bit
private static final long maskMirrored = 0x80000000L;
// bit masks identify the 16-bit priperty field described above, in B
// table
private static final long
maskOtherLowercase = 0x100000000L,
maskOtherUppercase = 0x200000000L,
maskOtherAlphabetic = 0x400000000L,
maskOtherMath = 0x800000000L,
maskIdeographic = 0x1000000000L,
maskNoncharacterCP = 0x2000000000L;
// Can compare masked values with these to determine // Can compare masked values with these to determine
// numeric or lexical types. // numeric or lexical types.
public static int public static int
...@@ -258,7 +282,7 @@ public class GenerateCharacter { ...@@ -258,7 +282,7 @@ public class GenerateCharacter {
* The specification file is assumed to contain its data in sorted order by * The specification file is assumed to contain its data in sorted order by
* character code; as a result, the array passed as an argument to this method * character code; as a result, the array passed as an argument to this method
* has its components in the same sorted order, with one entry for each defined * has its components in the same sorted order, with one entry for each defined
* Unicode character or character range. (A range is indicated by two consecutive * Unicode character or character range. (A range is indicated by two consecutive
* entries, such that the name of the first entry begins with "<" and ends with * entries, such that the name of the first entry begins with "<" and ends with
* "First>" and the second entry begins with "<" and ends with "Last>".) This is * "First>" and the second entry begins with "<" and ends with "Last>".) This is
* therefore a sparse representation of the character property data. * therefore a sparse representation of the character property data.
...@@ -282,7 +306,8 @@ public class GenerateCharacter { ...@@ -282,7 +306,8 @@ public class GenerateCharacter {
* @see GenerateCharacter#buildOne * @see GenerateCharacter#buildOne
*/ */
static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps) { static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList)
{
long[] result; long[] result;
if (bLatin1 == true) { if (bLatin1 == true) {
result = new long[256]; result = new long[256];
...@@ -290,13 +315,13 @@ public class GenerateCharacter { ...@@ -290,13 +315,13 @@ public class GenerateCharacter {
result = new long[1<<16]; result = new long[1<<16];
} }
int k=0; int k=0;
int codePoint = plane<<16; int codePoint = plane<<16;
UnicodeSpec nonCharSpec = new UnicodeSpec(); UnicodeSpec nonCharSpec = new UnicodeSpec();
for (int j = 0; j < data.length && k < result.length; j++) { for (int j = 0; j < data.length && k < result.length; j++) {
if (data[j].codePoint == codePoint) { if (data[j].codePoint == codePoint) {
result[k] = buildOne(codePoint, data[j], specialMaps); result[k] = buildOne(codePoint, data[j], specialMaps);
++k; ++k;
++codePoint; ++codePoint;
} }
else if(data[j].codePoint > codePoint) { else if(data[j].codePoint > codePoint) {
if (data[j].name.endsWith("Last>")) { if (data[j].name.endsWith("Last>")) {
...@@ -304,7 +329,7 @@ public class GenerateCharacter { ...@@ -304,7 +329,7 @@ public class GenerateCharacter {
while (codePoint < data[j].codePoint && k < result.length) { while (codePoint < data[j].codePoint && k < result.length) {
result[k] = buildOne(codePoint, data[j], specialMaps); result[k] = buildOne(codePoint, data[j], specialMaps);
++k; ++k;
++codePoint; ++codePoint;
} }
} }
else { else {
...@@ -312,15 +337,14 @@ public class GenerateCharacter { ...@@ -312,15 +337,14 @@ public class GenerateCharacter {
while (codePoint < data[j].codePoint && k < result.length) { while (codePoint < data[j].codePoint && k < result.length) {
result[k] = buildOne(codePoint, nonCharSpec, specialMaps); result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
++k; ++k;
++codePoint; ++codePoint;
} }
} }
k = data[j].codePoint & 0xFFFF; k = data[j].codePoint & 0xFFFF;
codePoint = data[j].codePoint; codePoint = data[j].codePoint;
result[k] = buildOne(codePoint, data[j], specialMaps); result[k] = buildOne(codePoint, data[j], specialMaps);
++k; ++k;
++codePoint; ++codePoint;
} }
else { else {
System.out.println("An error has occured during spec mapping."); System.out.println("An error has occured during spec mapping.");
...@@ -333,8 +357,17 @@ public class GenerateCharacter { ...@@ -333,8 +357,17 @@ public class GenerateCharacter {
while (k < result.length) { while (k < result.length) {
result[k] = buildOne(codePoint, nonCharSpec, specialMaps); result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
++k; ++k;
++codePoint; ++codePoint;
} }
// now add all extra supported properties from PropList, to the
// upper 16-bit
addExProp(result, propList, "Other_Lowercase", maskOtherLowercase);
addExProp(result, propList, "Other_Uppercase", maskOtherUppercase);
addExProp(result, propList, "Other_Alphabetic", maskOtherAlphabetic);
addExProp(result, propList, "Ideographic", maskIdeographic);
//addExProp(result, propList, "Other_Math", maskOtherMath);
//addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
return result; return result;
} }
...@@ -381,15 +414,15 @@ public class GenerateCharacter { ...@@ -381,15 +414,15 @@ public class GenerateCharacter {
// record the general category // record the general category
resultA |= us.generalCategory; resultA |= us.generalCategory;
// record the numeric properties // record the numeric properties
NUMERIC: { NUMERIC: {
STRANGE: { STRANGE: {
int val = 0; int val = 0;
// c is A-Z // c is A-Z
if ((c >= 0x0041) && (c <= 0x005A)) { if ((c >= 0x0041) && (c <= 0x005A)) {
val = c - 0x0041; val = c - 0x0041;
resultA |= valueJavaSupradecimal; resultA |= valueJavaSupradecimal;
// c is a-z // c is a-z
} else if ((c >= 0x0061) && (c <= 0x007A)) { } else if ((c >= 0x0061) && (c <= 0x007A)) {
val = c - 0x0061; val = c - 0x0061;
resultA |= valueJavaSupradecimal; resultA |= valueJavaSupradecimal;
...@@ -428,7 +461,7 @@ public class GenerateCharacter { ...@@ -428,7 +461,7 @@ public class GenerateCharacter {
resultA |= valueStrangeNumeric; resultA |= valueStrangeNumeric;
} // end NUMERIC } // end NUMERIC
// record case mapping // record case mapping
int offset = 0; int offset = 0;
// might have a 1:M mapping // might have a 1:M mapping
int specialMap = SpecialCaseMap.find(c, specialCaseMaps); int specialMap = SpecialCaseMap.find(c, specialCaseMaps);
...@@ -458,12 +491,12 @@ public class GenerateCharacter { ...@@ -458,12 +491,12 @@ public class GenerateCharacter {
} }
} }
if ((us.hasTitleMap() && us.titleMap != us.upperMap) || if ((us.hasTitleMap() && us.titleMap != us.upperMap) ||
(bHasUpper && us.hasLowerMap())) { (bHasUpper && us.hasLowerMap())) {
resultA |= maskTitleCase; resultA |= maskTitleCase;
} }
if (bHasUpper && !us.hasLowerMap() && !us.hasTitleMap() && verbose) { if (bHasUpper && !us.hasLowerMap() && !us.hasTitleMap() && verbose) {
System.out.println("Warning: Character " + hex4(c) + " has upper but " + System.out.println("Warning: Character " + hex4(c) + " has upper but " +
"no title case; Java won't know this"); "no title case; Java won't know this");
} }
if (offset < minOffsetSeen) minOffsetSeen = offset; if (offset < minOffsetSeen) minOffsetSeen = offset;
if (offset > maxOffsetSeen) maxOffsetSeen = offset; if (offset > maxOffsetSeen) maxOffsetSeen = offset;
...@@ -475,8 +508,7 @@ public class GenerateCharacter { ...@@ -475,8 +508,7 @@ public class GenerateCharacter {
} }
resultA |= ((offset & maskCase) << shiftCaseOffset); resultA |= ((offset & maskCase) << shiftCaseOffset);
// record lexical info about this character
// record lexical info about this character
if (us.generalCategory == UnicodeSpec.LOWERCASE_LETTER if (us.generalCategory == UnicodeSpec.LOWERCASE_LETTER
|| us.generalCategory == UnicodeSpec.UPPERCASE_LETTER || us.generalCategory == UnicodeSpec.UPPERCASE_LETTER
|| us.generalCategory == UnicodeSpec.TITLECASE_LETTER || us.generalCategory == UnicodeSpec.TITLECASE_LETTER
...@@ -539,6 +571,16 @@ public class GenerateCharacter { ...@@ -539,6 +571,16 @@ public class GenerateCharacter {
return resultA; return resultA;
} }
static void addExProp(long[] map, PropList propList, String prop, long mask) {
List<Integer> cps = propList.codepoints(prop);
if (cps != null) {
for (Integer cp : cps) {
if (cp < map.length)
map[cp] |= mask;
}
}
}
/** /**
* This is the heart of the table compression strategy. The inputs are a map * This is the heart of the table compression strategy. The inputs are a map
* and a number of bits (size). The map is simply an array of long integer values; * and a number of bits (size). The map is simply an array of long integer values;
...@@ -645,8 +687,8 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -645,8 +687,8 @@ OUTER: for (int i = 0; i < n; i += m) {
*/ */
static void generateCharacterClass(String theTemplateFileName, static void generateCharacterClass(String theTemplateFileName,
String theOutputFileName) String theOutputFileName)
throws FileNotFoundException, IOException { throws FileNotFoundException, IOException {
BufferedReader in = new BufferedReader(new FileReader(theTemplateFileName)); BufferedReader in = new BufferedReader(new FileReader(theTemplateFileName));
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(theOutputFileName))); PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(theOutputFileName)));
out.println(commentStart + out.println(commentStart +
...@@ -719,6 +761,9 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -719,6 +761,9 @@ OUTER: for (int i = 0; i < n; i += m) {
if (x.length() >= 9 && x.substring(0, 7).equals("Lookup(") && if (x.length() >= 9 && x.substring(0, 7).equals("Lookup(") &&
x.substring(x.length()-1).equals(")") ) x.substring(x.length()-1).equals(")") )
return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32)); return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32));
if (x.length() >= 11 && x.substring(0, 9).equals("LookupEx(") &&
x.substring(x.length()-1).equals(")") )
return genAccess("B", x.substring(9, x.length()-1), 16);
if (x.equals("shiftType")) return Long.toString(shiftType); if (x.equals("shiftType")) return Long.toString(shiftType);
if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo); if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo);
if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo); if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo);
...@@ -731,6 +776,10 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -731,6 +776,10 @@ OUTER: for (int i = 0; i < n; i += m) {
if (x.equals("maskLowerCase")) return "0x" + hex8(maskLowerCase); if (x.equals("maskLowerCase")) return "0x" + hex8(maskLowerCase);
if (x.equals("maskUpperCase")) return "0x" + hex8(maskUpperCase); if (x.equals("maskUpperCase")) return "0x" + hex8(maskUpperCase);
if (x.equals("maskTitleCase")) return "0x" + hex8(maskTitleCase); if (x.equals("maskTitleCase")) return "0x" + hex8(maskTitleCase);
if (x.equals("maskOtherLowercase")) return "0x" + hex4(maskOtherLowercase >> 32);
if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable); if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart); if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart); if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
...@@ -899,7 +948,7 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -899,7 +948,7 @@ OUTER: for (int i = 0; i < n; i += m) {
// If we ever need more than 32 bits to represent the character properties, // If we ever need more than 32 bits to represent the character properties,
// then a table "B" may be needed as well. // then a table "B" may be needed as well.
// genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false); genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false);
totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2); totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2);
result.append(commentStart); result.append(commentStart);
...@@ -1080,9 +1129,9 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -1080,9 +1129,9 @@ OUTER: for (int i = 0; i < n; i += m) {
*/ */
static void genTable(StringBuffer result, String name, static void genTable(StringBuffer result, String name,
long[] table, int extract, int bits, int size, long[] table, int extract, int bits, int size,
boolean preshifted, int shift, boolean hexFormat, boolean preshifted, int shift, boolean hexFormat,
boolean properties, boolean hexComment) { boolean properties, boolean hexComment) {
String atype = bits == 1 ? (Csyntax ? "unsigned long" : "int") : String atype = bits == 1 ? (Csyntax ? "unsigned long" : "int") :
bits == 2 ? (Csyntax ? "unsigned long" : "int") : bits == 2 ? (Csyntax ? "unsigned long" : "int") :
...@@ -1137,7 +1186,12 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -1137,7 +1186,12 @@ OUTER: for (int i = 0; i < n; i += m) {
char ch = '\u0000'; char ch = '\u0000';
int charsPerEntry = -entriesPerChar; int charsPerEntry = -entriesPerChar;
for (int j=0; j<table.length; ++j) { for (int j=0; j<table.length; ++j) {
long entry = table[j] >> extract; //long entry = table[j] >> extract;
long entry;
if ("A".equals(name))
entry = (table[j] & 0xffffffffL) >> extract;
else
entry = (table[j] >> extract);
if (shiftEntries) entry <<= shift; if (shiftEntries) entry <<= shift;
if (entry >= (1L << bits)) { if (entry >= (1L << bits)) {
FAIL("Entry too big"); FAIL("Entry too big");
...@@ -1549,6 +1603,7 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -1549,6 +1603,7 @@ OUTER: for (int i = 0; i < n; i += m) {
static String OutputFileName = null; static String OutputFileName = null;
static String UnicodeSpecFileName = null; // liu static String UnicodeSpecFileName = null; // liu
static String SpecialCasingFileName = null; static String SpecialCasingFileName = null;
static String PropListFileName = null;
static boolean useCharForByte = false; static boolean useCharForByte = false;
static int[] sizes; static int[] sizes;
static int bins = 0; // liu; if > 0, then perform search static int bins = 0; // liu; if > 0, then perform search
...@@ -1668,20 +1723,28 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -1668,20 +1723,28 @@ OUTER: for (int i = 0; i < n; i += m) {
SpecialCasingFileName = args[++j]; SpecialCasingFileName = args[++j];
} }
} }
else if (args[j].equals("-plane")) { else if (args[j].equals("-proplist")) {
if (j == args.length -1) { if (j == args.length -1) {
FAIL("Plane number missing after -plane"); FAIL("File name missing after -proplist");
} }
else { else {
plane = Integer.parseInt(args[++j]); PropListFileName = args[++j];
} }
if (plane > 0) { }
bLatin1 = false; else if (args[j].equals("-plane")) {
} if (j == args.length -1) {
} FAIL("Plane number missing after -plane");
else if ("-usecharforbyte".equals(args[j])) { }
useCharForByte = true; else {
} plane = Integer.parseInt(args[++j]);
}
if (plane > 0) {
bLatin1 = false;
}
}
else if ("-usecharforbyte".equals(args[j])) {
useCharForByte = true;
}
else if (args[j].equals("-latin1")) { else if (args[j].equals("-latin1")) {
bLatin1 = true; bLatin1 = true;
plane = 0; plane = 0;
...@@ -1728,6 +1791,10 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -1728,6 +1791,10 @@ OUTER: for (int i = 0; i < n; i += m) {
SpecialCasingFileName = DefaultSpecialCasingFileName; SpecialCasingFileName = DefaultSpecialCasingFileName;
desc.append(" [-specialcasing " + SpecialCasingFileName + ']'); desc.append(" [-specialcasing " + SpecialCasingFileName + ']');
} }
if (PropListFileName == null) {
PropListFileName = DefaultPropListFileName;
desc.append(" [-proplist " + PropListFileName + ']');
}
if (TemplateFileName == null) { if (TemplateFileName == null) {
TemplateFileName = (Csyntax ? DefaultCTemplateFileName TemplateFileName = (Csyntax ? DefaultCTemplateFileName
: DefaultJavaTemplateFileName); : DefaultJavaTemplateFileName);
...@@ -1877,12 +1944,13 @@ OUTER: for (int i = 0; i < n; i += m) { ...@@ -1877,12 +1944,13 @@ OUTER: for (int i = 0; i < n; i += m) {
try { try {
UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane); UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane); specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
if (verbose) { if (verbose) {
System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
} }
long[] map = buildMap(data, specialCaseMaps); long[] map = buildMap(data, specialCaseMaps, propList);
if (verbose) { if (verbose) {
System.err.println("Completed building of initial map"); System.err.println("Completed building of initial map");
} }
......
/*
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package build.tools.generatecharacter;
import java.util.regex.*;
import java.util.*;
import java.io.*;
/**
* A PropList object contains the lists of code points that have
* the same Unicode property defined in PropList.txt
*
* @author Xueming Shen
*/
public class PropList {
public static PropList readSpecFile(File file, int plane)
throws IOException
{
return new PropList(file, plane);
}
public List<Integer> codepoints(String name) {
return propMap.get(name);
}
public Set<String> names() {
return propMap.keySet();
}
private Map<String, ArrayList<Integer>> propMap =
new LinkedHashMap<String, ArrayList<Integer>>();
private PropList(File file, int plane) throws IOException {
int i, j;
BufferedReader sbfr = new BufferedReader(new FileReader(file));
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
String line = null;
int lineNo = 0;
while ((line = sbfr.readLine()) != null) {
lineNo++;
if (line.length() <= 1 || line.charAt(0) == '#') {
continue;
}
m.reset(line);
if (m.matches()) {
int start = Integer.parseInt(m.group(1), 16);
if ((start >> 16) != plane)
continue;
int end = (m.group(2)==null)?start
:Integer.parseInt(m.group(2), 16);
String name = m.group(3);
start &= 0xffff;
end &= 0xffff;
ArrayList<Integer> list = propMap.get(name);
if (list == null) {
list = new ArrayList<Integer>();
propMap.put(name, list);
}
while (start <= end)
list.add(start++);
} else {
System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
}
}
sbfr.close();
//for (String name: propMap.keySet()) {
// System.out.printf("%s %d%n", name, propMap.get(name).size());
//}
}
public static void main(String[] args) throws IOException {
readSpecFile(new File(args[0]), Integer.decode(args[1]));
}
}
...@@ -59,14 +59,14 @@ import java.util.Locale; ...@@ -59,14 +59,14 @@ import java.util.Locale;
* <p>The {@code char} data type (and therefore the value that a * <p>The {@code char} data type (and therefore the value that a
* {@code Character} object encapsulates) are based on the * {@code Character} object encapsulates) are based on the
* original Unicode specification, which defined characters as * original Unicode specification, which defined characters as
* fixed-width 16-bit entities. The Unicode standard has since been * fixed-width 16-bit entities. The Unicode Standard has since been
* changed to allow for characters whose representation requires more * changed to allow for characters whose representation requires more
* than 16 bits. The range of legal <em>code point</em>s is now * than 16 bits. The range of legal <em>code point</em>s is now
* U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
* (Refer to the <a * (Refer to the <a
* href="http://www.unicode.org/reports/tr27/#notation"><i> * href="http://www.unicode.org/reports/tr27/#notation"><i>
* definition</i></a> of the U+<i>n</i> notation in the Unicode * definition</i></a> of the U+<i>n</i> notation in the Unicode
* standard.) * Standard.)
* *
* <p><a name="BMP">The set of characters from U+0000 to U+FFFF is * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
...@@ -5200,7 +5200,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5200,7 +5200,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
* <p> * <p>
* A character is lowercase if its general category type, provided * A character is lowercase if its general category type, provided
* by {@code Character.getType(ch)}, is * by {@code Character.getType(ch)}, is
* {@code LOWERCASE_LETTER}. * {@code LOWERCASE_LETTER}, or it has contributory property
* Other_Lowercase as defined by the Unicode Standard.
* <p> * <p>
* The following are examples of lowercase characters: * The following are examples of lowercase characters:
* <p><blockquote><pre> * <p><blockquote><pre>
...@@ -5235,7 +5236,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5235,7 +5236,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
* <p> * <p>
* A character is lowercase if its general category type, provided * A character is lowercase if its general category type, provided
* by {@link Character#getType getType(codePoint)}, is * by {@link Character#getType getType(codePoint)}, is
* {@code LOWERCASE_LETTER}. * {@code LOWERCASE_LETTER}, or it has contributory property
* Other_Lowercase as defined by the Unicode Standard.
* <p> * <p>
* The following are examples of lowercase characters: * The following are examples of lowercase characters:
* <p><blockquote><pre> * <p><blockquote><pre>
...@@ -5257,7 +5259,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5257,7 +5259,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
* @since 1.5 * @since 1.5
*/ */
public static boolean isLowerCase(int codePoint) { public static boolean isLowerCase(int codePoint) {
return getType(codePoint) == Character.LOWERCASE_LETTER; return getType(codePoint) == Character.LOWERCASE_LETTER ||
CharacterData.of(codePoint).isOtherLowercase(codePoint);
} }
/** /**
...@@ -5265,6 +5268,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5265,6 +5268,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
* <p> * <p>
* A character is uppercase if its general category type, provided by * A character is uppercase if its general category type, provided by
* {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
* <p> * <p>
* The following are examples of uppercase characters: * The following are examples of uppercase characters:
* <p><blockquote><pre> * <p><blockquote><pre>
...@@ -5298,7 +5302,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5298,7 +5302,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
* Determines if the specified character (Unicode code point) is an uppercase character. * Determines if the specified character (Unicode code point) is an uppercase character.
* <p> * <p>
* A character is uppercase if its general category type, provided by * A character is uppercase if its general category type, provided by
* {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}. * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
* or it has contributory property Other_Uppercase as defined by the Unicode Standard.
* <p> * <p>
* The following are examples of uppercase characters: * The following are examples of uppercase characters:
* <p><blockquote><pre> * <p><blockquote><pre>
...@@ -5320,7 +5325,8 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5320,7 +5325,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
* @since 1.5 * @since 1.5
*/ */
public static boolean isUpperCase(int codePoint) { public static boolean isUpperCase(int codePoint) {
return getType(codePoint) == Character.UPPERCASE_LETTER; return getType(codePoint) == Character.UPPERCASE_LETTER ||
CharacterData.of(codePoint).isOtherUppercase(codePoint);
} }
/** /**
...@@ -5724,6 +5730,52 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -5724,6 +5730,52 @@ class Character implements java.io.Serializable, Comparable<Character> {
return isJavaIdentifierPart(ch); return isJavaIdentifierPart(ch);
} }
/**
* Determines if the specified character (Unicode code point) is an alphabet.
* <p>
* A character is considered to be alphabetic if its general category type,
* provided by {@link Character#getType(int) getType(codePoint)}, is any of
* the following:
* <ul>
* <li> <code>UPPERCASE_LETTER</code>
* <li> <code>LOWERCASE_LETTER</code>
* <li> <code>TITLECASE_LETTER</code>
* <li> <code>MODIFIER_LETTER</code>
* <li> <code>OTHER_LETTER</code>
* <li> <code>LETTER_NUMBER</code>
* </ul>
* or it has contributory property Other_Alphabetic as defined by the
* Unicode Standard.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return <code>true</code> if the character is a Unicode alphabet
* character, <code>false</code> otherwise.
* @since 1.7
*/
public static boolean isAlphabetic(int codePoint) {
return (((((1 << Character.UPPERCASE_LETTER) |
(1 << Character.LOWERCASE_LETTER) |
(1 << Character.TITLECASE_LETTER) |
(1 << Character.MODIFIER_LETTER) |
(1 << Character.OTHER_LETTER) |
(1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
}
/**
* Determines if the specified character (Unicode code point) is a CJKV
* (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
* the Unicode Standard.
*
* @param codePoint the character (Unicode code point) to be tested.
* @return <code>true</code> if the character is a Unicode ideograph
* character, <code>false</code> otherwise.
* @since 1.7
*/
public static boolean isIdeographic(int codePoint) {
return CharacterData.of(codePoint).isIdeographic(codePoint);
}
/** /**
* Determines if the specified character is * Determines if the specified character is
* permissible as the first character in a Java identifier. * permissible as the first character in a Java identifier.
...@@ -6430,7 +6482,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -6430,7 +6482,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
/** /**
* Determines if the specified character is a Unicode space character. * Determines if the specified character is a Unicode space character.
* A character is considered to be a space character if and only if * A character is considered to be a space character if and only if
* it is specified to be a space character by the Unicode standard. This * it is specified to be a space character by the Unicode Standard. This
* method returns true if the character's general category type is any of * method returns true if the character's general category type is any of
* the following: * the following:
* <ul> * <ul>
...@@ -6458,7 +6510,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -6458,7 +6510,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
* Determines if the specified character (Unicode code point) is a * Determines if the specified character (Unicode code point) is a
* Unicode space character. A character is considered to be a * Unicode space character. A character is considered to be a
* space character if and only if it is specified to be a space * space character if and only if it is specified to be a space
* character by the Unicode standard. This method returns true if * character by the Unicode Standard. This method returns true if
* the character's general category type is any of the following: * the character's general category type is any of the following:
* *
* <ul> * <ul>
...@@ -6908,7 +6960,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -6908,7 +6960,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
* @since 1.4 * @since 1.4
*/ */
static char[] toUpperCaseCharArray(int codePoint) { static char[] toUpperCaseCharArray(int codePoint) {
// As of Unicode 4.0, 1:M uppercasings only happen in the BMP. // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
assert isBmpCodePoint(codePoint); assert isBmpCodePoint(codePoint);
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
} }
...@@ -6941,7 +6993,7 @@ class Character implements java.io.Serializable, Comparable<Character> { ...@@ -6941,7 +6993,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
* Note: if the specified character is not assigned a name by * Note: if the specified character is not assigned a name by
* the <i>UnicodeData</i> file (part of the Unicode Character * the <i>UnicodeData</i> file (part of the Unicode Character
* Database maintained by the Unicode Consortium), the returned * Database maintained by the Unicode Consortium), the returned
* name is the same as the result of expression * name is the same as the result of expression.
* *
* <blockquote>{@code * <blockquote>{@code
* Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
......
...@@ -46,10 +46,27 @@ abstract class CharacterData { ...@@ -46,10 +46,27 @@ abstract class CharacterData {
int toUpperCaseEx(int ch) { int toUpperCaseEx(int ch) {
return toUpperCase(ch); return toUpperCase(ch);
} }
char[] toUpperCaseCharArray(int ch) { char[] toUpperCaseCharArray(int ch) {
return null; return null;
} }
boolean isOtherLowercase(int ch) {
return false;
}
boolean isOtherUppercase(int ch) {
return false;
}
boolean isOtherAlphabetic(int ch) {
return false;
}
boolean isIdeographic(int ch) {
return false;
}
// Character <= 0xff (basic latin) is handled by internal fast-path // Character <= 0xff (basic latin) is handled by internal fast-path
// to avoid initializing large tables. // to avoid initializing large tables.
// Note: performance of this "fast-path" code may be sub-optimal // Note: performance of this "fast-path" code may be sub-optimal
......
/*
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 7037261
* @summary Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic
*/
import java.util.regex.*;
import java.util.*;
import java.io.*;
import static java.lang.Character.*;
public class CheckProp {
public static void main(String[] args) throws IOException {
File fPropList = new File(System.getProperty("test.src", "."), "PropList.txt");
int i, j;
BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
Map<String, ArrayList<Integer>> propMap = new LinkedHashMap<>();
String line = null;
int lineNo = 0;
while ((line = sbfr.readLine()) != null) {
lineNo++;
if (line.length() <= 1 || line.charAt(0) == '#') {
continue;
}
m.reset(line);
if (m.matches()) {
int start = Integer.parseInt(m.group(1), 16);
int end = (m.group(2)==null)?start
:Integer.parseInt(m.group(2), 16);
String name = m.group(3);
ArrayList<Integer> list = propMap.get(name);
if (list == null) {
list = new ArrayList<Integer>();
propMap.put(name, list);
}
while (start <= end)
list.add(start++);
} else {
System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
}
}
sbfr.close();
//for (String name: propMap.keySet()) {
// System.out.printf("%s %d%n", name, propMap.get(name).size());
//}
Integer[] otherLowercase = propMap.get("Other_Lowercase").toArray(new Integer[0]);
Integer[] otherUppercase = propMap.get("Other_Uppercase").toArray(new Integer[0]);
Integer[] otherAlphabetic = propMap.get("Other_Alphabetic").toArray(new Integer[0]);
Integer[] ideographic = propMap.get("Ideographic").toArray(new Integer[0]);
int fails = 0;
for (int cp = MIN_CODE_POINT; cp < MAX_CODE_POINT; cp++) {
int type = getType(cp);
if (isLowerCase(cp) !=
(type == LOWERCASE_LETTER ||
Arrays.binarySearch(otherLowercase, cp) >= 0))
{
fails++;
System.err.printf("Wrong isLowerCase(U+%04x)\n", cp);
}
if (isUpperCase(cp) !=
(type == UPPERCASE_LETTER ||
Arrays.binarySearch(otherUppercase, cp) >= 0))
{
fails++;
System.err.printf("Wrong isUpperCase(U+%04x)\n", cp);
}
if (isAlphabetic(cp) !=
(type == UPPERCASE_LETTER || type == LOWERCASE_LETTER ||
type == TITLECASE_LETTER || type == MODIFIER_LETTER ||
type == OTHER_LETTER || type == OTHER_LETTER ||
type == LETTER_NUMBER ||
Arrays.binarySearch(otherAlphabetic, cp) >=0))
{
fails++;
System.err.printf("Wrong isAlphabetic(U+%04x)\n", cp);
}
if (isIdeographic(cp) !=
(Arrays.binarySearch(ideographic, cp) >= 0))
{
fails++;
System.err.printf("Wrong isIdeographic(U+%04x)\n", cp);
}
}
if (fails != 0)
throw new RuntimeException("CheckProp failed=" + fails);
}
}
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册