6404304: RFE: Unicode 5.1 support

Reviewed-by: okutsu, naoto

6404304: RFE: Unicode 5.1 support
Reviewed-by: okutsu, naoto
59e20a9f · peytoia · 84a49425 · 59e20a9f · 59e20a9f · 59e20a9f
30 changed file
--- a/make/java/text/FILES_java.gmk
+++ b/make/java/text/FILES_java.gmk
@@ -92,11 +92,11 @@ FILES_java = \
            sun/text/normalizer/SymbolTable.java \
            sun/text/normalizer/Trie.java \
            sun/text/normalizer/TrieIterator.java \
+            sun/text/normalizer/UBiDiProps.java \
            sun/text/normalizer/UCharacter.java \
            sun/text/normalizer/UCharacterIterator.java \
            sun/text/normalizer/UCharacterProperty.java \
            sun/text/normalizer/UCharacterPropertyReader.java \
-            sun/text/normalizer/UProperty.java \
            sun/text/normalizer/UTF16.java \
            sun/text/normalizer/UnicodeMatcher.java \
            sun/text/normalizer/UnicodeSet.java \

--- a/make/java/text/Makefile
+++ b/make/java/text/Makefile
@@ -64,7 +64,8 @@ BIFILES = $(TEXT_CLASSDIR)/CharacterBreakIteratorData \
          $(TEXT_CLASSDIR)/SentenceBreakIteratorData

 ICU_FILES = $(TEXT_CLASSDIR)/unorm.icu \
-            $(TEXT_CLASSDIR)/uprops.icu
+            $(TEXT_CLASSDIR)/uprops.icu \
+            $(TEXT_CLASSDIR)/ubidi.icu

 # builder
 GENERATEBREAKITERATORDATA_JARFILE = \
@@ -89,7 +90,7 @@ $(BIFILES): $(GENERATEBREAKITERATORDATA_JARFILE) \
 build: $(BIFILES) $(ICU_FILES)

 #
-# Extra rules to copy unorm.icu and uprops.icu
+# Extra rules to copy unorm.icu, uprops.icu, and ubidi.icu
 #
 $(TEXT_CLASSDIR)/unorm.icu: $(TEXT_SRCDIR)/unorm.icu
 	$(install-file)
@@ -97,6 +98,9 @@ $(TEXT_CLASSDIR)/unorm.icu: $(TEXT_SRCDIR)/unorm.icu
 $(TEXT_CLASSDIR)/uprops.icu: $(TEXT_SRCDIR)/uprops.icu
 	$(install-file)

+$(TEXT_CLASSDIR)/ubidi.icu: $(TEXT_SRCDIR)/ubidi.icu
+	$(install-file)
+
 clean clobber::
 	$(RM) -r $(TEXT_CLASSES)
 	$(RM) -r $(BIFILES)

--- a/make/tools/GenerateCharacter/CharacterData00.java.template
+++ b/make/tools/GenerateCharacter/CharacterData00.java.template
@@ -144,6 +144,55 @@ class CharacterData00 extends CharacterData {
            case 0x1FBC : mapChar = 0x1FB3; break;
            case 0x1FCC : mapChar = 0x1FC3; break;
            case 0x1FFC : mapChar = 0x1FF3; break;
+
+            case 0x023A : mapChar = 0x2C65; break;
+            case 0x023E : mapChar = 0x2C66; break;
+            case 0x10A0 : mapChar = 0x2D00; break;
+            case 0x10A1 : mapChar = 0x2D01; break;
+            case 0x10A2 : mapChar = 0x2D02; break;
+            case 0x10A3 : mapChar = 0x2D03; break;
+            case 0x10A4 : mapChar = 0x2D04; break;
+            case 0x10A5 : mapChar = 0x2D05; break;
+            case 0x10A6 : mapChar = 0x2D06; break;
+            case 0x10A7 : mapChar = 0x2D07; break;
+            case 0x10A8 : mapChar = 0x2D08; break;
+            case 0x10A9 : mapChar = 0x2D09; break;
+            case 0x10AA : mapChar = 0x2D0A; break;
+            case 0x10AB : mapChar = 0x2D0B; break;
+            case 0x10AC : mapChar = 0x2D0C; break;
+            case 0x10AD : mapChar = 0x2D0D; break;
+            case 0x10AE : mapChar = 0x2D0E; break;
+            case 0x10AF : mapChar = 0x2D0F; break;
+            case 0x10B0 : mapChar = 0x2D10; break;
+            case 0x10B1 : mapChar = 0x2D11; break;
+            case 0x10B2 : mapChar = 0x2D12; break;
+            case 0x10B3 : mapChar = 0x2D13; break;
+            case 0x10B4 : mapChar = 0x2D14; break;
+            case 0x10B5 : mapChar = 0x2D15; break;
+            case 0x10B6 : mapChar = 0x2D16; break;
+            case 0x10B7 : mapChar = 0x2D17; break;
+            case 0x10B8 : mapChar = 0x2D18; break;
+            case 0x10B9 : mapChar = 0x2D19; break;
+            case 0x10BA : mapChar = 0x2D1A; break;
+            case 0x10BB : mapChar = 0x2D1B; break;
+            case 0x10BC : mapChar = 0x2D1C; break;
+            case 0x10BD : mapChar = 0x2D1D; break;
+            case 0x10BE : mapChar = 0x2D1E; break;
+            case 0x10BF : mapChar = 0x2D1F; break;
+            case 0x10C0 : mapChar = 0x2D20; break;
+            case 0x10C1 : mapChar = 0x2D21; break;
+            case 0x10C2 : mapChar = 0x2D22; break;
+            case 0x10C3 : mapChar = 0x2D23; break;
+            case 0x10C4 : mapChar = 0x2D24; break;
+            case 0x10C5 : mapChar = 0x2D25; break;
+            case 0x1E9E : mapChar = 0x00DF; break;
+            case 0x2C62 : mapChar = 0x026B; break;
+            case 0x2C63 : mapChar = 0x1D7D; break;
+            case 0x2C64 : mapChar = 0x027D; break;
+            case 0x2C6D : mapChar = 0x0251; break;
+            case 0x2C6E : mapChar = 0x0271; break;
+            case 0x2C6F : mapChar = 0x0250; break;
+            case 0xA77D : mapChar = 0x1D79; break;
              // default mapChar is already set, so no
              // need to redo it here.
              // default       : mapChar = ch;
@@ -196,6 +245,54 @@ class CharacterData00 extends CharacterData {
            case 0x1FB3 : mapChar = 0x1FBC; break;
            case 0x1FC3 : mapChar = 0x1FCC; break;
            case 0x1FF3 : mapChar = 0x1FFC; break;
+
+            case 0x0250 : mapChar = 0x2C6F; break;
+            case 0x0251 : mapChar = 0x2C6D; break;
+            case 0x026B : mapChar = 0x2C62; break;
+            case 0x0271 : mapChar = 0x2C6E; break;
+            case 0x027D : mapChar = 0x2C64; break;
+            case 0x1D79 : mapChar = 0xA77D; break;
+            case 0x1D7D : mapChar = 0x2C63; break;
+            case 0x2C65 : mapChar = 0x023A; break;
+            case 0x2C66 : mapChar = 0x023E; break;
+            case 0x2D00 : mapChar = 0x10A0; break;
+            case 0x2D01 : mapChar = 0x10A1; break;
+            case 0x2D02 : mapChar = 0x10A2; break;
+            case 0x2D03 : mapChar = 0x10A3; break;
+            case 0x2D04 : mapChar = 0x10A4; break;
+            case 0x2D05 : mapChar = 0x10A5; break;
+            case 0x2D06 : mapChar = 0x10A6; break;
+            case 0x2D07 : mapChar = 0x10A7; break;
+            case 0x2D08 : mapChar = 0x10A8; break;
+            case 0x2D09 : mapChar = 0x10A9; break;
+            case 0x2D0A : mapChar = 0x10AA; break;
+            case 0x2D0B : mapChar = 0x10AB; break;
+            case 0x2D0C : mapChar = 0x10AC; break;
+            case 0x2D0D : mapChar = 0x10AD; break;
+            case 0x2D0E : mapChar = 0x10AE; break;
+            case 0x2D0F : mapChar = 0x10AF; break;
+            case 0x2D10 : mapChar = 0x10B0; break;
+            case 0x2D11 : mapChar = 0x10B1; break;
+            case 0x2D12 : mapChar = 0x10B2; break;
+            case 0x2D13 : mapChar = 0x10B3; break;
+            case 0x2D14 : mapChar = 0x10B4; break;
+            case 0x2D15 : mapChar = 0x10B5; break;
+            case 0x2D16 : mapChar = 0x10B6; break;
+            case 0x2D17 : mapChar = 0x10B7; break;
+            case 0x2D18 : mapChar = 0x10B8; break;
+            case 0x2D19 : mapChar = 0x10B9; break;
+            case 0x2D1A : mapChar = 0x10BA; break;
+            case 0x2D1B : mapChar = 0x10BB; break;
+            case 0x2D1C : mapChar = 0x10BC; break;
+            case 0x2D1D : mapChar = 0x10BD; break;
+            case 0x2D1E : mapChar = 0x10BE; break;
+            case 0x2D1F : mapChar = 0x10BF; break;
+            case 0x2D20 : mapChar = 0x10C0; break;
+            case 0x2D21 : mapChar = 0x10C1; break;
+            case 0x2D22 : mapChar = 0x10C2; break;
+            case 0x2D23 : mapChar = 0x10C3; break;
+            case 0x2D24 : mapChar = 0x10C4; break;
+            case 0x2D25 : mapChar = 0x10C5; break;
              // ch must have a 1:M case mapping, but we
              // can't handle it here. Return ch.
              // since mapChar is already set, no need
@@ -315,6 +412,12 @@ class CharacterData00 extends CharacterData {
                case 0x32BE: retval = 49; break;          // CIRCLED NUMBER FORTY NINE
                case 0x32BF: retval = 50; break;          // CIRCLED NUMBER FIFTY

+                case 0x0D71: retval = 100; break;         // MALAYALAM NUMBER ONE HUNDRED
+                case 0x0D72: retval = 1000; break;        // MALAYALAM NUMBER ONE THOUSAND
+                case 0x2186: retval = 50; break;          // ROMAN NUMERAL FIFTY EARLY FORM
+                case 0x2187: retval = 50000; break;       // ROMAN NUMERAL FIFTY THOUSAND
+                case 0x2188: retval = 100000; break;      // ROMAN NUMERAL ONE HUNDRED THOUSAND
+
                default:       retval = -2; break;
            }
            break;
@@ -383,6 +486,54 @@ class CharacterData00 extends CharacterData {
                    case 0x00B5 : mapChar = 0x039C; break;
                    case 0x017F : mapChar = 0x0053; break;
                    case 0x1FBE : mapChar = 0x0399; break;
+
+                    case 0x0250 : mapChar = 0x2C6F; break;
+                    case 0x0251 : mapChar = 0x2C6D; break;
+                    case 0x026B : mapChar = 0x2C62; break;
+                    case 0x0271 : mapChar = 0x2C6E; break;
+                    case 0x027D : mapChar = 0x2C64; break;
+                    case 0x1D79 : mapChar = 0xA77D; break;
+                    case 0x1D7D : mapChar = 0x2C63; break;
+                    case 0x2C65 : mapChar = 0x023A; break;
+                    case 0x2C66 : mapChar = 0x023E; break;
+                    case 0x2D00 : mapChar = 0x10A0; break;
+                    case 0x2D01 : mapChar = 0x10A1; break;
+                    case 0x2D02 : mapChar = 0x10A2; break;
+                    case 0x2D03 : mapChar = 0x10A3; break;
+                    case 0x2D04 : mapChar = 0x10A4; break;
+                    case 0x2D05 : mapChar = 0x10A5; break;
+                    case 0x2D06 : mapChar = 0x10A6; break;
+                    case 0x2D07 : mapChar = 0x10A7; break;
+                    case 0x2D08 : mapChar = 0x10A8; break;
+                    case 0x2D09 : mapChar = 0x10A9; break;
+                    case 0x2D0A : mapChar = 0x10AA; break;
+                    case 0x2D0B : mapChar = 0x10AB; break;
+                    case 0x2D0C : mapChar = 0x10AC; break;
+                    case 0x2D0D : mapChar = 0x10AD; break;
+                    case 0x2D0E : mapChar = 0x10AE; break;
+                    case 0x2D0F : mapChar = 0x10AF; break;
+                    case 0x2D10 : mapChar = 0x10B0; break;
+                    case 0x2D11 : mapChar = 0x10B1; break;
+                    case 0x2D12 : mapChar = 0x10B2; break;
+                    case 0x2D13 : mapChar = 0x10B3; break;
+                    case 0x2D14 : mapChar = 0x10B4; break;
+                    case 0x2D15 : mapChar = 0x10B5; break;
+                    case 0x2D16 : mapChar = 0x10B6; break;
+                    case 0x2D17 : mapChar = 0x10B7; break;
+                    case 0x2D18 : mapChar = 0x10B8; break;
+                    case 0x2D19 : mapChar = 0x10B9; break;
+                    case 0x2D1A : mapChar = 0x10BA; break;
+                    case 0x2D1B : mapChar = 0x10BB; break;
+                    case 0x2D1C : mapChar = 0x10BC; break;
+                    case 0x2D1D : mapChar = 0x10BD; break;
+                    case 0x2D1E : mapChar = 0x10BE; break;
+                    case 0x2D1F : mapChar = 0x10BF; break;
+                    case 0x2D20 : mapChar = 0x10C0; break;
+                    case 0x2D21 : mapChar = 0x10C1; break;
+                    case 0x2D22 : mapChar = 0x10C2; break;
+                    case 0x2D23 : mapChar = 0x10C3; break;
+                    case 0x2D24 : mapChar = 0x10C4; break;
+                    case 0x2D25 : mapChar = 0x10C5; break;
                    default       : mapChar = Character.ERROR; break;
                }
            }

--- a/make/tools/GenerateCharacter/CharacterData01.java.template
+++ b/make/tools/GenerateCharacter/CharacterData01.java.template
@@ -218,6 +218,48 @@ class CharacterData01 extends CharacterData {
            case 0x10132: retval = 80000; break;   // AEGEAN NUMBER EIGHTY THOUSAND
            case 0x10133: retval = 90000; break;   // AEGEAN NUMBER NINETY THOUSAND
            case 0x10323: retval = 50; break;      // OLD ITALIC NUMERAL FIFTY
+
+            case 0x010144: retval = 50; break;     // ACROPHONIC ATTIC FIFTY
+            case 0x010145: retval = 500; break;    // ACROPHONIC ATTIC FIVE HUNDRED
+            case 0x010146: retval = 5000; break;   // ACROPHONIC ATTIC FIVE THOUSAND
+            case 0x010147: retval = 50000; break;  // ACROPHONIC ATTIC FIFTY THOUSAND
+            case 0x01014A: retval = 50; break;     // ACROPHONIC ATTIC FIFTY TALENTS
+            case 0x01014B: retval = 100; break;    // ACROPHONIC ATTIC ONE HUNDRED TALENTS
+            case 0x01014C: retval = 500; break;    // ACROPHONIC ATTIC FIVE HUNDRED TALENTS
+            case 0x01014D: retval = 1000; break;   // ACROPHONIC ATTIC ONE THOUSAND TALENTS
+            case 0x01014E: retval = 5000; break;   // ACROPHONIC ATTIC FIVE THOUSAND TALENTS
+            case 0x010151: retval = 50; break;     // ACROPHONIC ATTIC FIFTY STATERS
+            case 0x010152: retval = 100; break;    // ACROPHONIC ATTIC ONE HUNDRED STATERS
+            case 0x010153: retval = 500; break;    // ACROPHONIC ATTIC FIVE HUNDRED STATERS
+            case 0x010154: retval = 1000; break;   // ACROPHONIC ATTIC ONE THOUSAND STATERS
+            case 0x010155: retval = 10000; break;  // ACROPHONIC ATTIC TEN THOUSAND STATERS
+            case 0x010156: retval = 50000; break;  // ACROPHONIC ATTIC FIFTY THOUSAND STATERS
+            case 0x010166: retval = 50; break;     // ACROPHONIC TROEZENIAN FIFTY
+            case 0x010167: retval = 50; break;     // ACROPHONIC TROEZENIAN FIFTY ALTERNATE FORM
+            case 0x010168: retval = 50; break;     // ACROPHONIC HERMIONIAN FIFTY
+            case 0x010169: retval = 50; break;     // ACROPHONIC THESPIAN FIFTY
+            case 0x01016A: retval = 100; break;    // ACROPHONIC THESPIAN ONE HUNDRED
+            case 0x01016B: retval = 300; break;    // ACROPHONIC THESPIAN THREE HUNDRED
+            case 0x01016C: retval = 500; break;    // ACROPHONIC EPIDAUREAN FIVE HUNDRED
+            case 0x01016D: retval = 500; break;    // ACROPHONIC TROEZENIAN FIVE HUNDRED
+            case 0x01016E: retval = 500; break;    // ACROPHONIC THESPIAN FIVE HUNDRED
+            case 0x01016F: retval = 500; break;    // ACROPHONIC CARYSTIAN FIVE HUNDRED
+            case 0x010170: retval = 500; break;    // ACROPHONIC NAXIAN FIVE HUNDRED
+            case 0x010171: retval = 1000; break;   // ACROPHONIC THESPIAN ONE THOUSAND
+            case 0x010172: retval = 5000; break;   // ACROPHONIC THESPIAN FIVE THOUSAND
+            case 0x010174: retval = 50; break;     // ACROPHONIC STRATIAN FIFTY MNAS
+            case 0x010341: retval = 90; break;     // GOTHIC LETTER NINETY
+            case 0x01034A: retval = 900; break;    // GOTHIC LETTER NINE HUNDRED
+            case 0x0103D5: retval = 100; break;    // OLD PERSIAN NUMBER HUNDRED
+            case 0x010919: retval = 100; break;    // PHOENICIAN NUMBER ONE HUNDRED
+            case 0x010A46: retval = 100; break;    // KHAROSHTHI NUMBER ONE HUNDRED
+            case 0x010A47: retval = 1000; break;   // KHAROSHTHI NUMBER ONE THOUSAND
+            case 0x01D36C: retval = 40; break;     // COUNTING ROD TENS DIGIT FOUR
+            case 0x01D36D: retval = 50; break;     // COUNTING ROD TENS DIGIT FIVE
+            case 0x01D36E: retval = 60; break;     // COUNTING ROD TENS DIGIT SIX
+            case 0x01D36F: retval = 70; break;     // COUNTING ROD TENS DIGIT SEVEN
+            case 0x01D370: retval = 80; break;     // COUNTING ROD TENS DIGIT EIGHT
+            case 0x01D371: retval = 90; break;     // COUNTING ROD TENS DIGIT NINE
            default: retval = -2; break;
            }
            

--- a/make/tools/UnicodeData/SpecialCasing.txt
+++ b/make/tools/UnicodeData/SpecialCasing.txt
-# SpecialCasing-4.0.0.txt
-# Date: 2003-03-14, 20:22:04 GMT [MD]
+# SpecialCasing-5.1.0.txt
+# Date: 2008-03-03, 21:58:10 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2008 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see UCD.html
 #
 # Special Casing Properties
 #
 # This file is a supplement to the UnicodeData file.
 # It contains additional information about the casing of Unicode characters.
 # (For compatibility, the UnicodeData.txt file only contains case mappings for
-# characters where they are 1-1, and does not have locale-specific mappings.)
+# characters where they are 1-1, and independent of context and language.
 # For more information, see the discussion of Case Mappings in the Unicode Standard.
 #
 # All code points not listed in this file that do not have a simple case mappings
@@ -18,31 +23,31 @@
 #
 # <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)? # <comment>
 #
-# <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more than
-# one character, they are separated by spaces. Other than as used to separate elements,
-# spaces are to be ignored.
+# <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more
+# than one character, they are separated by spaces. Other than as used to separate 
+# elements, spaces are to be ignored.
 #
-# The <condition_list> is optional. Where present, it consists of one or more locales or contexts,
-# separated by spaces. In these conditions:
+# The <condition_list> is optional. Where present, it consists of one or more language IDs
+# or contexts, separated by spaces. In these conditions:
 # - A condition list overrides the normal behavior if all of the listed conditions are true.
 # - The context is always the context of the characters in the original string,
 #   NOT in the resulting string.
 # - Case distinctions in the condition list are not significant.
 # - Conditions preceded by "Not_" represent the negation of the condition.
+# The condition list is not represented in the UCD as a formal property.
 #
-# A locale is defined as:
-# <locale> := <ISO_639_code> ( "_" <ISO_3166_code> ( "_" <variant> )? )?
-# <ISO_3166_code> := 2-letter ISO country code,
-# <ISO_639_code> :=  2-letter ISO language code
+# A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
 #
-# A context is one of the following, as defined in the Unicode Standard:
-#   Final_Sigma, After_Soft_Dotted, More_Above, Before_Dot, Not_Before_Dot, After_I
+# A context for a character C is defined by Section 3.13 Default Case 
+# Operations, of The Unicode Standard, Version 5.0.
+# (This is identical to the context defined by Unicode 4.1.0,
+#  as specified in http://www.unicode.org/versions/Unicode4.1.0/)
 #
 # Parsers of this file must be prepared to deal with future additions to this format:
 #  * Additional contexts
 #  * Additional fields
 # ================================================================================
-
+# @missing 0000..10FFFF; <slc>; <stc>; <suc>
 # ================================================================================
 # Unconditional mappings
 # ================================================================================
@@ -170,7 +175,7 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
 1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI

-# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases
+# Some characters with YPOGEGRAMMENI also have no corresponding titlecases

 1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
 1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
@@ -184,7 +189,14 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI

 # ================================================================================
-# Conditional mappings
+# Conditional Mappings
+# The remainder of this file provides conditional casing data used to produce 
+# full case mappings.
+# ================================================================================
+# Language-Insensitive Mappings
+# These are characters whose full case mappings do not depend on language, but do
+# depend on context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
 # ================================================================================

 # Special case for final form of sigma
@@ -203,7 +215,10 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA

 # ================================================================================
-# Locale-sensitive mappings
+# Language-Sensitive Mappings
+# These are characters whose full case mappings depend on language and perhaps also
+# context (which characters come before or after). For more information
+# see the header of this file and the Unicode Standard.
 # ================================================================================

 # Lithuanian
@@ -254,3 +269,6 @@ FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
 # Note: the following case is already in the UnicodeData file.

 # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
+
+# EOF
+
--- a/make/tools/UnicodeData/UnicodeData.txt
+++ b/make/tools/UnicodeData/UnicodeData.txt
--- a/make/tools/UnicodeData/VERSION
+++ b/make/tools/UnicodeData/VERSION
+5.1.0
--- a/src/share/classes/java/lang/Character.java
+++ b/src/share/classes/java/lang/Character.java
--- a/src/share/classes/java/lang/ConditionalSpecialCasing.java
+++ b/src/share/classes/java/lang/ConditionalSpecialCasing.java
@@ -74,6 +74,7 @@ final class ConditionalSpecialCasing {
        new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
        new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
        new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
+        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, "lt", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE

        //# ================================================================================
        //# Turkish and Azeri
@@ -84,7 +85,10 @@ final class ConditionalSpecialCasing {
        new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
        new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
-        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0)  // # LATIN SMALL LETTER I
+        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN SMALL LETTER I
+        //# ================================================================================
+        //# Other
+        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, "en", 0), // # LATIN CAPITALLETTER I WITH DOT ABOVE
    };

    // A hash table that contains the above entries

--- a/src/share/classes/java/lang/String.java
+++ b/src/share/classes/java/lang/String.java
@@ -2451,14 +2451,21 @@ public final class String
            }
            if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
                lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
+            } else if (srcChar == '\u0130') { // LATIN CAPITAL LETTER I DOT
+                lowerChar = Character.ERROR;
            } else {
                lowerChar = Character.toLowerCase(srcChar);
            }
            if ((lowerChar == Character.ERROR) ||
                (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
                if (lowerChar == Character.ERROR) {
-                    lowerCharArray =
-                        ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
+                     if (!localeDependent && srcChar == '\u0130') {
+                         lowerCharArray =
+                             ConditionalSpecialCasing.toLowerCaseCharArray(this, i, Locale.ENGLISH);
+                     } else {
+                        lowerCharArray =
+                            ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
+                     }
                } else if (srcCount == 2) {
                    resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
                    continue;

--- a/src/share/classes/sun/text/normalizer/CharTrie.java
+++ b/src/share/classes/sun/text/normalizer/CharTrie.java
 /*
- * Portions Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -77,6 +76,66 @@ public class CharTrie extends Trie
        m_friendAgent_ = new FriendAgent();
    }

+    /**
+     * Make a dummy CharTrie.
+     * A dummy trie is an empty runtime trie, used when a real data trie cannot
+     * be loaded.
+     *
+     * The trie always returns the initialValue,
+     * or the leadUnitValue for lead surrogate code points.
+     * The Latin-1 part is always set up to be linear.
+     *
+     * @param initialValue the initial value that is set for all code points
+     * @param leadUnitValue the value for lead surrogate code _units_ that do not
+     *                      have associated supplementary data
+     * @param dataManipulate object which provides methods to parse the char data
+     */
+    public CharTrie(int initialValue, int leadUnitValue, DataManipulate dataManipulate) {
+        super(new char[BMP_INDEX_LENGTH+SURROGATE_BLOCK_COUNT], HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_, dataManipulate);
+
+        int dataLength, latin1Length, i, limit;
+        char block;
+
+        /* calculate the actual size of the dummy trie data */
+
+        /* max(Latin-1, block 0) */
+        dataLength=latin1Length= INDEX_STAGE_1_SHIFT_<=8 ? 256 : DATA_BLOCK_LENGTH;
+        if(leadUnitValue!=initialValue) {
+            dataLength+=DATA_BLOCK_LENGTH;
+        }
+        m_data_=new char[dataLength];
+        m_dataLength_=dataLength;
+
+        m_initialValue_=(char)initialValue;
+
+        /* fill the index and data arrays */
+
+        /* indexes are preset to 0 (block 0) */
+
+        /* Latin-1 data */
+        for(i=0; i<latin1Length; ++i) {
+            m_data_[i]=(char)initialValue;
+        }
+
+        if(leadUnitValue!=initialValue) {
+            /* indexes for lead surrogate code units to the block after Latin-1 */
+            block=(char)(latin1Length>>INDEX_STAGE_2_SHIFT_);
+            i=0xd800>>INDEX_STAGE_1_SHIFT_;
+            limit=0xdc00>>INDEX_STAGE_1_SHIFT_;
+            for(; i<limit; ++i) {
+                m_index_[i]=block;
+            }
+
+            /* data for lead surrogate code units */
+            limit=latin1Length+DATA_BLOCK_LENGTH;
+            for(i=latin1Length; i<limit; ++i) {
+                m_data_[i]=(char)leadUnitValue;
+            }
+        }
+
+        m_friendAgent_ = new FriendAgent();
+    }
+
    /**
     * Java friend implementation
     */
@@ -130,7 +189,18 @@ public class CharTrie extends Trie
    */
    public final char getCodePointValue(int ch)
    {
-        int offset = getCodePointOffset(ch);
+        int offset;
+
+        // fastpath for U+0000..U+D7FF
+        if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            // copy of getRawOffset()
+            offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+                    + (ch & INDEX_STAGE_3_MASK_);
+            return m_data_[offset];
+        }
+
+        // handle U+D800..U+10FFFF
+        offset = getCodePointOffset(ch);

        // return -1 if there is an error, in this case we return the default
        // value: m_initialValue_

--- a/src/share/classes/sun/text/normalizer/NormalizerBase.java
+++ b/src/share/classes/sun/text/normalizer/NormalizerBase.java
 /*
- * Portions Copyright 2001-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -127,7 +126,7 @@ import java.text.Normalizer;
 * normalize(FCD) may be implemented with NFD.
 *
 * For more details on FCD see the collation design document:
- * http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/collation/ICU_collation_design.htm
+ * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
 *
 * ICU collation performs either NFD or FCD normalization automatically if
 * normalization is turned on for the collator object. Beyond collation and

--- a/src/share/classes/sun/text/normalizer/NormalizerDataReader.java
+++ b/src/share/classes/sun/text/normalizer/NormalizerDataReader.java
 /*
- * Portions Copyright 2003-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -331,7 +330,7 @@ final class NormalizerDataReader implements ICUBinary.Authenticate {
                        throws IOException{

         //Read the bytes that make up the normTrie
-         dataInputStream.read(normBytes);
+         dataInputStream.readFully(normBytes);

         //normTrieStream= new ByteArrayInputStream(normBytes);

@@ -346,11 +345,11 @@ final class NormalizerDataReader implements ICUBinary.Authenticate {
         }

         //Read the fcdTrie
-         dataInputStream.read(fcdBytes);
+         dataInputStream.readFully(fcdBytes);


         //Read the AuxTrie
-        dataInputStream.read(auxBytes);
+        dataInputStream.readFully(auxBytes);
    }

    public byte[] getDataFormatVersion(){

--- a/src/share/classes/sun/text/normalizer/NormalizerImpl.java
+++ b/src/share/classes/sun/text/normalizer/NormalizerImpl.java
 /*
- * Portions Copyright 2003-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -102,7 +101,7 @@ public final class NormalizerImpl {
    private static final long  MIN_SPECIAL    =  (long)(0xfc000000 & UNSIGNED_INT_MASK);
    private static final long  SURROGATES_TOP =  (long)(0xfff00000 & UNSIGNED_INT_MASK);
    private static final long  MIN_HANGUL     =  (long)(0xfff00000 & UNSIGNED_INT_MASK);
-    private static final long  MIN_JAMO_V     =  (long)(0xfff20000 & UNSIGNED_INT_MASK);
+//  private static final long  MIN_JAMO_V     =  (long)(0xfff20000 & UNSIGNED_INT_MASK);
    private static final long  JAMO_V_TOP     =  (long)(0xfff30000 & UNSIGNED_INT_MASK);


@@ -908,7 +907,7 @@ public final class NormalizerImpl {
                    buffer = composePart(args,prevStarter,src,srcStart,srcLimit,options,nx);

                    // compare the normalized version with the original
-                    if(0!=strCompare(buffer,0,args.length,src,prevStarter,(srcStart-prevStarter), false)) {
+                    if(0!=strCompare(buffer,0,args.length,src,prevStarter,srcStart, false)) {
                        result=NormalizerBase.NO; // normalization differs
                        break;
                    }
@@ -2291,7 +2290,7 @@ public final class NormalizerImpl {
    private static final int OPTIONS_NX_MASK=0x1f;
    private static final int OPTIONS_UNICODE_MASK=0xe0;
    public  static final int OPTIONS_SETS_MASK=0xff;
-    private static final int OPTIONS_UNICODE_SHIFT=5;
+//  private static final int OPTIONS_UNICODE_SHIFT=5;
    private static final UnicodeSet[] nxCache = new UnicodeSet[OPTIONS_SETS_MASK+1];

    /* Constants for options flags for normalization.*/

--- a/src/share/classes/sun/text/normalizer/Trie.java
+++ b/src/share/classes/sun/text/normalizer/Trie.java
 /*
- * Portions Copyright 2003-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -37,10 +36,9 @@

 package sun.text.normalizer;

-import java.io.InputStream;
 import java.io.DataInputStream;
+import java.io.InputStream;
 import java.io.IOException;
-import java.util.Arrays;

 /**
 * <p>A trie is a kind of compressed, serializable table of values
@@ -81,7 +79,6 @@ public abstract class Trie
    * This interface specifies methods to be implemented in order for
    * com.ibm.impl.Trie, to surrogate offset information encapsulated within
    * the data.
-    * @draft 2.1
    */
    public static interface DataManipulate
    {
@@ -92,11 +89,17 @@ public abstract class Trie
        * @param value data value for a surrogate from the trie, including the
        *        folding offset
        * @return data offset or 0 if there is no data for the lead surrogate
-        * @draft 2.1
        */
        public int getFoldingOffset(int value);
    }

+    // default implementation
+    private static class DefaultGetFoldingOffset implements DataManipulate {
+        public int getFoldingOffset(int value) {
+            return value;
+        }
+    }
+
    // protected constructor -------------------------------------------

    /**
@@ -107,7 +110,6 @@ public abstract class Trie
    *                       trie data
    * @throws IOException thrown when input stream does not have the
    *                        right header.
-    * @draft 2.1
    */
    protected Trie(InputStream inputStream,
                   DataManipulate  dataManipulate) throws IOException
@@ -121,7 +123,11 @@ public abstract class Trie
            throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file");
        }

-        m_dataManipulate_ = dataManipulate;
+        if(dataManipulate != null) {
+            m_dataManipulate_ = dataManipulate;
+        } else {
+            m_dataManipulate_ = new DefaultGetFoldingOffset();
+        }
        m_isLatin1Linear_ = (m_options_ &
                             HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
        m_dataOffset_     = input.readInt();
@@ -135,19 +141,21 @@ public abstract class Trie
    * @param options used by the trie
    * @param dataManipulate object containing the information to parse the
    *                       trie data
-    * @draft 2.2
    */
    protected Trie(char index[], int options, DataManipulate dataManipulate)
    {
        m_options_ = options;
-        m_dataManipulate_ = dataManipulate;
+        if(dataManipulate != null) {
+            m_dataManipulate_ = dataManipulate;
+        } else {
+            m_dataManipulate_ = new DefaultGetFoldingOffset();
+        }
        m_isLatin1Linear_ = (m_options_ &
                             HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
        m_index_ = index;
        m_dataOffset_ = m_index_.length;
    }

-
    // protected data members ------------------------------------------

    /**
@@ -158,7 +166,6 @@ public abstract class Trie
    protected static final int LEAD_INDEX_OFFSET_ = 0x2800 >> 5;
    /**
    * Shift size for shifting right the input index. 1..9
-    * @draft 2.1
    */
    protected static final int INDEX_STAGE_1_SHIFT_ = 5;
    /**
@@ -168,31 +175,39 @@ public abstract class Trie
    * This requires blocks of stage 2 data to be aligned by
    * DATA_GRANULARITY.
    * 0..INDEX_STAGE_1_SHIFT
-    * @draft 2.1
    */
    protected static final int INDEX_STAGE_2_SHIFT_ = 2;
+    /**
+     * Number of data values in a stage 2 (data array) block.
+     */
+    protected static final int DATA_BLOCK_LENGTH=1<<INDEX_STAGE_1_SHIFT_;
    /**
    * Mask for getting the lower bits from the input index.
-    * DATA_BLOCK_LENGTH_ - 1.
-    * @draft 2.1
+    * DATA_BLOCK_LENGTH - 1.
    */
-    protected static final int INDEX_STAGE_3_MASK_ =
-                                              (1 << INDEX_STAGE_1_SHIFT_) - 1;
+    protected static final int INDEX_STAGE_3_MASK_ = DATA_BLOCK_LENGTH - 1;
+    /** Number of bits of a trail surrogate that are used in index table lookups. */
+    protected static final int SURROGATE_BLOCK_BITS=10-INDEX_STAGE_1_SHIFT_;
+    /**
+     * Number of index (stage 1) entries per lead surrogate.
+     * Same as number of index entries for 1024 trail surrogates,
+     * ==0x400>>INDEX_STAGE_1_SHIFT_
+     */
+    protected static final int SURROGATE_BLOCK_COUNT=(1<<SURROGATE_BLOCK_BITS);
+    /** Length of the BMP portion of the index (stage 1) array. */
+    protected static final int BMP_INDEX_LENGTH=0x10000>>INDEX_STAGE_1_SHIFT_;
    /**
    * Surrogate mask to use when shifting offset to retrieve supplementary
    * values
-    * @draft 2.1
    */
    protected static final int SURROGATE_MASK_ = 0x3FF;
    /**
    * Index or UTF16 characters
-    * @draft 2.1
    */
    protected char m_index_[];
    /**
    * Internal TrieValue which handles the parsing of the data value.
    * This class is to be implemented by the user
-    * @draft 2.1
    */
    protected DataManipulate m_dataManipulate_;
    /**
@@ -200,7 +215,6 @@ public abstract class Trie
    * index and data into a char array, so this is used to indicate the
    * initial offset to the data portion.
    * Note this index always points to the initial value.
-    * @draft 2.1
    */
    protected int m_dataOffset_;
    /**
@@ -215,7 +229,6 @@ public abstract class Trie
    * @param lead lead surrogate
    * @param trail trailing surrogate
    * @return offset to data
-    * @draft 2.1
    */
    protected abstract int getSurrogateOffset(char lead, char trail);

@@ -223,14 +236,12 @@ public abstract class Trie
    * Gets the value at the argument index
    * @param index value at index will be retrieved
    * @return 32 bit value
-    * @draft 2.1
    */
    protected abstract int getValue(int index);

    /**
    * Gets the default initial value
    * @return 32 bit value
-    * @draft 2.1
    */
    protected abstract int getInitialValue();

@@ -247,7 +258,6 @@ public abstract class Trie
    * @param offset index offset which ch is to start from
    * @param ch index to be used after offset
    * @return offset to the data
-    * @draft 2.1
    */
    protected final int getRawOffset(int offset, char ch)
    {
@@ -261,7 +271,6 @@ public abstract class Trie
    * Treats a lead surrogate as a normal code point.
    * @param ch BMP character
    * @return offset to data
-    * @draft 2.1
    */
    protected final int getBMPOffset(char ch)
    {
@@ -279,7 +288,6 @@ public abstract class Trie
    * the next trailing surrogate character.
    * @param ch lead surrogate character
    * @return offset to data
-    * @draft 2.1
    */
    protected final int getLeadOffset(char ch)
    {
@@ -293,26 +301,27 @@ public abstract class Trie
    * Gets the offset to data which the codepoint points to
    * @param ch codepoint
    * @return offset to data
-    * @draft 2.1
    */
    protected final int getCodePointOffset(int ch)
    {
        // if ((ch >> 16) == 0) slower
-        if (ch >= UTF16.CODEPOINT_MIN_VALUE
-            && ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
+        if (ch < 0) {
+            return -1;
+        } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
+            // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
+            return getRawOffset(0, (char)ch);
+        } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
            // BMP codepoint
            return getBMPOffset((char)ch);
-        }
-        // for optimization
-        if (ch >= UTF16.CODEPOINT_MIN_VALUE
-            && ch <= UCharacter.MAX_VALUE) {
+        } else if (ch <= UCharacter.MAX_VALUE) {
            // look at the construction of supplementary characters
            // trail forms the ends of it.
            return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
                                      (char)(ch & SURROGATE_MASK_));
+        } else {
+            // return -1 // if there is an error, in this case we return
+            return -1;
        }
-        // return -1 if there is an error, in this case we return
-        return -1;
    }

    /**
@@ -320,7 +329,6 @@ public abstract class Trie
    * <p>This is overwritten by the child classes.
    * @param inputStream input stream containing the trie information
    * @exception IOException thrown when data reading fails.
-    * @draft 2.1
    */
    protected void unserialize(InputStream inputStream) throws IOException
    {
@@ -335,7 +343,6 @@ public abstract class Trie
    /**
    * Determines if this is a 32 bit trie
    * @return true if options specifies this is a 32 bit trie
-    * @draft 2.1
    */
    protected final boolean isIntTrie()
    {
@@ -345,7 +352,6 @@ public abstract class Trie
    /**
    * Determines if this is a 16 bit trie
    * @return true if this is a 16 bit trie
-    * @draft 2.1
    */
    protected final boolean isCharTrie()
    {
@@ -354,40 +360,20 @@ public abstract class Trie

    // private data members --------------------------------------------

-    /**
-    * Signature index
-    */
-    private static final int HEADER_SIGNATURE_INDEX_ = 0;
-    /**
-    * Options index
-    */
-    private static final int HEADER_OPTIONS_INDEX_ = 1 << 1;
-    /**
-    * Index length index
-    */
-    private static final int HEADER_INDEX_LENGTH_INDEX_ = 2 << 1;
-    /**
-    * Data length index
-    */
-    private static final int HEADER_DATA_LENGTH_INDEX_ = 3 << 1;
-    /**
-    * Size of header
-    */
-    private static final int HEADER_LENGTH_ = 4 << 1;
    /**
    * Latin 1 option mask
    */
-    private static final int HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_ = 0x200;
+    protected static final int HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_ = 0x200;
    /**
    * Constant number to authenticate the byte block
    */
-    private static final int HEADER_SIGNATURE_ = 0x54726965;
+    protected static final int HEADER_SIGNATURE_ = 0x54726965;
    /**
    * Header option formatting
    */
    private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF;
-    private static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4;
-    private static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100;
+    protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4;
+    protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100;

    /**
    * Flag indicator for Latin quick access data block
@@ -409,9 +395,8 @@ public abstract class Trie
    /**
    * Authenticates raw data header.
    * Checking the header information, signature and options.
-    * @param rawdata array of char data to be checked
+    * @param signature This contains the options and type of a Trie
    * @return true if the header is authenticated valid
-    * @draft 2.1
    */
    private final boolean checkHeader(int signature)
    {

--- a/src/share/classes/sun/text/normalizer/TrieIterator.java
+++ b/src/share/classes/sun/text/normalizer/TrieIterator.java
 /*
- * Portions Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -108,15 +107,14 @@ package sun.text.normalizer;
 * @since release 2.1, Jan 17 2002
 */
 public class TrieIterator implements RangeValueIterator
-
 {
+
    // public constructor ---------------------------------------------

    /**
    * TrieEnumeration constructor
    * @param trie to be used
    * @exception IllegalArgumentException throw when argument is null.
-    * @draft 2.1
    */
    public TrieIterator(Trie trie)
    {
@@ -141,7 +139,6 @@ public class TrieIterator implements RangeValueIterator
    * @return true if we are not at the end of the iteration, false otherwise.
    * @exception NoSuchElementException - if no more elements exist.
    * @see com.ibm.icu.util.RangeValueIterator.Element
-    * @draft 2.1
    */
    public final boolean next(Element element)
    {
@@ -158,7 +155,6 @@ public class TrieIterator implements RangeValueIterator

    /**
    * Resets the iterator to the beginning of the iteration
-    * @draft 2.1
    */
    public final void reset()
    {
@@ -186,7 +182,6 @@ public class TrieIterator implements RangeValueIterator
    * The default function is to return the value as it is.
    * @param value a value from the trie
    * @return extracted value
-    * @draft 2.1
    */
    protected int extract(int value)
    {
@@ -278,7 +273,6 @@ public class TrieIterator implements RangeValueIterator
    * Note, if there are no more iterations, it will never get to here.
    * Blocked out by next().
    * @param element return result object
-    * @draft 2.1
    */
    private final void calculateNextSupplementaryElement(Element element)
    {
@@ -516,10 +510,6 @@ public class TrieIterator implements RangeValueIterator
    */
    private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00;
    /**
-    * Trail surrogate maximum value
-    */
-    private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF;
-    /**
    * Number of trail surrogate
    */
    private static final int TRAIL_SURROGATE_COUNT_ = 0x400;
@@ -538,11 +528,6 @@ public class TrieIterator implements RangeValueIterator
    private static final int DATA_BLOCK_LENGTH_ =
                                              1 << Trie.INDEX_STAGE_1_SHIFT_;
    /**
-    * Number of codepoints in a stage 2 block
-    */
-    private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ =
-                                                     DATA_BLOCK_LENGTH_ << 10;
-    /**
    * Trie instance
    */
    private Trie m_trie_;
@@ -560,10 +545,4 @@ public class TrieIterator implements RangeValueIterator
    private int m_nextBlock_;
    private int m_nextBlockIndex_;
    private int m_nextTrailIndexOffset_;
-    /**
-    * This is the return result element
-    */
-    private int m_start_;
-    private int m_limit_;
-    private int m_value_;
 }
--- a/src/share/classes/sun/text/normalizer/UBiDiProps.java
+++ b/src/share/classes/sun/text/normalizer/UBiDiProps.java
+/*
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Sun designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Sun in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+/*
+ *******************************************************************************
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
+ *                                                                             *
+ * The original version of this source code and documentation is copyrighted   *
+ * and owned by IBM, These materials are provided under terms of a License     *
+ * Agreement between IBM and Sun. This technology is protected by multiple     *
+ * US and International patents. This notice and attribution to IBM may not    *
+ * to removed.                                                                 *
+ *******************************************************************************
+*   file name:  UBiDiProps.java
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005jan16
+*   created by: Markus W. Scherer
+*
+*   Low-level Unicode bidi/shaping properties access.
+*   Java port of ubidi_props.h/.c.
+*/
+
+package sun.text.normalizer;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.InputStream;
+import java.io.IOException;
+
+public final class UBiDiProps {
+    // constructors etc. --------------------------------------------------- ***
+
+    // port of ubidi_openProps()
+    public UBiDiProps() throws IOException{
+        InputStream is=ICUData.getStream(DATA_FILE_NAME);
+        BufferedInputStream b=new BufferedInputStream(is, 4096 /* data buffer size */);
+        readData(b);
+        b.close();
+        is.close();
+
+    }
+
+    private void readData(InputStream is) throws IOException {
+        DataInputStream inputStream=new DataInputStream(is);
+
+        // read the header
+        ICUBinary.readHeader(inputStream, FMT, new IsAcceptable());
+
+        // read indexes[]
+        int i, count;
+        count=inputStream.readInt();
+        if(count<IX_INDEX_TOP) {
+            throw new IOException("indexes[0] too small in "+DATA_FILE_NAME);
+        }
+        indexes=new int[count];
+
+        indexes[0]=count;
+        for(i=1; i<count; ++i) {
+            indexes[i]=inputStream.readInt();
+        }
+
+        // read the trie
+        trie=new CharTrie(inputStream, null);
+
+        // read mirrors[]
+        count=indexes[IX_MIRROR_LENGTH];
+        if(count>0) {
+            mirrors=new int[count];
+            for(i=0; i<count; ++i) {
+                mirrors[i]=inputStream.readInt();
+            }
+        }
+
+        // read jgArray[]
+        count=indexes[IX_JG_LIMIT]-indexes[IX_JG_START];
+        jgArray=new byte[count];
+        for(i=0; i<count; ++i) {
+            jgArray[i]=inputStream.readByte();
+        }
+    }
+
+    // implement ICUBinary.Authenticate
+    private final class IsAcceptable implements ICUBinary.Authenticate {
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0]==1 &&
+                   version[2]==Trie.INDEX_STAGE_1_SHIFT_ && version[3]==Trie.INDEX_STAGE_2_SHIFT_;
+        }
+    }
+
+    // UBiDiProps singleton
+    private static UBiDiProps gBdp=null;
+
+    // port of ubidi_getSingleton()
+    public static final synchronized UBiDiProps getSingleton() throws IOException {
+        if(gBdp==null) {
+            gBdp=new UBiDiProps();
+        }
+        return gBdp;
+    }
+
+    // UBiDiProps dummy singleton
+    private static UBiDiProps gBdpDummy=null;
+
+    private UBiDiProps(boolean makeDummy) { // ignore makeDummy, only creates a unique signature
+        indexes=new int[IX_TOP];
+        indexes[0]=IX_TOP;
+        trie=new CharTrie(0, 0, null); // dummy trie, always returns 0
+    }
+
+    /**
+     * Get a singleton dummy object, one that works with no real data.
+     * This can be used when the real data is not available.
+     * Using the dummy can reduce checks for available data after an initial failure.
+     * Port of ucase_getDummy().
+     */
+    public static final synchronized UBiDiProps getDummy() {
+        if(gBdpDummy==null) {
+            gBdpDummy=new UBiDiProps(true);
+        }
+        return gBdpDummy;
+    }
+
+    public final int getClass(int c) {
+        return getClassFromProps(trie.getCodePointValue(c));
+    }
+
+    // data members -------------------------------------------------------- ***
+    private int indexes[];
+    private int mirrors[];
+    private byte jgArray[];
+
+    private CharTrie trie;
+
+    // data format constants ----------------------------------------------- ***
+    private static final String DATA_FILE_NAME = "/sun/text/resources/ubidi.icu";
+
+    /* format "BiDi" */
+    private static final byte FMT[]={ 0x42, 0x69, 0x44, 0x69 };
+
+    /* indexes into indexes[] */
+    private static final int IX_INDEX_TOP=0;
+    private static final int IX_MIRROR_LENGTH=3;
+
+    private static final int IX_JG_START=4;
+    private static final int IX_JG_LIMIT=5;
+
+    private static final int IX_TOP=16;
+
+    private static final int CLASS_MASK=    0x0000001f;
+
+    private static final int getClassFromProps(int props) {
+        return props&CLASS_MASK;
+    }
+
+}
--- a/src/share/classes/sun/text/normalizer/UCharacter.java
+++ b/src/share/classes/sun/text/normalizer/UCharacter.java
--- a/src/share/classes/sun/text/normalizer/UCharacterProperty.java
+++ b/src/share/classes/sun/text/normalizer/UCharacterProperty.java
--- a/src/share/classes/sun/text/normalizer/UCharacterPropertyReader.java
+++ b/src/share/classes/sun/text/normalizer/UCharacterPropertyReader.java
 /*
- * Portions Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -37,8 +36,8 @@

 package sun.text.normalizer;

-import java.io.InputStream;
 import java.io.DataInputStream;
+import java.io.InputStream;
 import java.io.IOException;

 /**
@@ -50,254 +49,13 @@ import java.io.IOException;
 * </p>
 * <p>uprops.icu which is in big-endian format is jared together with this
 * package.</p>
+*
+* Unicode character properties file format see
+* (ICU4C)/source/tools/genprops/store.c
+*
 * @author Syn Wee Quek
 * @since release 2.1, February 1st 2002
-* @draft 2.1
 */
-/* Unicode character properties file format ------------------------------------
-
-The file format prepared and written here contains several data
-structures that store indexes or data.
-
-
-
-The following is a description of format version 3 .
-
-Data contents:
-
-The contents is a parsed, binary form of several Unicode character
-database files, most prominently UnicodeData.txt.
-
-Any Unicode code point from 0 to 0x10ffff can be looked up to get
-the properties, if any, for that code point. This means that the input
-to the lookup are 21-bit unsigned integers, with not all of the
-21-bit range used.
-
-It is assumed that client code keeps a uint32_t pointer
-to the beginning of the data:
-
-    const uint32_t *p32;
-
-Formally, the file contains the following structures:
-
-    const int32_t indexes[16] with values i0..i15:
-
-    i0 propsIndex; -- 32-bit unit index to the table of 32-bit properties words
-    i1 exceptionsIndex;  -- 32-bit unit index to the table of 32-bit exception words
-    i2 exceptionsTopIndex; -- 32-bit unit index to the array of UChars for special mappings
-
-    i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
-    i4 additionalVectorsIndex; -- 32-bit unit index to the table of properties vectors
-    i5 additionalVectorsColumns; -- number of 32-bit words per properties vector
-
-    i6 reservedItemIndex; -- 32-bit unit index to the top of the properties vectors table
-    i7..i9 reservedIndexes; -- reserved values; 0 for now
-
-    i10 maxValues; -- maximum code values for vector word 0, see uprops.h (format version 3.1+)
-    i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (format version 3.2)
-    i12..i15 reservedIndexes; -- reserved values; 0 for now
-
-    PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
-
-    P  const uint32_t props32[i1-i0];
-    E  const uint32_t exceptions[i2-i1];
-    U  const UChar uchars[2*(i3-i2)];
-
-    AT serialized trie for additional properties (byte size: 4*(i4-i3))
-    PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
-
-Trie lookup and properties:
-
-In order to condense the data for the 21-bit code space, several properties of
-the Unicode code assignment are exploited:
- The code space is sparse.
- There are several 10k of consecutive codes with the same properties.
- Characters and scripts are allocated in groups of 16 code points.
- Inside blocks for scripts the properties are often repetitive.
- The 21-bit space is not fully used for Unicode.
-
-The lookup of properties for a given code point is done with a trie lookup,
-using the UTrie implementation.
-The trie lookup result is a 16-bit index in the props32[] table where the
-actual 32-bit properties word is stored. This is done to save space.
-
-(There are thousands of 16-bit entries in the trie data table, but
-only a few hundred unique 32-bit properties words.
-If the trie data table contained 32-bit words directly, then that would be
-larger because the length of the table would be the same as now but the
-width would be 32 bits instead of 16. This saves more than 10kB.)
-
-With a given Unicode code point
-
-    UChar32 c;
-
-and 0<=c<0x110000, the lookup is done like this:
-
-    uint16_t i;
-    UTRIE_GET16(c, i);
-    uint32_t props=p32[i];
-
-For some characters, not all of the properties can be efficiently encoded
-using 32 bits. For them, the 32-bit word contains an index into the exceptions[]
-array:
-
-    if(props&EXCEPTION_BIT)) {
-        uint16_t e=(uint16_t)(props>>VALUE_SHIFT);
-        ...
-    }
-
-The exception values are a variable number of uint32_t starting at
-
-    const uint32_t *pe=p32+exceptionsIndex+e;
-
-The first uint32_t there contains flags about what values actually follow it.
-Some of the exception values are UChar32 code points for the case mappings,
-others are numeric values etc.
-
-32-bit properties sets:
-
-Each 32-bit properties word contains:
-
- 0.. 4  general category
- 5      has exception values
- 6..10  BiDi category
-11      is mirrored
-12..14  numericType:
-            0 no numeric value
-            1 decimal digit value
-            2 digit value
-            3 numeric value
-            ### TODO: type 4 for Han digits & numbers?!
-15..19  reserved
-20..31  value according to bits 0..5:
-        if(has exception) {
-            exception index;
-        } else switch(general category) {
-        case Ll: delta to uppercase; -- same as titlecase
-        case Lu: -delta to lowercase; -- titlecase is same as c
-        case Lt: -delta to lowercase; -- uppercase is same as c
-        default:
-            if(is mirrored) {
-                delta to mirror;
-            } else if(numericType!=0) {
-                numericValue;
-            } else {
-                0;
-            };
-        }
-
-Exception values:
-
-In the first uint32_t exception word for a code point,
-bits
-31..16  reserved
-15..0   flags that indicate which values follow:
-
-bit
- 0      has uppercase mapping
- 1      has lowercase mapping
- 2      has titlecase mapping
- 3      unused
- 4      has numeric value (numerator)
-            if numericValue=0x7fffff00+x then numericValue=10^x
- 5      has denominator value
- 6      has a mirror-image Unicode code point
- 7      has SpecialCasing.txt entries
- 8      has CaseFolding.txt entries
-
-According to the flags in this word, one or more uint32_t words follow it
-in the sequence of the bit flags in the flags word; if a flag is not set,
-then the value is missing or 0:
-
-For the case mappings and the mirror-image Unicode code point,
-one uint32_t or UChar32 each is the code point.
-If the titlecase mapping is missing, then it is the same as the uppercase mapping.
-
-For the digit values, bits 31..16 contain the decimal digit value, and
-bits 15..0 contain the digit value. A value of -1 indicates that
-this value is missing.
-
-For the numeric/numerator value, an int32_t word contains the value directly,
-except for when there is no numerator but a denominator, then the numerator
-is implicitly 1. This means:
-    numerator denominator result
-    none      none        none
-    x         none        x
-    none      y           1/y
-    x         y           x/y
-
-If the numerator value is 0x7fffff00+x then it is replaced with 10^x.
-
-For the denominator value, a uint32_t word contains the value directly.
-
-For special casing mappings, the 32-bit exception word contains:
-31      if set, this character has complex, conditional mappings
-        that are not stored;
-        otherwise, the mappings are stored according to the following bits
-30..24  number of UChars used for mappings
-23..16  reserved
-15.. 0  UChar offset from the beginning of the UChars array where the
-        UChars for the special case mappings are stored in the following format:
-
-Format of special casing UChars:
-One UChar value with lengths as follows:
-14..10  number of UChars for titlecase mapping
- 9.. 5  number of UChars for uppercase mapping
- 4.. 0  number of UChars for lowercase mapping
-
-Followed by the UChars for lowercase, uppercase, titlecase mappings in this order.
-
-For case folding mappings, the 32-bit exception word contains:
-31..24  number of UChars used for the full mapping
-23..16  reserved
-15.. 0  UChar offset from the beginning of the UChars array where the
-        UChars for the special case mappings are stored in the following format:
-
-Format of case folding UChars:
-Two UChars contain the simple mapping as follows:
-    0,  0   no simple mapping
-    BMP,0   a simple mapping to a BMP code point
-    s1, s2  a simple mapping to a supplementary code point stored as two surrogates
-This is followed by the UChars for the full case folding mappings.
-
-Example:
-U+2160, ROMAN NUMERAL ONE, needs an exception because it has a lowercase
-mapping and a numeric value.
-Its exception values would be stored as 3 uint32_t words:
-
- flags=0x0a (see above) with combining class 0
- lowercase mapping 0x2170
- numeric value=1
-
--- Additional properties (new in format version 2.1) ---
-
-The second trie for additional properties (AT) is also a UTrie with 16-bit data.
-The data words consist of 32-bit unit indexes (not row indexes!) into the
-table of unique properties vectors (PV).
-Each vector contains a set of properties.
-The width of a vector (number of uint32_t per row) may change
-with the formatVersion, it is stored in i5.
-
-Current properties: see icu/source/common/uprops.h
-
--- Changes in format version 3.1 ---
-
-See i10 maxValues above, contains only UBLOCK_COUNT and USCRIPT_CODE_LIMIT.
-
--- Changes in format version 3.2 ---
-
- The tries use linear Latin-1 ranges.
- The additional properties bits store full properties XYZ instead
-  of partial Other_XYZ, so that changes in the derivation formulas
-  need not be tracked in runtime library code.
- Joining Type and Line Break are also stored completely, so that uprops.c
-  needs no runtime formulas for enumerated properties either.
- Store the case-sensitive flag in the main properties word.
- i10 also contains U_LB_COUNT and U_EA_COUNT.
- i11 contains maxValues2 for vector word 2.
-
----------------------------------------------------------------------------- */
-
 final class UCharacterPropertyReader implements ICUBinary.Authenticate
 {
    // public methods ----------------------------------------------------
@@ -315,7 +73,6 @@ final class UCharacterPropertyReader implements ICUBinary.Authenticate
    * <p>Protected constructor.</p>
    * @param inputStream ICU uprop.dat file input stream
    * @exception IOException throw if data file fails authentication
-    * @draft 2.1
    */
    protected UCharacterPropertyReader(InputStream inputStream)
                                                        throws IOException
@@ -331,8 +88,7 @@ final class UCharacterPropertyReader implements ICUBinary.Authenticate
    * <p>Reads uprops.icu, parse it into blocks of data to be stored in
    * UCharacterProperty.</P
    * @param ucharppty UCharacterProperty instance
-    * @exception thrown when data reading fails
-    * @draft 2.1
+    * @exception IOException thrown when data reading fails
    */
    protected void read(UCharacterProperty ucharppty) throws IOException
    {
@@ -362,38 +118,30 @@ final class UCharacterPropertyReader implements ICUBinary.Authenticate

        // read the trie index block
        // m_props_index_ in terms of ints
-        ucharppty.m_trie_ = new CharTrie(m_dataInputStream_, ucharppty);
+        ucharppty.m_trie_ = new CharTrie(m_dataInputStream_, null);

-        // reads the 32 bit properties block
+        // skip the 32 bit properties block
        int size = m_exceptionOffset_ - m_propertyOffset_;
-        ucharppty.m_property_ = new int[size];
-        for (int i = 0; i < size; i ++) {
-            ucharppty.m_property_[i] = m_dataInputStream_.readInt();
-        }
+        m_dataInputStream_.skipBytes(size * 4);

        // reads the 32 bit exceptions block
        size = m_caseOffset_ - m_exceptionOffset_;
-        ucharppty.m_exception_ = new int[size];
-        for (int i = 0; i < size; i ++) {
-            ucharppty.m_exception_[i] = m_dataInputStream_.readInt();
-        }
+        m_dataInputStream_.skipBytes(size * 4);

        // reads the 32 bit case block
        size = (m_additionalOffset_ - m_caseOffset_) << 1;
-        ucharppty.m_case_ = new char[size];
-        for (int i = 0; i < size; i ++) {
-            ucharppty.m_case_[i] = m_dataInputStream_.readChar();
-        }
-
-        // reads the additional property block
-        ucharppty.m_additionalTrie_ = new CharTrie(m_dataInputStream_,
-                                                   ucharppty);
-
-        // additional properties
-        size = m_reservedOffset_ - m_additionalVectorsOffset_;
-        ucharppty.m_additionalVectors_ = new int[size];
-        for (int i = 0; i < size; i ++) {
-            ucharppty.m_additionalVectors_[i] = m_dataInputStream_.readInt();
+        m_dataInputStream_.skipBytes(size * 2);
+
+        if(m_additionalColumnsCount_ > 0) {
+            // reads the additional property block
+            ucharppty.m_additionalTrie_ = new CharTrie(m_dataInputStream_, null);
+
+            // additional properties
+            size = m_reservedOffset_ - m_additionalVectorsOffset_;
+            ucharppty.m_additionalVectors_ = new int[size];
+            for (int i = 0; i < size; i ++) {
+                ucharppty.m_additionalVectors_[i] = m_dataInputStream_.readInt();
+            }
        }

        m_dataInputStream_.close();
@@ -428,12 +176,15 @@ final class UCharacterPropertyReader implements ICUBinary.Authenticate
    private byte m_unicodeVersion_[];

    /**
-    * File format version that this class understands.
-    * No guarantees are made if a older version is used
+    * Data format "UPro".
    */
    private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x50,
                                                    (byte)0x72, (byte)0x6F};
-    private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x3, (byte)0x1,
+    /**
+     * Format version; this code works with all versions with the same major
+     * version number and the same Trie bit distribution.
+     */
+    private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x5, (byte)0,
                                             (byte)Trie.INDEX_STAGE_1_SHIFT_,
                                             (byte)Trie.INDEX_STAGE_2_SHIFT_};
 }
--- a/src/share/classes/sun/text/normalizer/UProperty.java
+++ b/src/share/classes/sun/text/normalizer/UProperty.java
-/*
- * Portions Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Sun designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Sun in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- */
-
-/*
- *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
- *                                                                             *
- * The original version of this source code and documentation is copyrighted   *
- * and owned by IBM, These materials are provided under terms of a License     *
- * Agreement between IBM and Sun. This technology is protected by multiple     *
- * US and International patents. This notice and attribution to IBM may not    *
- * to removed.                                                                 *
- *******************************************************************************
- */
-
-package sun.text.normalizer;
-
-/**
- * <p>Selection constants for Unicode properties. </p>
- * <p>These constants are used in functions like
- * UCharacter.hasBinaryProperty(int) to select one of the Unicode properties.
- * </p>
- * <p>The properties APIs are intended to reflect Unicode properties as
- * defined in the Unicode Character Database (UCD) and Unicode Technical
- * Reports (UTR).</p>
- * <p>For details about the properties see <a href=http://www.unicode.org>
- * http://www.unicode.org</a>.</p>
- * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
- * </p>
- * <p>Important: If ICU is built with UCD files from Unicode versions below
- * 3.2, then properties marked with "new" are not or not fully
- * available. Check UCharacter.getUnicodeVersion() to be sure.</p>
- * @author Syn Wee Quek
- * @stable ICU 2.6
- * @see com.ibm.icu.lang.UCharacter
- */
-public interface UProperty
-{
-    // public data member --------------------------------------------------
-
-    /**
-     * Enumerated property Hangul_Syllable_Type, new in Unicode 4.
-     * Returns HangulSyllableType values.
-     * @stable ICU 2.6
-     */
-    public static final int HANGUL_SYLLABLE_TYPE = 0x100B;
-
-    /**
-     * Bitmask property General_Category_Mask.
-     * This is the General_Category property returned as a bit mask.
-     * When used in UCharacter.getIntPropertyValue(c),
-     * returns bit masks for UCharacterCategory values where exactly one bit is set.
-     * When used with UCharacter.getPropertyValueName() and UCharacter.getPropertyValueEnum(),
-     * a multi-bit mask is used for sets of categories like "Letters".
-     * @stable ICU 2.4
-     */
-    public static final int GENERAL_CATEGORY_MASK = 0x2000;
-}
--- a/src/share/classes/sun/text/normalizer/UTF16.java
+++ b/src/share/classes/sun/text/normalizer/UTF16.java
 /*
- * Portions Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -184,15 +183,16 @@ public final class UTF16
     *            bounds.
     * @stable ICU 2.1
     */
-    public static int charAt(String source, int offset16)
-    {
-        if (offset16 < 0 || offset16 >= source.length()) {
-            throw new StringIndexOutOfBoundsException(offset16);
+    public static int charAt(String source, int offset16) {
+        char single = source.charAt(offset16);
+        if (single < LEAD_SURROGATE_MIN_VALUE) {
+            return single;
        }
+        return _charAt(source, offset16, single);
+    }

-        char single = source.charAt(offset16);
-        if (single < LEAD_SURROGATE_MIN_VALUE ||
-            single > TRAIL_SURROGATE_MAX_VALUE) {
+    private static int _charAt(String source, int offset16, char single) {
+        if (single > TRAIL_SURROGATE_MAX_VALUE) {
            return single;
        }

@@ -201,29 +201,23 @@ public final class UTF16
        // low, look both directions.

        if (single <= LEAD_SURROGATE_MAX_VALUE) {
-            ++ offset16;
+            ++offset16;
            if (source.length() != offset16) {
                char trail = source.charAt(offset16);
-                if (trail >= TRAIL_SURROGATE_MIN_VALUE &&
-                    trail <= TRAIL_SURROGATE_MAX_VALUE) {
-                    return UCharacterProperty.getRawSupplementary(single,
-                                                                  trail);
+                if (trail >= TRAIL_SURROGATE_MIN_VALUE && trail <= TRAIL_SURROGATE_MAX_VALUE) {
+                    return UCharacterProperty.getRawSupplementary(single, trail);
                }
            }
-        }
-        else
-            {
-                -- offset16;
-                if (offset16 >= 0) {
-                    // single is a trail surrogate so
-                    char lead = source.charAt(offset16);
-                    if (lead >= LEAD_SURROGATE_MIN_VALUE &&
-                        lead <= LEAD_SURROGATE_MAX_VALUE) {
-                        return UCharacterProperty.getRawSupplementary(lead,
-                                                                      single);
-                    }
+        } else {
+            --offset16;
+            if (offset16 >= 0) {
+                // single is a trail surrogate so
+                char lead = source.charAt(offset16);
+                if (lead >= LEAD_SURROGATE_MIN_VALUE && lead <= LEAD_SURROGATE_MAX_VALUE) {
+                    return UCharacterProperty.getRawSupplementary(lead, single);
                }
            }
+        }
        return single; // return unmatched surrogate
    }


--- a/src/share/classes/sun/text/normalizer/UnicodeSet.java
+++ b/src/share/classes/sun/text/normalizer/UnicodeSet.java
 /*
- * Portions Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -38,11 +37,8 @@
 package sun.text.normalizer;

 import java.text.ParsePosition;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.TreeSet;
 import java.util.Iterator;
-import java.util.Collection;
+import java.util.TreeSet;

 /**
 * A mutable set of Unicode characters and multicharacter strings.  Objects of this class
@@ -130,8 +126,8 @@ import java.util.Collection;
 * "[:Lu:]" and the Perl-like syntax "\p{Lu}" are recognized.  For a
 * complete list of supported property patterns, see the User's Guide
 * for UnicodeSet at
- * <a href="http://oss.software.ibm.com/icu/userguide/unicodeSet.html">
- * http://oss.software.ibm.com/icu/userguide/unicodeSet.html</a>.
+ * <a href="http://www.icu-project.org/userguide/unicodeSet.html">
+ * http://www.icu-project.org/userguide/unicodeSet.html</a>.
 * Actual determination of property data is defined by the underlying
 * Unicode database as implemented by UCharacter.
 *
@@ -271,9 +267,11 @@ import java.util.Collection;
 *     </tr>
 *   </table>
 * </blockquote>
+ * <p>To iterate over contents of UnicodeSet, use UnicodeSetIterator class.
 *
 * @author Alan Liu
 * @stable ICU 2.0
+ * @see UnicodeSetIterator
 */
 public class UnicodeSet implements UnicodeMatcher {

@@ -322,7 +320,7 @@ public class UnicodeSet implements UnicodeMatcher {
     * properties are all exactly alike, e.g. CJK Ideographs from
     * U+4E00 to U+9FA5.
     */
-    private static UnicodeSet INCLUSIONS = null;
+    private static UnicodeSet INCLUSIONS[] = null;

    //----------------------------------------------------------------
    // Public API
@@ -471,17 +469,18 @@ public class UnicodeSet implements UnicodeMatcher {
            return result;
        }

-        return _generatePattern(result, escapeUnprintable);
+        return _generatePattern(result, escapeUnprintable, true);
    }

    /**
     * Generate and append a string representation of this set to result.
     * This does not use this.pat, the cleaned up copy of the string
     * passed to applyPattern().
-     * @stable ICU 2.0
+     * @param includeStrings if false, doesn't include the strings.
+     * @stable ICU 3.8
     */
    public StringBuffer _generatePattern(StringBuffer result,
-                                         boolean escapeUnprintable) {
+                                         boolean escapeUnprintable, boolean includeStrings) {
        result.append('[');

        int count = getRangeCount();
@@ -524,7 +523,7 @@ public class UnicodeSet implements UnicodeMatcher {
            }
        }

-        if (strings.size() > 0) {
+        if (includeStrings && strings.size() > 0) {
            Iterator it = strings.iterator();
            while (it.hasNext()) {
                result.append('{');
@@ -535,19 +534,8 @@ public class UnicodeSet implements UnicodeMatcher {
        return result.append(']');
    }

-    /**
-     * Adds the specified range to this set if it is not already
-     * present.  If this set already contains the specified range,
-     * the call leaves this set unchanged.  If <code>end > start</code>
-     * then an empty range is added, leaving the set unchanged.
-     *
-     * @param start first character, inclusive, of range to be added
-     * to this set.
-     * @param end last character, inclusive, of range to be added
-     * to this set.
-     * @stable ICU 2.0
-     */
-    public UnicodeSet add(int start, int end) {
+    // for internal use, after checkFrozen has been called
+    private UnicodeSet add_unchecked(int start, int end) {
        if (start < MIN_VALUE || start > MAX_VALUE) {
            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6));
        }
@@ -569,6 +557,11 @@ public class UnicodeSet implements UnicodeMatcher {
     * @stable ICU 2.0
     */
    public final UnicodeSet add(int c) {
+        return add_unchecked(c);
+    }
+
+    // for internal use only, after checkFrozen has been called
+    private final UnicodeSet add_unchecked(int c) {
        if (c < MIN_VALUE || c > MAX_VALUE) {
            throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
        }
@@ -663,13 +656,12 @@ public class UnicodeSet implements UnicodeMatcher {
     * @stable ICU 2.0
     */
    public final UnicodeSet add(String s) {
-
        int cp = getSingleCP(s);
        if (cp < 0) {
            strings.add(s);
            pat = null;
        } else {
-            add(cp, cp);
+            add_unchecked(cp, cp);
        }
        return this;
    }
@@ -981,7 +973,6 @@ public class UnicodeSet implements UnicodeMatcher {
     */
    void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
                      StringBuffer rebuiltPat, int options) {
-
        // Syntax characters: [ ] ^ - & { }

        // Recognized special forms for chars, sets: c-c s-s s&s
@@ -992,7 +983,7 @@ public class UnicodeSet implements UnicodeMatcher {
            opts |= RuleCharacterIterator.SKIP_WHITESPACE;
        }

-        StringBuffer pat = new StringBuffer(), buf = null;
+        StringBuffer patBuf = new StringBuffer(), buf = null;
        boolean usePat = false;
        UnicodeSet scratch = null;
        Object backup = null;
@@ -1049,13 +1040,13 @@ public class UnicodeSet implements UnicodeMatcher {
                    } else {
                        // Handle opening '[' delimiter
                        mode = 1;
-                        pat.append('[');
+                        patBuf.append('[');
                        backup = chars.getPos(backup); // prepare to backup
                        c = chars.next(opts);
                        literal = chars.isEscaped();
                        if (c == '^' && !literal) {
                            invert = true;
-                            pat.append('^');
+                            patBuf.append('^');
                            backup = chars.getPos(backup); // prepare to backup
                            c = chars.next(opts);
                            literal = chars.isEscaped();
@@ -1093,13 +1084,13 @@ public class UnicodeSet implements UnicodeMatcher {
                    if (op != 0) {
                        syntaxError(chars, "Char expected after operator");
                    }
-                    add(lastChar, lastChar);
-                    _appendToPat(pat, lastChar, false);
+                    add_unchecked(lastChar, lastChar);
+                    _appendToPat(patBuf, lastChar, false);
                    lastItem = op = 0;
                }

                if (op == '-' || op == '&') {
-                    pat.append(op);
+                    patBuf.append(op);
                }

                if (nested == null) {
@@ -1108,14 +1099,14 @@ public class UnicodeSet implements UnicodeMatcher {
                }
                switch (setMode) {
                case 1:
-                    nested.applyPattern(chars, symbols, pat, options);
+                    nested.applyPattern(chars, symbols, patBuf, options);
                    break;
                case 2:
                    chars.skipIgnored(opts);
-                    nested.applyPropertyPattern(chars, pat, symbols);
+                    nested.applyPropertyPattern(chars, patBuf, symbols);
                    break;
                case 3: // `nested' already parsed
-                    nested._toPattern(pat, false);
+                    nested._toPattern(patBuf, false);
                    break;
                }

@@ -1158,17 +1149,17 @@ public class UnicodeSet implements UnicodeMatcher {
                switch (c) {
                case ']':
                    if (lastItem == 1) {
-                        add(lastChar, lastChar);
-                        _appendToPat(pat, lastChar, false);
+                        add_unchecked(lastChar, lastChar);
+                        _appendToPat(patBuf, lastChar, false);
                    }
                    // Treat final trailing '-' as a literal
                    if (op == '-') {
-                        add(op, op);
-                        pat.append(op);
+                        add_unchecked(op, op);
+                        patBuf.append(op);
                    } else if (op == '&') {
                        syntaxError(chars, "Trailing '&'");
                    }
-                    pat.append(']');
+                    patBuf.append(']');
                    mode = 2;
                    continue;
                case '-':
@@ -1178,11 +1169,11 @@ public class UnicodeSet implements UnicodeMatcher {
                            continue;
                        } else {
                            // Treat final trailing '-' as a literal
-                            add(c, c);
+                            add_unchecked(c, c);
                            c = chars.next(opts);
                            literal = chars.isEscaped();
                            if (c == ']' && !literal) {
-                                pat.append("-]");
+                                patBuf.append("-]");
                                mode = 2;
                                continue;
                            }
@@ -1202,8 +1193,8 @@ public class UnicodeSet implements UnicodeMatcher {
                        syntaxError(chars, "Missing operand after operator");
                    }
                    if (lastItem == 1) {
-                        add(lastChar, lastChar);
-                        _appendToPat(pat, lastChar, false);
+                        add_unchecked(lastChar, lastChar);
+                        _appendToPat(patBuf, lastChar, false);
                    }
                    lastItem = 0;
                    if (buf == null) {
@@ -1228,9 +1219,9 @@ public class UnicodeSet implements UnicodeMatcher {
                    // we don't need to drop through to the further
                    // processing
                    add(buf.toString());
-                    pat.append('{');
-                    _appendToPat(pat, buf.toString(), false);
-                    pat.append('}');
+                    patBuf.append('{');
+                    _appendToPat(patBuf, buf.toString(), false);
+                    patBuf.append('}');
                    continue;
                case SymbolTable.SYMBOL_REF:
                    //         symbols  nosymbols
@@ -1250,12 +1241,12 @@ public class UnicodeSet implements UnicodeMatcher {
                    }
                    if (anchor && op == 0) {
                        if (lastItem == 1) {
-                            add(lastChar, lastChar);
-                            _appendToPat(pat, lastChar, false);
+                            add_unchecked(lastChar, lastChar);
+                            _appendToPat(patBuf, lastChar, false);
                        }
-                        add(UnicodeMatcher.ETHER);
+                        add_unchecked(UnicodeMatcher.ETHER);
                        usePat = true;
-                        pat.append(SymbolTable.SYMBOL_REF).append(']');
+                        patBuf.append(SymbolTable.SYMBOL_REF).append(']');
                        mode = 2;
                        continue;
                    }
@@ -1281,14 +1272,14 @@ public class UnicodeSet implements UnicodeMatcher {
                        // these are most likely typos.
                        syntaxError(chars, "Invalid range");
                    }
-                    add(lastChar, c);
-                    _appendToPat(pat, lastChar, false);
-                    pat.append(op);
-                    _appendToPat(pat, c, false);
+                    add_unchecked(lastChar, c);
+                    _appendToPat(patBuf, lastChar, false);
+                    patBuf.append(op);
+                    _appendToPat(patBuf, c, false);
                    lastItem = op = 0;
                } else {
-                    add(lastChar, lastChar);
-                    _appendToPat(pat, lastChar, false);
+                    add_unchecked(lastChar, lastChar);
+                    _appendToPat(patBuf, lastChar, false);
                    lastChar = c;
                }
                break;
@@ -1315,9 +1306,9 @@ public class UnicodeSet implements UnicodeMatcher {
        // Use the rebuilt pattern (pat) only if necessary.  Prefer the
        // generated pattern.
        if (usePat) {
-            rebuiltPat.append(pat.toString());
+            rebuiltPat.append(patBuf.toString());
        } else {
-            _generatePattern(rebuiltPat, false);
+            _generatePattern(rebuiltPat, false, true);
        }
    }

@@ -1590,7 +1581,9 @@ public class UnicodeSet implements UnicodeMatcher {

    private static class VersionFilter implements Filter {
        VersionInfo version;
+
        VersionFilter(VersionInfo version) { this.version = version; }
+
        public boolean contains(int ch) {
            VersionInfo v = UCharacter.getAge(ch);
            // Reference comparison ok; VersionInfo caches and reuses
@@ -1600,18 +1593,28 @@ public class UnicodeSet implements UnicodeMatcher {
        }
    }

-    private static synchronized UnicodeSet getInclusions() {
+    private static synchronized UnicodeSet getInclusions(int src) {
        if (INCLUSIONS == null) {
-            UCharacterProperty property = UCharacterProperty.getInstance();
-            INCLUSIONS = property.getInclusions();
+            INCLUSIONS = new UnicodeSet[UCharacterProperty.SRC_COUNT];
+        }
+        if(INCLUSIONS[src] == null) {
+            UnicodeSet incl = new UnicodeSet();
+            switch(src) {
+            case UCharacterProperty.SRC_PROPSVEC:
+                UCharacterProperty.getInstance().upropsvec_addPropertyStarts(incl);
+                break;
+            default:
+                throw new IllegalStateException("UnicodeSet.getInclusions(unknown src "+src+")");
+            }
+            INCLUSIONS[src] = incl;
        }
-        return INCLUSIONS;
+        return INCLUSIONS[src];
    }

    /**
     * Generic filter-based scanning code for UCD property UnicodeSets.
     */
-    private UnicodeSet applyFilter(Filter filter) {
+    private UnicodeSet applyFilter(Filter filter, int src) {
        // Walk through all Unicode characters, noting the start
        // and end of each range for which filter.contain(c) is
        // true.  Add each range to a set.
@@ -1629,7 +1632,7 @@ public class UnicodeSet implements UnicodeMatcher {
        clear();

        int startHasProperty = -1;
-        UnicodeSet inclusions = getInclusions();
+        UnicodeSet inclusions = getInclusions(src);
        int limitRange = inclusions.getRangeCount();

        for (int j=0; j<limitRange; ++j) {
@@ -1646,19 +1649,18 @@ public class UnicodeSet implements UnicodeMatcher {
                        startHasProperty = ch;
                    }
                } else if (startHasProperty >= 0) {
-                    add(startHasProperty, ch-1);
+                    add_unchecked(startHasProperty, ch-1);
                    startHasProperty = -1;
                }
            }
        }
        if (startHasProperty >= 0) {
-            add(startHasProperty, 0x10FFFF);
+            add_unchecked(startHasProperty, 0x10FFFF);
        }

        return this;
    }

-
    /**
     * Remove leading and trailing rule white space and compress
     * internal rule white space to a single space character.
@@ -1686,10 +1688,6 @@ public class UnicodeSet implements UnicodeMatcher {
        return buf.toString();
    }

-    //----------------------------------------------------------------
-    // Property set API
-    //----------------------------------------------------------------
-
    /**
     * Modifies this set to contain those code points which have the
     * given value for the given property.  Prior contents of this
@@ -1699,22 +1697,21 @@ public class UnicodeSet implements UnicodeMatcher {
     * @param symbols if not null, then symbols are first called to see if a property
     * is available. If true, then everything else is skipped.
     * @return this set
-     * @draft ICU 3.2
-     * @deprecated This is a draft API and might change in a future release of ICU.
+     * @stable ICU 3.2
     */
    public UnicodeSet applyPropertyAlias(String propertyAlias,
                                         String valueAlias, SymbolTable symbols) {
-                if (propertyAlias.equals("Age"))
-                    {
-                        // Must munge name, since
-                        // VersionInfo.getInstance() does not do
-                        // 'loose' matching.
-                        VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias));
-                        applyFilter(new VersionFilter(version));
-                        return this;
-                    }
-                else
-                    throw new IllegalArgumentException("Unsupported property");
+        if (valueAlias.length() > 0) {
+            if (propertyAlias.equals("Age")) {
+                // Must munge name, since
+                // VersionInfo.getInstance() does not do
+                // 'loose' matching.
+                VersionInfo version = VersionInfo.getInstance(mungeCharName(valueAlias));
+                applyFilter(new VersionFilter(version), UCharacterProperty.SRC_PROPSVEC);
+                return this;
+            }
+        }
+        throw new IllegalArgumentException("Unsupported property: " + propertyAlias);
    }

    /**
@@ -1840,14 +1837,14 @@ public class UnicodeSet implements UnicodeMatcher {
     */
    private void applyPropertyPattern(RuleCharacterIterator chars,
                                      StringBuffer rebuiltPat, SymbolTable symbols) {
-        String pat = chars.lookahead();
+        String patStr = chars.lookahead();
        ParsePosition pos = new ParsePosition(0);
-        applyPropertyPattern(pat, pos, symbols);
+        applyPropertyPattern(patStr, pos, symbols);
        if (pos.getIndex() == 0) {
            syntaxError(chars, "Invalid property pattern");
        }
        chars.jumpahead(pos.getIndex());
-        rebuiltPat.append(pat.substring(0, pos.getIndex()));
+        rebuiltPat.append(patStr.substring(0, pos.getIndex()));
    }

    //----------------------------------------------------------------
@@ -1860,8 +1857,9 @@ public class UnicodeSet implements UnicodeMatcher {
     * which UCharacterProperty.isRuleWhiteSpace() returns true,
     * unless they are quoted or escaped.  This may be ORed together
     * with other selectors.
-     * @internal
+     * @stable ICU 3.8
     */
    public static final int IGNORE_SPACE = 1;

 }
+
--- a/src/share/classes/sun/text/normalizer/UnicodeSetIterator.java
+++ b/src/share/classes/sun/text/normalizer/UnicodeSetIterator.java
 /*
- * Portions Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -22,10 +22,9 @@
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */
-
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -167,8 +166,8 @@ public class UnicodeSetIterator {
     * @param set the set to iterate over.
     * @stable ICU 2.0
     */
-    public void reset(UnicodeSet set) {
-        this.set = set;
+    public void reset(UnicodeSet uset) {
+        set = uset;
        reset();
    }

@@ -213,8 +212,8 @@ public class UnicodeSetIterator {
    /**
     * @internal
     */
-    protected void loadRange(int range) {
-        nextElement = set.getRangeStart(range);
-        endElement = set.getRangeEnd(range);
+    protected void loadRange(int aRange) {
+        nextElement = set.getRangeStart(aRange);
+        endElement = set.getRangeEnd(aRange);
    }
 }
--- a/src/share/classes/sun/text/normalizer/Utility.java
+++ b/src/share/classes/sun/text/normalizer/Utility.java
 /*
- * Portions Copyright 2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,7 @@
 */
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *
@@ -36,10 +36,27 @@

 package sun.text.normalizer;

-// This class contains utility functions so testing not needed
-///CLOVER:OFF
 public final class Utility {

+    /**
+     * Convenience utility to compare two Object[]s
+     * Ought to be in System.
+     * @param len the length to compare.
+     * The start indices and start+len must be valid.
+     */
+    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
+                                            char[] target, int targetStart,
+                                            int len)
+    {
+        int sourceEnd = sourceStart + len;
+        int delta = targetStart - sourceStart;
+        for (int i = sourceStart; i < sourceEnd; i++) {
+            if (source[i]!=target[i + delta])
+            return false;
+        }
+        return true;
+    }
+
    /**
     * Convert characters outside the range U+0020 to U+007F to
     * Unicode escapes, and convert backslash to a double backslash.
@@ -344,7 +361,6 @@ public final class Utility {
        return false;
    }

-    //// for StringPrep
    /**
    * Similar to StringBuffer.getChars, version 1.3.
    * Since JDK 1.2 implements StringBuffer.getChars differently, this method
@@ -356,7 +372,6 @@ public final class Utility {
    * @param dst char array to store the retrieved chars
    * @param dstBegin offset to the start of the destination char array to
    *                 store the retrieved chars
-    * @draft since ICU4J 2.0
    */
    public static void getChars(StringBuffer src, int srcBegin, int srcEnd,
                                char dst[], int dstBegin)
@@ -367,23 +382,4 @@ public final class Utility {
        src.getChars(srcBegin, srcEnd, dst, dstBegin);
    }

-    /**
-     * Convenience utility to compare two char[]s.
-     * @param len the length to compare.
-     * The start indices and start+len must be valid.
-     */
-    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
-                                            char[] target, int targetStart,
-                                            int len)
-    {
-        int sourceEnd = sourceStart + len;
-        int delta = targetStart - sourceStart;
-        for (int i = sourceStart; i < sourceEnd; i++) {
-            if (source[i] != target[i + delta])
-            return false;
-        }
-        return true;
-    }
-
 }
-///CLOVER:ON
--- a/src/share/classes/sun/text/normalizer/VersionInfo.java
+++ b/src/share/classes/sun/text/normalizer/VersionInfo.java
 /*
- * Portions Copyright 2005-2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * Portions Copyright 2005-2009 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,7 @@
 */
 /*
 *******************************************************************************
- * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved                     *
+ * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved         *
 *                                                                             *
 * The original version of this source code and documentation is copyrighted   *
 * and owned by IBM, These materials are provided under terms of a License     *

--- a/src/share/classes/sun/text/resources/ubidi.icu
+++ b/src/share/classes/sun/text/resources/ubidi.icu
--- a/src/share/classes/sun/text/resources/unorm.icu
+++ b/src/share/classes/sun/text/resources/unorm.icu
--- a/src/share/classes/sun/text/resources/uprops.icu
+++ b/src/share/classes/sun/text/resources/uprops.icu
--- a/test/java/lang/String/ToLowerCase.java
+++ b/test/java/lang/String/ToLowerCase.java
@@ -72,7 +72,7 @@ public class ToLowerCase {
        // I-dot tests (Turkish and Azeri)
        test("\u0130", turkish, "i");
        test("\u0130", az, "i");
-        test("\u0130", Locale.US, "i");
+        test("\u0130", Locale.US, "i\u0307");

        // Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
        test("I\u0307", turkish, "i");