From 557a5a332a0b71e8abbe1e892a85eca2c19b3ee1 Mon Sep 17 00:00:00 2001 From: naoto Date: Fri, 10 Sep 2010 15:29:40 -0700 Subject: [PATCH] 6875847: Java Locale Enhancement Reviewed-by: srl Contributed-by: Yoshito Umaoka , Doug Felt , Mark Davis --- make/java/java/FILES_java.gmk | 12 + .../java/text/DecimalFormatSymbols.java | 21 +- src/share/classes/java/util/Calendar.java | 33 +- .../java/util/IllformedLocaleException.java | 90 + src/share/classes/java/util/Locale.java | 1766 ++++++++++++++--- .../classes/java/util/ResourceBundle.java | 557 ++++-- .../java/util/spi/LocaleNameProvider.java | 51 +- .../java/util/spi/LocaleServiceProvider.java | 30 +- .../sun/util/LocaleServiceProviderPool.java | 177 +- .../classes/sun/util/locale/AsciiUtil.java | 208 ++ .../classes/sun/util/locale/BaseLocale.java | 253 +++ .../classes/sun/util/locale/Extension.java | 63 + .../util/locale/InternalLocaleBuilder.java | 705 +++++++ .../classes/sun/util/locale/LanguageTag.java | 726 +++++++ .../sun/util/locale/LocaleExtensions.java | 246 +++ .../sun/util/locale/LocaleObjectCache.java | 108 + .../util/locale/LocaleSyntaxException.java | 52 + .../classes/sun/util/locale/ParseStatus.java | 60 + .../sun/util/locale/StringTokenIterator.java | 117 ++ .../util/locale/UnicodeLocaleExtension.java | 127 ++ .../sun/util/resources/LocaleData.java | 7 +- .../sun/util/resources/LocaleNames.properties | 184 ++ .../util/resources/LocaleNames_zh.properties | 6 + .../resources/LocaleNames_zh_TW.properties | 6 + test/java/util/Locale/LocaleEnhanceTest.java | 1293 ++++++++++++ test/java/util/Locale/LocaleTestFmwk.java | 19 +- test/java/util/Locale/icuLocales.txt | 292 +++ .../util/Locale/serialized/java6locale_ROOT | Bin 0 -> 127 bytes .../util/Locale/serialized/java6locale__US | Bin 0 -> 127 bytes .../util/Locale/serialized/java6locale___Java | Bin 0 -> 129 bytes .../util/Locale/serialized/java6locale_en | Bin 0 -> 127 bytes .../util/Locale/serialized/java6locale_en_US | Bin 0 -> 127 bytes .../Locale/serialized/java6locale_en_US_Java | Bin 0 -> 131 bytes .../util/Locale/serialized/java6locale_iw_IL | Bin 0 -> 127 bytes .../Locale/serialized/java6locale_ja_JP_JP | Bin 0 -> 129 bytes .../Locale/serialized/java6locale_no_NO_NY | Bin 0 -> 129 bytes .../Locale/serialized/java6locale_th_TH_TH | Bin 0 -> 129 bytes 37 files changed, 6643 insertions(+), 566 deletions(-) create mode 100644 src/share/classes/java/util/IllformedLocaleException.java create mode 100644 src/share/classes/sun/util/locale/AsciiUtil.java create mode 100644 src/share/classes/sun/util/locale/BaseLocale.java create mode 100644 src/share/classes/sun/util/locale/Extension.java create mode 100644 src/share/classes/sun/util/locale/InternalLocaleBuilder.java create mode 100644 src/share/classes/sun/util/locale/LanguageTag.java create mode 100644 src/share/classes/sun/util/locale/LocaleExtensions.java create mode 100644 src/share/classes/sun/util/locale/LocaleObjectCache.java create mode 100644 src/share/classes/sun/util/locale/LocaleSyntaxException.java create mode 100644 src/share/classes/sun/util/locale/ParseStatus.java create mode 100644 src/share/classes/sun/util/locale/StringTokenIterator.java create mode 100644 src/share/classes/sun/util/locale/UnicodeLocaleExtension.java create mode 100644 test/java/util/Locale/LocaleEnhanceTest.java create mode 100644 test/java/util/Locale/icuLocales.txt create mode 100644 test/java/util/Locale/serialized/java6locale_ROOT create mode 100644 test/java/util/Locale/serialized/java6locale__US create mode 100644 test/java/util/Locale/serialized/java6locale___Java create mode 100644 test/java/util/Locale/serialized/java6locale_en create mode 100644 test/java/util/Locale/serialized/java6locale_en_US create mode 100644 test/java/util/Locale/serialized/java6locale_en_US_Java create mode 100644 test/java/util/Locale/serialized/java6locale_iw_IL create mode 100644 test/java/util/Locale/serialized/java6locale_ja_JP_JP create mode 100644 test/java/util/Locale/serialized/java6locale_no_NO_NY create mode 100644 test/java/util/Locale/serialized/java6locale_th_TH_TH diff --git a/make/java/java/FILES_java.gmk b/make/java/java/FILES_java.gmk index 416eeb343..d9af3c10f 100644 --- a/make/java/java/FILES_java.gmk +++ b/make/java/java/FILES_java.gmk @@ -183,10 +183,22 @@ JAVA_JAVA_java = \ java/util/MissingFormatWidthException.java \ java/util/UnknownFormatConversionException.java \ java/util/UnknownFormatFlagsException.java \ + java/util/IllformedLocaleException.java \ java/util/FormatterClosedException.java \ java/util/ListResourceBundle.java \ sun/util/EmptyListResourceBundle.java \ java/util/Locale.java \ + sun/util/locale/AsciiUtil.java \ + sun/util/locale/BaseLocale.java \ + sun/util/locale/Extension.java \ + sun/util/locale/InternalLocaleBuilder.java \ + sun/util/locale/LanguageTag.java \ + sun/util/locale/LocaleExtensions.java \ + sun/util/locale/LocaleObjectCache.java \ + sun/util/locale/LocaleSyntaxException.java \ + sun/util/locale/ParseStatus.java \ + sun/util/locale/StringTokenIterator.java \ + sun/util/locale/UnicodeLocaleExtension.java \ java/util/LocaleISOData.java \ sun/util/LocaleServiceProviderPool.java \ sun/util/LocaleDataMetaInfo.java \ diff --git a/src/share/classes/java/text/DecimalFormatSymbols.java b/src/share/classes/java/text/DecimalFormatSymbols.java index 358156142..b3c55b82c 100644 --- a/src/share/classes/java/text/DecimalFormatSymbols.java +++ b/src/share/classes/java/text/DecimalFormatSymbols.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -43,10 +43,10 @@ import java.io.ObjectInputStream; import java.io.Serializable; import java.text.spi.DecimalFormatSymbolsProvider; import java.util.Currency; -import java.util.Hashtable; import java.util.Locale; import java.util.ResourceBundle; -import java.util.spi.LocaleServiceProvider; +import java.util.concurrent.ConcurrentHashMap; + import sun.util.LocaleServiceProviderPool; import sun.util.resources.LocaleData; @@ -527,10 +527,17 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { // get resource bundle data - try the cache first boolean needCacheUpdate = false; - Object[] data = (Object[]) cachedLocaleData.get(locale); + Object[] data = cachedLocaleData.get(locale); if (data == null) { /* cache miss */ + // When numbering system is thai (Locale's extension contains u-nu-thai), + // we read the data from th_TH_TH. + Locale lookupLocale = locale; + String numberType = locale.getUnicodeLocaleType("nu"); + if (numberType != null && numberType.equals("thai")) { + lookupLocale = new Locale("th", "TH", "TH"); + } data = new Object[3]; - ResourceBundle rb = LocaleData.getNumberFormatData(locale); + ResourceBundle rb = LocaleData.getNumberFormatData(lookupLocale); data[0] = rb.getStringArray("NumberElements"); needCacheUpdate = true; } @@ -586,7 +593,7 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { monetarySeparator = decimalSeparator; if (needCacheUpdate) { - cachedLocaleData.put(locale, data); + cachedLocaleData.putIfAbsent(locale, data); } } @@ -806,7 +813,7 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { * cache to hold the NumberElements and the Currency * of a Locale. */ - private static final Hashtable cachedLocaleData = new Hashtable(3); + private static final ConcurrentHashMap cachedLocaleData = new ConcurrentHashMap(3); /** * Obtains a DecimalFormatSymbols instance from a DecimalFormatSymbolsProvider diff --git a/src/share/classes/java/util/Calendar.java b/src/share/classes/java/util/Calendar.java index 1fb891cf7..7ec5e87ec 100644 --- a/src/share/classes/java/util/Calendar.java +++ b/src/share/classes/java/util/Calendar.java @@ -1013,19 +1013,30 @@ public abstract class Calendar implements Serializable, Cloneable, ComparableIllformedLocaleException with no + * detail message and -1 as the error index. + */ + public IllformedLocaleException() { + super(); + } + + /** + * Constructs a new IllformedLocaleException with the + * given message and -1 as the error index. + * + * @param message the message + */ + public IllformedLocaleException(String message) { + super(message); + } + + /** + * Constructs a new IllformedLocaleException with the + * given message and the error index. The error index is the approximate + * offset from the start of the ill-formed value to the point where the + * parse first detected an error. A negative error index value indicates + * either the error index is not applicable or unknown. + * + * @param message the message + * @param errorIndex the index + */ + public IllformedLocaleException(String message, int errorIndex) { + super(message + ((errorIndex < 0) ? "" : " [at index " + errorIndex + "]")); + _errIdx = errorIndex; + } + + /** + * Returns the index where the error was found. A negative value indicates + * either the error index is not applicable or unknown. + * + * @return the error index + */ + public int getErrorIndex() { + return _errIdx; + } +} diff --git a/src/share/classes/java/util/Locale.java b/src/share/classes/java/util/Locale.java index 9c53e6e78..49b85866e 100644 --- a/src/share/classes/java/util/Locale.java +++ b/src/share/classes/java/util/Locale.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -40,86 +40,240 @@ package java.util; -import java.io.*; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.ObjectStreamField; +import java.io.Serializable; import java.security.AccessController; import java.text.MessageFormat; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; import java.util.spi.LocaleNameProvider; -import java.util.spi.LocaleServiceProvider; + import sun.security.action.GetPropertyAction; import sun.util.LocaleServiceProviderPool; +import sun.util.locale.AsciiUtil; +import sun.util.locale.BaseLocale; +import sun.util.locale.InternalLocaleBuilder; +import sun.util.locale.LanguageTag; +import sun.util.locale.LocaleExtensions; +import sun.util.locale.LocaleObjectCache; +import sun.util.locale.LocaleSyntaxException; +import sun.util.locale.ParseStatus; +import sun.util.locale.UnicodeLocaleExtension; import sun.util.resources.LocaleData; import sun.util.resources.OpenListResourceBundle; /** - * * A Locale object represents a specific geographical, political, * or cultural region. An operation that requires a Locale to perform * its task is called locale-sensitive and uses the Locale * to tailor information for the user. For example, displaying a number - * is a locale-sensitive operation--the number should be formatted - * according to the customs/conventions of the user's native country, + * is a locale-sensitive operation— the number should be formatted + * according to the customs and conventions of the user's native country, * region, or culture. * - *

- * Create a Locale object using the constructors in this class: + *

The Locale class implements identifiers + * interchangeable with BCP 47 (IETF BCP 47, "Tags for Identifying + * Languages"), with support for the LDML (UTS#35, "Unicode Locale + * Data Markup Language") BCP 47-compatible extensions for locale data + * exchange. + * + *

A Locale object logically consists of the fields + * described below. + * + *

+ *
language
+ * + *
ISO 639 alpha-2 or alpha-3 language code, or registered + * language subtags up to 8 alpha letters (for future enhancements). + * When a language has both an alpha-2 code and an alpha-3 code, the + * alpha-2 code must be used. You can find a full list of valid + * language codes in the IANA Language Subtag Registry (search for + * "Type: language"). The language field is case insensitive, but + * Locale always canonicalizes to lower case.

+ * + *
Well-formed language values have the form + * [a-zA-Z]{2,8}. Note that this is not the the full + * BCP47 language production, since it excludes extlang. They are + * not needed since modern three-letter language codes replace + * them.

+ * + *
Example: "en" (English), "ja" (Japanese), "kok" (Konkani)

+ * + *
script
+ * + *
ISO 15924 alpha-4 script code. You can find a full list of + * valid script codes in the IANA Language Subtag Registry (search + * for "Type: script"). The script field is case insensitive, but + * Locale always canonicalizes to title case (the first + * letter is upper case and the rest of the letters are lower + * case).

+ * + *
Well-formed script values have the form + * [a-zA-Z]{4}

+ * + *
Example: "Latn" (Latin), "Cyrl" (Cyrillic)

+ * + *
country (region)
+ * + *
ISO 3166 alpha-2 country code or UN M.49 numeric-3 area code. + * You can find a full list of valid country and region codes in the + * IANA Language Subtag Registry (search for "Type: region"). The + * country (region) field is case insensitive, but + * Locale always canonicalizes to upper case.

+ * + *
Well-formed country/region values have + * the form [a-zA-Z]{2} | [0-9]{3}

+ * + *
Example: "US" (United States), "FR" (France), "029" + * (Caribbean)

+ * + *
variant
+ * + *
Any arbitrary value used to indicate a variation of a + * Locale. Where there are two or more variant values + * each indicating its own semantics, these values should be ordered + * by importance, with most important first, separated by + * underscore('_'). The variant field is case sensitive.

+ * + *
Note: IETF BCP 47 places syntactic restrictions on variant + * subtags. Also BCP 47 subtags are strictly used to indicate + * additional variations that define a language or its dialects that + * are not covered by any combinations of language, script and + * region subtags. You can find a full list of valid variant codes + * in the IANA Language Subtag Registry (search for "Type: variant"). + * + *

However, the variant field in Locale has + * historically been used for any kind of variation, not just + * language variations. For example, some supported variants + * available in Java SE Runtime Environments indicate alternative + * cultural behaviors such as calendar type or number script. In + * BCP 47 this kind of information, which does not identify the + * language, is supported by extension subtags or private use + * subtags.


+ * + *
Well-formed variant values have the form SUBTAG + * (('_'|'-') SUBTAG)* where SUBTAG = + * [0-9][0-9a-zA-Z]{3} | [0-9a-zA-Z]{5,8}. (Note: BCP 47 only + * uses hyphen ('-') as a delimiter, this is more lenient).

+ * + *
Example: "polyton" (Polytonic Greek), "POSIX"

+ * + *
extensions
+ * + *
A map from single character keys to string values, indicating + * extensions apart from language identification. The extensions in + * Locale implement the semantics and syntax of BCP 47 + * extension subtags and private use subtags. The extensions are + * case insensitive, but Locale canonicalizes all + * extension keys and values to lower case. Note that extensions + * cannot have empty values.

+ * + *
Well-formed keys are single characters from the set + * [0-9a-zA-Z]. Well-formed values have the form + * SUBTAG ('-' SUBTAG)* where for the key 'x' + * SUBTAG = [0-9a-zA-Z]{1,8} and for other keys + * SUBTAG = [0-9a-zA-Z]{2,8} (that is, 'x' allows + * single-character subtags).

+ * + *
Example: key="u"/value="ca-japanese" (Japanese Calendar), + * key="x"/value="java-1-7"
+ *
+ * + * Note: Although BCP 47 requires field values to be registered + * in the IANA Language Subtag Registry, the Locale class + * does not provide any validation features. The Builder + * only checks if an individual field satisfies the syntactic + * requirement (is well-formed), but does not validate the value + * itself. See {@link Builder} for details. + * + *

Unicode locale/language extension

+ * + *

UTS#35, "Unicode Locale Data Markup Language" defines optional + * attributes and keywords to override or refine the default behavior + * associated with a locale. A keyword is represented by a pair of + * key and type. For example, "nu-thai" indicates that Thai local + * digits (value:"thai") should be used for formatting numbers + * (key:"nu"). + * + *

The keywords are mapped to a BCP 47 extension value using the + * extension key 'u' ({@link #UNICODE_LOCALE_EXTENSION}). The above + * example, "nu-thai", becomes the extension "u-nu-thai".code + * + *

Thus, when a Locale object contains Unicode locale + * attributes and keywords, + * getExtension(UNICODE_LOCALE_EXTENSION) will return a + * String representing this information, for example, "nu-thai". The + * Locale class also provides {@link + * #getUnicodeLocaleAttributes}, {@link #getUnicodeLocaleKeys}, and + * {@link #getUnicodeLocaleType} which allow you to access Unicode + * locale attributes and key/type pairs directly. When represented as + * a string, the Unicode Locale Extension lists attributes + * alphabetically, followed by key/type sequences with keys listed + * alphabetically (the order of subtags comprising a key's type is + * fixed when the type is defined) + * + *

A well-formed locale key has the form + * [0-9a-zA-Z]{2}. A well-formed locale type has the + * form "" | [0-9a-zA-Z]{3,8} ('-' [0-9a-zA-Z]{3,8})* (it + * can be empty, or a series of subtags 3-8 alphanums in length). A + * well-formed locale attribute has the form + * [0-9a-zA-Z]{3,8} (it is a single subtag with the same + * form as a locale type subtag). + * + *

The Unicode locale extension specifies optional behavior in + * locale-sensitive services. Although the LDML specification defines + * various keys and values, actual locale-sensitive service + * implementations in a Java Runtime Environment might not support any + * particular Unicode locale attributes or key/type pairs. + * + *

Creating a Locale

+ * + *

There are several different ways to create a Locale + * object. + * + *

Builder
+ * + *

Using {@link Builder} you can construct a Locale object + * that conforms to BCP 47 syntax. + * + *

Constructors
+ * + *

The Locale class provides three constructors: *

*
- * Locale(String language)
- * Locale(String language, String country)
- * Locale(String language, String country, String variant)
+ *     {@link #Locale(String language)}
+ *     {@link #Locale(String language, String country)}
+ *     {@link #Locale(String language, String country, String variant)}
  * 
*
- * The language argument is a valid ISO Language Code. - * These codes are the lower-case, two-letter codes as defined by ISO-639. - * You can find a full list of these codes at a number of sites, such as: - *
- * http://www.loc.gov/standards/iso639-2/php/English_list.php - * - *

- * The country argument is a valid ISO Country Code. These - * codes are the upper-case, two-letter codes as defined by ISO-3166. - * You can find a full list of these codes at a number of sites, such as: - *
- * http://www.iso.ch/iso/en/prods-services/iso3166ma/02iso-3166-code-lists/list-en1.html - * - *

- * The variant argument is a vendor or browser-specific code. - * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX. - * Where there are two variants, separate them with an underscore, and - * put the most important one first. For example, a Traditional Spanish collation - * might construct a locale with parameters for language, country and variant as: - * "es", "ES", "Traditional_WIN". - * - *

- * Because a Locale object is just an identifier for a region, - * no validity check is performed when you construct a Locale. - * If you want to see whether particular resources are available for the - * Locale you construct, you must query those resources. For - * example, ask the NumberFormat for the locales it supports - * using its getAvailableLocales method. - *
Note: When you ask for a resource for a particular - * locale, you get back the best available match, not necessarily - * precisely what you asked for. For more information, look at - * {@link ResourceBundle}. - * - *

- * The Locale class provides a number of convenient constants + * These constructors allow you to create a Locale object + * with language, country and variant, but you cannot specify + * script or extensions. + * + *

Factory Methods
+ * + *

The method {@link #forLanguageTag} creates a Locale + * object for a well-formed BCP 47 language tag. + * + *

Locale Constants
+ * + *

The Locale class provides a number of convenient constants * that you can use to create Locale objects for commonly used * locales. For example, the following creates a Locale object * for the United States: *

*
- * Locale.US
+ *     Locale.US
  * 
*
* - *

- * Once you've created a Locale you can query it for information about - * itself. Use getCountry to get the ISO Country Code and - * getLanguage to get the ISO Language Code. You can - * use getDisplayCountry to get the + *

Use of Locale

+ * + *

Once you've created a Locale you can query it for information + * about itself. Use getCountry to get the country (or region) + * code and getLanguage to get the language code. + * You can use getDisplayCountry to get the * name of the country suitable for displaying to the user. Similarly, * you can use getDisplayLanguage to get the name of * the language suitable for displaying to the user. Interestingly, @@ -127,28 +281,27 @@ import sun.util.resources.OpenListResourceBundle; * and have two versions: one that uses the default locale and one * that uses the locale specified as an argument. * - *

- * The Java Platform provides a number of classes that perform locale-sensitive + *

The Java Platform provides a number of classes that perform locale-sensitive * operations. For example, the NumberFormat class formats - * numbers, currency, or percentages in a locale-sensitive manner. Classes - * such as NumberFormat have a number of convenience methods + * numbers, currency, and percentages in a locale-sensitive manner. Classes + * such as NumberFormat have several convenience methods * for creating a default object of that type. For example, the * NumberFormat class provides these three convenience methods * for creating a default NumberFormat object: *

*
- * NumberFormat.getInstance()
- * NumberFormat.getCurrencyInstance()
- * NumberFormat.getPercentInstance()
+ *     NumberFormat.getInstance()
+ *     NumberFormat.getCurrencyInstance()
+ *     NumberFormat.getPercentInstance()
  * 
*
- * These methods have two variants; one with an explicit locale - * and one without; the latter using the default locale. + * Each of these methods has two variants; one with an explicit locale + * and one without; the latter uses the default locale: *
*
- * NumberFormat.getInstance(myLocale)
- * NumberFormat.getCurrencyInstance(myLocale)
- * NumberFormat.getPercentInstance(myLocale)
+ *     NumberFormat.getInstance(myLocale)
+ *     NumberFormat.getCurrencyInstance(myLocale)
+ *     NumberFormat.getPercentInstance(myLocale)
  * 
*
* A Locale is the mechanism for identifying the kind of object @@ -156,75 +309,162 @@ import sun.util.resources.OpenListResourceBundle; * just a mechanism for identifying objects, * not a container for the objects themselves. * - * @see ResourceBundle - * @see java.text.Format - * @see java.text.NumberFormat - * @see java.text.Collator - * @author Mark Davis - * @since 1.1 + *

Compatibility

+ * + *

In order to maintain compatibility with existing usage, Locale's + * constructors retain their behavior prior to the Java Runtime + * Environment version 1.7. The same is largely true for the + * toString method. Thus Locale objects can continue to + * be used as they were. In particular, clients who parse the output + * of toString into language, country, and variant fields can continue + * to do so (although this is strongly discouraged), although the + * variant field will have additional information in it if script or + * extensions are present. + * + *

In addition, BCP 47 imposes syntax restrictions that are not + * imposed by Locale's constructors. This means that conversions + * between some Locales and BCP 47 language tags cannot be made without + * losing information. Thus toLanguageTag cannot + * represent the state of locales whose language, country, or variant + * do not conform to BCP 47. + * + *

Because of these issues, it is recommended that clients migrate + * away from constructing non-conforming locales and use the + * forLanguageTag and Locale.Builder APIs instead. + * Clients desiring a string representation of the complete locale can + * then always rely on toLanguageTag for this purpose. + * + *

Special cases
+ * + *

For compatibility reasons, two + * non-conforming locales are treated as special cases. These are + * ja_JP_JP and th_TH_TH. These are ill-formed + * in BCP 47 since the variants are too short. To ease migration to BCP 47, + * these are treated specially during construction. These two cases (and only + * these) cause a constructor to generate an extension, all other values behave + * exactly as they did prior to Java 7. + * + *

Java has used ja_JP_JP to represent Japanese as used in + * Japan together with the Japanese Imperial calendar. This is now + * representable using a Unicode locale extension, by specifying the + * Unicode locale key ca (for "calendar") and type + * japanese. When the Locale constructor is called with the + * arguments "ja", "JP", "JP", the extension "u-ca-japanese" is + * automatically added. + * + *

Java has used th_TH_TH to represent Thai as used in + * Thailand together with Thai digits. This is also now representable using + * a Unicode locale extension, by specifying the Unicode locale key + * nu (for "number") and value thai. When the Locale + * constructor is called with the arguments "th", "TH", "TH", the + * extension "u-nu-thai" is automatically added. + * + *

Serialization
+ * + *

During serialization, writeObject writes all fields to the output + * stream, including extensions. + * + *

During deserialization, readResolve adds extensions as described + * in Special Cases, only + * for the two cases th_TH_TH and ja_JP_JP. + * + *

Legacy language codes
+ * + *

Locale's constructor has always converted three language codes to + * their earlier, obsoleted forms: he maps to iw, + * yi maps to ji, and id maps to + * in. This continues to be the case, in order to not break + * backwards compatibility. + * + *

The APIs added in 1.7 map between the old and new language codes, + * maintaining the old codes internal to Locale (so that + * getLanguage and toString reflect the old + * code), but using the new codes in the BCP 47 language tag APIs (so + * that toLanguageTag reflects the new one). This + * preserves the equivalence between Locales no matter which code or + * API is used to construct them. Java's default resource bundle + * lookup mechanism also implements this mapping, so that resources + * can be named using either convention, see {@link ResourceBundle.Control}. + * + *

Three-letter language/country(region) codes
+ * + *

The Locale constructors have always specified that the language + * and the country param be two characters in length, although in + * practice they have accepted any length. The specification has now + * been relaxed to allow language codes of two to eight characters and + * country (region) codes of two to three characters, and in + * particular, three-letter language codes and three-digit region + * codes as specified in the IANA Language Subtag Registry. For + * compatibility, the implementation still does not impose a length + * constraint. + * + * @see Builder + * @see ResourceBundle + * @see java.text.Format + * @see java.text.NumberFormat + * @see java.text.Collator + * @author Mark Davis + * @since 1.1 */ - public final class Locale implements Cloneable, Serializable { - // cache to store singleton Locales - private final static ConcurrentHashMap cache = - new ConcurrentHashMap(32); + static private final Cache LOCALECACHE = new Cache(); /** Useful constant for language. */ - static public final Locale ENGLISH = createSingleton("en__", "en", ""); + static public final Locale ENGLISH = getInstance("en", "", ""); /** Useful constant for language. */ - static public final Locale FRENCH = createSingleton("fr__", "fr", ""); + static public final Locale FRENCH = getInstance("fr", "", ""); /** Useful constant for language. */ - static public final Locale GERMAN = createSingleton("de__", "de", ""); + static public final Locale GERMAN = getInstance("de", "", ""); /** Useful constant for language. */ - static public final Locale ITALIAN = createSingleton("it__", "it", ""); + static public final Locale ITALIAN = getInstance("it", "", ""); /** Useful constant for language. */ - static public final Locale JAPANESE = createSingleton("ja__", "ja", ""); + static public final Locale JAPANESE = getInstance("ja", "", ""); /** Useful constant for language. */ - static public final Locale KOREAN = createSingleton("ko__", "ko", ""); + static public final Locale KOREAN = getInstance("ko", "", ""); /** Useful constant for language. */ - static public final Locale CHINESE = createSingleton("zh__", "zh", ""); + static public final Locale CHINESE = getInstance("zh", "", ""); /** Useful constant for language. */ - static public final Locale SIMPLIFIED_CHINESE = createSingleton("zh_CN_", "zh", "CN"); + static public final Locale SIMPLIFIED_CHINESE = getInstance("zh", "CN", ""); /** Useful constant for language. */ - static public final Locale TRADITIONAL_CHINESE = createSingleton("zh_TW_", "zh", "TW"); + static public final Locale TRADITIONAL_CHINESE = getInstance("zh", "TW", ""); /** Useful constant for country. */ - static public final Locale FRANCE = createSingleton("fr_FR_", "fr", "FR"); + static public final Locale FRANCE = getInstance("fr", "FR", ""); /** Useful constant for country. */ - static public final Locale GERMANY = createSingleton("de_DE_", "de", "DE"); + static public final Locale GERMANY = getInstance("de", "DE", ""); /** Useful constant for country. */ - static public final Locale ITALY = createSingleton("it_IT_", "it", "IT"); + static public final Locale ITALY = getInstance("it", "IT", ""); /** Useful constant for country. */ - static public final Locale JAPAN = createSingleton("ja_JP_", "ja", "JP"); + static public final Locale JAPAN = getInstance("ja", "JP", ""); /** Useful constant for country. */ - static public final Locale KOREA = createSingleton("ko_KR_", "ko", "KR"); + static public final Locale KOREA = getInstance("ko", "KR", ""); /** Useful constant for country. */ @@ -240,19 +480,19 @@ public final class Locale implements Cloneable, Serializable { /** Useful constant for country. */ - static public final Locale UK = createSingleton("en_GB_", "en", "GB"); + static public final Locale UK = getInstance("en", "GB", ""); /** Useful constant for country. */ - static public final Locale US = createSingleton("en_US_", "en", "US"); + static public final Locale US = getInstance("en", "US", ""); /** Useful constant for country. */ - static public final Locale CANADA = createSingleton("en_CA_", "en", "CA"); + static public final Locale CANADA = getInstance("en", "CA", ""); /** Useful constant for country. */ - static public final Locale CANADA_FRENCH = createSingleton("fr_CA_", "fr", "CA"); + static public final Locale CANADA_FRENCH = getInstance("fr", "CA", ""); /** * Useful constant for the root locale. The root locale is the locale whose @@ -262,7 +502,25 @@ public final class Locale implements Cloneable, Serializable { * * @since 1.6 */ - static public final Locale ROOT = createSingleton("__", "", ""); + static public final Locale ROOT = getInstance("", "", ""); + + /** + * The key for the private use extension ('x'). + * + * @see #getExtension(char) + * @see Builder#setExtension(char, String) + * @since 1.7 + */ + static public final char PRIVATE_USE_EXTENSION = 'x'; + + /** + * The key for Unicode locale extension ('u'). + * + * @see #getExtension(char) + * @see Builder#setExtension(char, String) + * @since 1.7 + */ + static public final char UNICODE_LOCALE_EXTENSION = 'u'; /** serialization ID */ @@ -274,32 +532,67 @@ public final class Locale implements Cloneable, Serializable { private static final int DISPLAY_LANGUAGE = 0; private static final int DISPLAY_COUNTRY = 1; private static final int DISPLAY_VARIANT = 2; + private static final int DISPLAY_SCRIPT = 3; /** - * Construct a locale from language, country, variant. - * NOTE: ISO 639 is not a stable standard; some of the language codes it defines - * (specifically iw, ji, and in) have changed. This constructor accepts both the - * old codes (iw, ji, and in) and the new codes (he, yi, and id), but all other + * Private constructor used by getInstance method + */ + private Locale(BaseLocale baseLocale, LocaleExtensions extensions) { + _baseLocale = baseLocale; + _extensions = extensions; + } + + /** + * Construct a locale from language, country and variant. + * This constructor normalizes the language value to lowercase and + * the country value to uppercase. + *

+ * Note: + *

    + *
  • ISO 639 is not a stable standard; some of the language codes it defines + * (specifically "iw", "ji", and "in") have changed. This constructor accepts both the + * old codes ("iw", "ji", and "in") and the new codes ("he", "yi", and "id"), but all other * API on Locale will return only the OLD codes. - * @param language lowercase two-letter ISO-639 code. - * @param country uppercase two-letter ISO-3166 code. - * @param variant vendor and browser specific code. See class description. + *
  • For backward compatibility reasons, this constructor does not make + * any syntactic checks on the input. + *
  • The two cases ("ja", "JP", "JP") and ("th", "TH", "TH") are handled specially, + * see Special Cases for more information. + *
+ * + * @param language An ISO 639 alpha-2 or alpha-3 language code, or a language subtag + * up to 8 characters in length. See the Locale class description about + * valid language values. + * @param country An ISO 3166 alpha-2 country code or a UN M.49 numeric-3 area code. + * See the Locale class description about valid country values. + * @param variant Any arbitrary value used to indicate a variation of a Locale. + * See the Locale class description for the details. * @exception NullPointerException thrown if any argument is null. */ public Locale(String language, String country, String variant) { - this.language = convertOldISOCodes(language); - this.country = toUpperCase(country).intern(); - this.variant = variant.intern(); + _baseLocale = BaseLocale.getInstance(convertOldISOCodes(language), "", country, variant); + _extensions = getCompatibilityExtensions(language, "", country, variant); } /** - * Construct a locale from language, country. - * NOTE: ISO 639 is not a stable standard; some of the language codes it defines - * (specifically iw, ji, and in) have changed. This constructor accepts both the - * old codes (iw, ji, and in) and the new codes (he, yi, and id), but all other + * Construct a locale from language and country. + * This constructor normalizes the language value to lowercase and + * the country value to uppercase. + *

+ * Note: + *

    + *
  • ISO 639 is not a stable standard; some of the language codes it defines + * (specifically "iw", "ji", and "in") have changed. This constructor accepts both the + * old codes ("iw", "ji", and "in") and the new codes ("he", "yi", and "id"), but all other * API on Locale will return only the OLD codes. - * @param language lowercase two-letter ISO-639 code. - * @param country uppercase two-letter ISO-3166 code. + *
  • For backward compatibility reasons, this constructor does not make + * any syntactic checks on the input. + *
+ * + * @param language An ISO 639 alpha-2 or alpha-3 language code, or a language subtag + * up to 8 characters in length. See the Locale class description about + * valid language values. + * @param country An ISO 3166 alpha-2 country code or a UN M.49 numeric-3 area code. + * See the Locale class description about valid country values. * @exception NullPointerException thrown if either argument is null. */ public Locale(String language, String country) { @@ -308,11 +601,21 @@ public final class Locale implements Cloneable, Serializable { /** * Construct a locale from a language code. - * NOTE: ISO 639 is not a stable standard; some of the language codes it defines - * (specifically iw, ji, and in) have changed. This constructor accepts both the - * old codes (iw, ji, and in) and the new codes (he, yi, and id), but all other + * This constructor normalizes the language value to lowercase. + *

+ * Note: + *

    + *
  • ISO 639 is not a stable standard; some of the language codes it defines + * (specifically "iw", "ji", and "in") have changed. This constructor accepts both the + * old codes ("iw", "ji", and "in") and the new codes ("he", "yi", and "id"), but all other * API on Locale will return only the OLD codes. - * @param language lowercase two-letter ISO-639 code. + *
  • For backward compatibility reasons, this constructor does not make + * any syntactic checks on the input. + *
+ * + * @param language An ISO 639 alpha-2 or alpha-3 language code, or a language subtag + * up to 8 characters in length. See the Locale class description about + * valid language values. * @exception NullPointerException thrown if argument is null. * @since 1.4 */ @@ -320,32 +623,6 @@ public final class Locale implements Cloneable, Serializable { this(language, "", ""); } - /** - * Constructs a Locale using language - * and country. This constructor assumes that - * language and contry are interned and - * it is invoked by createSingleton only. (flag is just for - * avoiding the conflict with the public constructors. - */ - private Locale(String language, String country, boolean flag) { - this.language = language; - this.country = country; - this.variant = ""; - } - - /** - * Creates a Locale instance with the given - * language and counry and puts the - * instance under the given key in the cache. This - * method must be called only when initializing the Locale - * constants. - */ - private static Locale createSingleton(String key, String language, String country) { - Locale locale = new Locale(language, country, false); - cache.put(key, locale); - return locale; - } - /** * Returns a Locale constructed from the given * language, country and @@ -354,29 +631,70 @@ public final class Locale implements Cloneable, Serializable { * returned. Otherwise, a new Locale instance is * created and cached. * - * @param language lowercase two-letter ISO-639 code. - * @param country uppercase two-letter ISO-3166 code. + * @param language lowercase 2 to 8 language code. + * @param country uppercase two-letter ISO-3166 code and numric-3 UN M.49 area code. * @param variant vendor and browser specific code. See class description. * @return the Locale instance requested * @exception NullPointerException if any argument is null. */ static Locale getInstance(String language, String country, String variant) { - if (language== null || country == null || variant == null) { + return getInstance(language, "", country, variant, LocaleExtensions.EMPTY_EXTENSIONS); + } + + static Locale getInstance(String language, String script, String country, + String variant, LocaleExtensions extensions) { + if (language== null || script == null || country == null || variant == null) { throw new NullPointerException(); } - StringBuilder sb = new StringBuilder(); - sb.append(language).append('_').append(country).append('_').append(variant); - String key = sb.toString(); - Locale locale = cache.get(key); - if (locale == null) { - locale = new Locale(language, country, variant); - Locale l = cache.putIfAbsent(key, locale); - if (l != null) { - locale = l; + if (extensions == null) { + extensions = LocaleExtensions.EMPTY_EXTENSIONS; + } + + if (extensions.equals(LocaleExtensions.EMPTY_EXTENSIONS)) { + extensions = getCompatibilityExtensions(language, script, country, variant); + } + + BaseLocale baseloc = BaseLocale.getInstance(language, script, country, variant); + return getInstance(baseloc, extensions); + } + + static Locale getInstance(BaseLocale baseloc, LocaleExtensions extensions) { + LocaleKey key = new LocaleKey(baseloc, extensions); + return LOCALECACHE.get(key); + } + + private static class Cache extends LocaleObjectCache { + public Cache() { + } + protected Locale createObject(LocaleKey key) { + return new Locale(key._base, key._exts); + } + } + + private static class LocaleKey { + private BaseLocale _base; + private LocaleExtensions _exts; + + private LocaleKey(BaseLocale baseLocale, LocaleExtensions extensions) { + _base = baseLocale; + _exts = extensions; + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; } + if (!(obj instanceof LocaleKey)) { + return false; + } + LocaleKey other = (LocaleKey)obj; + return _base.equals(other._base) && _exts.equals(other._exts); + } + + public int hashCode() { + return _base.hashCode() ^ _exts.hashCode(); } - return locale; } /** @@ -595,6 +913,11 @@ public final class Locale implements Cloneable, Serializable { /** * Returns a list of all 2-letter country codes defined in ISO 3166. * Can be used to create Locales. + *

+ * Note: The Locale class also supports other codes for + * country (region), such as 3-letter numeric UN M.49 area codes. + * Therefore, the list returned by this method does not contain ALL valid + * codes that can be used to create Locales. */ public static String[] getISOCountries() { if (isoCountries == null) { @@ -608,9 +931,16 @@ public final class Locale implements Cloneable, Serializable { /** * Returns a list of all 2-letter language codes defined in ISO 639. * Can be used to create Locales. - * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed. + *

+ * Note: + *

    + *
  • ISO 639 is not a stable standard— some languages' codes have changed. * The list this function returns includes both the new and the old codes for the - * languages whose codes have changed.] + * languages whose codes have changed. + *
  • The Locale class also supports language codes up to + * 8 characters in length. Therefore, the list returned by this method does + * not contain ALL valid codes that can be used to create Locales. + *
*/ public static String[] getISOLanguages() { if (isoLanguages == null) { @@ -631,100 +961,516 @@ public final class Locale implements Cloneable, Serializable { } /** - * Returns the language code for this locale, which will either be the empty string - * or a lowercase ISO 639 code. - *

NOTE: ISO 639 is not a stable standard-- some languages' codes have changed. + * Returns the language code of this Locale. + * + *

Note: ISO 639 is not a stable standard— some languages' codes have changed. * Locale's constructor recognizes both the new and the old codes for the languages * whose codes have changed, but this function always returns the old code. If you - * want to check for a specific language whose code has changed, don't do

-     * if (locale.getLanguage().equals("he"))
+     * want to check for a specific language whose code has changed, don't do
+     * 
+     * if (locale.getLanguage().equals("he")) // BAD!
      *    ...
-     * 
Instead, do
-     * if (locale.getLanguage().equals(new Locale("he", "", "").getLanguage()))
-     *    ...
+ *
+ * Instead, do + *
+     * if (locale.getLanguage().equals(new Locale("he").getLanguage()))
+     *    ...
+     * 
+ * @return The language code, or the empty string if none is defined. * @see #getDisplayLanguage */ public String getLanguage() { - return language; + return _baseLocale.getLanguage(); } /** - * Returns the country/region code for this locale, which will - * either be the empty string or an uppercase ISO 3166 2-letter code. + * Returns the script for this locale, which should + * either be the empty string or an ISO 15924 4-letter script + * code. The first letter is uppercase and the rest are + * lowercase, for example, 'Latn', 'Cyrl'. + * + * @return The script code, or the empty string if none is defined. + * @see #getDisplayScript + * @since 1.7 + */ + public String getScript() { + return _baseLocale.getScript(); + } + + /** + * Returns the country/region code for this locale, which should + * either be the empty string, an uppercase ISO 3166 2-letter code, + * or a UN M.49 3-digit code. + * + * @return The country/region code, or the empty string if none is defined. * @see #getDisplayCountry */ public String getCountry() { - return country; + return _baseLocale.getRegion(); } /** * Returns the variant code for this locale. + * + * @return The variant code, or the empty string if none is defined. * @see #getDisplayVariant */ public String getVariant() { - return variant; + return _baseLocale.getVariant(); + } + + /** + * Returns the extension (or private use) value associated with + * the specified key, or null if there is no extension + * associated with the key. To be well-formed, the key must be one + * of [0-9A-Za-z]. Keys are case-insensitive, so + * for example 'z' and 'Z' represent the same extension. + * + * @param key the extension key + * @return The extension, or null if this locale defines no + * extension for the specified key. + * @throws IllegalArgumentException if key is not well-formed + * @see #PRIVATE_USE_EXTENSION + * @see #UNICODE_LOCALE_EXTENSION + * @since 1.7 + */ + public String getExtension(char key) { + if (!LocaleExtensions.isValidKey(key)) { + throw new IllegalArgumentException("Ill-formed extension key: " + key); + } + return _extensions.getExtensionValue(key); } /** - * Getter for the programmatic name of the entire locale, - * with the language, country and variant separated by underbars. - * Language is always lower case, and country is always upper case. - * If the language is missing, the string will begin with an underbar. - * If both the language and country fields are missing, this function - * will return the empty string, even if the variant field is filled in - * (you can't have a locale with just a variant-- the variant must accompany - * a valid language or country code). - * Examples: "en", "de_DE", "_GB", "en_US_WIN", "de__POSIX", "fr__MAC" + * Returns the set of extension keys associated with this locale, or the + * empty set if it has no extensions. The returned set is unmodifiable. + * The keys will all be lower-case. + * + * @return The set of extension keys, or the empty set if this locale has + * no extensions. + * @since 1.7 + */ + public Set getExtensionKeys() { + return _extensions.getKeys(); + } + + /** + * Returns the set of unicode locale attributes associated with + * this locale, or the empty set if it has no attributes. The + * returned set is unmodifiable. + * + * @return The set of attributes. + * @since 1.7 + */ + public Set getUnicodeLocaleAttributes() { + return _extensions.getUnicodeLocaleAttributes(); + } + + /** + * Returns the Unicode locale type associated with the specified Unicode locale key + * for this locale. Returns the empty string for keys that are defined with no type. + * Returns null if the key is not defined. Keys are case-insensitive. The key must + * be two alphanumeric characters ([0-9a-zA-Z]), or an IllegalArgumentException is + * thrown. + * + * @param key the Unicode locale key + * @return The Unicode locale type associated with the key, or null if the + * locale does not define the key. + * @throws IllegalArgumentException if the key is not well-formed + * @throws NullPointerException if key is null + * @since 1.7 + */ + public String getUnicodeLocaleType(String key) { + if (!UnicodeLocaleExtension.isKey(key)) { + throw new IllegalArgumentException("Ill-formed Unicode locale key: " + key); + } + return _extensions.getUnicodeLocaleType(key); + } + + /** + * Returns the set of Unicode locale keys defined by this locale, or the empty set if + * this locale has none. The returned set is immutable. Keys are all lower case. + * + * @return The set of Unicode locale keys, or the empty set if this locale has + * no Unicode locale keywords. + * @since 1.7 + */ + public Set getUnicodeLocaleKeys() { + return _extensions.getUnicodeLocaleKeys(); + } + + /** + * Package locale method returning the Locale's BaseLocale, + * used by ResourceBundle + * @return base locale of this Locale + */ + BaseLocale getBaseLocale() { + return _baseLocale; + } + + /** + * Package local method returning the Locale's LocaleExtensions, + * used by ResourceBundle + * @return locale exnteions of this Locale + */ + LocaleExtensions getLocaleExtensions() { + return _extensions; + } + + /** + * Returns a string representation of this Locale + * object, consisting of language, country, variant, script, + * and extensions as below: + *

+ * language + "_" + country + "_" + (variant + "_#" | "#") + script + "-" + extensions + *
+ * + * Language is always lower case, country is always upper case, script is always title + * case, and extensions are always lower case. Extensions and private use subtags + * will be in canonical order as explained in {@link #toLanguageTag}. + * + *

When the locale has neither script nor extensions, the result is the same as in + * Java 6 and prior. + * + *

If both the language and country fields are missing, this function will return + * the empty string, even if the variant, script, or extensions field is present (you + * can't have a locale with just a variant, the variant must accompany a well-formed + * language or country code). + * + *

If script or extensions are present and variant is missing, no underscore is + * added before the "#". + * + *

This behavior is designed to support debugging and to be compatible with + * previous uses of toString that expected language, country, and variant + * fields only. To represent a Locale as a String for interchange purposes, use + * {@link #toLanguageTag}. + * + *

Examples:

    + *
  • en + *
  • de_DE + *
  • _GB + *
  • en_US_WIN + *
  • de__POSIX + *
  • zh_CN_#Hans + *
  • zh_TW_#Hant-x-java + *
  • th_TH_TH_#u-nu-thai
+ * + * @return A string representation of the Locale, for debugging. * @see #getDisplayName + * @see #toLanguageTag */ public final String toString() { - boolean l = language.length() != 0; - boolean c = country.length() != 0; - boolean v = variant.length() != 0; - StringBuilder result = new StringBuilder(language); - if (c||(l&&v)) { - result.append('_').append(country); // This may just append '_' + boolean l = (_baseLocale.getLanguage().length() != 0); + boolean s = (_baseLocale.getScript().length() != 0); + boolean r = (_baseLocale.getRegion().length() != 0); + boolean v = (_baseLocale.getVariant().length() != 0); + boolean e = (_extensions.getID().length() != 0); + + StringBuilder result = new StringBuilder(_baseLocale.getLanguage()); + if (r || (l && v)) { + result.append('_') + .append(_baseLocale.getRegion()); // This may just append '_' + } + if (v && (l || r)) { + result.append('_') + .append(_baseLocale.getVariant()); + } + + if (s && (l || r)) { + result.append("_#") + .append(_baseLocale.getScript()); } - if (v&&(l||c)) { - result.append('_').append(variant); + + if (e && (l || r)) { + result.append('_'); + if (!s) { + result.append('#'); + } + result.append(_extensions.getID()); } + return result.toString(); } /** - * Returns a three-letter abbreviation for this locale's language. If the locale - * doesn't specify a language, this will be the empty string. Otherwise, this will - * be a lowercase ISO 639-2/T language code. - * The ISO 639-2 language codes can be found on-line at - * - * http://www.loc.gov/standards/iso639-2/englangn.html. - * @exception MissingResourceException Throws MissingResourceException if the + * Returns a well-formed IETF BCP 47 language tag representing + * this locale. + * + *

If this Locale has a language, country, or + * variant that does not satisfy the IETF BCP 47 language tag + * syntax requirements, this method handles these fields as + * described below: + * + *

Language: If language is empty, or not well-formed (for example "a" or + * "e2"), it will be emitted as "und" (Undetermined). + * + *

Country: If country is not well-formed (for example "12" or "USA"), + * it will be omitted. + * + *

Variant: If variant is well-formed, each sub-segment + * (delimited by '-' or '_') is emitted as a subtag. Otherwise: + *

    + * + *
  • if all sub-segments match [0-9a-zA-Z]{1,8} + * (for example "WIN" or "Oracle_JDK_Standard_Edition"), the first + * ill-formed sub-segment and all following will be appended to + * the private use subtag. The first appended subtag will be + * "lvariant", followed by the sub-segments in order, separated by + * hyphen. For example, "x-lvariant-WIN", + * "Oracle-x-lvariant-JDK-Standard-Edition". + * + *
  • if any sub-segment does not match + * [0-9a-zA-Z]{1,8}, the variant will be truncated + * and the problematic sub-segment and all following sub-segments + * will be omitted. If the remainder is non-empty, it will be + * emitted as a private use subtag as above (even if the remainder + * turns out to be well-formed). For example, + * "Solaris_isjustthecoolestthing" is emitted as + * "x-lvariant-Solaris", not as "solaris".
+ * + *

Compatibility special cases:

    + * + *
  • The language codes "iw", "ji", and "in" are handled + * specially. Java uses these deprecated codes for compatibility + * reasons. The toLanguageTag method converts these + * three codes (and only these three) to "he", "yi", and "id" + * respectively. + * + *
  • A locale with language "no", country "NO", and variant + * "NY", representing Norwegian Nynorsk, will be represented as + * having language "nn", country "NO", and empty variant. This is + * because some JVMs used the deprecated form to represent the + * user's default locale, and for compatibility reasons that Take a has + * not been changed.
+ * + *

Note: Although the language tag created by this + * method is well-formed (satisfies the syntax requirements + * defined by the IETF BCP 47 specification), it is not + * necessarily a valid BCP 47 language tag. For example, + *

+     *   new Locale("xx", "YY").toLanguageTag();
+ * + * will return "xx-YY", but the language subtag "xx" and the + * region subtag "YY" are invalid because they are not registered + * in the IANA Language Subtag Registry. + * + * @return a BCP47 language tag representing the locale + * @see #forLanguageTag(String) + * @since 1.7 + */ + public String toLanguageTag() { + LanguageTag tag = LanguageTag.parseLocale(_baseLocale, _extensions); + StringBuilder buf = new StringBuilder(); + + String subtag = tag.getLanguage(); + buf.append(LanguageTag.canonicalizeLanguage(subtag)); + + subtag = tag.getScript(); + if (subtag.length() > 0) { + buf.append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizeScript(subtag)); + } + + subtag = tag.getRegion(); + if (subtag.length() > 0) { + buf.append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizeRegion(subtag)); + } + + Listsubtags = tag.getVariants(); + for (String s : subtags) { + buf.append(LanguageTag.SEP); + // preserve casing + buf.append(s); + } + + subtags = tag.getExtensions(); + for (String s : subtags) { + buf.append(LanguageTag.SEP); + buf.append(LanguageTag.canonicalizeExtension(s)); + } + + subtag = tag.getPrivateuse(); + if (subtag.length() > 0) { + buf.append(LanguageTag.SEP).append(LanguageTag.PRIVATEUSE).append(LanguageTag.SEP); + // preserve casing + buf.append(subtag); + } + + return buf.toString(); + } + + /** + * Returns a locale for the specified IETF BCP 47 language tag string. + * + *

If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. Compare + * to {@link Locale.Builder#setLanguageTag} which throws an exception + * in this case. + * + *

The following conversions are performed:

    + * + *
  • The language code "und" is mapped to language "". + * + *
  • The language codes "he", "yi", and "id" are mapped to "iw", + * "ji", and "in" respectively. (This is the same canonicalization + * that's done in Locale's constructors.) + * + *
  • The portion of a private use subtag prefixed by "lvariant", + * if any, is removed and appended to the variant field in the + * result locale (without case normalization). If it is then + * empty, the private use subtag is discarded: + * + *
    +     *     Locale loc;
    +     *     loc = Locale.forLanguageTag("en-US-x-lvariant-POSIX);
    +     *     loc.getVariant(); // returns "POSIX"
    +     *     loc.getExtension('x'); // returns null
    +     *
    +     *     loc = Locale.forLanguageTag("de-POSIX-x-URP-lvariant-Abc-Def");
    +     *     loc.getVariant(); // returns "POSIX_Abc_Def"
    +     *     loc.getExtension('x'); // returns "urp"
    +     * 
    + * + *
  • When the languageTag argument contains an extlang subtag, + * the first such subtag is used as the language, and the primary + * language subtag and other extlang subtags are ignored: + * + *
    +     *     Locale.forLanguageTag("ar-aao").getLanguage(); // returns "aao"
    +     *     Locale.forLanguageTag("en-abc-def-us").toString(); // returns "abc_US"
    +     * 
    + * + *
  • Case is normalized except for variant tags, which are left + * unchanged. Language is normalized to lower case, script to + * title case, country to upper case, and extensions to lower + * case. + * + *
  • If, after processing, the locale would exactly match either + * ja_JP_JP or th_TH_TH with no extensions, the appropriate + * extensions are added as though the constructor had been called: + * + *
    +     *    Locale.forLanguageTag("ja-JP-x-lvariant-JP).toLanguageTag();
    +     *    // returns ja-JP-u-ca-japanese-x-lvariant-JP
    +     *    Locale.forLanguageTag("th-TH-x-lvariant-TH).toLanguageTag();
    +     *    // returns th-TH-u-nu-thai-x-lvariant-TH
    +     * 
+ * + *

This implements the 'Language-Tag' production of BCP47, and + * so supports grandfathered (regular and irregular) as well as + * private use language tags. Stand alone private use tags are + * represented as empty language and extension 'x-whatever', + * and grandfathered tags are converted to their canonical replacements + * where they exist. + * + *

Grandfathered tags with canonical replacements are as follows: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
grandfathered tag modern replacement
art-lojban jbo
i-ami ami
i-bnn bnn
i-hak hak
i-klingon tlh
i-lux lb
i-navajo nv
i-pwn pwn
i-tao tao
i-tay tay
i-tsu tsu
no-bok nb
no-nyn nn
sgn-BE-FR sfb
sgn-BE-NL vgt
sgn-CH-DE sgg
zh-guoyu cmn
zh-hakka hak
zh-min-nan nan
zh-xiang hsn
+ * + *

Grandfathered tags with no modern replacement will be + * converted as follows: + * + * + * + * + * + * + * + * + * + * + * + *
grandfathered tag converts to
cel-gaulish xtg-x-cel-gaulish
en-GB-oed en-GB-x-oed
i-default en-x-i-default
i-enochian und-x-i-enochian
i-mingo see-x-i-mingo
zh-min nan-x-zh-min
+ * + *

For a list of all grandfathered tags, see the + * IANA Language Subtag Registry (search for "Type: grandfathered"). + * + *

Note: there is no guarantee that toLanguageTag + * and forLanguageTag will round-trip. + * + * @param languageTag the language tag + * @return The locale that best represents the language tag. + * @throws NullPointerException if languageTag is null + * @see #toLanguageTag() + * @see java.util.Locale.Builder#setLanguageTag(String) + * @since 1.7 + */ + public static Locale forLanguageTag(String languageTag) { + LanguageTag tag = LanguageTag.parse(languageTag, null); + InternalLocaleBuilder bldr = new InternalLocaleBuilder(); + bldr.setLanguageTag(tag); + return getInstance(bldr.getBaseLocale(), bldr.getLocaleExtensions()); + } + + /** + * Returns a three-letter abbreviation of this locale's language. + * If the language matches an ISO 639-1 two-letter code, the + * corresponding ISO 639-2/T three-letter lowercase code is + * returned. The ISO 639-2 language codes can be found on-line, + * see "Codes for the Representation of Names of Languages Part 2: + * Alpha-3 Code". If the locale specifies a three-letter + * language, the language is returned as is. If the locale does + * not specify a language the empty string is returned. + * + * @return A three-letter abbreviation of this locale's language. + * @exception MissingResourceException Throws MissingResourceException if * three-letter language abbreviation is not available for this locale. */ public String getISO3Language() throws MissingResourceException { - String language3 = getISO3Code(language, LocaleISOData.isoLanguageTable); + String language3 = getISO3Code(_baseLocale.getLanguage(), LocaleISOData.isoLanguageTable); if (language3 == null) { throw new MissingResourceException("Couldn't find 3-letter language code for " - + language, "FormatData_" + toString(), "ShortLanguage"); + + _baseLocale.getLanguage(), "FormatData_" + toString(), "ShortLanguage"); } return language3; } /** - * Returns a three-letter abbreviation for this locale's country. If the locale - * doesn't specify a country, this will be the empty string. Otherwise, this will - * be an uppercase ISO 3166 3-letter country code. - * The ISO 3166-2 country codes can be found on-line at - * - * http://www.davros.org/misc/iso3166.txt. + * Returns a three-letter abbreviation for this locale's country. + * If the country matches an ISO 3166-1 alpha-2 code, the + * corresponding ISO 3166-1 alpha-3 uppercase code is returned. + * If the locale doesn't specify a country, this will be the empty + * string. + * + *

The ISO 3166-1 codes can be found on-line. + * + * @return A three-letter abbreviation of this locale's country. * @exception MissingResourceException Throws MissingResourceException if the * three-letter country abbreviation is not available for this locale. */ public String getISO3Country() throws MissingResourceException { - String country3 = getISO3Code(country, LocaleISOData.isoCountryTable); + String country3 = getISO3Code(_baseLocale.getRegion(), LocaleISOData.isoCountryTable); if (country3 == null) { throw new MissingResourceException("Couldn't find 3-letter country code for " - + country, "FormatData_" + toString(), "ShortCountry"); + + _baseLocale.getRegion(), "FormatData_" + toString(), "ShortCountry"); } return country3; } @@ -782,7 +1528,33 @@ public final class Locale implements Cloneable, Serializable { * @exception NullPointerException if inLocale is null */ public String getDisplayLanguage(Locale inLocale) { - return getDisplayString(language, inLocale, DISPLAY_LANGUAGE); + return getDisplayString(_baseLocale.getLanguage(), inLocale, DISPLAY_LANGUAGE); + } + + /** + * Returns a name for the the locale's script that is appropriate for display to + * the user. If possible, the name will be localized for the default locale. Returns + * the empty string if this locale doesn't specify a script code. + * + * @return the display name of the script code for the current default locale + * @since 1.7 + */ + public String getDisplayScript() { + return getDisplayScript(getDefault()); + } + + /** + * Returns a name for the locale's script that is appropriate + * for display to the user. If possible, the name will be + * localized for the given locale. Returns the empty string if + * this locale doesn't specify a script code. + * + * @return the display name of the script code for the current default locale + * @throws NullPointerException if inLocale is null + * @since 1.7 + */ + public String getDisplayScript(Locale inLocale) { + return getDisplayString(_baseLocale.getScript(), inLocale, DISPLAY_SCRIPT); } /** @@ -817,7 +1589,7 @@ public final class Locale implements Cloneable, Serializable { * @exception NullPointerException if inLocale is null */ public String getDisplayCountry(Locale inLocale) { - return getDisplayString(country, inLocale, DISPLAY_COUNTRY); + return getDisplayString(_baseLocale.getRegion(), inLocale, DISPLAY_COUNTRY); } private String getDisplayString(String code, Locale inLocale, int type) { @@ -876,7 +1648,7 @@ public final class Locale implements Cloneable, Serializable { * @exception NullPointerException if inLocale is null */ public String getDisplayVariant(Locale inLocale) { - if (variant.length() == 0) + if (_baseLocale.getVariant().length() == 0) return ""; OpenListResourceBundle bundle = LocaleData.getLocaleNames(inLocale); @@ -897,39 +1669,44 @@ public final class Locale implements Cloneable, Serializable { /** * Returns a name for the locale that is appropriate for display to the - * user. This will be the values returned by getDisplayLanguage(), getDisplayCountry(), - * and getDisplayVariant() assembled into a single string. The display name will have - * one of the following forms:

- * language (country, variant)

- * language (country)

- * language (variant)

- * country (variant)

- * language

- * country

- * variant

- * depending on which fields are specified in the locale. If the language, country, - * and variant fields are all empty, this function returns the empty string. + * user. This will be the values returned by getDisplayLanguage(), + * getDisplayScript(), getDisplayCountry(), and getDisplayVariant() assembled + * into a single string. The the non-empty values are used in order, + * with the second and subsequent names in parentheses. For example: + *
+ * language (script, country, variant)
+ * language (country)
+ * language (variant)
+ * script (country)
+ * country
+ *
+ * depending on which fields are specified in the locale. If the + * language, sacript, country, and variant fields are all empty, + * this function returns the empty string. */ public final String getDisplayName() { return getDisplayName(getDefault(Category.DISPLAY)); } /** - * Returns a name for the locale that is appropriate for display to the - * user. This will be the values returned by getDisplayLanguage(), getDisplayCountry(), - * and getDisplayVariant() assembled into a single string. The display name will have - * one of the following forms:

- * language (country, variant)

- * language (country)

- * language (variant)

- * country (variant)

- * language

- * country

- * variant

- * depending on which fields are specified in the locale. If the language, country, - * and variant fields are all empty, this function returns the empty string. + * Returns a name for the locale that is appropriate for display + * to the user. This will be the values returned by + * getDisplayLanguage(), getDisplayScript(),getDisplayCountry(), + * and getDisplayVariant() assembled into a single string. + * The non-empty values are used in order, + * with the second and subsequent names in parentheses. For example: + *
+ * language (script, country, variant)
+ * language (country)
+ * language (variant)
+ * script (country)
+ * country
+ *
+ * depending on which fields are specified in the locale. If the + * language, script, country, and variant fields are all empty, + * this function returns the empty string. * - * @exception NullPointerException if inLocale is null + * @throws NullPointerException if inLocale is null */ public String getDisplayName(Locale inLocale) { OpenListResourceBundle bundle = LocaleData.getLocaleNames(inLocale); @@ -1009,7 +1786,7 @@ public final class Locale implements Cloneable, Serializable { } /** - * Overrides Cloneable + * Overrides Cloneable. */ public Object clone() { @@ -1029,7 +1806,7 @@ public final class Locale implements Cloneable, Serializable { public int hashCode() { int hc = hashCodeValue; if (hc == 0) { - hc = (language.hashCode() << 8) ^ country.hashCode() ^ (variant.hashCode() << 4); + hc = _baseLocale.hashCode() ^ _extensions.hashCode(); hashCodeValue = hc; } return hc; @@ -1039,8 +1816,8 @@ public final class Locale implements Cloneable, Serializable { /** * Returns true if this Locale is equal to another object. A Locale is - * deemed equal to another Locale with identical language, country, - * and variant, and unequal to all other objects. + * deemed equal to another Locale with identical language, script, country, + * variant and extensions, and unequal to all other objects. * * @return true if this Locale is equal to the specified object. */ @@ -1050,43 +1827,18 @@ public final class Locale implements Cloneable, Serializable { return true; if (!(obj instanceof Locale)) return false; - Locale other = (Locale) obj; - return language == other.language - && country == other.country - && variant == other.variant; + BaseLocale otherBase = ((Locale)obj)._baseLocale; + LocaleExtensions otherExt = ((Locale)obj)._extensions; + return _baseLocale.equals(otherBase) && _extensions.equals(otherExt); } // ================= privates ===================================== - // XXX instance and class variables. For now keep these separate, since it is - // faster to match. Later, make into single string. + private transient BaseLocale _baseLocale; + private transient LocaleExtensions _extensions; /** - * @serial - * @see #getLanguage - */ - private final String language; - - /** - * @serial - * @see #getCountry - */ - private final String country; - - /** - * @serial - * @see #getVariant - */ - private final String variant; - - /** - * Placeholder for the object's hash code. Always -1. - * @serial - */ - private volatile int hashcode = -1; // lazy evaluate - - /** - * Calculated hashcode to fix 4518797. + * Calculated hashcode */ private transient volatile int hashCodeValue = 0; @@ -1101,7 +1853,7 @@ public final class Locale implements Cloneable, Serializable { */ private String[] getDisplayVariantArray(OpenListResourceBundle bundle, Locale inLocale) { // Split the variant name into tokens separated by '_'. - StringTokenizer tokenizer = new StringTokenizer(variant, "_"); + StringTokenizer tokenizer = new StringTokenizer(_baseLocale.getVariant(), "_"); String[] names = new String[tokenizer.countTokens()]; // For each variant token, lookup the display name. If @@ -1179,49 +1931,102 @@ public final class Locale implements Cloneable, Serializable { } /** - * Replace the deserialized Locale object with a newly - * created object. Newer language codes are replaced with older ISO - * codes. The country and variant codes are replaced with internalized - * String copies. + * @serialField language String + * language subtag in lower case. (See getLanguage()) + * @serialField country String + * country subtag in upper case. (See getCountry()) + * @serialField variant String + * variant subtags separated by LOWLINE characters. (See getVariant()) + * @serialField hashcode int + * deprectated, for forward compatibility only + * @serialField script String + * script subtag in title case (See getScript()) + * @serialField extensions String + * canonical representation of extensions, that is, + * BCP47 extensions in alphabetical order followed by + * BCP47 private use subtags, all in lower case letters + * separated by HYPHEN-MINUS characters. + * (See getExtensionKeys(), + * getExtension(char)) + */ + private static final ObjectStreamField[] serialPersistentFields = { + new ObjectStreamField("language", String.class), + new ObjectStreamField("country", String.class), + new ObjectStreamField("variant", String.class), + new ObjectStreamField("hashcode", int.class), + new ObjectStreamField("script", String.class), + new ObjectStreamField("extensions", String.class), + }; + + /** + * Serializes this Locale to the specified ObjectOutputStream. + * @param out the ObjectOutputStream to write + * @throws IOException + * @since 1.7 */ - private Object readResolve() throws java.io.ObjectStreamException { - return getInstance(language, country, variant); + private void writeObject(ObjectOutputStream out) throws IOException { + ObjectOutputStream.PutField fields = out.putFields(); + fields.put("language", _baseLocale.getLanguage()); + fields.put("script", _baseLocale.getScript()); + fields.put("country", _baseLocale.getRegion()); + fields.put("variant", _baseLocale.getVariant()); + fields.put("extensions", _extensions.getID()); + fields.put("hashcode", -1); // place holder just for backward support + out.writeFields(); } - private static volatile String[] isoLanguages = null; - - private static volatile String[] isoCountries = null; - - /* - * Locale needs its own, locale insensitive version of toLowerCase to - * avoid circularity problems between Locale and String. - * The most straightforward algorithm is used. Look at optimizations later. + /** + * Deserialize this Locale. + * @param in the ObjectInputStream to read + * @throws IOException + * @throws ClassNotFoundException + * @throws IllformdLocaleException + * @since 1.7 */ - private String toLowerCase(String str) { - char[] buf = new char[str.length()]; - for (int i = 0; i < buf.length; i++) { - buf[i] = Character.toLowerCase(str.charAt(i)); + private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { + ObjectInputStream.GetField fields = in.readFields(); + String language = (String)fields.get("language", ""); + String script = (String)fields.get("script", ""); + String country = (String)fields.get("country", ""); + String variant = (String)fields.get("variant", ""); + String extStr = (String)fields.get("extensions", ""); + _baseLocale = BaseLocale.getInstance(convertOldISOCodes(language), script, country, variant); + try { + InternalLocaleBuilder bldr = new InternalLocaleBuilder(); + bldr.setExtensions(extStr); + _extensions = bldr.getLocaleExtensions(); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage()); } - return new String( buf ); } - /* - * Locale needs its own, locale insensitive version of toUpperCase to - * avoid circularity problems between Locale and String. - * The most straightforward algorithm is used. Look at optimizations later. + /** + * Returns a cached Locale instance equivalent to + * the deserialized Locale. When serialized + * language, country and variant fields read from the object data stream + * are exactly "ja", "JP", "JP" or "th", "TH", "TH" and script/extensions + * fields are empty, this method supplies UNICODE_LOCALE_EXTENSION + * "ca"/"japanese" (calendar type is "japanese") or "nu"/"thai" (number script + * type is "thai"). See Special Cases + * for more information. + * + * @return an instance of Locale equivalent to + * the deserialized Locale. + * @throws java.io.ObjectStreamException */ - private String toUpperCase(String str) { - char[] buf = new char[str.length()]; - for (int i = 0; i < buf.length; i++) { - buf[i] = Character.toUpperCase(str.charAt(i)); - } - return new String( buf ); + private Object readResolve() throws java.io.ObjectStreamException { + return getInstance(_baseLocale.getLanguage(), _baseLocale.getScript(), + _baseLocale.getRegion(), _baseLocale.getVariant(), _extensions); } - private String convertOldISOCodes(String language) { + private static volatile String[] isoLanguages = null; + + private static volatile String[] isoCountries = null; + + private static String convertOldISOCodes(String language) { // we accept both the old and the new ISO codes for the languages whose ISO // codes have changed, but we always store the OLD code, for backward compatibility - language = toLowerCase(language).intern(); + language = AsciiUtil.toLowerString(language).intern(); if (language == "he") { return "iw"; } else if (language == "yi") { @@ -1233,6 +2038,25 @@ public final class Locale implements Cloneable, Serializable { } } + private static LocaleExtensions getCompatibilityExtensions(String language, String script, String country, String variant) { + LocaleExtensions extensions = LocaleExtensions.EMPTY_EXTENSIONS; + // Special cases for backward compatibility support + if (AsciiUtil.caseIgnoreMatch(language, "ja") + && script.length() == 0 + && AsciiUtil.caseIgnoreMatch(country, "JP") + && AsciiUtil.caseIgnoreMatch(variant, "JP")) { + // ja_JP_JP -> u-ca-japanese (calendar = japanese) + extensions = LocaleExtensions.CALENDAR_JAPANESE; + } else if (AsciiUtil.caseIgnoreMatch(language, "th") + && script.length() == 0 + && AsciiUtil.caseIgnoreMatch(country, "TH") + && AsciiUtil.caseIgnoreMatch(variant, "TH")) { + // th_TH_TH -> u-nu-thai (numbersystem = thai) + extensions = LocaleExtensions.NUMBER_THAI; + } + return extensions; + } + /** * Obtains a localized locale names from a LocaleNameProvider * implementation. @@ -1256,6 +2080,8 @@ public final class Locale implements Cloneable, Serializable { return localeNameProvider.getDisplayCountry(code, locale); case DISPLAY_VARIANT: return localeNameProvider.getDisplayVariant(code, locale); + case DISPLAY_SCRIPT: + return localeNameProvider.getDisplayScript(code, locale); default: assert false; // shouldn't happen } @@ -1287,4 +2113,346 @@ public final class Locale implements Cloneable, Serializable { */ FORMAT, } + + /** + * Builder is used to build instances of Locale + * from values configured by the setters. Unlike the Locale + * constructors, the Builder checks if a value configured by a + * setter satisfies the syntax requirements defined by the Locale + * class. A Locale object created by a Builder is + * well-formed and can be transformed to a well-formed IETF BCP 47 language tag + * without losing information. + * + *

Note: The Locale class does not provide any + * syntactic restrictions on variant, while BCP 47 requires each variant + * subtag to be 5 to 8 alphanumerics or a single numeric followed by 3 + * alphanumerics. The method setVariant throws + * IllformedLocaleException for a variant that does not satisfy + * this restriction. If it is necessary to support such a variant, use a + * Locale constructor. However, keep in mind that a Locale + * object created this way might lose the variant information when + * transformed to a BCP 47 language tag. + * + *

The following example shows how to create a Locale object + * with the Builder. + *

+ *
+     *     Locale aLocale = new Builder().setLanguage("sr").setScript("Latn").setRegion("RS").build();
+     * 
+ *
+ * + *

Builders can be reused; clear() resets all + * fields to their default values. + * + * @see Locale#forLanguageTag + * @since 1.7 + */ + public static final class Builder { + private InternalLocaleBuilder _locbld; + + /** + * Constructs an empty Builder. The default value of all + * fields, extensions, and private use information is the + * empty string. + */ + public Builder() { + _locbld = new InternalLocaleBuilder(); + } + + /** + * Resets the Builder to match the provided + * locale. Existing state is discarded. + * + *

All fields of the locale must be well-formed, see {@link Locale}. + * + *

Locales with any ill-formed fields cause + * IllformedLocaleException to be thrown, except for the + * following three cases which are accepted for compatibility + * reasons:

    + *
  • Locale("ja", "JP", "JP") is treated as "ja-JP-u-ca-japanese" + *
  • Locale("th", "TH", "TH") is treated as "th-TH-u-nu-thai" + *
  • Locale("no", "NO", "NY") is treated as "nn-NO"
+ * + * @param locale the locale + * @return This builder. + * @throws IllformedLocaleException if locale has + * any ill-formed fields. + * @throws NullPointerException if locale is null. + */ + public Builder setLocale(Locale locale) { + try { + _locbld.setLocale(locale._baseLocale, locale._extensions); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Resets the Builder to match the provided IETF BCP 47 + * language tag. Discards the existing state. Null and the + * empty string cause the builder to be reset, like {@link + * #clear}. Grandfathered tags (see {@link + * Locale#forLanguageTag}) are converted to their canonical + * form before being processed. Otherwise, the language tag + * must be well-formed (see {@link Locale}) or an exception is + * thrown (unlike Locale.forLanguageTag, which + * just discards ill-formed and following portions of the + * tag). + * + * @param languageTag the language tag + * @return This builder. + * @throws IllformedLocaleException if languageTag is ill-formed + * @see Locale#forLanguageTag(String) + */ + public Builder setLanguageTag(String languageTag) { + ParseStatus sts = new ParseStatus(); + LanguageTag tag = LanguageTag.parse(languageTag, sts); + if (sts.isError()) { + throw new IllformedLocaleException(sts.getErrorMessage(), sts.getErrorIndex()); + } + _locbld.setLanguageTag(tag); + + return this; + } + + /** + * Sets the language. If language is the empty string or + * null, the language in this Builder is removed. Otherwise, + * the language must be well-formed + * or an exception is thrown. + * + *

The typical language value is a two or three-letter language + * code as defined in ISO639. + * + * @param language the language + * @return This builder. + * @throws IllformedLocaleException if language is ill-formed + */ + public Builder setLanguage(String language) { + try { + _locbld.setLanguage(language); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Sets the script. If script is null or the empty string, + * the script in this Builder is removed. + * Otherwise, the script must be well-formed or an + * exception is thrown. + * + *

The typical script value is a four-letter script code as defined by ISO 15924. + * + * @param script the script + * @return This builder. + * @throws IllformedLocaleException if script is ill-formed + */ + public Builder setScript(String script) { + try { + _locbld.setScript(script); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Sets the region. If region is null or the empty string, the region + * in this Builder is removed. Otherwise, + * the region must be well-formed or an + * exception is thrown. + * + *

The typical region value is a two-letter ISO 3166 code or a + * three-digit UN M.49 area code. + * + *

The country value in the Locale created by the + * Builder is always normalized to upper case. + * + * @param region the region + * @return This builder. + * @throws IllformedLocaleException if region is ill-formed + */ + public Builder setRegion(String region) { + try { + _locbld.setRegion(region); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Sets the variant. If variant is null or the empty string, the + * variant in this Builder is removed. Otherwise, it + * must consist of one or more well-formed + * subtags, or an exception is thrown. + * + *

Note: This method checks if variant + * satisfies the IETF BCP 47 variant subtag's syntax requirements, + * and normalizes the value to lowercase letters. However, + * the Locale class does not impose any syntactic + * restriction on variant, and the variant value in + * Locale is case sensitive. To set such a variant, + * use a Locale constructor. + * + * @param variant the variant + * @return This builder. + * @throws IllformedLocaleException if variant is ill-formed + */ + public Builder setVariant(String variant) { + try { + _locbld.setVariant(variant); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Sets the extension for the given key. If the value is null or the + * empty string, the extension is removed. Otherwise, the extension + * must be well-formed or an exception + * is thrown. + * + *

Note: The key {@link Locale#UNICODE_LOCALE_EXTENSION + * UNICODE_LOCALE_EXTENSION} ('u') is used for the Unicode locale extension. + * Setting a value for this key replaces any existing Unicode locale key/type + * pairs with those defined in the extension. + * + *

Note: The key {@link Locale#PRIVATE_USE_EXTENSION + * PRIVATE_USE_EXTENSION} ('x') is used for the private use code. To be + * well-formed, the value for this key needs only to have subtags of one to + * eight alphanumeric characters, not two to eight as in the general case. + * + * @param key the extension key + * @param value the extension value + * @return This builder. + * @throws IllformedLocaleException if key is illegal + * or value is ill-formed + * @see #setUnicodeLocaleKeyword(String, String) + */ + public Builder setExtension(char key, String value) { + try { + _locbld.setExtension(key, value); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Sets the Unicode locale keyword type for the given key. If the type + * is null, the Unicode keyword is removed. Otherwise, the key must be + * non-null and both key and type must be well-formed or an exception + * is thrown. + * + *

Keys and types are converted to lower case. + * + *

Note:Setting the 'u' extension via {@link #setExtension} + * replaces all Unicode locale keywords with those defined in the + * extension. + * + * @param key the Unicode locale key + * @param type the Unicode locale type + * @return This builder. + * @throws IllformedLocaleException if key or type + * is ill-formed + * @throws NullPointerException if key is null + * @see #setExtension(char, String) + */ + public Builder setUnicodeLocaleKeyword(String key, String type) { + try { + _locbld.setUnicodeLocaleKeyword(key, type); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Adds a unicode locale attribute, if not already present, otherwise + * has no effect. The attribute must not be null and must be well-formed or an exception + * is thrown. + * + * @param attribute the attribute + * @return This builder. + * @throws NullPointerException if attribute is null + * @throws IllformedLocaleException if attribute is ill-formed + * @see #setExtension(char, String) + */ + public Builder addUnicodeLocaleAttribute(String attribute) { + try { + _locbld.addUnicodeLocaleAttribute(attribute); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Removes a unicode locale attribute, if present, otherwise has no + * effect. The attribute must not be null and must be well-formed or an exception + * is thrown. + * + *

Attribute comparision for removal is case-insensitive. + * + * @param attribute the attribute + * @return This builder. + * @throws NullPointerException if attribute is null + * @throws IllformedLocaleException if attribute is ill-formed + * @see #setExtension(char, String) + */ + public Builder removeUnicodeLocaleAttribute(String attribute) { + try { + _locbld.removeUnicodeLocaleAttribute(attribute); + } catch (LocaleSyntaxException e) { + throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex()); + } + return this; + } + + /** + * Resets the builder to its initial, empty state. + * + * @return This builder. + */ + public Builder clear() { + _locbld.clear(); + return this; + } + + /** + * Resets the extensions to their initial, empty state. + * Language, script, region and variant are unchanged. + * + * @return This builder. + * @see #setExtension(char, String) + */ + public Builder clearExtensions() { + _locbld.clearExtensions(); + return this; + } + + /** + * Returns an instance of Locale created from the fields set + * on this builder. + * + *

This applies the conversions listed in {@link Locale#forLanguageTag} + * when constructing a Locale. (Grandfathered tags are handled in + * {@link #setLanguageTag}.) + * + * @return A Locale. + */ + public Locale build() { + BaseLocale baseloc = _locbld.getBaseLocale(); + LocaleExtensions extensions = _locbld.getLocaleExtensions(); + return Locale.getInstance(baseloc, extensions); + } + } } diff --git a/src/share/classes/java/util/ResourceBundle.java b/src/share/classes/java/util/ResourceBundle.java index 56888362b..e645fe914 100644 --- a/src/share/classes/java/util/ResourceBundle.java +++ b/src/share/classes/java/util/ResourceBundle.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -56,16 +56,18 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.jar.JarEntry; +import sun.util.locale.BaseLocale; +import sun.util.locale.LocaleExtensions; +import sun.util.locale.LocaleObjectCache; + /** * - * Resource bundles contain locale-specific objects. - * When your program needs a locale-specific resource, - * a String for example, your program can load it - * from the resource bundle that is appropriate for the - * current user's locale. In this way, you can write - * program code that is largely independent of the user's - * locale isolating most, if not all, of the locale-specific + * Resource bundles contain locale-specific objects. When your program needs a + * locale-specific resource, a String for example, your program can + * load it from the resource bundle that is appropriate for the current user's + * locale. In this way, you can write program code that is largely independent + * of the user's locale isolating most, if not all, of the locale-specific * information in resource bundles. * *

@@ -854,87 +856,140 @@ public abstract class ResourceBundle { } /** - * Gets a resource bundle using the specified base name, locale, and class loader. + * Gets a resource bundle using the specified base name, locale, and class + * loader. + * + *

This method behaves the same as calling + * {@link #getBundle(String, Locale, ClassLoader, Control)} passing a + * default instance of {@link Control}. The following describes this behavior. + * + *

getBundle uses the base name, the specified locale, and + * the default locale (obtained from {@link java.util.Locale#getDefault() + * Locale.getDefault}) to generate a sequence of candidate bundle names. If the specified + * locale's language, script, country, and variant are all empty strings, + * then the base name is the only candidate bundle name. Otherwise, a list + * of candidate locales is generated from the attribute values of the + * specified locale (language, script, country and variant) and appended to + * the base name. Typically, this will look like the following: * - *

- * Conceptually, getBundle uses the following strategy for locating and instantiating - * resource bundles: - *

- * getBundle uses the base name, the specified locale, and the default - * locale (obtained from {@link java.util.Locale#getDefault() Locale.getDefault}) - * to generate a sequence of candidate bundle names. - * If the specified locale's language, country, and variant are all empty - * strings, then the base name is the only candidate bundle name. - * Otherwise, the following sequence is generated from the attribute - * values of the specified locale (language1, country1, and variant1) - * and of the default locale (language2, country2, and variant2): - *

    - *
  • baseName + "_" + language1 + "_" + country1 + "_" + variant1 - *
  • baseName + "_" + language1 + "_" + country1 - *
  • baseName + "_" + language1 - *
  • baseName + "_" + language2 + "_" + country2 + "_" + variant2 - *
  • baseName + "_" + language2 + "_" + country2 - *
  • baseName + "_" + language2 - *
  • baseName - *
- *

- * Candidate bundle names where the final component is an empty string are omitted. - * For example, if country1 is an empty string, the second candidate bundle name is omitted. + *

+     *     baseName + "_" + language + "_" + script + "_" + country + "_" + variant
+     *     baseName + "_" + language + "_" + script + "_" + country
+     *     baseName + "_" + language + "_" + script
+     *     baseName + "_" + language + "_" + country + "_" + variant
+     *     baseName + "_" + language + "_" + country
+     *     baseName + "_" + language
+     * 
* - *

- * getBundle then iterates over the candidate bundle names to find the first - * one for which it can instantiate an actual resource bundle. For each candidate - * bundle name, it attempts to create a resource bundle: - *

    - *
  • - * First, it attempts to load a class using the candidate bundle name. - * If such a class can be found and loaded using the specified class loader, is assignment - * compatible with ResourceBundle, is accessible from ResourceBundle, and can be instantiated, - * getBundle creates a new instance of this class and uses it as the result - * resource bundle. - *
  • - * Otherwise, getBundle attempts to locate a property resource file. - * It generates a path name from the candidate bundle name by replacing all "." characters - * with "/" and appending the string ".properties". - * It attempts to find a "resource" with this name using - * {@link java.lang.ClassLoader#getResource(java.lang.String) ClassLoader.getResource}. - * (Note that a "resource" in the sense of getResource has nothing to do with - * the contents of a resource bundle, it is just a container of data, such as a file.) - * If it finds a "resource", it attempts to create a new - * {@link PropertyResourceBundle} instance from its contents. - * If successful, this instance becomes the result resource bundle. - *
+ *

Candidate bundle names where the final component is an empty string + * are omitted, along with the underscore. For example, if country is an + * empty string, the second and the fifth candidate bundle names above + * would be omitted. Also, if script is an empty string, the candidate names + * including script are omitted. For example, a locale with language "de" + * and variant "JAVA" will produce candidate names with base name + * "MyResource" below. * - *

- * If no result resource bundle has been found, a MissingResourceException - * is thrown. - * - *

- * Once a result resource bundle has been found, its parent chain is instantiated. - * getBundle iterates over the candidate bundle names that can be - * obtained by successively removing variant, country, and language - * (each time with the preceding "_") from the bundle name of the result resource bundle. - * As above, candidate bundle names where the final component is an empty string are omitted. - * With each of the candidate bundle names it attempts to instantiate a resource bundle, as - * described above. - * Whenever it succeeds, it calls the previously instantiated resource + *

+     *     MyResource_de__JAVA
+     *     MyResource_de
+     * 
+ * + * In the case that the variant contains one or more underscores ('_'), a + * sequence of bundle names generated by truncating the last underscore and + * the part following it is inserted after a candidate bundle name with the + * original variant. For example, for a locale with language "en", script + * "Latn, country "US" and variant "WINDOWS_VISTA", and bundle base name + * "MyResource", the list of candidate bundle names below is generated: + * + *
+     * MyResource_en_Latn_US_WINDOWS_VISTA
+     * MyResource_en_Latn_US_WINDOWS
+     * MyResource_en_Latn_US
+     * MyResource_en_Latn
+     * MyResource_en_US_WINDOWS_VISTA
+     * MyResource_en_US_WINDOWS
+     * MyResource_en_US
+     * MyResource_en
+     * 
+ * + *
Note: For some Locales, the list of + * candidate bundle names contains extra names, or the order of bundle names + * is slightly modified. See the description of the default implementation + * of {@link Control#getCandidateLocales(String, Locale) + * getCandidateLocales} for details.
+ * + *

getBundle then iterates over the candidate bundle names + * to find the first one for which it can instantiate an actual + * resource bundle. It uses the default controls' {@link Control#getFormats + * getFormats} method, which generates two bundle names for each generated + * name, the first a class name and the second a properties file name. For + * each candidate bundle name, it attempts to create a resource bundle: + * + *

  • First, it attempts to load a class using the generated class name. + * If such a class can be found and loaded using the specified class + * loader, is assignment compatible with ResourceBundle, is accessible from + * ResourceBundle, and can be instantiated, getBundle creates a + * new instance of this class and uses it as the result resource + * bundle. + * + *
  • Otherwise, getBundle attempts to locate a property + * resource file using the generated properties file name. It generates a + * path name from the candidate bundle name by replacing all "." characters + * with "/" and appending the string ".properties". It attempts to find a + * "resource" with this name using {@link + * java.lang.ClassLoader#getResource(java.lang.String) + * ClassLoader.getResource}. (Note that a "resource" in the sense of + * getResource has nothing to do with the contents of a + * resource bundle, it is just a container of data, such as a file.) If it + * finds a "resource", it attempts to create a new {@link + * PropertyResourceBundle} instance from its contents. If successful, this + * instance becomes the result resource bundle.
+ * + *

This continues until a result resource bundle is instantiated or the + * list of candidate bundle names is exhausted. If no matching resource + * bundle is found, the default control's {@link Control#getFallbackLocale + * getFallbackLocale} method is called, which returns the current default + * locale. A new sequence of candidate locale names is generated using this + * locale and and searched again, as above. + * + *

If still no result bundle is found, the base name alone is looked up. If + * this still fails, a MissingResourceException is thrown. + * + *

Once a result resource bundle has been found, + * its parent chain is instantiated. If the result bundle already + * has a parent (perhaps because it was returned from a cache) the chain is + * complete. + * + *

Otherwise, getBundle examines the remainder of the + * candidate locale list that was used during the pass that generated the + * result resource bundle. (As before, candidate bundle names where the + * final component is an empty string are omitted.) When it comes to the + * end of the candidate list, it tries the plain bundle name. With each of the + * candidate bundle names it attempts to instantiate a resource bundle (first + * looking for a class and then a properties file, as described above). + * + *

Whenever it succeeds, it calls the previously instantiated resource * bundle's {@link #setParent(java.util.ResourceBundle) setParent} method - * with the new resource bundle, unless the previously instantiated resource - * bundle already has a non-null parent. + * with the new resource bundle. This continues until the list of names + * is exhausted or the current bundle already has a non-null parent. * - *

- * getBundle caches instantiated resource bundles and - * may return the same resource bundle instance multiple - * times. + *

Once the parent chain is complete, the bundle is returned. * - *

- * The baseName argument should be a fully qualified class name. However, for - * compatibility with earlier versions, Sun's Java SE Runtime Environments do not verify this, - * and so it is possible to access PropertyResourceBundles by specifying a - * path name (using "/") instead of a fully qualified class name (using "."). + *

Note: getBundle caches instantiated resource + * bundles and might return the same resource bundle instance multiple times. + * + *

Note:The baseName argument should be a fully + * qualified class name. However, for compatibility with earlier versions, + * Sun's Java SE Runtime Environments do not verify this, and so it is + * possible to access PropertyResourceBundles by specifying a + * path name (using "/") instead of a fully qualified class name (using + * "."). * *

- * Example:
The following class and property files are provided: + * Example: + *

+ * The following class and property files are provided: *

      *     MyResources.class
      *     MyResources.properties
@@ -944,22 +999,26 @@ public abstract class ResourceBundle {
      *     MyResources_en.properties
      *     MyResources_es_ES.class
      * 
- * The contents of all files are valid (that is, public non-abstract subclasses of ResourceBundle for - * the ".class" files, syntactically correct ".properties" files). - * The default locale is Locale("en", "GB"). - *

- * Calling getBundle with the shown locale argument values instantiates - * resource bundles from the following sources: - *

    - *
  • Locale("fr", "CH"): result MyResources_fr_CH.class, parent MyResources_fr.properties, parent MyResources.class - *
  • Locale("fr", "FR"): result MyResources_fr.properties, parent MyResources.class - *
  • Locale("de", "DE"): result MyResources_en.properties, parent MyResources.class - *
  • Locale("en", "US"): result MyResources_en.properties, parent MyResources.class - *
  • Locale("es", "ES"): result MyResources_es_ES.class, parent MyResources.class - *
- *

The file MyResources_fr_CH.properties is never used because it is hidden by - * MyResources_fr_CH.class. Likewise, MyResources.properties is also hidden by - * MyResources.class. + * + * The contents of all files are valid (that is, public non-abstract + * subclasses of ResourceBundle for the ".class" files, + * syntactically correct ".properties" files). The default locale is + * Locale("en", "GB"). + * + *

Calling getBundle with the locale arguments below will + * instantiate resource bundles as follows: + * + * + * + * + * + * + * + *
Locale("fr", "CH")MyResources_fr_CH.class, parent MyResources_fr.properties, parent MyResources.class
Locale("fr", "FR")MyResources_fr.properties, parent MyResources.class
Locale("de", "DE")MyResources_en.properties, parent MyResources.class
Locale("en", "US")MyResources_en.properties, parent MyResources.class
Locale("es", "ES")MyResources_es_ES.class, parent MyResources.class
+ * + *

The file MyResources_fr_CH.properties is never used because it is + * hidden by the MyResources_fr_CH.class. Likewise, MyResources.properties + * is also hidden by MyResources.class. * * @param baseName the base name of the resource bundle, a fully qualified class name * @param locale the locale for which a resource bundle is desired @@ -1095,8 +1154,6 @@ public abstract class ResourceBundle { * href="./ResourceBundle.html#parent_chain">parent chain is * instantiated based on the list of candidate locales from which it was * found. Finally, the bundle is returned to the caller. - * - * * * *

During the resource bundle loading process above, this factory @@ -1119,7 +1176,6 @@ public abstract class ResourceBundle { * {@link Control#getTimeToLive(String,Locale) * control.getTimeToLive} for details. * - * *

The following is an example of the bundle loading process with the * default ResourceBundle.Control implementation. * @@ -1131,7 +1187,6 @@ public abstract class ResourceBundle { *

  • Available resource bundles: * foo/bar/Messages_fr.properties and * foo/bar/Messages.properties
  • - * * * *

    First, getBundle tries loading a resource bundle in @@ -1811,8 +1866,8 @@ public abstract class ResourceBundle { * handleGetObject} method returns null. Once the * Set has been created, the value is kept in this * ResourceBundle in order to avoid producing the - * same Set in the next calls. Override this method - * in subclass implementations for faster handling. + * same Set in subsequent calls. Subclasses can + * override this method for faster handling. * * @return a Set of the keys contained only in this * ResourceBundle @@ -2177,24 +2232,133 @@ public abstract class ResourceBundle { * ResourceBundle.getBundle factory method loads only * the base bundle as the resulting resource bundle. * - *

    It is not a requirement to return an immutable - * (unmodifiable) List. However, the returned - * List must not be mutated after it has been - * returned by getCandidateLocales. + *

    It is not a requirement to return an immutable (unmodifiable) + * List. However, the returned List must not + * be mutated after it has been returned by + * getCandidateLocales. * *

    The default implementation returns a List containing - * Locales in the following sequence: - *

    -         *     Locale(language, country, variant)
    -         *     Locale(language, country)
    -         *     Locale(language)
    -         *     Locale.ROOT
    -         * 
    - * where language, country and - * variant are the language, country and variant values - * of the given locale, respectively. Locales where the + * Locales using the rules described below. In the + * description below, L, S, C and V + * respectively represent non-empty language, script, country, and + * variant. For example, [L, C] represents a + * Locale that has non-empty values only for language and + * country. The form L("xx") represents the (non-empty) + * language value is "xx". For all cases, Locales whose * final component values are empty strings are omitted. * + *
    1. For an input Locale with an empty script value, + * append candidate Locales by omitting the final component + * one by one as below: + * + *
        + *
      • [L, C, V] + *
      • [L, C] + *
      • [L] + *
      • Locale.ROOT + *
      + * + *
    2. For an input Locale with a non-empty script value, + * append candidate Locales by omitting the final component + * up to language, then append candidates generated from the + * Locale with country and variant restored: + * + *
        + *
      • [L, S, C, V] + *
      • [L, S, C] + *
      • [L, S] + *
      • [L, C, V] + *
      • [L, C] + *
      • [L] + *
      • Locale.ROOT + *
      + * + *
    3. For an input Locale with a variant value consisting + * of multiple subtags separated by underscore, generate candidate + * Locales by omitting the variant subtags one by one, then + * insert them after every occurence of Locales with the + * full variant value in the original list. For example, if the + * the variant consists of two subtags V1 and V2: + * + *
        + *
      • [L, S, C, V1, V2] + *
      • [L, S, C, V1] + *
      • [L, S, C] + *
      • [L, S] + *
      • [L, C, V1, V2] + *
      • [L, C, V1] + *
      • [L, C] + *
      • [L] + *
      • Locale.ROOT + *
      + * + *
    4. Special cases for Chinese. When an input Locale has the + * language "zh" (Chinese) and an empty script value, either "Hans" (Simplified) or + * "Hant" (Traditional) might be supplied, depending on the country. + * When the country is "CN" (China) or "SG" (Singapore), "Hans" is supplied. + * When the country is "HK" (Hong Kong SAR China), "MO" (Macau SAR China), + * or "TW" (Taiwan), "Hant" is supplied. For all other countries or when the country + * is empty, no script is supplied. For example, for Locale("zh", "CN") + * , the candidate list will be: + *
        + *
      • [L("zh"), S("Hans"), C("CN")] + *
      • [L("zh"), S("Hans")] + *
      • [L("zh"), C("CN")] + *
      • [L("zh")] + *
      • Locale.ROOT + *
      + * + * For Locale("zh", "TW"), the candidate list will be: + *
        + *
      • [L("zh"), S("Hant"), C("TW")] + *
      • [L("zh"), S("Hant")] + *
      • [L("zh"), C("TW")] + *
      • [L("zh")] + *
      • Locale.ROOT + *
      + * + *
    5. Special cases for Norwegian. Both Locale("no", "NO", + * "NY") and Locale("nn", "NO") represent Norwegian + * Nynorsk. When a locale's language is "nn", the standard candidate + * list is generated up to [L("nn")], and then the following + * candidates are added: + * + *
      • [L("no"), C("NO"), V("NY")] + *
      • [L("no"), C("NO")] + *
      • [L("no")] + *
      • Locale.ROOT + *
      + * + * If the locale is exactly Locale("no", "NO", "NY"), it is first + * converted to Locale("nn", "NO") and then the above procedure is + * followed. + * + *

      Also, Java treats the language "no" as a synonym of Norwegian + * Bokmål "nb". Except for the single case Locale("no", + * "NO", "NY") (handled above), when an input Locale + * has language "no" or "nb", candidate Locales with + * language code "no" and "nb" are interleaved, first using the + * requested language, then using its synonym. For example, + * Locale("nb", "NO", "POSIX") generates the following + * candidate list: + * + *

        + *
      • [L("nb"), C("NO"), V("POSIX")] + *
      • [L("no"), C("NO"), V("POSIX")] + *
      • [L("nb"), C("NO")] + *
      • [L("no"), C("NO")] + *
      • [L("nb")] + *
      • [L("no")] + *
      • Locale.ROOT + *
      + * + * Locale("no", "NO", "POSIX") would generate the same list + * except that locales with "no" would appear before the corresponding + * locales with "nb".
    6. + * + * + *
    + * *

    The default implementation uses an {@link ArrayList} that * overriding implementations may modify before returning it to the * caller. However, a subclass must not modify it after it has @@ -2231,24 +2395,119 @@ public abstract class ResourceBundle { if (baseName == null) { throw new NullPointerException(); } - String language = locale.getLanguage(); - String country = locale.getCountry(); - String variant = locale.getVariant(); + return new ArrayList(CANDIDATES_CACHE.get(locale.getBaseLocale())); + } - List locales = new ArrayList(4); - if (variant.length() > 0) { - locales.add(locale); - } - if (country.length() > 0) { - locales.add((locales.size() == 0) ? - locale : Locale.getInstance(language, country, "")); + private static final CandidateListCache CANDIDATES_CACHE = new CandidateListCache(); + + private static class CandidateListCache extends LocaleObjectCache> { + protected List createObject(BaseLocale base) { + String language = base.getLanguage(); + String script = base.getScript(); + String region = base.getRegion(); + String variant = base.getVariant(); + + // Special handling for Norwegian + boolean isNorwegianBokmal = false; + boolean isNorwegianNynorsk = false; + if (language.equals("no")) { + if (region.equals("NO") && variant.equals("NY")) { + variant = ""; + isNorwegianNynorsk = true; + } else { + isNorwegianBokmal = true; + } + } + if (language.equals("nb") || isNorwegianBokmal) { + List tmpList = getDefaultList("nb", script, region, variant); + // Insert a locale replacing "nb" with "no" for every list entry + List bokmalList = new LinkedList(); + for (Locale l : tmpList) { + bokmalList.add(l); + if (l.getLanguage().length() == 0) { + break; + } + bokmalList.add(Locale.getInstance("no", l.getScript(), l.getCountry(), + l.getVariant(), LocaleExtensions.EMPTY_EXTENSIONS)); + } + return bokmalList; + } else if (language.equals("nn") || isNorwegianNynorsk) { + // Insert no_NO_NY, no_NO, no after nn + List nynorskList = getDefaultList("nn", script, region, variant); + int idx = nynorskList.size() - 1; + nynorskList.add(idx++, Locale.getInstance("no", "NO", "NY")); + nynorskList.add(idx++, Locale.getInstance("no", "NO", "")); + nynorskList.add(idx++, Locale.getInstance("no", "", "")); + return nynorskList; + } + // Special handling for Chinese + else if (language.equals("zh")) { + if (script.length() == 0 && region.length() > 0) { + // Supply script for users who want to use zh_Hans/zh_Hant + // as bundle names (recommended for Java7+) + if (region.equals("TW") || region.equals("HK") || region.equals("MO")) { + script = "Hant"; + } else if (region.equals("CN") || region.equals("SG")) { + script = "Hans"; + } + } else if (script.length() > 0 && region.length() == 0) { + // Supply region(country) for users who still package Chinese + // bundles using old convension. + if (script.equals("Hans")) { + region = "CN"; + } else if (script.equals("Hant")) { + region = "TW"; + } + } + } + + return getDefaultList(language, script, region, variant); } - if (language.length() > 0) { - locales.add((locales.size() == 0) ? - locale : Locale.getInstance(language, "", "")); + + private static List getDefaultList(String language, String script, String region, String variant) { + List variants = null; + + if (variant.length() > 0) { + variants = new LinkedList(); + int idx = variant.length(); + while (idx != -1) { + variants.add(variant.substring(0, idx)); + idx = variant.lastIndexOf('_', --idx); + } + } + + LinkedList list = new LinkedList(); + + if (variants != null) { + for (String v : variants) { + list.add(Locale.getInstance(language, script, region, v, LocaleExtensions.EMPTY_EXTENSIONS)); + } + } + if (region.length() > 0) { + list.add(Locale.getInstance(language, script, region, "", LocaleExtensions.EMPTY_EXTENSIONS)); + } + if (script.length() > 0) { + list.add(Locale.getInstance(language, script, "", "", LocaleExtensions.EMPTY_EXTENSIONS)); + + // With script, after truncating variant, region and script, + // start over without script. + if (variants != null) { + for (String v : variants) { + list.add(Locale.getInstance(language, "", region, v, LocaleExtensions.EMPTY_EXTENSIONS)); + } + } + if (region.length() > 0) { + list.add(Locale.getInstance(language, "", region, "", LocaleExtensions.EMPTY_EXTENSIONS)); + } + } + if (language.length() > 0) { + list.add(Locale.getInstance(language, "", "", "", LocaleExtensions.EMPTY_EXTENSIONS)); + } + // Add root locale at the end + list.add(Locale.ROOT); + + return list; } - locales.add(Locale.ROOT); - return locales; } /** @@ -2606,13 +2865,14 @@ public abstract class ResourceBundle { * *

    This implementation returns the following value: *

    -         *     baseName + "_" + language + "_" + country + "_" + variant
    +         *     baseName + "_" + language + "_" + script + "_" + country + "_" + variant
              * 
    - * where language, country and - * variant are the language, country and variant values - * of locale, respectively. Final component values that - * are empty Strings are omitted along with the preceding '_'. If - * all of the values are empty strings, then baseName + * where language, script, country, + * and variant are the language, script, country, and variant + * values of locale, respectively. Final component values that + * are empty Strings are omitted along with the preceding '_'. When the + * script is empty, the script value is ommitted along with the preceding '_'. + * If all of the values are empty strings, then baseName * is returned. * *

    For example, if baseName is @@ -2643,6 +2903,7 @@ public abstract class ResourceBundle { } String language = locale.getLanguage(); + String script = locale.getScript(); String country = locale.getCountry(); String variant = locale.getVariant(); @@ -2652,12 +2913,22 @@ public abstract class ResourceBundle { StringBuilder sb = new StringBuilder(baseName); sb.append('_'); - if (variant != "") { - sb.append(language).append('_').append(country).append('_').append(variant); - } else if (country != "") { - sb.append(language).append('_').append(country); + if (script != "") { + if (variant != "") { + sb.append(language).append('_').append(script).append('_').append(country).append('_').append(variant); + } else if (country != "") { + sb.append(language).append('_').append(script).append('_').append(country); + } else { + sb.append(language).append('_').append(script); + } } else { - sb.append(language); + if (variant != "") { + sb.append(language).append('_').append(country).append('_').append(variant); + } else if (country != "") { + sb.append(language).append('_').append(country); + } else { + sb.append(language); + } } return sb.toString(); diff --git a/src/share/classes/java/util/spi/LocaleNameProvider.java b/src/share/classes/java/util/spi/LocaleNameProvider.java index b54195932..8c3639e17 100644 --- a/src/share/classes/java/util/spi/LocaleNameProvider.java +++ b/src/share/classes/java/util/spi/LocaleNameProvider.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -44,22 +44,23 @@ public abstract class LocaleNameProvider extends LocaleServiceProvider { } /** - * Returns a localized name for the given ISO 639 language code and the - * given locale that is appropriate for display to the user. + * Returns a localized name for the given + * IETF BCP47 language code and the given locale that is appropriate for + * display to the user. * For example, if languageCode is "fr" and locale * is en_US, getDisplayLanguage() will return "French"; if languageCode * is "en" and locale is fr_FR, getDisplayLanguage() will return "anglais". * If the name returned cannot be localized according to locale, * (say, the provider does not have a Japanese name for Croatian), * this method returns null. - * @param languageCode the ISO 639 language code string in the form of two + * @param languageCode the language code string in the form of two to eight * lower-case letters between 'a' (U+0061) and 'z' (U+007A) * @param locale the desired locale * @return the name of the given language code for the specified locale, or null if it's not * available. * @exception NullPointerException if languageCode or locale is null * @exception IllegalArgumentException if languageCode is not in the form of - * two lower-case letters, or locale isn't + * two or three lower-case letters, or locale isn't * one of the locales returned from * {@link java.util.spi.LocaleServiceProvider#getAvailableLocales() * getAvailableLocales()}. @@ -68,22 +69,52 @@ public abstract class LocaleNameProvider extends LocaleServiceProvider { public abstract String getDisplayLanguage(String languageCode, Locale locale); /** - * Returns a localized name for the given ISO 3166 country code and the - * given locale that is appropriate for display to the user. + * Returns a localized name for the given + * IETF BCP47 script code and the given locale that is appropriate for + * display to the user. + * For example, if scriptCode is "Latn" and locale + * is en_US, getDisplayScript() will return "Latin"; if scriptCode + * is "Cyrl" and locale is fr_FR, getDisplayScript() will return "cyrillique". + * If the name returned cannot be localized according to locale, + * (say, the provider does not have a Japanese name for Cyrillic), + * this method returns null. + * @param scriptCode the four letter script code string in the form of title-case + * letters (the first letter is upper-case character between 'A' (U+0041) and + * 'Z' (U+005A) followed by three lower-case character between 'a' (U+0061) + * and 'z' (U+007A)). + * @param locale the desired locale + * @return the name of the given script code for the specified locale, or null if it's not + * available. + * @exception NullPointerException if scriptCode or locale is null + * @exception IllegalArgumentException if scriptCode is not in the form of + * four title case letters, or locale isn't + * one of the locales returned from + * {@link java.util.spi.LocaleServiceProvider#getAvailableLocales() + * getAvailableLocales()}. + * @see java.util.Locale#getDisplayScript(java.util.Locale) + * @since 1.7 + */ + public abstract String getDisplayScript(String scriptCode, Locale locale); + + /** + * Returns a localized name for the given + * IETF BCP47 region code (either ISO 3166 country code or UN M.49 area + * codes) and the given locale that is appropriate for display to the user. * For example, if countryCode is "FR" and locale * is en_US, getDisplayCountry() will return "France"; if countryCode * is "US" and locale is fr_FR, getDisplayCountry() will return "Etats-Unis". * If the name returned cannot be localized according to locale, * (say, the provider does not have a Japanese name for Croatia), * this method returns null. - * @param countryCode the ISO 3166 country code string in the form of two - * upper-case letters between 'A' (U+0041) and 'Z' (U+005A) + * @param countryCode the country(region) code string in the form of two + * upper-case letters between 'A' (U+0041) and 'Z' (U+005A) or the UN M.49 area code + * in the form of three digit letters between '0' (U+0030) and '9' (U+0039). * @param locale the desired locale * @return the name of the given country code for the specified locale, or null if it's not * available. * @exception NullPointerException if countryCode or locale is null * @exception IllegalArgumentException if countryCode is not in the form of - * two upper-case letters, or locale isn't + * two upper-case letters or three digit letters, or locale isn't * one of the locales returned from * {@link java.util.spi.LocaleServiceProvider#getAvailableLocales() * getAvailableLocales()}. diff --git a/src/share/classes/java/util/spi/LocaleServiceProvider.java b/src/share/classes/java/util/spi/LocaleServiceProvider.java index 567e89271..02626e35e 100644 --- a/src/share/classes/java/util/spi/LocaleServiceProvider.java +++ b/src/share/classes/java/util/spi/LocaleServiceProvider.java @@ -86,18 +86,19 @@ import java.util.Locale; * Otherwise, they call the getAvailableLocales() methods of * installed providers for the appropriate interface to find one that * supports the requested locale. If such a provider is found, its other - * methods are called to obtain the requested object or name. If neither - * the Java runtime environment itself nor an installed provider supports - * the requested locale, a fallback locale is constructed by replacing the - * first of the variant, country, or language strings of the locale that's - * not an empty string with an empty string, and the lookup process is - * restarted. In the case that the variant contains one or more '_'s, the - * fallback locale is constructed by replacing the variant with a new variant - * which eliminates the last '_' and the part following it. Even if a - * fallback occurs, methods that return requested objects or name are - * invoked with the original locale before the fallback.The Java runtime - * environment must support the root locale for all locale sensitive services - * in order to guarantee that this process terminates. + * methods are called to obtain the requested object or name. When checking + * whether a locale is supported, the locale's extensions are ignored. + * If neither the Java runtime environment itself nor an installed provider + * supports the requested locale, the methods go through a list of candidate + * locales and repeat the availability check for each until a match is found. + * The algorithm used for creating a list of candidate locales is same as + * the one used by ResourceBunlde by default (see + * {@link java.util.ResourceBundle.Control#getCandidateLocales getCandidateLocales} + * for the details). Even if a locale is resolved from the candidate list, + * methods that return requested objects or names are invoked with the original + * requested locale including extensions. The Java runtime environment must + * support the root locale for all locale sensitive services in order to + * guarantee that this process terminates. *

    * Providers of names (but not providers of other objects) are allowed to * return null for some name requests even for locales that they claim to @@ -124,6 +125,11 @@ public abstract class LocaleServiceProvider { /** * Returns an array of all locales for which this locale service provider * can provide localized objects or names. + *

    + * Note: Extensions in a Locale are ignored during + * service provider lookup. So the array returned by this method should + * not include two or more Locale objects only differing in + * their extensions. * * @return An array of all locales for which this locale service provider * can provide localized objects or names. diff --git a/src/share/classes/sun/util/LocaleServiceProviderPool.java b/src/share/classes/sun/util/LocaleServiceProviderPool.java index 8cc22603d..8c45c6fd6 100644 --- a/src/share/classes/sun/util/LocaleServiceProviderPool.java +++ b/src/share/classes/sun/util/LocaleServiceProviderPool.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,18 +28,20 @@ package sun.util; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; -import java.util.Arrays; +import java.util.ArrayList; import java.util.HashSet; -import java.util.Iterator; +import java.util.IllformedLocaleException; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; +import java.util.Locale.Builder; import java.util.Map; +import java.util.ResourceBundle.Control; import java.util.ServiceLoader; -import java.util.ServiceConfigurationError; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.spi.LocaleServiceProvider; + import sun.util.logging.PlatformLogger; import sun.util.resources.LocaleData; import sun.util.resources.OpenListResourceBundle; @@ -89,6 +91,16 @@ public final class LocaleServiceProviderPool { */ private Set providerLocales = null; + /** + * Special locale for ja_JP with Japanese calendar + */ + private static Locale locale_ja_JP_JP = new Locale("ja", "JP", "JP"); + + /** + * Special locale for th_TH with Thai numbering system + */ + private static Locale locale_th_TH_TH = new Locale("th", "TH", "TH"); + /** * A factory method that returns a singleton instance */ @@ -153,14 +165,20 @@ public final class LocaleServiceProviderPool { java.util.spi.CurrencyNameProvider.class, java.util.spi.LocaleNameProvider.class, java.util.spi.TimeZoneNameProvider.class }; - Set all = new HashSet(Arrays.asList( - LocaleData.getAvailableLocales()) - ); + + // Normalize locales for look up + Locale[] allLocales = LocaleData.getAvailableLocales(); + Set all = new HashSet(allLocales.length); + for (Locale locale : allLocales) { + all.add(getLookupLocale(locale)); + } + for (Class providerClass : providerClasses) { LocaleServiceProviderPool pool = LocaleServiceProviderPool.getPool(providerClass); all.addAll(pool.getProviderLocales()); } + allAvailableLocales = all.toArray(new Locale[0]); } } @@ -196,7 +214,8 @@ public final class LocaleServiceProviderPool { } /** - * Returns an array of available locales from providers. + * Returns an array of available locales (already normalized + * for service lookup) from providers. * Note that this method does not return a defensive copy. * * @return list of the provider locales @@ -208,7 +227,7 @@ public final class LocaleServiceProviderPool { for (LocaleServiceProvider lsp : providers) { Locale[] locales = lsp.getAvailableLocales(); for (Locale locale: locales) { - providerLocales.add(locale); + providerLocales.add(getLookupLocale(locale)); } } } @@ -227,15 +246,19 @@ public final class LocaleServiceProviderPool { } /** - * Returns an array of available locales supported by the JRE. + * Returns an array of available locales (already normalized for + * service lookup) supported by the JRE. * Note that this method does not return a defensive copy. * * @return list of the available JRE locales */ private synchronized List getJRELocales() { if (availableJRELocales == null) { - availableJRELocales = - Arrays.asList(LocaleData.getAvailableLocales()); + Locale[] allLocales = LocaleData.getAvailableLocales(); + availableJRELocales = new ArrayList(allLocales.length); + for (Locale locale : allLocales) { + availableJRELocales.add(getLookupLocale(locale)); + } } return availableJRELocales; } @@ -249,7 +272,7 @@ public final class LocaleServiceProviderPool { */ private boolean isJRESupported(Locale locale) { List locales = getJRELocales(); - return locales.contains(locale); + return locales.contains(getLookupLocale(locale)); } /** @@ -325,7 +348,7 @@ public final class LocaleServiceProviderPool { bundleKey = key; } Locale bundleLocale = (bundle != null ? bundle.getLocale() : null); - Locale requested = locale; + List lookupLocales = getLookupLocales(locale); P lsp; S providersObj = null; @@ -333,21 +356,30 @@ public final class LocaleServiceProviderPool { // to the requested locale than the bundle we've found (for // localized names), or Java runtime's supported locale // (for localized objects) - while ((locale = findProviderLocale(locale, bundleLocale)) != null) { - - lsp = (P)findProvider(locale); - - if (lsp != null) { - providersObj = getter.getObject(lsp, requested, key, params); - if (providersObj != null) { - return providersObj; - } else if (isObjectProvider) { - config( - "A locale sensitive service provider returned null for a localized objects, which should not happen. provider: " + lsp + " locale: " + requested); + Set provLoc = getProviderLocales(); + for (int i = 0; i < lookupLocales.size(); i++) { + Locale current = lookupLocales.get(i); + if (bundleLocale != null) { + if (current.equals(bundleLocale)) { + break; + } + } else { + if (isJRESupported(current)) { + break; + } + } + if (provLoc.contains(current)) { + lsp = (P)findProvider(current); + if (lsp != null) { + providersObj = getter.getObject(lsp, locale, key, params); + if (providersObj != null) { + return providersObj; + } else if (isObjectProvider) { + config( + "A locale sensitive service provider returned null for a localized objects, which should not happen. provider: " + lsp + " locale: " + locale); + } } } - - locale = getParentLocale(locale); } // look up the JRE bundle and its parent chain. Only @@ -361,7 +393,7 @@ public final class LocaleServiceProviderPool { } else { lsp = (P)findProvider(bundleLocale); if (lsp != null) { - providersObj = getter.getObject(lsp, requested, key, params); + providersObj = getter.getObject(lsp, locale, key, params); if (providersObj != null) { return providersObj; } @@ -399,6 +431,8 @@ public final class LocaleServiceProviderPool { for (LocaleServiceProvider lsp : providers) { Locale[] locales = lsp.getAvailableLocales(); for (Locale available: locales) { + // normalize + available = getLookupLocale(available); if (locale.equals(available)) { LocaleServiceProvider providerInCache = providersCache.put(locale, lsp); @@ -414,66 +448,51 @@ public final class LocaleServiceProviderPool { } /** - * Returns the provider's locale that is the most appropriate - * within the range - * - * @param start the given locale that is used as the starting one - * @param end the given locale that is used as the end one (exclusive), - * or null if it reaching any of the JRE supported locale should - * terminate the look up. - * @return the most specific locale within the range, or null - * if no provider locale found in that range. + * Returns a list of candidate locales for service look up. + * @param locale the input locale + * @return the list of candiate locales for the given locale */ - private Locale findProviderLocale(Locale start, Locale end) { - Set provLoc = getProviderLocales(); - Locale current = start; - - while (current != null) { - if (end != null) { - if (current.equals(end)) { - current = null; - break; - } - } else { - if (isJRESupported(current)) { - current = null; - break; - } - } - - if (provLoc.contains(current)) { - break; - } - - current = getParentLocale(current); - } - - return current; + private static List getLookupLocales(Locale locale) { + // Note: We currently use the default implementation of + // ResourceBundle.Control.getCandidateLocales. The result + // returned by getCandidateLocales are already normalized + // (no extensions) for service look up. + List lookupLocales = new Control(){}.getCandidateLocales("", locale); + return lookupLocales; } /** - * Returns the parent locale. + * Returns an instance of Locale used for service look up. + * The result Locale has no extensions except for ja_JP_JP + * and th_TH_TH * * @param locale the locale - * @return the parent locale + * @return the locale used for service look up */ - private static Locale getParentLocale(Locale locale) { - String variant = locale.getVariant(); - if (variant != "") { - int underscoreIndex = variant.lastIndexOf('_'); - if (underscoreIndex != (-1)) { - return new Locale(locale.getLanguage(), locale.getCountry(), - variant.substring(0, underscoreIndex)); - } else { - return new Locale(locale.getLanguage(), locale.getCountry()); + private static Locale getLookupLocale(Locale locale) { + Locale lookupLocale = locale; + Set extensions = locale.getExtensionKeys(); + if (!extensions.isEmpty() + && !locale.equals(locale_ja_JP_JP) + && !locale.equals(locale_th_TH_TH)) { + // remove extensions + Builder locbld = new Builder(); + try { + locbld.setLocale(locale); + locbld.clearExtensions(); + lookupLocale = locbld.build(); + } catch (IllformedLocaleException e) { + // A Locale with non-empty extensions + // should have well-formed fields except + // for ja_JP_JP and th_TH_TH. Therefore, + // it should never enter in this catch clause. + config("A locale(" + locale + ") has non-empty extensions, but has illformed fields."); + + // Fallback - script field will be lost. + lookupLocale = new Locale(locale.getLanguage(), locale.getCountry(), locale.getVariant()); } - } else if (locale.getCountry() != "") { - return new Locale(locale.getLanguage()); - } else if (locale.getLanguage() != "") { - return Locale.ROOT; - } else { - return null; } + return lookupLocale; } /** diff --git a/src/share/classes/sun/util/locale/AsciiUtil.java b/src/share/classes/sun/util/locale/AsciiUtil.java new file mode 100644 index 000000000..b122292ca --- /dev/null +++ b/src/share/classes/sun/util/locale/AsciiUtil.java @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +public final class AsciiUtil { + public static boolean caseIgnoreMatch(String s1, String s2) { + if (s1 == s2) { + return true; + } + int len = s1.length(); + if (len != s2.length()) { + return false; + } + int i = 0; + while (i < len) { + char c1 = s1.charAt(i); + char c2 = s2.charAt(i); + if (c1 != c2 && toLower(c1) != toLower(c2)) { + break; + } + i++; + } + return (i == len); + } + + public static int caseIgnoreCompare(String s1, String s2) { + if (s1 == s2) { + return 0; + } + return AsciiUtil.toLowerString(s1).compareTo(AsciiUtil.toLowerString(s2)); + } + + + public static char toUpper(char c) { + if (c >= 'a' && c <= 'z') { + c -= 0x20; + } + return c; + } + + public static char toLower(char c) { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + return c; + } + + public static String toLowerString(String s) { + int idx = 0; + for (; idx < s.length(); idx++) { + char c = s.charAt(idx); + if (c >= 'A' && c <= 'Z') { + break; + } + } + if (idx == s.length()) { + return s; + } + StringBuilder buf = new StringBuilder(s.substring(0, idx)); + for (; idx < s.length(); idx++) { + buf.append(toLower(s.charAt(idx))); + } + return buf.toString(); + } + + public static String toUpperString(String s) { + int idx = 0; + for (; idx < s.length(); idx++) { + char c = s.charAt(idx); + if (c >= 'a' && c <= 'z') { + break; + } + } + if (idx == s.length()) { + return s; + } + StringBuilder buf = new StringBuilder(s.substring(0, idx)); + for (; idx < s.length(); idx++) { + buf.append(toUpper(s.charAt(idx))); + } + return buf.toString(); + } + + public static String toTitleString(String s) { + if (s.length() == 0) { + return s; + } + int idx = 0; + char c = s.charAt(idx); + if (!(c >= 'a' && c <= 'z')) { + for (idx = 1; idx < s.length(); idx++) { + if (c >= 'A' && c <= 'Z') { + break; + } + } + } + if (idx == s.length()) { + return s; + } + StringBuilder buf = new StringBuilder(s.substring(0, idx)); + if (idx == 0) { + buf.append(toUpper(s.charAt(idx))); + idx++; + } + for (; idx < s.length(); idx++) { + buf.append(toLower(s.charAt(idx))); + } + return buf.toString(); + } + + public static boolean isAlpha(char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + } + + public static boolean isAlphaString(String s) { + boolean b = true; + for (int i = 0; i < s.length(); i++) { + if (!isAlpha(s.charAt(i))) { + b = false; + break; + } + } + return b; + } + + public static boolean isNumeric(char c) { + return (c >= '0' && c <= '9'); + } + + public static boolean isNumericString(String s) { + boolean b = true; + for (int i = 0; i < s.length(); i++) { + if (!isNumeric(s.charAt(i))) { + b = false; + break; + } + } + return b; + } + + public static boolean isAlphaNumeric(char c) { + return isAlpha(c) || isNumeric(c); + } + + public static boolean isAlphaNumericString(String s) { + boolean b = true; + for (int i = 0; i < s.length(); i++) { + if (!isAlphaNumeric(s.charAt(i))) { + b = false; + break; + } + } + return b; + } + + public static class CaseInsensitiveKey { + private String _key; + private int _hash; + + public CaseInsensitiveKey(String key) { + _key = key; + _hash = AsciiUtil.toLowerString(key).hashCode(); + } + + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o instanceof CaseInsensitiveKey) { + return AsciiUtil.caseIgnoreMatch(_key, ((CaseInsensitiveKey)o)._key); + } + return false; + } + + public int hashCode() { + return _hash; + } + } +} diff --git a/src/share/classes/sun/util/locale/BaseLocale.java b/src/share/classes/sun/util/locale/BaseLocale.java new file mode 100644 index 000000000..a3314826f --- /dev/null +++ b/src/share/classes/sun/util/locale/BaseLocale.java @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +package sun.util.locale; + + +public final class BaseLocale { + + public static final String SEP = "_"; + + private static final Cache CACHE = new Cache(); + public static final BaseLocale ROOT = BaseLocale.getInstance("", "", "", ""); + + private String _language = ""; + private String _script = ""; + private String _region = ""; + private String _variant = ""; + + private transient volatile int _hash = 0; + + private BaseLocale(String language, String script, String region, String variant) { + if (language != null) { + _language = AsciiUtil.toLowerString(language).intern(); + } + if (script != null) { + _script = AsciiUtil.toTitleString(script).intern(); + } + if (region != null) { + _region = AsciiUtil.toUpperString(region).intern(); + } + if (variant != null) { + _variant = variant.intern(); + } + } + + public static BaseLocale getInstance(String language, String script, String region, String variant) { + // JDK uses deprecated ISO639.1 language codes for he, yi and id + if (AsciiUtil.caseIgnoreMatch(language, "he")) { + language = "iw"; + } else if (AsciiUtil.caseIgnoreMatch(language, "yi")) { + language = "ji"; + } else if (AsciiUtil.caseIgnoreMatch(language, "id")) { + language = "in"; + } + + Key key = new Key(language, script, region, variant); + BaseLocale baseLocale = CACHE.get(key); + return baseLocale; + } + + public String getLanguage() { + return _language; + } + + public String getScript() { + return _script; + } + + public String getRegion() { + return _region; + } + + public String getVariant() { + return _variant; + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof BaseLocale)) { + return false; + } + BaseLocale other = (BaseLocale)obj; + return hashCode() == other.hashCode() + && _language.equals(other._language) + && _script.equals(other._script) + && _region.equals(other._region) + && _variant.equals(other._variant); + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + if (_language.length() > 0) { + buf.append("language="); + buf.append(_language); + } + if (_script.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("script="); + buf.append(_script); + } + if (_region.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("region="); + buf.append(_region); + } + if (_variant.length() > 0) { + if (buf.length() > 0) { + buf.append(", "); + } + buf.append("variant="); + buf.append(_variant); + } + return buf.toString(); + } + + public int hashCode() { + int h = _hash; + if (h == 0) { + // Generating a hash value from language, script, region and variant + for (int i = 0; i < _language.length(); i++) { + h = 31*h + _language.charAt(i); + } + for (int i = 0; i < _script.length(); i++) { + h = 31*h + _script.charAt(i); + } + for (int i = 0; i < _region.length(); i++) { + h = 31*h + _region.charAt(i); + } + for (int i = 0; i < _variant.length(); i++) { + h = 31*h + _variant.charAt(i); + } + _hash = h; + } + return h; + } + + private static class Key implements Comparable { + private String _lang = ""; + private String _scrt = ""; + private String _regn = ""; + private String _vart = ""; + + private volatile int _hash; // Default to 0 + + public Key(String language, String script, String region, String variant) { + if (language != null) { + _lang = language; + } + if (script != null) { + _scrt = script; + } + if (region != null) { + _regn = region; + } + if (variant != null) { + _vart = variant; + } + } + + public boolean equals(Object obj) { + return (this == obj) || + (obj instanceof Key) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._lang, this._lang) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._scrt, this._scrt) + && AsciiUtil.caseIgnoreMatch(((Key)obj)._regn, this._regn) + && ((Key)obj)._vart.equals(_vart); // variant is case sensitive in JDK! + } + + public int compareTo(Key other) { + int res = AsciiUtil.caseIgnoreCompare(this._lang, other._lang); + if (res == 0) { + res = AsciiUtil.caseIgnoreCompare(this._scrt, other._scrt); + if (res == 0) { + res = AsciiUtil.caseIgnoreCompare(this._regn, other._regn); + if (res == 0) { + res = this._vart.compareTo(other._vart); + } + } + } + return res; + } + + public int hashCode() { + int h = _hash; + if (h == 0) { + // Generating a hash value from language, script, region and variant + for (int i = 0; i < _lang.length(); i++) { + h = 31*h + AsciiUtil.toLower(_lang.charAt(i)); + } + for (int i = 0; i < _scrt.length(); i++) { + h = 31*h + AsciiUtil.toLower(_scrt.charAt(i)); + } + for (int i = 0; i < _regn.length(); i++) { + h = 31*h + AsciiUtil.toLower(_regn.charAt(i)); + } + for (int i = 0; i < _vart.length(); i++) { + h = 31*h + _vart.charAt(i); + } + _hash = h; + } + return h; + } + + public static Key normalize(Key key) { + String lang = AsciiUtil.toLowerString(key._lang).intern(); + String scrt = AsciiUtil.toTitleString(key._scrt).intern(); + String regn = AsciiUtil.toUpperString(key._regn).intern(); + String vart = key._vart.intern(); // preserve upper/lower cases + + return new Key(lang, scrt, regn, vart); + } + } + + private static class Cache extends LocaleObjectCache { + + public Cache() { + } + + protected Key normalizeKey(Key key) { + return Key.normalize(key); + } + + protected BaseLocale createObject(Key key) { + return new BaseLocale(key._lang, key._scrt, key._regn, key._vart); + } + + } +} diff --git a/src/share/classes/sun/util/locale/Extension.java b/src/share/classes/sun/util/locale/Extension.java new file mode 100644 index 000000000..8d98faf2d --- /dev/null +++ b/src/share/classes/sun/util/locale/Extension.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + + +public class Extension { + private char _key; + protected String _value; + + protected Extension(char key) { + _key = key; + } + + Extension(char key, String value) { + _key = key; + _value = value; + } + + public char getKey() { + return _key; + } + + public String getValue() { + return _value; + } + + public String getID() { + return _key + LanguageTag.SEP + _value; + } + + public String toString() { + return getID(); + } +} diff --git a/src/share/classes/sun/util/locale/InternalLocaleBuilder.java b/src/share/classes/sun/util/locale/InternalLocaleBuilder.java new file mode 100644 index 000000000..6c33036b9 --- /dev/null +++ b/src/share/classes/sun/util/locale/InternalLocaleBuilder.java @@ -0,0 +1,705 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public final class InternalLocaleBuilder { + + private String _language = ""; + private String _script = ""; + private String _region = ""; + private String _variant = ""; + + private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0)); + + private HashMap _extensions; + private HashSet _uattributes; + private HashMap _ukeywords; + + + public InternalLocaleBuilder() { + } + + public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { + if (language == null || language.length() == 0) { + _language = ""; + } else { + if (!LanguageTag.isLanguage(language)) { + throw new LocaleSyntaxException("Ill-formed language: " + language, 0); + } + _language = language; + } + return this; + } + + public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { + if (script == null || script.length() == 0) { + _script = ""; + } else { + if (!LanguageTag.isScript(script)) { + throw new LocaleSyntaxException("Ill-formed script: " + script, 0); + } + _script = script; + } + return this; + } + + public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { + if (region == null || region.length() == 0) { + _region = ""; + } else { + if (!LanguageTag.isRegion(region)) { + throw new LocaleSyntaxException("Ill-formed region: " + region, 0); + } + _region = region; + } + return this; + } + + public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { + if (variant == null || variant.length() == 0) { + _variant = ""; + } else { + // normalize separators to "_" + String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); + int errIdx = checkVariants(var, BaseLocale.SEP); + if (errIdx != -1) { + throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); + } + _variant = var; + } + return this; + } + + public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { + if (!UnicodeLocaleExtension.isAttribute(attribute)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); + } + // Use case insensitive string to prevent duplication + if (_uattributes == null) { + _uattributes = new HashSet(4); + } + _uattributes.add(new CaseInsensitiveString(attribute)); + return this; + } + + public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { + if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); + } + if (_uattributes != null) { + _uattributes.remove(new CaseInsensitiveString(attribute)); + } + return this; + } + + public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException { + if (!UnicodeLocaleExtension.isKey(key)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key); + } + + CaseInsensitiveString cikey = new CaseInsensitiveString(key); + if (type == null) { + if (_ukeywords != null) { + // null type is used for remove the key + _ukeywords.remove(cikey); + } + } else { + if (type.length() != 0) { + // normalize separator to "-" + String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP); + // validate + StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP); + while (!itr.isDone()) { + String s = itr.current(); + if (!UnicodeLocaleExtension.isTypeSubtag(s)) { + throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart()); + } + itr.next(); + } + } + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(cikey, type); + } + return this; + } + + public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { + // validate key + boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton); + if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) { + throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); + } + + boolean remove = (value == null || value.length() == 0); + CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); + + if (remove) { + if (UnicodeLocaleExtension.isSingletonChar(key.value())) { + // clear entire Unicode locale extension + if (_uattributes != null) { + _uattributes.clear(); + } + if (_ukeywords != null) { + _ukeywords.clear(); + } + } else { + if (_extensions != null && _extensions.containsKey(key)) { + _extensions.remove(key); + } + } + } else { + // validate value + String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP); + StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP); + while (!itr.isDone()) { + String s = itr.current(); + boolean validSubtag; + if (isBcpPrivateuse) { + validSubtag = LanguageTag.isPrivateuseSubtag(s); + } else { + validSubtag = LanguageTag.isExtensionSubtag(s); + } + if (!validSubtag) { + throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart()); + } + itr.next(); + } + + if (UnicodeLocaleExtension.isSingletonChar(key.value())) { + setUnicodeLocaleExtension(val); + } else { + if (_extensions == null) { + _extensions = new HashMap(4); + } + _extensions.put(key, val); + } + } + return this; + } + + /* + * Set extension/private subtags in a single string representation + */ + public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException { + if (subtags == null || subtags.length() == 0) { + clearExtensions(); + return this; + } + subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP); + StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); + + List extensions = null; + String privateuse = null; + + int parsed = 0; + int start; + + // Make a list of extension subtags + while (!itr.isDone()) { + String s = itr.current(); + if (LanguageTag.isExtensionSingleton(s)) { + start = itr.currentStart(); + String singleton = s; + StringBuilder sb = new StringBuilder(singleton); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (LanguageTag.isExtensionSubtag(s)) { + sb.append(LanguageTag.SEP).append(s); + parsed = itr.currentEnd(); + } else { + break; + } + itr.next(); + } + + if (parsed < start) { + throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start); + } + + if (extensions == null) { + extensions = new ArrayList(4); + } + extensions.add(sb.toString()); + } else { + break; + } + } + if (!itr.isDone()) { + String s = itr.current(); + if (LanguageTag.isPrivateusePrefix(s)) { + start = itr.currentStart(); + StringBuilder sb = new StringBuilder(s); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (!LanguageTag.isPrivateuseSubtag(s)) { + break; + } + sb.append(LanguageTag.SEP).append(s); + parsed = itr.currentEnd(); + + itr.next(); + } + if (parsed <= start) { + throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start); + } else { + privateuse = sb.toString(); + } + } + } + + if (!itr.isDone()) { + throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart()); + } + + return setExtensions(extensions, privateuse); + } + + /* + * Set a list of BCP47 extensions and private use subtags + * BCP47 extensions are already validated and well-formed, but may contain duplicates + */ + private InternalLocaleBuilder setExtensions(List bcpExtensions, String privateuse) { + clearExtensions(); + + if (bcpExtensions != null && bcpExtensions.size() > 0) { + HashSet processedExntensions = new HashSet(bcpExtensions.size()); + for (String bcpExt : bcpExtensions) { + CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0)); + // ignore duplicates + if (!processedExntensions.contains(key)) { + // each extension string contains singleton, e.g. "a-abc-def" + if (UnicodeLocaleExtension.isSingletonChar(key.value())) { + setUnicodeLocaleExtension(bcpExt.substring(2)); + } else { + if (_extensions == null) { + _extensions = new HashMap(4); + } + _extensions.put(key, bcpExt.substring(2)); + } + } + } + } + if (privateuse != null && privateuse.length() > 0) { + // privateuse string contains prefix, e.g. "x-abc-def" + if (_extensions == null) { + _extensions = new HashMap(1); + } + _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2)); + } + + return this; + } + + /* + * Reset Builder's internal state with the given language tag + */ + public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { + clear(); + if (langtag.getExtlangs().size() > 0) { + _language = langtag.getExtlangs().get(0); + } else { + String language = langtag.getLanguage(); + if (!language.equals(LanguageTag.UNDETERMINED)) { + _language = language; + } + } + _script = langtag.getScript(); + _region = langtag.getRegion(); + + List bcpVariants = langtag.getVariants(); + if (bcpVariants.size() > 0) { + StringBuilder var = new StringBuilder(bcpVariants.get(0)); + for (int i = 1; i < bcpVariants.size(); i++) { + var.append(BaseLocale.SEP).append(bcpVariants.get(i)); + } + _variant = var.toString(); + } + + setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); + + return this; + } + + public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException { + String language = base.getLanguage(); + String script = base.getScript(); + String region = base.getRegion(); + String variant = base.getVariant(); + + // Special backward compatibility support + + // Exception 1 - ja_JP_JP + if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) { + // When locale ja_JP_JP is created, ca-japanese is always there. + // The builder ignores the variant "JP" + assert("japanese".equals(extensions.getUnicodeLocaleType("ca"))); + variant = ""; + } + // Exception 2 - th_TH_TH + else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) { + // When locale th_TH_TH is created, nu-thai is always there. + // The builder ignores the variant "TH" + assert("thai".equals(extensions.getUnicodeLocaleType("nu"))); + variant = ""; + } + // Exception 3 - no_NO_NY + else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) { + // no_NO_NY is a valid locale and used by Java 6 or older versions. + // The build ignores the variant "NY" and change the language to "nn". + language = "nn"; + variant = ""; + } + + // Validate base locale fields before updating internal state. + // LocaleExtensions always store validated/canonicalized values, + // so no checks are necessary. + if (language.length() > 0 && !LanguageTag.isLanguage(language)) { + throw new LocaleSyntaxException("Ill-formed language: " + language); + } + + if (script.length() > 0 && !LanguageTag.isScript(script)) { + throw new LocaleSyntaxException("Ill-formed script: " + script); + } + + if (region.length() > 0 && !LanguageTag.isRegion(region)) { + throw new LocaleSyntaxException("Ill-formed region: " + region); + } + + if (variant.length() > 0) { + int errIdx = checkVariants(variant, BaseLocale.SEP); + if (errIdx != -1) { + throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); + } + } + + // The input locale is validated at this point. + // Now, updating builder's internal fields. + _language = language; + _script = script; + _region = region; + _variant = variant; + clearExtensions(); + + Set extKeys = (extensions == null) ? null : extensions.getKeys(); + if (extKeys != null) { + // map extensions back to builder's internal format + for (Character key : extKeys) { + Extension e = extensions.getExtension(key); + if (e instanceof UnicodeLocaleExtension) { + UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e; + for (String uatr : ue.getUnicodeLocaleAttributes()) { + if (_uattributes == null) { + _uattributes = new HashSet(4); + } + _uattributes.add(new CaseInsensitiveString(uatr)); + } + for (String ukey : ue.getUnicodeLocaleKeys()) { + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey)); + } + } else { + if (_extensions == null) { + _extensions = new HashMap(4); + } + _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue()); + } + } + } + return this; + } + + public InternalLocaleBuilder clear() { + _language = ""; + _script = ""; + _region = ""; + _variant = ""; + clearExtensions(); + return this; + } + + public InternalLocaleBuilder clearExtensions() { + if (_extensions != null) { + _extensions.clear(); + } + if (_uattributes != null) { + _uattributes.clear(); + } + if (_ukeywords != null) { + _ukeywords.clear(); + } + return this; + } + + public BaseLocale getBaseLocale() { + String language = _language; + String script = _script; + String region = _region; + String variant = _variant; + + // Special private use subtag sequence identified by "lvariant" will be + // interpreted as Java variant. + if (_extensions != null) { + String privuse = _extensions.get(PRIVUSE_KEY); + if (privuse != null) { + StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP); + boolean sawPrefix = false; + int privVarStart = -1; + while (!itr.isDone()) { + if (sawPrefix) { + privVarStart = itr.currentStart(); + break; + } + if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { + sawPrefix = true; + } + itr.next(); + } + if (privVarStart != -1) { + StringBuilder sb = new StringBuilder(variant); + if (sb.length() != 0) { + sb.append(BaseLocale.SEP); + } + sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP)); + variant = sb.toString(); + } + } + } + + return BaseLocale.getInstance(language, script, region, variant); + } + + public LocaleExtensions getLocaleExtensions() { + if ((_extensions == null || _extensions.size() == 0) + && (_uattributes == null || _uattributes.size() == 0) + && (_ukeywords == null || _ukeywords.size() == 0)) { + return LocaleExtensions.EMPTY_EXTENSIONS; + } + + return new LocaleExtensions(_extensions, _uattributes, _ukeywords); + } + + /* + * Remove special private use subtag sequence identified by "lvariant" + * and return the rest. Only used by LocaleExtensions + */ + static String removePrivateuseVariant(String privuseVal) { + StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP); + + // Note: privateuse value "abc-lvariant" is unchanged + // because no subtags after "lvariant". + + int prefixStart = -1; + boolean sawPrivuseVar = false; + while (!itr.isDone()) { + if (prefixStart != -1) { + // Note: privateuse value "abc-lvariant" is unchanged + // because no subtags after "lvariant". + sawPrivuseVar = true; + break; + } + if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { + prefixStart = itr.currentStart(); + } + itr.next(); + } + if (!sawPrivuseVar) { + return privuseVal; + } + + assert(prefixStart == 0 || prefixStart > 1); + return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1); + } + + /* + * Check if the given variant subtags separated by the given + * separator(s) are valid + */ + private int checkVariants(String variants, String sep) { + StringTokenIterator itr = new StringTokenIterator(variants, sep); + while (!itr.isDone()) { + String s = itr.current(); + if (!LanguageTag.isVariant(s)) { + return itr.currentStart(); + } + itr.next(); + } + return -1; + } + + /* + * Private methods parsing Unicode Locale Extension subtags. + * Duplicated attributes/keywords will be ignored. + * The input must be a valid extension subtags (excluding singleton). + */ + private void setUnicodeLocaleExtension(String subtags) { + // wipe out existing attributes/keywords + if (_uattributes != null) { + _uattributes.clear(); + } + if (_ukeywords != null) { + _ukeywords.clear(); + } + + StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); + + // parse attributes + while (!itr.isDone()) { + if (!UnicodeLocaleExtension.isAttribute(itr.current())) { + break; + } + if (_uattributes == null) { + _uattributes = new HashSet(4); + } + _uattributes.add(new CaseInsensitiveString(itr.current())); + itr.next(); + } + + // parse keywords + CaseInsensitiveString key = null; + String type; + int typeStart = -1; + int typeEnd = -1; + while (!itr.isDone()) { + if (key != null) { + if (UnicodeLocaleExtension.isKey(itr.current())) { + // next keyword - emit previous one + assert(typeStart == -1 || typeEnd != -1); + type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(key, type); + + // reset keyword info + CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current()); + key = _ukeywords.containsKey(tmpKey) ? null : tmpKey; + typeStart = typeEnd = -1; + } else { + if (typeStart == -1) { + typeStart = itr.currentStart(); + } + typeEnd = itr.currentEnd(); + } + } else if (UnicodeLocaleExtension.isKey(itr.current())) { + // 1. first keyword or + // 2. next keyword, but previous one was duplicate + key = new CaseInsensitiveString(itr.current()); + if (_ukeywords != null && _ukeywords.containsKey(key)) { + // duplicate + key = null; + } + } + + if (!itr.hasNext()) { + if (key != null) { + // last keyword + assert(typeStart == -1 || typeEnd != -1); + type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); + if (_ukeywords == null) { + _ukeywords = new HashMap(4); + } + _ukeywords.put(key, type); + } + break; + } + + itr.next(); + } + } + + static class CaseInsensitiveString { + private String _s; + + CaseInsensitiveString(String s) { + _s = s; + } + + public String value() { + return _s; + } + + public int hashCode() { + return AsciiUtil.toLowerString(_s).hashCode(); + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof CaseInsensitiveString)) { + return false; + } + return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value()); + } + } + + static class CaseInsensitiveChar { + private char _c; + + CaseInsensitiveChar(char c) { + _c = c; + } + + public char value() { + return _c; + } + + public int hashCode() { + return AsciiUtil.toLower(_c); + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof CaseInsensitiveChar)) { + return false; + } + return _c == AsciiUtil.toLower(((CaseInsensitiveChar)obj).value()); + } + + } +} diff --git a/src/share/classes/sun/util/locale/LanguageTag.java b/src/share/classes/sun/util/locale/LanguageTag.java new file mode 100644 index 000000000..653bb8e05 --- /dev/null +++ b/src/share/classes/sun/util/locale/LanguageTag.java @@ -0,0 +1,726 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class LanguageTag { + // + // static fields + // + public static final String SEP = "-"; + public static final String PRIVATEUSE = "x"; + public static String UNDETERMINED = "und"; + public static final String PRIVUSE_VARIANT_PREFIX = "lvariant"; + + // + // Language subtag fields + // + private String _language = ""; // language subtag + private String _script = ""; // script subtag + private String _region = ""; // region subtag + private String _privateuse = ""; // privateuse + + private List _extlangs = Collections.emptyList(); // extlang subtags + private List _variants = Collections.emptyList(); // variant subtags + private List _extensions = Collections.emptyList(); // extensions + + // Map contains grandfathered tags and its preferred mappings from + // http://www.ietf.org/rfc/rfc5646.txt + private static final Map GRANDFATHERED = + new HashMap(); + + static { + // grandfathered = irregular ; non-redundant tags registered + // / regular ; during the RFC 3066 era + // + // irregular = "en-GB-oed" ; irregular tags do not match + // / "i-ami" ; the 'langtag' production and + // / "i-bnn" ; would not otherwise be + // / "i-default" ; considered 'well-formed' + // / "i-enochian" ; These tags are all valid, + // / "i-hak" ; but most are deprecated + // / "i-klingon" ; in favor of more modern + // / "i-lux" ; subtags or subtag + // / "i-mingo" ; combination + // / "i-navajo" + // / "i-pwn" + // / "i-tao" + // / "i-tay" + // / "i-tsu" + // / "sgn-BE-FR" + // / "sgn-BE-NL" + // / "sgn-CH-DE" + // + // regular = "art-lojban" ; these tags match the 'langtag' + // / "cel-gaulish" ; production, but their subtags + // / "no-bok" ; are not extended language + // / "no-nyn" ; or variant subtags: their meaning + // / "zh-guoyu" ; is defined by their registration + // / "zh-hakka" ; and all of these are deprecated + // / "zh-min" ; in favor of a more modern + // / "zh-min-nan" ; subtag or sequence of subtags + // / "zh-xiang" + + final String[][] entries = { + //{"tag", "preferred"}, + {"art-lojban", "jbo"}, + {"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback + {"en-GB-oed", "en-GB-x-oed"}, // fallback + {"i-ami", "ami"}, + {"i-bnn", "bnn"}, + {"i-default", "en-x-i-default"}, // fallback + {"i-enochian", "und-x-i-enochian"}, // fallback + {"i-hak", "hak"}, + {"i-klingon", "tlh"}, + {"i-lux", "lb"}, + {"i-mingo", "see-x-i-mingo"}, // fallback + {"i-navajo", "nv"}, + {"i-pwn", "pwn"}, + {"i-tao", "tao"}, + {"i-tay", "tay"}, + {"i-tsu", "tsu"}, + {"no-bok", "nb"}, + {"no-nyn", "nn"}, + {"sgn-BE-FR", "sfb"}, + {"sgn-BE-NL", "vgt"}, + {"sgn-CH-DE", "sgg"}, + {"zh-guoyu", "cmn"}, + {"zh-hakka", "hak"}, + {"zh-min", "nan-x-zh-min"}, // fallback + {"zh-min-nan", "nan"}, + {"zh-xiang", "hsn"}, + }; + for (String[] e : entries) { + GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e); + } + } + + private LanguageTag() { + } + + /* + * BNF in RFC5464 + * + * Language-Tag = langtag ; normal language tags + * / privateuse ; private use tag + * / grandfathered ; grandfathered tags + * + * + * langtag = language + * ["-" script] + * ["-" region] + * *("-" variant) + * *("-" extension) + * ["-" privateuse] + * + * language = 2*3ALPHA ; shortest ISO 639 code + * ["-" extlang] ; sometimes followed by + * ; extended language subtags + * / 4ALPHA ; or reserved for future use + * / 5*8ALPHA ; or registered language subtag + * + * extlang = 3ALPHA ; selected ISO 639 codes + * *2("-" 3ALPHA) ; permanently reserved + * + * script = 4ALPHA ; ISO 15924 code + * + * region = 2ALPHA ; ISO 3166-1 code + * / 3DIGIT ; UN M.49 code + * + * variant = 5*8alphanum ; registered variants + * / (DIGIT 3alphanum) + * + * extension = singleton 1*("-" (2*8alphanum)) + * + * ; Single alphanumerics + * ; "x" reserved for private use + * singleton = DIGIT ; 0 - 9 + * / %x41-57 ; A - W + * / %x59-5A ; Y - Z + * / %x61-77 ; a - w + * / %x79-7A ; y - z + * + * privateuse = "x" 1*("-" (1*8alphanum)) + * + */ + public static LanguageTag parse(String languageTag, ParseStatus sts) { + if (sts == null) { + sts = new ParseStatus(); + } else { + sts.reset(); + } + + StringTokenIterator itr; + + // Check if the tag is grandfathered + String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag)); + if (gfmap != null) { + // use preferred mapping + itr = new StringTokenIterator(gfmap[1], SEP); + } else { + itr = new StringTokenIterator(languageTag, SEP); + } + + LanguageTag tag = new LanguageTag(); + + // langtag must start with either language or privateuse + if (tag.parseLanguage(itr, sts)) { + tag.parseExtlangs(itr, sts); + tag.parseScript(itr, sts); + tag.parseRegion(itr, sts); + tag.parseVariants(itr, sts); + tag.parseExtensions(itr, sts); + } + tag.parsePrivateuse(itr, sts); + + if (!itr.isDone() && !sts.isError()) { + String s = itr.current(); + sts._errorIndex = itr.currentStart(); + if (s.length() == 0) { + sts._errorMsg = "Empty subtag"; + } else { + sts._errorMsg = "Invalid subtag: " + s; + } + } + + return tag; + } + + // + // Language subtag parsers + // + + private boolean parseLanguage(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isLanguage(s)) { + found = true; + _language = s; + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseExtlangs(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + while (!itr.isDone()) { + String s = itr.current(); + if (!isExtlang(s)) { + break; + } + found = true; + if (_extlangs.isEmpty()) { + _extlangs = new ArrayList(3); + } + _extlangs.add(s); + sts._parseLength = itr.currentEnd(); + itr.next(); + + if (_extlangs.size() == 3) { + // Maximum 3 extlangs + break; + } + } + + return found; + } + + private boolean parseScript(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isScript(s)) { + found = true; + _script = s; + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseRegion(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isRegion(s)) { + found = true; + _region = s; + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseVariants(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + while (!itr.isDone()) { + String s = itr.current(); + if (!isVariant(s)) { + break; + } + found = true; + if (_variants.isEmpty()) { + _variants = new ArrayList(3); + } + _variants.add(s); + sts._parseLength = itr.currentEnd(); + itr.next(); + } + + return found; + } + + private boolean parseExtensions(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + while (!itr.isDone()) { + String s = itr.current(); + if (isExtensionSingleton(s)) { + int start = itr.currentStart(); + String singleton = s; + StringBuilder sb = new StringBuilder(singleton); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (isExtensionSubtag(s)) { + sb.append(SEP).append(s); + sts._parseLength = itr.currentEnd(); + } else { + break; + } + itr.next(); + } + + if (sts._parseLength <= start) { + sts._errorIndex = start; + sts._errorMsg = "Incomplete extension '" + singleton + "'"; + break; + } + + if (_extensions.size() == 0) { + _extensions = new ArrayList(4); + } + _extensions.add(sb.toString()); + found = true; + } else { + break; + } + } + return found; + } + + private boolean parsePrivateuse(StringTokenIterator itr, ParseStatus sts) { + if (itr.isDone() || sts.isError()) { + return false; + } + + boolean found = false; + + String s = itr.current(); + if (isPrivateusePrefix(s)) { + int start = itr.currentStart(); + StringBuilder sb = new StringBuilder(s); + + itr.next(); + while (!itr.isDone()) { + s = itr.current(); + if (!isPrivateuseSubtag(s)) { + break; + } + sb.append(SEP).append(s); + sts._parseLength = itr.currentEnd(); + + itr.next(); + } + + if (sts._parseLength <= start) { + // need at least 1 private subtag + sts._errorIndex = start; + sts._errorMsg = "Incomplete privateuse"; + } else { + _privateuse = sb.toString(); + found = true; + } + } + + return found; + } + + public static LanguageTag parseLocale(BaseLocale baseLocale, LocaleExtensions localeExtensions) { + LanguageTag tag = new LanguageTag(); + + String language = baseLocale.getLanguage(); + String script = baseLocale.getScript(); + String region = baseLocale.getRegion(); + String variant = baseLocale.getVariant(); + + String privuseVar = null; // store ill-formed variant subtags + + if (language.length() == 0 || !isLanguage(language)) { + tag._language = UNDETERMINED; + } else { + // Convert a deprecated language code used by Java to + // a new code + if (language.equals("iw")) { + language = "he"; + } else if (language.equals("ji")) { + language = "yi"; + } else if (language.equals("in")) { + language = "id"; + } + tag._language = language; + } + + if (script.length() > 0 && isScript(script)) { + tag._script = canonicalizeScript(script); + } + + if (region.length() > 0 && isRegion(region)) { + tag._region = canonicalizeRegion(region); + } + + // Special handling for no_NO_NY - use nn_NO for language tag + if (tag._language.equals("no") && tag._region.equals("NO") && variant.equals("NY")) { + tag._language = "nn"; + variant = ""; + } + + if (variant.length() > 0) { + List variants = null; + StringTokenIterator varitr = new StringTokenIterator(variant, BaseLocale.SEP); + while (!varitr.isDone()) { + String var = varitr.current(); + if (!isVariant(var)) { + break; + } + if (variants == null) { + variants = new ArrayList(); + } + variants.add(var); // Do not canonicalize! + varitr.next(); + } + if (variants != null) { + tag._variants = variants; + } + if (!varitr.isDone()) { + // ill-formed variant subtags + StringBuilder buf = new StringBuilder(); + while (!varitr.isDone()) { + String prvv = varitr.current(); + if (!isPrivateuseSubtag(prvv)) { + // cannot use private use subtag - truncated + break; + } + if (buf.length() > 0) { + buf.append(SEP); + } + buf.append(prvv); + varitr.next(); + } + if (buf.length() > 0) { + privuseVar = buf.toString(); + } + } + } + + List extensions = null; + String privateuse = null; + + Set locextKeys = localeExtensions.getKeys(); + for (Character locextKey : locextKeys) { + Extension ext = localeExtensions.getExtension(locextKey); + if (isPrivateusePrefixChar(locextKey.charValue())) { + privateuse = ext.getValue(); + } else { + if (extensions == null) { + extensions = new ArrayList(); + } + extensions.add(locextKey.toString() + SEP + ext.getValue()); + } + } + + if (extensions != null) { + tag._extensions = extensions; + } + + // append ill-formed variant subtags to private use + if (privuseVar != null) { + if (privateuse == null) { + privateuse = PRIVUSE_VARIANT_PREFIX + SEP + privuseVar; + } else { + privateuse = privateuse + SEP + PRIVUSE_VARIANT_PREFIX + SEP + privuseVar.replace(BaseLocale.SEP, SEP); + } + } + + if (privateuse != null) { + tag._privateuse = privateuse; + } else if (tag._language.length() == 0) { + // use "und" if neither language nor privateuse is available + tag._language = UNDETERMINED; + } + + return tag; + } + + // + // Getter methods for language subtag fields + // + + public String getLanguage() { + return _language; + } + + public List getExtlangs() { + return Collections.unmodifiableList(_extlangs); + } + + public String getScript() { + return _script; + } + + public String getRegion() { + return _region; + } + + public List getVariants() { + return Collections.unmodifiableList(_variants); + } + + public List getExtensions() { + return Collections.unmodifiableList(_extensions); + } + + public String getPrivateuse() { + return _privateuse; + } + + // + // Language subtag syntax checking methods + // + + public static boolean isLanguage(String s) { + // language = 2*3ALPHA ; shortest ISO 639 code + // ["-" extlang] ; sometimes followed by + // ; extended language subtags + // / 4ALPHA ; or reserved for future use + // / 5*8ALPHA ; or registered language subtag + return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s); + } + + public static boolean isExtlang(String s) { + // extlang = 3ALPHA ; selected ISO 639 codes + // *2("-" 3ALPHA) ; permanently reserved + return (s.length() == 3) && AsciiUtil.isAlphaString(s); + } + + public static boolean isScript(String s) { + // script = 4ALPHA ; ISO 15924 code + return (s.length() == 4) && AsciiUtil.isAlphaString(s); + } + + public static boolean isRegion(String s) { + // region = 2ALPHA ; ISO 3166-1 code + // / 3DIGIT ; UN M.49 code + return ((s.length() == 2) && AsciiUtil.isAlphaString(s)) + || ((s.length() == 3) && AsciiUtil.isNumericString(s)); + } + + public static boolean isVariant(String s) { + // variant = 5*8alphanum ; registered variants + // / (DIGIT 3alphanum) + int len = s.length(); + if (len >= 5 && len <= 8) { + return AsciiUtil.isAlphaNumericString(s); + } + if (len == 4) { + return AsciiUtil.isNumeric(s.charAt(0)) + && AsciiUtil.isAlphaNumeric(s.charAt(1)) + && AsciiUtil.isAlphaNumeric(s.charAt(2)) + && AsciiUtil.isAlphaNumeric(s.charAt(3)); + } + return false; + } + + public static boolean isExtensionSingleton(String s) { + // singleton = DIGIT ; 0 - 9 + // / %x41-57 ; A - W + // / %x59-5A ; Y - Z + // / %x61-77 ; a - w + // / %x79-7A ; y - z + + return (s.length() == 1) + && AsciiUtil.isAlphaString(s) + && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); + } + + public static boolean isExtensionSingletonChar(char c) { + return isExtensionSingleton(String.valueOf(c)); + } + + public static boolean isExtensionSubtag(String s) { + // extension = singleton 1*("-" (2*8alphanum)) + return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isPrivateusePrefix(String s) { + // privateuse = "x" 1*("-" (1*8alphanum)) + return (s.length() == 1) + && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s); + } + + public static boolean isPrivateusePrefixChar(char c) { + return (AsciiUtil.caseIgnoreMatch(PRIVATEUSE, String.valueOf(c))); + } + + public static boolean isPrivateuseSubtag(String s) { + // privateuse = "x" 1*("-" (1*8alphanum)) + return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + // + // Language subtag canonicalization methods + // + + public static String canonicalizeLanguage(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtlang(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeScript(String s) { + return AsciiUtil.toTitleString(s); + } + + public static String canonicalizeRegion(String s) { + return AsciiUtil.toUpperString(s); + } + + public static String canonicalizeVariant(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtension(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtensionSingleton(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizeExtensionSubtag(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizePrivateuse(String s) { + return AsciiUtil.toLowerString(s); + } + + public static String canonicalizePrivateuseSubtag(String s) { + return AsciiUtil.toLowerString(s); + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + + if (_language.length() > 0) { + sb.append(_language); + + for (String extlang : _extlangs) { + sb.append(SEP).append(extlang); + } + + if (_script.length() > 0) { + sb.append(SEP).append(_script); + } + + if (_region.length() > 0) { + sb.append(SEP).append(_region); + } + + for (String variant : _extlangs) { + sb.append(SEP).append(variant); + } + + for (String extension : _extensions) { + sb.append(SEP).append(extension); + } + } + if (_privateuse.length() > 0) { + if (sb.length() > 0) { + sb.append(SEP); + } + sb.append(_privateuse); + } + + return sb.toString(); + } +} diff --git a/src/share/classes/sun/util/locale/LocaleExtensions.java b/src/share/classes/sun/util/locale/LocaleExtensions.java new file mode 100644 index 000000000..44016382b --- /dev/null +++ b/src/share/classes/sun/util/locale/LocaleExtensions.java @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +import java.util.Collections; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.TreeSet; + +import sun.util.locale.InternalLocaleBuilder.CaseInsensitiveChar; +import sun.util.locale.InternalLocaleBuilder.CaseInsensitiveString; + + +public class LocaleExtensions { + + private SortedMap _map; + private String _id; + + private static final SortedMap EMPTY_MAP = + Collections.unmodifiableSortedMap(new TreeMap()); + + public static final LocaleExtensions EMPTY_EXTENSIONS; + public static final LocaleExtensions CALENDAR_JAPANESE; + public static final LocaleExtensions NUMBER_THAI; + + static { + EMPTY_EXTENSIONS = new LocaleExtensions(); + EMPTY_EXTENSIONS._id = ""; + EMPTY_EXTENSIONS._map = EMPTY_MAP; + + CALENDAR_JAPANESE = new LocaleExtensions(); + CALENDAR_JAPANESE._id = "u-ca-japanese"; + CALENDAR_JAPANESE._map = new TreeMap(); + CALENDAR_JAPANESE._map.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), UnicodeLocaleExtension.CA_JAPANESE); + + NUMBER_THAI = new LocaleExtensions(); + NUMBER_THAI._id = "u-nu-thai"; + NUMBER_THAI._map = new TreeMap(); + NUMBER_THAI._map.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), UnicodeLocaleExtension.NU_THAI); + } + + private LocaleExtensions() { + } + + /* + * Package local constructor, only used by InternalLocaleBuilder. + */ + LocaleExtensions(Map extensions, + Set uattributes, Map ukeywords) { + boolean hasExtension = (extensions != null && extensions.size() > 0); + boolean hasUAttributes = (uattributes != null && uattributes.size() > 0); + boolean hasUKeywords = (ukeywords != null && ukeywords.size() > 0); + + if (!hasExtension && !hasUAttributes && !hasUKeywords) { + _map = EMPTY_MAP; + _id = ""; + return; + } + + // Build extension map + _map = new TreeMap(); + if (hasExtension) { + for (Entry ext : extensions.entrySet()) { + char key = AsciiUtil.toLower(ext.getKey().value()); + String value = ext.getValue(); + + if (LanguageTag.isPrivateusePrefixChar(key)) { + // we need to exclude special variant in privuateuse, e.g. "x-abc-lvariant-DEF" + value = InternalLocaleBuilder.removePrivateuseVariant(value); + if (value == null) { + continue; + } + } + + Extension e = new Extension(key, AsciiUtil.toLowerString(value)); + _map.put(Character.valueOf(key), e); + } + } + + if (hasUAttributes || hasUKeywords) { + TreeSet uaset = null; + TreeMap ukmap = null; + + if (hasUAttributes) { + uaset = new TreeSet(); + for (CaseInsensitiveString cis : uattributes) { + uaset.add(AsciiUtil.toLowerString(cis.value())); + } + } + + if (hasUKeywords) { + ukmap = new TreeMap(); + for (Entry kwd : ukeywords.entrySet()) { + String key = AsciiUtil.toLowerString(kwd.getKey().value()); + String type = AsciiUtil.toLowerString(kwd.getValue()); + ukmap.put(key, type); + } + } + + UnicodeLocaleExtension ule = new UnicodeLocaleExtension(uaset, ukmap); + _map.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), ule); + } + + if (_map.size() == 0) { + // this could happen when only privuateuse with special variant + _map = EMPTY_MAP; + _id = ""; + } else { + _id = toID(_map); + } + } + + public Set getKeys() { + return Collections.unmodifiableSet(_map.keySet()); + } + + public Extension getExtension(Character key) { + return _map.get(Character.valueOf(AsciiUtil.toLower(key.charValue()))); + } + + public String getExtensionValue(Character key) { + Extension ext = _map.get(Character.valueOf(AsciiUtil.toLower(key.charValue()))); + if (ext == null) { + return null; + } + return ext.getValue(); + } + + public Set getUnicodeLocaleAttributes() { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return Collections.emptySet(); + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getUnicodeLocaleAttributes(); + } + + public Set getUnicodeLocaleKeys() { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return Collections.emptySet(); + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getUnicodeLocaleKeys(); + } + + public String getUnicodeLocaleType(String unicodeLocaleKey) { + Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON)); + if (ext == null) { + return null; + } + assert (ext instanceof UnicodeLocaleExtension); + return ((UnicodeLocaleExtension)ext).getUnicodeLocaleType(AsciiUtil.toLowerString(unicodeLocaleKey)); + } + + public boolean isEmpty() { + return _map.isEmpty(); + } + + public static boolean isValidKey(char c) { + return LanguageTag.isExtensionSingletonChar(c) || LanguageTag.isPrivateusePrefixChar(c); + } + + public static boolean isValidUnicodeLocaleKey(String ukey) { + return UnicodeLocaleExtension.isKey(ukey); + } + + private static String toID(SortedMap map) { + StringBuilder buf = new StringBuilder(); + Extension privuse = null; + for (Entry entry : map.entrySet()) { + char singleton = entry.getKey().charValue(); + Extension extension = entry.getValue(); + if (LanguageTag.isPrivateusePrefixChar(singleton)) { + privuse = extension; + } else { + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(extension); + } + } + if (privuse != null) { + if (buf.length() > 0) { + buf.append(LanguageTag.SEP); + } + buf.append(privuse); + } + return buf.toString(); + } + + + public String toString() { + return _id; + } + + public String getID() { + return _id; + } + + public int hashCode() { + return _id.hashCode(); + } + + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof LocaleExtensions)) { + return false; + } + return this._id.equals(((LocaleExtensions)other)._id); + } +} diff --git a/src/share/classes/sun/util/locale/LocaleObjectCache.java b/src/share/classes/sun/util/locale/LocaleObjectCache.java new file mode 100644 index 000000000..e5e6810c0 --- /dev/null +++ b/src/share/classes/sun/util/locale/LocaleObjectCache.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.SoftReference; +import java.util.concurrent.ConcurrentHashMap; + +public abstract class LocaleObjectCache { + private ConcurrentHashMap> _map; + private ReferenceQueue _queue = new ReferenceQueue(); + + public LocaleObjectCache() { + this(16, 0.75f, 16); + } + + public LocaleObjectCache(int initialCapacity, float loadFactor, int concurrencyLevel) { + _map = new ConcurrentHashMap>(initialCapacity, loadFactor, concurrencyLevel); + } + + public V get(K key) { + V value = null; + + cleanStaleEntries(); + CacheEntry entry = _map.get(key); + if (entry != null) { + value = entry.get(); + } + if (value == null) { + key = normalizeKey(key); + V newVal = createObject(key); + if (key == null || newVal == null) { + // subclass must return non-null key/value object + return null; + } + + CacheEntry newEntry = new CacheEntry(key, newVal, _queue); + + while (value == null) { + cleanStaleEntries(); + entry = _map.putIfAbsent(key, newEntry); + if (entry == null) { + value = newVal; + break; + } else { + value = entry.get(); + } + } + } + return value; + } + + @SuppressWarnings("unchecked") + private void cleanStaleEntries() { + CacheEntry entry; + while ((entry = (CacheEntry)_queue.poll()) != null) { + _map.remove(entry.getKey()); + } + } + + protected abstract V createObject(K key); + + protected K normalizeKey(K key) { + return key; + } + + private static class CacheEntry extends SoftReference { + private K _key; + + CacheEntry(K key, V value, ReferenceQueue queue) { + super(value, queue); + _key = key; + } + + K getKey() { + return _key; + } + } +} diff --git a/src/share/classes/sun/util/locale/LocaleSyntaxException.java b/src/share/classes/sun/util/locale/LocaleSyntaxException.java new file mode 100644 index 000000000..3c0004e8b --- /dev/null +++ b/src/share/classes/sun/util/locale/LocaleSyntaxException.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +public class LocaleSyntaxException extends Exception { + + private static final long serialVersionUID = 1L; + + private int _index = -1; + + public LocaleSyntaxException(String msg) { + this(msg, 0); + } + + public LocaleSyntaxException(String msg, int errorIndex) { + super(msg); + _index = errorIndex; + } + + public int getErrorIndex() { + return _index; + } +} diff --git a/src/share/classes/sun/util/locale/ParseStatus.java b/src/share/classes/sun/util/locale/ParseStatus.java new file mode 100644 index 000000000..a71793096 --- /dev/null +++ b/src/share/classes/sun/util/locale/ParseStatus.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +public class ParseStatus { + int _parseLength = 0; + int _errorIndex = -1; + String _errorMsg = null; + + public void reset() { + _parseLength = 0; + _errorIndex = -1; + _errorMsg = null; + } + + public boolean isError() { + return (_errorIndex >= 0); + } + + public int getErrorIndex() { + return _errorIndex; + } + + public int getParseLength() { + return _parseLength; + } + + public String getErrorMessage() { + return _errorMsg; + } +} diff --git a/src/share/classes/sun/util/locale/StringTokenIterator.java b/src/share/classes/sun/util/locale/StringTokenIterator.java new file mode 100644 index 000000000..6fc674383 --- /dev/null +++ b/src/share/classes/sun/util/locale/StringTokenIterator.java @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +/* + ******************************************************************************* + * Copyright (C) 2009, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +public class StringTokenIterator { + private String _text; + private String _dlms; + + private String _token; + private int _start; + private int _end; + private boolean _done; + + public StringTokenIterator(String text, String dlms) { + _text = text; + _dlms = dlms; + setStart(0); + } + + public String first() { + setStart(0); + return _token; + } + + public String current() { + return _token; + } + + public int currentStart() { + return _start; + } + + public int currentEnd() { + return _end; + } + + public boolean isDone() { + return _done; + } + + public String next() { + if (hasNext()) { + _start = _end + 1; + _end = nextDelimiter(_start); + _token = _text.substring(_start, _end); + } else { + _start = _end; + _token = null; + _done = true; + } + return _token; + } + + public boolean hasNext() { + return (_end < _text.length()); + } + + public StringTokenIterator setStart(int offset) { + if (offset > _text.length()) { + throw new IndexOutOfBoundsException(); + } + _start = offset; + _end = nextDelimiter(_start); + _token = _text.substring(_start, _end); + _done = false; + return this; + } + + public StringTokenIterator setText(String text) { + _text = text; + setStart(0); + return this; + } + + private int nextDelimiter(int start) { + int idx = start; + outer: while (idx < _text.length()) { + char c = _text.charAt(idx); + for (int i = 0; i < _dlms.length(); i++) { + if (c == _dlms.charAt(i)) { + break outer; + } + } + idx++; + } + return idx; + } +} + diff --git a/src/share/classes/sun/util/locale/UnicodeLocaleExtension.java b/src/share/classes/sun/util/locale/UnicodeLocaleExtension.java new file mode 100644 index 000000000..4db667967 --- /dev/null +++ b/src/share/classes/sun/util/locale/UnicodeLocaleExtension.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + ******************************************************************************* + * Copyright (C) 2009-2010, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package sun.util.locale; + +import java.util.Collections; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.TreeSet; + +public class UnicodeLocaleExtension extends Extension { + public static final char SINGLETON = 'u'; + + private static final SortedSet EMPTY_SORTED_SET = new TreeSet(); + private static final SortedMap EMPTY_SORTED_MAP = new TreeMap(); + + private SortedSet _attributes = EMPTY_SORTED_SET; + private SortedMap _keywords = EMPTY_SORTED_MAP; + + public static final UnicodeLocaleExtension CA_JAPANESE; + public static final UnicodeLocaleExtension NU_THAI; + + static { + CA_JAPANESE = new UnicodeLocaleExtension(); + CA_JAPANESE._keywords = new TreeMap(); + CA_JAPANESE._keywords.put("ca", "japanese"); + CA_JAPANESE._value = "ca-japanese"; + + NU_THAI = new UnicodeLocaleExtension(); + NU_THAI._keywords = new TreeMap(); + NU_THAI._keywords.put("nu", "thai"); + NU_THAI._value = "nu-thai"; + } + + private UnicodeLocaleExtension() { + super(SINGLETON); + } + + UnicodeLocaleExtension(SortedSet attributes, SortedMap keywords) { + this(); + if (attributes != null && attributes.size() > 0) { + _attributes = attributes; + } + if (keywords != null && keywords.size() > 0) { + _keywords = keywords; + } + + if (_attributes.size() > 0 || _keywords.size() > 0) { + StringBuilder sb = new StringBuilder(); + for (String attribute : _attributes) { + sb.append(LanguageTag.SEP).append(attribute); + } + for (Entry keyword : _keywords.entrySet()) { + String key = keyword.getKey(); + String value = keyword.getValue(); + + sb.append(LanguageTag.SEP).append(key); + if (value.length() > 0) { + sb.append(LanguageTag.SEP).append(value); + } + } + _value = sb.substring(1); // skip leading '-' + } + } + + public Set getUnicodeLocaleAttributes() { + return Collections.unmodifiableSet(_attributes); + } + + public Set getUnicodeLocaleKeys() { + return Collections.unmodifiableSet(_keywords.keySet()); + } + + public String getUnicodeLocaleType(String unicodeLocaleKey) { + return _keywords.get(unicodeLocaleKey); + } + + public static boolean isSingletonChar(char c) { + return (SINGLETON == AsciiUtil.toLower(c)); + } + + public static boolean isAttribute(String s) { + // 3*8alphanum + return (s.length() >= 3) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isKey(String s) { + // 2alphanum + return (s.length() == 2) && AsciiUtil.isAlphaNumericString(s); + } + + public static boolean isTypeSubtag(String s) { + // 3*8alphanum + return (s.length() >= 3) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s); + } +} diff --git a/src/share/classes/sun/util/resources/LocaleData.java b/src/share/classes/sun/util/resources/LocaleData.java index 291a2514a..9686abc8e 100644 --- a/src/share/classes/sun/util/resources/LocaleData.java +++ b/src/share/classes/sun/util/resources/LocaleData.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2005, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -177,6 +177,11 @@ public class LocaleData { for (Iterator l = candidates.iterator(); l.hasNext(); ) { String lstr = l.next().toString(); + /* truncate extra segment introduced by Java 7 for script and extesions */ + int idx = lstr.indexOf("_#"); + if (idx >= 0) { + lstr = lstr.substring(0, idx); + } /* Every locale string in the locale string list returned from the above getSupportedLocaleString is enclosed within two white spaces so that we could check some locale diff --git a/src/share/classes/sun/util/resources/LocaleNames.properties b/src/share/classes/sun/util/resources/LocaleNames.properties index 2dcc3b04c..5aae361c3 100644 --- a/src/share/classes/sun/util/resources/LocaleNames.properties +++ b/src/share/classes/sun/util/resources/LocaleNames.properties @@ -228,6 +228,157 @@ za=Zhuang zh=Chinese zu=Zulu +# script names +# key is ISO 15924 script code + +Arab=Arabic +Armi=Imperial Aramaic +Armn=Armenian +Avst=Avestan +Bali=Balinese +Bamu=Bamum +Bass=Bassa Vah +Batk=Batak +Beng=Bengali +Blis=Blissymbols +Bopo=Bopomofo +Brah=Brahmi +Brai=Braille +Bugi=Buginese +Buhd=Buhid +Cakm=Chakma +Cans=Unified Canadian Aboriginal Syllabics +Cari=Carian +Cham=Cham +Cher=Cherokee +Cirt=Cirth +Copt=Coptic +Cprt=Cypriot +Cyrl=Cyrillic +Cyrs=Old Church Slavonic Cyrillic +Deva=Devanagari +Dsrt=Deseret +Dupl=Duployan shorthand +Egyd=Egyptian demotic +Egyh=Egyptian hieratic +Egyp=Egyptian hieroglyphs +Elba=Elbasan +Ethi=Ethiopic +Geok=Khutsuri +Geor=Georgian +Glag=Glagolitic +Goth=Gothic +Gran=Grantha +Grek=Greek +Gujr=Gujarati +Guru=Gurmukhi +Hang=Hangul +Hani=Han +Hano=Hanunoo +Hans=Simplified Han +Hant=Traditional Han +Hebr=Hebrew +Hira=Hiragana +Hmng=Pahawh Hmong +Hrkt=Katakana or Hiragana +Hung=Old Hungarian +Inds=Indus +Ital=Old Italic +Java=Javanese +Jpan=Japanese +Kali=Kayah Li +Kana=Katakana +Khar=Kharoshthi +Khmr=Khmer +Knda=Kannada +Kore=Korean +Kpel=Kpelle +Kthi=Kaithi +Lana=Tai Tham +Laoo=Lao +Latf=Fraktur Latin +Latg=Gaelic Latin +Latn=Latin +Lepc=Lepcha +Limb=Limbu +Lina=Linear A +Linb=Linear B +Lisu=Lisu +Loma=Loma +Lyci=Lycian +Lydi=Lydian +Mand=Mandaic +Mani=Manichaean +Maya=Mayan hieroglyphs +Mend=Mende +Merc=Meroitic Cursive +Mero=Meroitic +Mlym=Malayalam +Mong=Mongolian +Moon=Moon +Mtei=Meitei Mayek +Mymr=Myanmar +Narb=Old North Arabian +Nbat=Nabataean +Nkgb=Nakhi Geba +Nkoo=N\u2019Ko +Ogam=Ogham +Olck=Ol Chiki +Orkh=Orkhon +Orya=Oriya +Osma=Osmanya +Palm=Palmyrene +Perm=Old Permic +Phag=Phags-pa +Phli=Inscriptional Pahlavi +Phlp=Psalter Pahlavi +Phlv=Book Pahlavi +Phnx=Phoenician +Plrd=Miao +Prti=Inscriptional Parthian +Rjng=Rejang +Roro=Rongorongo +Runr=Runic +Samr=Samaritan +Sara=Sarati +Sarb=Old South Arabian +Saur=Saurashtra +Sgnw=SignWriting +Shaw=Shavian +Sind=Sindhi +Sinh=Sinhala +Sund=Sundanese +Sylo=Syloti Nagri +Syrc=Syriac +Syre=Estrangelo Syriac +Syrj=Western Syriac +Syrn=Eastern Syriac +Tagb=Tagbanwa +Tale=Tai Le +Talu=New Tai Lue +Taml=Tamil +Tavt=Tai Viet +Telu=Telugu +Teng=Tengwar +Tfng=Tifinagh +Tglg=Tagalog +Thaa=Thaana +Thai=Thai +Tibt=Tibetan +Ugar=Ugaritic +Vaii=Vai +Visp=Visible Speech +Wara=Warang Citi +Xpeo=Old Persian +Xsux=Sumero-Akkadian Cuneiform +Yiii=Yi +Zinh=Inherited script +Zmth=Mathematical Notation +Zsym=Symbols +Zxxx=Unwritten +Zyyy=Undetermined script +Zzzz=Uncoded script + # country names # key is ISO 3166 country code @@ -479,6 +630,39 @@ ZA=South Africa ZM=Zambia ZW=Zimbabwe +# territory names +# key is UN M.49 country and area code + +001=World +002=Africa +003=North America +005=South America +009=Oceania +011=Western Africa +013=Central America +014=Eastern Africa +015=Northern Africa +017=Middle Africa +018=Southern Africa +019=Americas +021=Northern America +029=Caribbean +030=Eastern Asia +034=Southern Asia +035=South-Eastern Asia +039=Southern Europe +053=Australia and New Zealand +054=Melanesia +057=Micronesian Region +061=Polynesia +142=Asia +143=Central Asia +145=Western Asia +150=Europe +151=Eastern Europe +154=Northern Europe +155=Western Europe +419=Latin America and the Caribbean # variant names # key is %%variant diff --git a/src/share/classes/sun/util/resources/LocaleNames_zh.properties b/src/share/classes/sun/util/resources/LocaleNames_zh.properties index ae6c3edbd..eb58d8dd1 100644 --- a/src/share/classes/sun/util/resources/LocaleNames_zh.properties +++ b/src/share/classes/sun/util/resources/LocaleNames_zh.properties @@ -227,6 +227,12 @@ za=\u85cf\u6587 zh=\u4e2d\u6587 zu=\u7956\u9c81\u6587 +# script names +# key is ISO 15924 script code + +Hans=\u7b80\u4f53\u4e2d\u6587 +Hant=\u7e41\u4f53\u4e2d\u6587 + # country names # key is ISO 3166 country code diff --git a/src/share/classes/sun/util/resources/LocaleNames_zh_TW.properties b/src/share/classes/sun/util/resources/LocaleNames_zh_TW.properties index 34f797919..dae7ba5f3 100644 --- a/src/share/classes/sun/util/resources/LocaleNames_zh_TW.properties +++ b/src/share/classes/sun/util/resources/LocaleNames_zh_TW.properties @@ -227,6 +227,12 @@ za=\u58ef\u6587 zh=\u4e2d\u6587 zu=\u7956\u9b6f\u6587 +# script names +# key is ISO 15924 script code + +Hans=\u7c21\u9ad4\u4e2d\u6587 +Hant=\u7e41\u9ad4\u4e2d\u6587 + # country names # key is ISO 3166 country code diff --git a/test/java/util/Locale/LocaleEnhanceTest.java b/test/java/util/Locale/LocaleEnhanceTest.java new file mode 100644 index 000000000..a4416a478 --- /dev/null +++ b/test/java/util/Locale/LocaleEnhanceTest.java @@ -0,0 +1,1293 @@ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.IllformedLocaleException; +import java.util.List; +import java.util.Locale; +import java.util.Locale.Builder; +import java.util.Set; + +/** + * @test + * @bug 6875847 + * @summary test API changes to Locale + */ +public class LocaleEnhanceTest extends LocaleTestFmwk { + + public static void main(String[] args) throws Exception { + List argList = new ArrayList(); + argList.addAll(Arrays.asList(args)); + argList.add("-nothrow"); + new LocaleEnhanceTest().run(argList.toArray(new String[argList.size()])); + } + + public LocaleEnhanceTest() { + } + + /// + /// Generic sanity tests + /// + + /** A canonical language code. */ + private static final String l = "en"; + + /** A canonical script code.. */ + private static final String s = "Latn"; + + /** A canonical region code. */ + private static final String c = "US"; + + /** A canonical variant code. */ + private static final String v = "NewYork"; + + /** + * Ensure that Builder builds locales that have the expected + * tag and java6 ID. Note the odd cases for the ID. + */ + public void testCreateLocaleCanonicalValid() { + String[] valids = { + "en-Latn-US-NewYork", "en_US_NewYork_#Latn", + "en-Latn-US", "en_US_#Latn", + "en-Latn-NewYork", "en__NewYork_#Latn", // double underscore + "en-Latn", "en_#Latn", + "en-US-NewYork", "en_US_NewYork", + "en-US", "en_US", + "en-NewYork", "en__NewYork", // double underscore + "en", "en", + "und-Latn-US-NewYork", "_US_NewYork_#Latn", + "und-Latn-US", "_US_#Latn", + "und-Latn-NewYork", "", // variant only not supported + "und-Latn", "", + "und-US-NewYork", "_US_NewYork", + "und-US", "_US", + "und-NewYork", "", // variant only not supported + "und", "" + }; + + Builder builder = new Builder(); + + for (int i = 0; i < valids.length; i += 2) { + String tag = valids[i]; + String id = valids[i+1]; + + String idl = (i & 16) == 0 ? l : ""; + String ids = (i & 8) == 0 ? s : ""; + String idc = (i & 4) == 0 ? c : ""; + String idv = (i & 2) == 0 ? v : ""; + + String msg = String.valueOf(i/2) + ": '" + tag + "' "; + + try { + Locale l = builder + .setLanguage(idl) + .setScript(ids) + .setRegion(idc) + .setVariant(idv) + .build(); + assertEquals(msg + "language", idl, l.getLanguage()); + assertEquals(msg + "script", ids, l.getScript()); + assertEquals(msg + "country", idc, l.getCountry()); + assertEquals(msg + "variant", idv, l.getVariant()); + assertEquals(msg + "tag", tag, l.toLanguageTag()); + assertEquals(msg + "id", id, l.toString()); + } + catch (IllegalArgumentException e) { + errln(msg + e.getMessage()); + } + } + } + + /** + * Test that locale construction works with 'multiple variants'. + *

    + * The string "Newer__Yorker" is treated as three subtags, + * "Newer", "", and "Yorker", and concatenated into one + * subtag by omitting empty subtags and joining the remainer + * with underscores. So the resulting variant tag is "Newer_Yorker". + * Note that 'New' and 'York' are invalid BCP47 variant subtags + * because they are too short. + */ + public void testCreateLocaleMultipleVariants() { + + String[] valids = { + "en-Latn-US-Newer-Yorker", "en_US_Newer_Yorker_#Latn", + "en-Latn-Newer-Yorker", "en__Newer_Yorker_#Latn", + "en-US-Newer-Yorker", "en_US_Newer_Yorker", + "en-Newer-Yorker", "en__Newer_Yorker", + "und-Latn-US-Newer-Yorker", "_US_Newer_Yorker_#Latn", + "und-Latn-Newer-Yorker", "", + "und-US-Newer-Yorker", "_US_Newer_Yorker", + "und-Newer-Yorker", "", + }; + + Builder builder = new Builder(); // lenient variant + + final String idv = "Newer_Yorker"; + for (int i = 0; i < valids.length; i += 2) { + String tag = valids[i]; + String id = valids[i+1]; + + String idl = (i & 8) == 0 ? l : ""; + String ids = (i & 4) == 0 ? s : ""; + String idc = (i & 2) == 0 ? c : ""; + + String msg = String.valueOf(i/2) + ": " + tag + " "; + try { + Locale l = builder + .setLanguage(idl) + .setScript(ids) + .setRegion(idc) + .setVariant(idv) + .build(); + + assertEquals(msg + " language", idl, l.getLanguage()); + assertEquals(msg + " script", ids, l.getScript()); + assertEquals(msg + " country", idc, l.getCountry()); + assertEquals(msg + " variant", idv, l.getVariant()); + + assertEquals(msg + "tag", tag, l.toLanguageTag()); + assertEquals(msg + "id", id, l.toString()); + } + catch (IllegalArgumentException e) { + errln(msg + e.getMessage()); + } + } + } + + /** + * Ensure that all these invalid formats are not recognized by + * forLanguageTag. + */ + public void testCreateLocaleCanonicalInvalidSeparator() { + String[] invalids = { + // trailing separator + "en_Latn_US_NewYork_", + "en_Latn_US_", + "en_Latn_", + "en_", + "_", + + // double separator + "en_Latn_US__NewYork", + "_Latn_US__NewYork", + "en_US__NewYork", + "_US__NewYork", + + // are these OK? + // "en_Latn__US_NewYork", // variant is 'US_NewYork' + // "_Latn__US_NewYork", // variant is 'US_NewYork' + // "en__Latn_US_NewYork", // variant is 'Latn_US_NewYork' + // "en__US_NewYork", // variant is 'US_NewYork' + + // double separator without language or script + "__US", + "__NewYork", + + // triple separator anywhere except within variant + "en___NewYork", + "en_Latn___NewYork", + "_Latn___NewYork", + "___NewYork", + }; + + for (int i = 0; i < invalids.length; ++i) { + String id = invalids[i]; + Locale l = Locale.forLanguageTag(id); + assertEquals(id, "und", l.toLanguageTag()); + } + } + + /** + * Ensure that all current locale ids parse. Use DateFormat as a proxy + * for all current locale ids. + */ + public void testCurrentLocales() { + Locale[] locales = java.text.DateFormat.getAvailableLocales(); + Builder builder = new Builder(); + + for (Locale target : locales) { + String tag = target.toLanguageTag(); + + // the tag recreates the original locale, + // except no_NO_NY + Locale tagResult = Locale.forLanguageTag(tag); + if (!target.getVariant().equals("NY")) { + assertEquals("tagResult", target, tagResult); + } + + // the builder also recreates the original locale, + // except ja_JP_JP, th_TH_TH and no_NO_NY + Locale builderResult = builder.setLocale(target).build(); + if (target.getVariant().length() != 2) { + assertEquals("builderResult", target, builderResult); + } + } + } + + /** + * Ensure that all icu locale ids parse. + */ + public void testIcuLocales() throws Exception { + BufferedReader br = new BufferedReader( + new InputStreamReader( + LocaleEnhanceTest.class.getResourceAsStream("icuLocales.txt"), + "UTF-8")); + String id = null; + while (null != (id = br.readLine())) { + Locale result = Locale.forLanguageTag(id); + assertEquals("ulocale", id, result.toLanguageTag()); + } + } + + /// + /// Compatibility tests + /// + + public void testConstructor() { + // all the old weirdness still holds, no new weirdness + String[][] tests = { + // language to lower case, region to upper, variant unchanged + // short + { "X", "y", "z", "x", "Y" }, + // long + { "xXxXxXxXxXxX", "yYyYyYyYyYyYyYyY", "zZzZzZzZzZzZzZzZ", + "xxxxxxxxxxxx", "YYYYYYYYYYYYYYYY" }, + // mapped language ids + { "he", "IW", "", "iw" }, + { "iw", "IW", "", "iw" }, + { "yi", "DE", "", "ji" }, + { "ji", "DE", "", "ji" }, + { "id", "ID", "", "in" }, + { "in", "ID", "", "in" }, + // special variants + { "ja", "JP", "JP" }, + { "th", "TH", "TH" }, + { "no", "NO", "NY" }, + { "no", "NO", "NY" }, + // no canonicalization of 3-letter language codes + { "eng", "US", "" } + }; + for (int i = 0; i < tests.length; ++ i) { + String[] test = tests[i]; + String id = String.valueOf(i); + Locale locale = new Locale(test[0], test[1], test[2]); + assertEquals(id + " lang", test.length > 3 ? test[3] : test[0], locale.getLanguage()); + assertEquals(id + " region", test.length > 4 ? test[4] : test[1], locale.getCountry()); + assertEquals(id + " variant", test.length > 5 ? test[5] : test[2], locale.getVariant()); + } + } + + /// + /// Locale API tests. + /// + + public void testGetScript() { + // forLanguageTag normalizes case + Locale locale = Locale.forLanguageTag("und-latn"); + assertEquals("forLanguageTag", "Latn", locale.getScript()); + + // Builder normalizes case + locale = new Builder().setScript("LATN").build(); + assertEquals("builder", "Latn", locale.getScript()); + + // empty string is returned, not null, if there is no script + locale = Locale.forLanguageTag("und"); + assertEquals("script is empty string", "", locale.getScript()); + } + + public void testGetExtension() { + // forLanguageTag does NOT normalize to hyphen + Locale locale = Locale.forLanguageTag("und-a-some_ex-tension"); + assertEquals("some_ex-tension", null, locale.getExtension('a')); + + // regular extension + locale = new Builder().setExtension('a', "some-ex-tension").build(); + assertEquals("builder", "some-ex-tension", locale.getExtension('a')); + + // returns null if extension is not present + assertEquals("empty b", null, locale.getExtension('b')); + + // throws exception if extension tag is illegal + new ExpectIAE() { public void call() { Locale.forLanguageTag("").getExtension('\uD800'); }}; + + // 'x' is not an extension, it's a private use tag, but it's accessed through this API + locale = Locale.forLanguageTag("x-y-z-blork"); + assertEquals("x", "y-z-blork", locale.getExtension('x')); + } + + public void testGetExtensionKeys() { + Locale locale = Locale.forLanguageTag("und-a-xx-yy-b-zz-ww"); + Set result = locale.getExtensionKeys(); + assertEquals("result size", 2, result.size()); + assertTrue("'a','b'", result.contains('a') && result.contains('b')); + + // result is not mutable + try { + result.add('x'); + errln("expected exception on add to extension key set"); + } + catch (UnsupportedOperationException e) { + // ok + } + + // returns empty set if no extensions + locale = Locale.forLanguageTag("und"); + assertTrue("empty result", locale.getExtensionKeys().isEmpty()); + } + + public void testGetUnicodeLocaleAttributes() { + Locale locale = Locale.forLanguageTag("en-US-u-abc-def"); + Set attributes = locale.getUnicodeLocaleAttributes(); + assertEquals("number of attributes", 2, attributes.size()); + assertTrue("attribute abc", attributes.contains("abc")); + assertTrue("attribute def", attributes.contains("def")); + + locale = Locale.forLanguageTag("en-US-u-ca-gregory"); + attributes = locale.getUnicodeLocaleAttributes(); + assertTrue("empty attributes", attributes.isEmpty()); + } + + public void testGetUnicodeLocaleType() { + Locale locale = Locale.forLanguageTag("und-u-co-japanese-nu-thai"); + assertEquals("collation", "japanese", locale.getUnicodeLocaleType("co")); + assertEquals("numbers", "thai", locale.getUnicodeLocaleType("nu")); + + // Unicode locale extension key is case insensitive + assertEquals("key case", "japanese", locale.getUnicodeLocaleType("Co")); + + // if keyword is not present, returns null + assertEquals("locale keyword not present", null, locale.getUnicodeLocaleType("xx")); + + // if no locale extension is set, returns null + locale = Locale.forLanguageTag("und"); + assertEquals("locale extension not present", null, locale.getUnicodeLocaleType("co")); + + // typeless keyword + locale = Locale.forLanguageTag("und-u-kn"); + assertEquals("typeless keyword", "", locale.getUnicodeLocaleType("kn")); + + // invalid keys throw exception + new ExpectIAE() { public void call() { Locale.forLanguageTag("").getUnicodeLocaleType("q"); }}; + new ExpectIAE() { public void call() { Locale.forLanguageTag("").getUnicodeLocaleType("abcdefghi"); }}; + + // null argument throws exception + new ExpectNPE() { public void call() { Locale.forLanguageTag("").getUnicodeLocaleType(null); }}; + } + + public void testGetUnicodeLocaleKeys() { + Locale locale = Locale.forLanguageTag("und-u-co-japanese-nu-thai"); + Set result = locale.getUnicodeLocaleKeys(); + assertEquals("two keys", 2, result.size()); + assertTrue("co and nu", result.contains("co") && result.contains("nu")); + + // result is not modifiable + try { + result.add("frobozz"); + errln("expected exception when add to locale key set"); + } + catch (UnsupportedOperationException e) { + // ok + } + } + + public void testPrivateUseExtension() { + Locale locale = Locale.forLanguageTag("x-y-x-blork-"); + assertEquals("blork", "y-x-blork", locale.getExtension(Locale.PRIVATE_USE_EXTENSION)); + + locale = Locale.forLanguageTag("und"); + assertEquals("no privateuse", null, locale.getExtension(Locale.PRIVATE_USE_EXTENSION)); + } + + public void testToLanguageTag() { + // lots of normalization to test here + // test locales created using the constructor + String[][] tests = { + // empty locale canonicalizes to 'und' + { "", "", "", "und" }, + // variant alone is not a valid Locale, but has a valid language tag + { "", "", "NewYork", "und-NewYork" }, + // standard valid locales + { "", "Us", "", "und-US" }, + { "", "US", "NewYork", "und-US-NewYork" }, + { "EN", "", "", "en" }, + { "EN", "", "NewYork", "en-NewYork" }, + { "EN", "US", "", "en-US" }, + { "EN", "US", "NewYork", "en-US-NewYork" }, + // underscore in variant will be emitted as multiple variant subtags + { "en", "US", "Newer_Yorker", "en-US-Newer-Yorker" }, + // invalid variant subtags are appended as private use + { "en", "US", "new_yorker", "en-US-x-lvariant-new-yorker" }, + // the first invalid variant subtags and following variant subtags are appended as private use + { "en", "US", "Windows_XP_Home", "en-US-Windows-x-lvariant-XP-Home" }, + // too long variant and following variant subtags disappear + { "en", "US", "WindowsVista_SP2", "en-US" }, + // invalid region subtag disappears + { "en", "USA", "", "en" }, + // invalid language tag disappears + { "e", "US", "", "und-US" }, + // three-letter language tags are not canonicalized + { "Eng", "", "", "eng" }, + // legacy languages canonicalize to modern equivalents + { "he", "IW", "", "he-IW" }, + { "iw", "IW", "", "he-IW" }, + { "yi", "DE", "", "yi-DE" }, + { "ji", "DE", "", "yi-DE" }, + { "id", "ID", "", "id-ID" }, + { "in", "ID", "", "id-ID" }, + // special values are converted on output + { "ja", "JP", "JP", "ja-JP-u-ca-japanese-x-lvariant-JP" }, + { "th", "TH", "TH", "th-TH-u-nu-thai-x-lvariant-TH" }, + { "no", "NO", "NY", "nn-NO" } + }; + for (int i = 0; i < tests.length; ++i) { + String[] test = tests[i]; + Locale locale = new Locale(test[0], test[1], test[2]); + assertEquals("case " + i, test[3], locale.toLanguageTag()); + } + } + + public void testForLanguageTag() { + // forLanguageTag implements the 'Language-Tag' production of + // BCP47, so it handles private use and grandfathered tags, + // unlike locale builder. Tags listed below (except for the + // sample private use tags) come from 4646bis Feb 29, 2009. + + String[][] tests = { + // private use tags only + { "x-abc", "und-x-abc" }, + { "x-a-b-c", "und-x-a-b-c" }, + { "x-a-12345678", "und-x-a-12345678" }, + + // grandfathered tags with preferred mappings + { "i-ami", "ami" }, + { "i-bnn", "bnn" }, + { "i-hak", "hak" }, + { "i-klingon", "tlh" }, + { "i-lux", "lb" }, // two-letter tag + { "i-navajo", "nv" }, // two-letter tag + { "i-pwn", "pwn" }, + { "i-tao", "tao" }, + { "i-tay", "tay" }, + { "i-tsu", "tsu" }, + { "art-lojban", "jbo" }, + { "no-bok", "nb" }, + { "no-nyn", "nn" }, + { "sgn-BE-FR", "sfb" }, + { "sgn-BE-NL", "vgt" }, + { "sgn-CH-DE", "sgg" }, + { "zh-guoyu", "cmn" }, + { "zh-hakka", "hak" }, + { "zh-min-nan", "nan" }, + { "zh-xiang", "hsn" }, + + // grandfathered irregular tags, no preferred mappings, drop illegal fields + // from end. If no subtag is mappable, fallback to 'und' + { "i-default", "en-x-i-default" }, + { "i-enochian", "und-x-i-enochian" }, + { "i-mingo", "see-x-i-mingo" }, + { "en-GB-oed", "en-GB-x-oed" }, + { "zh-min", "nan-x-zh-min" }, + { "cel-gaulish", "xtg-x-cel-gaulish" }, + }; + for (int i = 0; i < tests.length; ++i) { + String[] test = tests[i]; + Locale locale = Locale.forLanguageTag(test[0]); + assertEquals("grandfathered case " + i, test[1], locale.toLanguageTag()); + } + + // forLanguageTag ignores everything past the first place it encounters + // a syntax error + tests = new String[][] { + { "valid", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-x-y-12345678-z", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-x-y-12345678-z" }, + { "segment of private use tag too long", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-x-y-123456789-z", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-x-y" }, + { "segment of private use tag is empty", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-x-y--12345678-z", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-x-y" }, + { "first segment of private use tag is empty", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-x--y-12345678-z", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def" }, + { "illegal extension tag", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def-\uD800-y-12345678-z", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-def" }, + { "locale subtag with no value", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-x-y-12345678-z", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-bb-x-y-12345678-z" }, + { "locale key subtag invalid", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc-123456789-def-x-y-12345678-z", + "en-US-Newer-Yorker-a-bb-cc-dd-u-aa-abc" }, + // locale key subtag invalid in earlier position, all following subtags + // dropped (and so the locale extension dropped as well) + { "locale key subtag invalid in earlier position", + "en-US-Newer-Yorker-a-bb-cc-dd-u-123456789-abc-bb-def-x-y-12345678-z", + "en-US-Newer-Yorker-a-bb-cc-dd" }, + }; + for (int i = 0; i < tests.length; ++i) { + String[] test = tests[i]; + String msg = "syntax error case " + i + " " + test[0]; + try { + Locale locale = Locale.forLanguageTag(test[1]); + assertEquals(msg, test[2], locale.toLanguageTag()); + } + catch (IllegalArgumentException e) { + errln(msg + " caught exception: " + e); + } + } + + // duplicated extension are just ignored + Locale locale = Locale.forLanguageTag("und-d-aa-00-bb-01-D-AA-10-cc-11-c-1234"); + assertEquals("extension", "aa-00-bb-01", locale.getExtension('d')); + assertEquals("extension c", "1234", locale.getExtension('c')); + + // redundant Unicode locale keys in an extension are ignored + locale = Locale.forLanguageTag("und-u-aa-000-bb-001-bB-002-cc-003-c-1234"); + assertEquals("Unicode keywords", "aa-000-bb-001-cc-003", locale.getExtension(Locale.UNICODE_LOCALE_EXTENSION)); + assertEquals("Duplicated Unicode locake key followed by an extension", "1234", locale.getExtension('c')); + } + + public void testGetDisplayScript() { + Locale latnLocale = Locale.forLanguageTag("und-latn"); + Locale hansLocale = Locale.forLanguageTag("und-hans"); + + Locale oldLocale = Locale.getDefault(); + + Locale.setDefault(Locale.US); + assertEquals("latn US", "Latin", latnLocale.getDisplayScript()); + assertEquals("hans US", "Simplified Han", hansLocale.getDisplayScript()); + + // note, no localization data yet other than US + // this should break when we have localization data for DE + Locale.setDefault(Locale.GERMANY); + assertEquals("latn DE", "Latin", latnLocale.getDisplayScript()); + assertEquals("hans DE", "Simplified Han", hansLocale.getDisplayScript()); + + Locale.setDefault(oldLocale); + } + + public void testGetDisplayScriptWithLocale() { + Locale latnLocale = Locale.forLanguageTag("und-latn"); + Locale hansLocale = Locale.forLanguageTag("und-hans"); + + assertEquals("latn US", "Latin", latnLocale.getDisplayScript(Locale.US)); + assertEquals("hans US", "Simplified Han", hansLocale.getDisplayScript(Locale.US)); + + // note, no localization data yet other than US + // this should break when we have localization data for DE + assertEquals("latn DE", "Latin", latnLocale.getDisplayScript(Locale.GERMANY)); + assertEquals("hans DE", "Simplified Han", hansLocale.getDisplayScript(Locale.GERMANY)); + } + + /// + /// Builder tests + /// + + public void testBuilderSetLocale() { + Builder builder = new Builder(); + Builder lenientBuilder = new Builder(); + + String languageTag = "en-Latn-US-NewYork-a-bb-ccc-u-co-japanese-x-y-z"; + String target = "en-Latn-US-NewYork-a-bb-ccc-u-co-japanese-x-y-z"; + + Locale locale = Locale.forLanguageTag(languageTag); + Locale result = lenientBuilder + .setLocale(locale) + .build(); + assertEquals("long tag", target, result.toLanguageTag()); + assertEquals("long tag", locale, result); + + // null is illegal + new BuilderNPE("locale") { + public void call() { b.setLocale(null); } + }; + + // builder canonicalizes the three legacy locales: + // ja_JP_JP, th_TH_TH, no_NY_NO. + locale = builder.setLocale(new Locale("ja", "JP", "JP")).build(); + assertEquals("ja_JP_JP languagetag", "ja-JP-u-ca-japanese", locale.toLanguageTag()); + assertEquals("ja_JP_JP variant", "", locale.getVariant()); + + locale = builder.setLocale(new Locale("th", "TH", "TH")).build(); + assertEquals("th_TH_TH languagetag", "th-TH-u-nu-thai", locale.toLanguageTag()); + assertEquals("th_TH_TH variant", "", locale.getVariant()); + + locale = builder.setLocale(new Locale("no", "NO", "NY")).build(); + assertEquals("no_NO_NY languagetag", "nn-NO", locale.toLanguageTag()); + assertEquals("no_NO_NY language", "nn", locale.getLanguage()); + assertEquals("no_NO_NY variant", "", locale.getVariant()); + + // non-canonical, non-legacy locales are invalid + new BuilderILE("123_4567_89") { + public void call() { + b.setLocale(new Locale("123", "4567", "89")); + } + }; + } + + public void testBuilderSetLanguageTag() { + String source = "eN-LaTn-Us-NewYork-A-Xx-B-Yy-X-1-2-3"; + String target = "en-Latn-US-NewYork-a-xx-b-yy-x-1-2-3"; + Builder builder = new Builder(); + String result = builder + .setLanguageTag(source) + .build() + .toLanguageTag(); + assertEquals("language", target, result); + + // redundant extensions cause a failure + new BuilderILE() { public void call() { b.setLanguageTag("und-a-xx-yy-b-ww-A-00-11-c-vv"); }}; + + // redundant Unicode locale extension keys within an Unicode locale extension cause a failure + new BuilderILE() { public void call() { b.setLanguageTag("und-u-nu-thai-NU-chinese-xx-1234"); }}; + } + + public void testBuilderSetLanguage() { + // language is normalized to lower case + String source = "eN"; + String target = "en"; + String defaulted = ""; + Builder builder = new Builder(); + String result = builder + .setLanguage(source) + .build() + .getLanguage(); + assertEquals("en", target, result); + + // setting with empty resets + result = builder + .setLanguage(target) + .setLanguage("") + .build() + .getLanguage(); + assertEquals("empty", defaulted, result); + + // setting with null resets too + result = builder + .setLanguage(target) + .setLanguage(null) + .build() + .getLanguage(); + assertEquals("null", defaulted, result); + + // language codes must be 2-8 alpha + // for forwards compatibility, 4-alpha and 5-8 alpha (registered) + // languages are accepted syntax + new BuilderILE("q", "abcdefghi", "13") { public void call() { b.setLanguage(arg); }}; + + // language code validation is NOT performed, any 2-8-alpha passes + assertNotNull("2alpha", builder.setLanguage("zz").build()); + assertNotNull("8alpha", builder.setLanguage("abcdefgh").build()); + + // three-letter language codes are NOT canonicalized to two-letter + result = builder + .setLanguage("eng") + .build() + .getLanguage(); + assertEquals("eng", "eng", result); + } + + public void testBuilderSetScript() { + // script is normalized to title case + String source = "lAtN"; + String target = "Latn"; + String defaulted = ""; + Builder builder = new Builder(); + String result = builder + .setScript(source) + .build() + .getScript(); + assertEquals("script", target, result); + + // setting with empty resets + result = builder + .setScript(target) + .setScript("") + .build() + .getScript(); + assertEquals("empty", defaulted, result); + + // settting with null also resets + result = builder + .setScript(target) + .setScript(null) + .build() + .getScript(); + assertEquals("null", defaulted, result); + + // ill-formed script codes throw IAE + // must be 4alpha + new BuilderILE("abc", "abcde", "l3tn") { public void call() { b.setScript(arg); }}; + + // script code validation is NOT performed, any 4-alpha passes + assertEquals("4alpha", "Wxyz", builder.setScript("wxyz").build().getScript()); + } + + public void testBuilderSetRegion() { + // region is normalized to upper case + String source = "uS"; + String target = "US"; + String defaulted = ""; + Builder builder = new Builder(); + String result = builder + .setRegion(source) + .build() + .getCountry(); + assertEquals("us", target, result); + + // setting with empty resets + result = builder + .setRegion(target) + .setRegion("") + .build() + .getCountry(); + assertEquals("empty", defaulted, result); + + // setting with null also resets + result = builder + .setRegion(target) + .setRegion(null) + .build() + .getCountry(); + assertEquals("null", defaulted, result); + + // ill-formed region codes throw IAE + // 2 alpha or 3 numeric + new BuilderILE("q", "abc", "12", "1234", "a3", "12a") { public void call() { b.setRegion(arg); }}; + + // region code validation is NOT performed, any 2-alpha or 3-digit passes + assertEquals("2alpha", "ZZ", builder.setRegion("ZZ").build().getCountry()); + assertEquals("3digit", "000", builder.setRegion("000").build().getCountry()); + } + + public void testBuilderSetVariant() { + // Variant case is not normalized in lenient variant mode + String source = "NewYork"; + String target = source; + String defaulted = ""; + Builder builder = new Builder(); + String result = builder + .setVariant(source) + .build() + .getVariant(); + assertEquals("NewYork", target, result); + + result = builder + .setVariant("NeWeR_YoRkEr") + .build() + .toLanguageTag(); + assertEquals("newer yorker", "und-NeWeR-YoRkEr", result); + + // subtags of variant are NOT reordered + result = builder + .setVariant("zzzzz_yyyyy_xxxxx") + .build() + .getVariant(); + assertEquals("zyx", "zzzzz_yyyyy_xxxxx", result); + + // setting to empty resets + result = builder + .setVariant(target) + .setVariant("") + .build() + .getVariant(); + assertEquals("empty", defaulted, result); + + // setting to null also resets + result = builder + .setVariant(target) + .setVariant(null) + .build() + .getVariant(); + assertEquals("null", defaulted, result); + + // ill-formed variants throw IAE + // digit followed by 3-7 characters, or alpha followed by 4-8 characters. + new BuilderILE("abcd", "abcdefghi", "1ab", "1abcdefgh") { public void call() { b.setVariant(arg); }}; + + // 4 characters is ok as long as the first is a digit + assertEquals("digit+3alpha", "1abc", builder.setVariant("1abc").build().getVariant()); + + // all subfields must conform + new BuilderILE("abcde-fg") { public void call() { b.setVariant(arg); }}; + } + + public void testBuilderSetExtension() { + // upper case characters are normalized to lower case + final char sourceKey = 'a'; + final String sourceValue = "aB-aBcdefgh-12-12345678"; + String target = "ab-abcdefgh-12-12345678"; + Builder builder = new Builder(); + String result = builder + .setExtension(sourceKey, sourceValue) + .build() + .getExtension(sourceKey); + assertEquals("extension", target, result); + + // setting with empty resets + result = builder + .setExtension(sourceKey, sourceValue) + .setExtension(sourceKey, "") + .build() + .getExtension(sourceKey); + assertEquals("empty", null, result); + + // setting with null also resets + result = builder + .setExtension(sourceKey, sourceValue) + .setExtension(sourceKey, null) + .build() + .getExtension(sourceKey); + assertEquals("null", null, result); + + // ill-formed extension keys throw IAE + // must be in [0-9a-ZA-Z] + new BuilderILE("$") { public void call() { b.setExtension('$', sourceValue); }}; + + // each segment of value must be 2-8 alphanum + new BuilderILE("ab-cd-123456789") { public void call() { b.setExtension(sourceKey, arg); }}; + + // no multiple hyphens. + new BuilderILE("ab--cd") { public void call() { b.setExtension(sourceKey, arg); }}; + + // locale extension key has special handling + Locale locale = builder + .setExtension('u', "co-japanese") + .build(); + assertEquals("locale extension", "japanese", locale.getUnicodeLocaleType("co")); + + // locale extension has same behavior with set locale keyword + Locale locale2 = builder + .setUnicodeLocaleKeyword("co", "japanese") + .build(); + assertEquals("locales with extension", locale, locale2); + + // setting locale extension overrides all previous calls to setLocaleKeyword + Locale locale3 = builder + .setExtension('u', "xxx-nu-thai") + .build(); + assertEquals("remove co", null, locale3.getUnicodeLocaleType("co")); + assertEquals("override thai", "thai", locale3.getUnicodeLocaleType("nu")); + assertEquals("override attribute", 1, locale3.getUnicodeLocaleAttributes().size()); + + // setting locale keyword extends values already set by the locale extension + Locale locale4 = builder + .setUnicodeLocaleKeyword("co", "japanese") + .build(); + assertEquals("extend", "japanese", locale4.getUnicodeLocaleType("co")); + assertEquals("extend", "thai", locale4.getUnicodeLocaleType("nu")); + + // locale extension subtags are reordered + result = builder + .clear() + .setExtension('u', "456-123-zz-123-yy-456-xx-789") + .build() + .toLanguageTag(); + assertEquals("reorder", "und-u-123-456-xx-789-yy-456-zz-123", result); + + // multiple keyword types + result = builder + .clear() + .setExtension('u', "nu-thai-foobar") + .build() + .getUnicodeLocaleType("nu"); + assertEquals("multiple types", "thai-foobar", result); + + // redundant locale extensions are ignored + result = builder + .clear() + .setExtension('u', "nu-thai-NU-chinese-xx-1234") + .build() + .toLanguageTag(); + assertEquals("duplicate keys", "und-u-nu-thai-xx-1234", result); + } + + public void testBuilderAddUnicodeLocaleAttribute() { + Builder builder = new Builder(); + Locale locale = builder + .addUnicodeLocaleAttribute("def") + .addUnicodeLocaleAttribute("abc") + .build(); + + Set uattrs = locale.getUnicodeLocaleAttributes(); + assertEquals("number of attributes", 2, uattrs.size()); + assertTrue("attribute abc", uattrs.contains("abc")); + assertTrue("attribute def", uattrs.contains("def")); + + // remove attribute + locale = builder.removeUnicodeLocaleAttribute("xxx") + .build(); + + assertEquals("remove bogus", 2, uattrs.size()); + + // add duplicate + locale = builder.addUnicodeLocaleAttribute("abc") + .build(); + assertEquals("add duplicate", 2, uattrs.size()); + + // null attribute throws NPE + new BuilderNPE("null attribute") { public void call() { b.addUnicodeLocaleAttribute(null); }}; + + // illformed attribute throws IllformedLocaleException + new BuilderILE("invalid attribute") { public void call() { b.addUnicodeLocaleAttribute("ca"); }}; + } + + public void testBuildersetUnicodeLocaleKeyword() { + // Note: most behavior is tested in testBuilderSetExtension + Builder builder = new Builder(); + Locale locale = builder + .setUnicodeLocaleKeyword("co", "japanese") + .setUnicodeLocaleKeyword("nu", "thai") + .build(); + assertEquals("co", "japanese", locale.getUnicodeLocaleType("co")); + assertEquals("nu", "thai", locale.getUnicodeLocaleType("nu")); + assertEquals("keys", 2, locale.getUnicodeLocaleKeys().size()); + + // can clear a keyword by setting to null, others remain + String result = builder + .setUnicodeLocaleKeyword("co", null) + .build() + .toLanguageTag(); + assertEquals("empty co", "und-u-nu-thai", result); + + // locale keyword extension goes when all keywords are gone + result = builder + .setUnicodeLocaleKeyword("nu", null) + .build() + .toLanguageTag(); + assertEquals("empty nu", "und", result); + + // locale keywords are ordered independent of order of addition + result = builder + .setUnicodeLocaleKeyword("zz", "012") + .setUnicodeLocaleKeyword("aa", "345") + .build() + .toLanguageTag(); + assertEquals("reordered", "und-u-aa-345-zz-012", result); + + // null keyword throws NPE + new BuilderNPE("keyword") { public void call() { b.setUnicodeLocaleKeyword(null, "thai"); }}; + + // well-formed keywords are two alphanum + new BuilderILE("a", "abc") { public void call() { b.setUnicodeLocaleKeyword(arg, "value"); }}; + + // well-formed values are 3-8 alphanum + new BuilderILE("ab", "abcdefghi") { public void call() { b.setUnicodeLocaleKeyword("ab", arg); }}; + } + + public void testBuilderPrivateUseExtension() { + // normalizes hyphens to underscore, case to lower + String source = "c-B-a"; + String target = "c-b-a"; + Builder builder = new Builder(); + String result = builder + .setExtension(Locale.PRIVATE_USE_EXTENSION, source) + .build() + .getExtension(Locale.PRIVATE_USE_EXTENSION); + assertEquals("abc", target, result); + + // multiple hyphens are ill-formed + new BuilderILE("a--b") { public void call() { b.setExtension(Locale.PRIVATE_USE_EXTENSION, arg); }}; + } + + public void testBuilderClear() { + String monster = "en-latn-US-NewYork-a-bb-cc-u-co-japanese-x-z-y-x-x"; + Builder builder = new Builder(); + Locale locale = Locale.forLanguageTag(monster); + String result = builder + .setLocale(locale) + .clear() + .build() + .toLanguageTag(); + assertEquals("clear", "und", result); + } + + public void testBuilderRemoveUnicodeAttribute() { + // tested in testBuilderAddUnicodeAttribute + } + + public void testBuilderBuild() { + // tested in other test methods + } + + public void testSerialize() { + final Locale[] testLocales = { + Locale.ROOT, + new Locale("en"), + new Locale("en", "US"), + new Locale("en", "US", "Win"), + new Locale("en", "US", "Win_XP"), + new Locale("ja", "JP"), + new Locale("ja", "JP", "JP"), + new Locale("th", "TH"), + new Locale("th", "TH", "TH"), + new Locale("no", "NO"), + new Locale("nb", "NO"), + new Locale("nn", "NO"), + new Locale("no", "NO", "NY"), + new Locale("nn", "NO", "NY"), + new Locale("he", "IL"), + new Locale("he", "IL", "var"), + new Locale("Language", "Country", "Variant"), + new Locale("", "US"), + new Locale("", "", "Java"), + Locale.forLanguageTag("en-Latn-US"), + Locale.forLanguageTag("zh-Hans"), + Locale.forLanguageTag("zh-Hant-TW"), + Locale.forLanguageTag("ja-JP-u-ca-japanese"), + Locale.forLanguageTag("und-Hant"), + Locale.forLanguageTag("und-a-123-456"), + Locale.forLanguageTag("en-x-java"), + Locale.forLanguageTag("th-TH-u-ca-buddist-nu-thai-x-lvariant-TH"), + }; + + for (Locale locale : testLocales) { + try { + // write + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(bos); + oos.writeObject(locale); + + // read + ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); + ObjectInputStream ois = new ObjectInputStream(bis); + Object o = ois.readObject(); + + assertEquals("roundtrip " + locale, locale, o); + } catch (Exception e) { + errln(locale + " encountered exception:" + e.getLocalizedMessage()); + } + } + } + + public void testDeserialize6() { + final String TESTFILEPREFIX = "java6locale_"; + + File dataDir = null; + String dataDirName = System.getProperty("serialized.data.dir"); + if (dataDirName == null) { + URL resdirUrl = getClass().getClassLoader().getResource("serialized"); + if (resdirUrl != null) { + try { + dataDir = new File(resdirUrl.toURI()); + } catch (URISyntaxException urie) { + } + } + } else { + dataDir = new File(dataDirName); + } + + if (dataDir == null || !dataDir.isDirectory()) { + errln("Could not locate the serialized test case data location"); + return; + } + + File[] files = dataDir.listFiles(); + for (File testfile : files) { + if (testfile.isDirectory()) { + continue; + } + String name = testfile.getName(); + if (!name.startsWith(TESTFILEPREFIX)) { + continue; + } + Locale locale; + String locStr = name.substring(TESTFILEPREFIX.length()); + if (locStr.equals("ROOT")) { + locale = Locale.ROOT; + } else { + String[] fields = locStr.split("_", 3); + String lang = fields[0]; + String country = (fields.length >= 2) ? fields[1] : ""; + String variant = (fields.length == 3) ? fields[2] : ""; + locale = new Locale(lang, country, variant); + } + + // desrialize + try { + FileInputStream fis = new FileInputStream(testfile); + ObjectInputStream ois = new ObjectInputStream(fis); + + Object o = ois.readObject(); + assertEquals("Deserialize Java 6 Locale " + locale, o, locale); + ois.close(); + } catch (Exception e) { + errln("Exception while reading " + testfile.getAbsolutePath() + " - " + e.getMessage()); + } + } + } + + /// + /// utility asserts + /// + + private void assertTrue(String msg, boolean v) { + if (!v) { + errln(msg + ": expected true"); + } + } + + private void assertFalse(String msg, boolean v) { + if (v) { + errln(msg + ": expected false"); + } + } + + private void assertEquals(String msg, Object e, Object v) { + if (e == null ? v != null : !e.equals(v)) { + if (e != null) { + e = "'" + e + "'"; + } + if (v != null) { + v = "'" + v + "'"; + } + errln(msg + ": expected " + e + " but got " + v); + } + } + + private void assertNotEquals(String msg, Object e, Object v) { + if (e == null ? v == null : e.equals(v)) { + if (e != null) { + e = "'" + e + "'"; + } + errln(msg + ": expected not equal " + e); + } + } + + private void assertNull(String msg, Object o) { + if (o != null) { + errln(msg + ": expected null but got '" + o + "'"); + } + } + + private void assertNotNull(String msg, Object o) { + if (o == null) { + errln(msg + ": expected non null"); + } + } + + // not currently used, might get rid of exceptions from the API + private abstract class ExceptionTest { + private final Class exceptionClass; + + ExceptionTest(Class exceptionClass) { + this.exceptionClass = exceptionClass; + } + + public void run() { + String failMsg = null; + try { + call(); + failMsg = "expected " + exceptionClass.getName() + " but no exception thrown."; + } + catch (Exception e) { + if (!exceptionClass.isAssignableFrom(e.getClass())) { + failMsg = "expected " + exceptionClass.getName() + " but caught " + e; + } + } + if (failMsg != null) { + String msg = message(); + msg = msg == null ? "" : msg + " "; + errln(msg + failMsg); + } + } + + public String message() { + return null; + } + + public abstract void call(); + } + + private abstract class ExpectNPE extends ExceptionTest { + ExpectNPE() { + super(NullPointerException.class); + run(); + } + } + + private abstract class BuilderNPE extends ExceptionTest { + protected final String msg; + protected final Builder b = new Builder(); + + BuilderNPE(String msg) { + super(NullPointerException.class); + + this.msg = msg; + + run(); + } + + public String message() { + return msg; + } + } + + private abstract class ExpectIAE extends ExceptionTest { + ExpectIAE() { + super(IllegalArgumentException.class); + run(); + } + } + + private abstract class BuilderILE extends ExceptionTest { + protected final String[] args; + protected final Builder b = new Builder(); + + protected String arg; // mutates during call + + BuilderILE(String... args) { + super(IllformedLocaleException.class); + + this.args = args; + + run(); + } + + public void run() { + for (String arg : args) { + this.arg = arg; + super.run(); + } + } + + public String message() { + return "arg: '" + arg + "'"; + } + } +} diff --git a/test/java/util/Locale/LocaleTestFmwk.java b/test/java/util/Locale/LocaleTestFmwk.java index ed7f21df2..a8d24ff09 100644 --- a/test/java/util/Locale/LocaleTestFmwk.java +++ b/test/java/util/Locale/LocaleTestFmwk.java @@ -1,10 +1,12 @@ /* - * Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or @@ -20,6 +22,7 @@ * or visit www.oracle.com if you need additional information or have any * questions. */ + /* * * @@ -112,6 +115,8 @@ public class LocaleTestFmwk { prompt = true; } else if (args[i].equals("-nothrow")) { nothrow = true; + } else if (args[i].equals("-exitcode")) { + exitcode = true; } else { Object m = testMethods.get( args[i] ); if( m != null ) { @@ -165,7 +170,12 @@ public class LocaleTestFmwk { } } if (nothrow) { - System.exit(errorCount); + if (exitcode) { + System.exit(errorCount); + } + if (errorCount > 0) { + throw new IllegalArgumentException("encountered " + errorCount + " errors"); + } } } @@ -235,7 +245,7 @@ public class LocaleTestFmwk { */ void usage() { System.out.println(getClass().getName() + - ": [-verbose] [-nothrow] [-prompt] [test names]"); + ": [-verbose] [-nothrow] [-exitcode] [-prompt] [test names]"); System.out.println("test names:"); Enumeration methodNames = testMethods.keys(); @@ -246,6 +256,7 @@ public class LocaleTestFmwk { private boolean prompt = false; private boolean nothrow = false; + private boolean exitcode = false; protected boolean verbose = false; private PrintWriter log; diff --git a/test/java/util/Locale/icuLocales.txt b/test/java/util/Locale/icuLocales.txt new file mode 100644 index 000000000..8d3390a4d --- /dev/null +++ b/test/java/util/Locale/icuLocales.txt @@ -0,0 +1,292 @@ +af +af-NA +af-ZA +am +am-ET +ar +ar-AE +ar-BH +ar-DZ +ar-EG +ar-IQ +ar-JO +ar-KW +ar-LB +ar-LY +ar-MA +ar-OM +ar-QA +ar-SA +ar-SD +ar-SY +ar-TN +ar-YE +as +as-IN +az +az-Cyrl +az-Cyrl-AZ +az-Latn +az-Latn-AZ +be +be-BY +bg +bg-BG +bn +bn-BD +bn-IN +bo +bo-CN +bo-IN +ca +ca-ES +cs +cs-CZ +cy +cy-GB +da +da-DK +de +de-AT +de-BE +de-CH +de-DE +de-LI +de-LU +el +el-CY +el-GR +en +en-AU +en-BE +en-BW +en-BZ +en-CA +en-GB +en-HK +en-IE +en-IN +en-JM +en-MH +en-MT +en-NA +en-NZ +en-PH +en-PK +en-SG +en-TT +en-US +en-US-posix +en-VI +en-ZA +en-ZW +eo +es +es-AR +es-BO +es-CL +es-CO +es-CR +es-DO +es-EC +es-ES +es-GT +es-HN +es-MX +es-NI +es-PA +es-PE +es-PR +es-PY +es-SV +es-US +es-UY +es-VE +et +et-EE +eu +eu-ES +fa +fa-AF +fa-IR +fi +fi-FI +fo +fo-FO +fr +fr-BE +fr-CA +fr-CH +fr-FR +fr-LU +fr-MC +fr-SN +ga +ga-IE +gl +gl-ES +gsw +gsw-CH +gu +gu-IN +gv +gv-GB +ha +ha-Latn +ha-Latn-GH +ha-Latn-NE +ha-Latn-NG +haw +haw-US +he +he-IL +hi +hi-IN +hr +hr-HR +hu +hu-HU +hy +hy-AM +hy-AM-revised +id +id-ID +ii +ii-CN +is +is-IS +it +it-CH +it-IT +ja +ja-JP +ka +ka-GE +kk +kk-Cyrl +kk-Cyrl-KZ +kl +kl-GL +km +km-KH +kn +kn-IN +ko +ko-KR +kok +kok-IN +kw +kw-GB +lt +lt-LT +lv +lv-LV +mk +mk-MK +ml +ml-IN +mr +mr-IN +ms +ms-BN +ms-MY +mt +mt-MT +nb +nb-NO +ne +ne-IN +ne-NP +nl +nl-BE +nl-NL +nn +nn-NO +om +om-ET +om-KE +or +or-IN +pa +pa-Arab +pa-Arab-PK +pa-Guru +pa-Guru-IN +pl +pl-PL +ps +ps-AF +pt +pt-BR +pt-PT +ro +ro-MD +ro-RO +ru +ru-RU +ru-UA +si +si-LK +sk +sk-SK +sl +sl-SI +so +so-DJ +so-ET +so-KE +so-SO +sq +sq-AL +sr +sr-Cyrl +sr-Cyrl-BA +sr-Cyrl-ME +sr-Cyrl-RS +sr-Latn +sr-Latn-BA +sr-Latn-ME +sr-Latn-RS +sv +sv-FI +sv-SE +sw +sw-KE +sw-TZ +ta +ta-IN +te +te-IN +th +th-TH +ti +ti-ER +ti-ET +tr +tr-TR +uk +uk-UA +ur +ur-IN +ur-PK +uz +uz-Arab +uz-Arab-AF +uz-Cyrl +uz-Cyrl-UZ +uz-Latn +uz-Latn-UZ +vi +vi-VN +zh +zh-Hans +zh-Hans-CN +zh-Hans-HK +zh-Hans-MO +zh-Hans-SG +zh-Hant +zh-Hant-HK +zh-Hant-MO +zh-Hant-TW +zu +zu-ZA diff --git a/test/java/util/Locale/serialized/java6locale_ROOT b/test/java/util/Locale/serialized/java6locale_ROOT new file mode 100644 index 0000000000000000000000000000000000000000..1ab7cc49b7a7841bad18e8db6db6d8cc54341414 GIT binary patch literal 127 zcmZ4UmVvdnh(RDLu`E%qv?Mb}&nG`QF(