/* * lang-tag * * Copyright 2012-2016, Connect2id Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. */ package com.nimbusds.langtag; import java.util.LinkedList; import java.util.List; /** * Language tag according to RFC 5646. * *
Supports normal language tags. Special private language tags beginning * with "x" and grandfathered tags beginning with "i" are not supported. * *
To construct a new language tag from scratch: * *
* // English as used in the United States * LangTag tag = new LangTag("en"); * tag.setRegion("US"); * * // Returns "en-US" * tag.toString(); ** *
To parse a language tag: * *
* // Chinese, Mandarin, Simplified script, as used in China * LangTag tag = LangTag.parse("zh-cmn-Hans-CN"); * * // Returns "zh" * tag.getPrimaryLanguage(); * * // Returns "cmn" * tag.getExtendedLanguageSubtags()[0]; * * // Returns "zh-cmn" * tag.getLanguage(); * * // Returns "Hans" * tag.getScript(); * * // Returns "CN" * tag.getRegion(); ** *
See RFC 5646. */ public class LangTag implements ReadOnlyLangTag { /** * The primary language, as the shortest ISO 639 code (2*3ALPHA). Must * always be defined, unless sufficient language subtags exist. */ private String primaryLanguage; /** * Optional extended language subtags, as three-letter ISO-639-3 codes. */ private String[] languageSubtags; /** * Optional script, (4ALPHA) ISO 15924 code. */ private String script = null; /** * Optional region, (2ALPHA) ISO 3166-1 code or (3DIGIT) UN M.49 code. */ private String region = null; /** * Optional variants, (5*8alphanum) or (DIGIT 3alphanum). */ private String[] variants = null; /** * Optional extensions. */ private String[] extensions = null; /** * Optional private use subtag. */ private String privateUse = null; /** * Ensures the specified subtag has a valid maximum length of eight * characters. * * @param subtag The sub tag to check. Must not be {@code null}. * * @throws LangTagException If the subtag has length greater than eight * characters. */ private static void ensureMaxLength(final String subtag) throws LangTagException { if (subtag.length() > 8) // extension or private use subtag? if (subtag.charAt(1) != '-' && subtag.length() > 10) throw new LangTagException("Invalid subtag syntax: Max character length exceeded"); } /** * Creates a new simple language tag. * *
Use for simple language tags such as "en" (English), "fr" * (French) or "pt" (Portuguese). * * @param primaryLanguage The primary language, as the shortest two or * three-letter ISO 639 code. Must not be * {@code null}. * * @throws LangTagException If the primary language syntax is invalid. */ public LangTag(final String primaryLanguage) throws LangTagException { this(primaryLanguage, new String[]{}); } /** * Creates a new extended language tag. * *
Use for extended language tags such as "zh-cmn" (Mandarin * Chinese) or "zh-yue" (Cantonese Chinese). * * @param primaryLanguage The primary language, as the shortest two or * three-letter ISO 639 code. May be {@code null} * if the subtags are sufficient to identify the * language. * @param languageSubtags One or more extended language subtags, as * three-letter ISO 639-3 codes. {@code null} if * none. * * @throws LangTagException If the primary or extended language syntax * is invalid. */ public LangTag(final String primaryLanguage, final String... languageSubtags) throws LangTagException { if (primaryLanguage == null && (languageSubtags == null || languageSubtags.length == 0)) throw new LangTagException("Either the primary language or the extended language subtags, or both must be defined"); setPrimaryLanguage(primaryLanguage); setExtendedLanguageSubtags(languageSubtags); } @Override public String getLanguage() { StringBuilder sb = new StringBuilder(); if (primaryLanguage != null) sb.append(primaryLanguage); if (languageSubtags != null && languageSubtags.length > 0) { for (String tag: languageSubtags) { if (sb.length() > 0) sb.append('-'); sb.append(tag); } } return sb.toString(); } @Override public String getPrimaryLanguage() { return primaryLanguage; } /** * Checks if the specified string has a valid primary language subtag * syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isPrimaryLanguage(final String s) { return s.matches("[a-zA-Z]{2,3}"); } /** * Sets the primary language subtag. * *
See RFC 5646 section 2.2.1. * * @param primaryLanguage The primary language, as the shortest two or * three-letter ISO 639 code. May be * {@code null}. * * @throws LangTagException If the primary language syntax is invalid. */ private void setPrimaryLanguage(final String primaryLanguage) throws LangTagException { if (primaryLanguage == null) { this.primaryLanguage = null; return; } ensureMaxLength(primaryLanguage); if (! isPrimaryLanguage(primaryLanguage)) throw new LangTagException("Invalid primary language subtag: Must be a two or three-letter ISO 639 code"); this.primaryLanguage = primaryLanguage.toLowerCase(); } @Override public String[] getExtendedLanguageSubtags() { return languageSubtags; } /** * Checks if the specified string has a valid extended language subtag * syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isExtendedLanguageSubtag(final String s) { return s.matches("[a-zA-Z]{3}"); } /** * Sets the extended language subtags. * *
See RFC 5646 section 2.2.2. * * @param languageSubtags The extended language subtags, as three-letter * ISO 639-3 codes. {@code null} if none. */ private void setExtendedLanguageSubtags(final String... languageSubtags) throws LangTagException { if (languageSubtags == null || languageSubtags.length == 0) { this.languageSubtags = null; return; } this.languageSubtags = new String[languageSubtags.length]; for (int i=0; i < languageSubtags.length; i++) { ensureMaxLength(languageSubtags[i]); if (! isExtendedLanguageSubtag(languageSubtags[i])) throw new LangTagException("Invalid extended language subtag: Must be a three-letter ISO 639-3 code"); this.languageSubtags[i] = languageSubtags[i].toLowerCase(); } } @Override public String getScript() { return script; } /** * Checks if the specified string has a valid script subtag syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isScript(final String s) { return s.matches("[a-zA-Z]{4}"); } /** * Sets the script. * *
See RFC 5646 section 2.2.3. * * @param script The script, as a four-letter ISO 15924 code. * {@code null} if not defined. * * @throws LangTagException If the script syntax is invalid. */ public void setScript(final String script) throws LangTagException { if (script == null) { this.script = null; return; } ensureMaxLength(script); if (! isScript(script)) throw new LangTagException("Invalid script subtag: Must be a four-letter ISO 15924 code"); this.script = script.substring(0, 1).toUpperCase() + script.substring(1).toLowerCase(); } @Override public String getRegion() { return region; } /** * Checks if the specified string has a valid region subtag syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isRegion(final String s) { return s.matches("[a-zA-Z]{2}|\\d{3}"); } /** * Sets the region. * *
See RFC 5646 section 2.2.4. * * @param region The region, as a two-letter ISO 3166-1 code or a three- * digit UN M.49 code. {@code null} if not defined. * * @throws LangTagException If the region syntax is invalid. */ public void setRegion(final String region) throws LangTagException { if (region == null) { this.region = null; return; } ensureMaxLength(region); if (! isRegion(region)) throw new LangTagException("Invalid region subtag: Must be a two-letter ISO 3166-1 code or a three-digit UN M.49 code"); this.region = region.toUpperCase(); } @Override public String[] getVariants() { return variants; } /** * Checks if the specified string has a valid variant subtag syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isVariant(final String s) { return s.matches("[a-zA-Z][a-zA-Z0-9]{4,}|[0-9][a-zA-Z0-9]{3,}"); } /** * Sets the variants. * *
See RFC 5646 section 2.2.5. * * @param variants The variants. {@code null} if not defined. * * @throws LangTagException If the variant syntax is invalid. */ public void setVariants(final String... variants) throws LangTagException { if (variants == null || variants.length == 0) { this.variants = null; return; } this.variants = new String[variants.length]; for (int i=0; i < variants.length; i++) { ensureMaxLength(variants[i]); if (! isVariant(variants[i])) throw new LangTagException("Invalid variant subtag"); this.variants[i] = variants[i].toLowerCase(); } } @Override public String[] getExtensions() { return extensions; } /** * Checks if the specified string has a valid extension singleton * syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isExtensionSingleton(final String s) { return s.matches("[0-9a-wA-Wy-zY-Z]"); } /** * Checks if the specified string has a valid extension subtag syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isExtension(final String s) { return s.matches("[0-9a-wA-Wy-zY-Z]-[0-9a-zA-Z]+"); } /** * Sets the extensions. * *
See RFC 5646 section 2.2.6. * * @param extensions The extensions. {@code null} if not defined. * * @throws LangTagException If the extension syntax is invalid. */ public void setExtensions(final String... extensions) throws LangTagException { if (extensions == null || extensions.length == 0) { this.extensions = null; return; } this.extensions = new String[extensions.length]; for (int i=0; i < extensions.length; i++) { ensureMaxLength(extensions[i]); if (! isExtension(extensions[i])) throw new LangTagException("Invalid extension subtag"); this.extensions[i] = extensions[i].toLowerCase(); } } @Override public String getPrivateUse() { return privateUse; } /** * Checks if the specified string has a valid private use subtag syntax. * * @param s The string to check. Must not be {@code null}. * * @return {@code true} if the syntax is correct, else {@code false}. */ private static boolean isPrivateUse(final String s) { return s.matches("x-[0-9a-zA-Z]+"); } /** * Sets the private use. * *
See RFC 5646 section 2.2.7.
*
* @param privateUse The private use. {@code null} if not defined.
*
* @throws LangTagException If the extension syntax is invalid.
*/
public void setPrivateUse(final String privateUse)
throws LangTagException {
if (privateUse == null) {
this.privateUse = null;
return;
}
ensureMaxLength(privateUse);
if (! isPrivateUse(privateUse))
throw new LangTagException("Invalid private use subtag");
this.privateUse = privateUse.toLowerCase();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(getLanguage());
if (script != null) {
sb.append('-');
sb.append(script);
}
if (region != null) {
sb.append('-');
sb.append(region);
}
if (variants != null) {
for (String v: variants) {
sb.append('-');
sb.append(v);
}
}
if (extensions != null) {
for (String e: extensions) {
sb.append('-');
sb.append(e);
}
}
if (privateUse != null) {
sb.append('-');
sb.append(privateUse);
}
return sb.toString();
}
/**
* Overrides {@code Object.hashCode()}.
*
* @return The object hash code.
*/
@Override
public int hashCode() {
return toString().hashCode();
}
/**
* Overrides {@code Object.equals()}.
*
* @param object The object to compare to.
*
* @return {@code true} if the objects have the same value, otherwise
* {@code false}.
*/
@Override
public boolean equals(Object object) {
return object != null &&
object instanceof LangTag &&
this.toString().equals(object.toString());
}
/**
* Parses the specified string representation of a language tag.
*
* @param s The string to parse. May be {@code null}.
*
* @return The language tag. {@code null} if the string was empty or
* {@code null}.
*
* @throws LangTagException If the string has invalid language tag
* syntax.
*/
public static LangTag parse(final String s)
throws LangTagException {
if (s == null || s.trim().isEmpty())
return null;
final String[] subtags = s.split("-");
int pos = 0;
// Parse primary lang + ext lang subtags
String primaryLang = null;
List