提交 14d64015 编写于 作者: P peytoia

6842557: NumericShaper needs to be updated for Unicode 5.1 support

6843181: NumericShaper is not thread-safe
6900137: Typo in API Doc for NumericShaper
Reviewed-by: okutsu
上级 694d3108
/* /*
* Copyright 2000-2006 Sun Microsystems, Inc. All Rights Reserved. * Copyright 2000-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
...@@ -25,6 +25,13 @@ ...@@ -25,6 +25,13 @@
package java.awt.font; package java.awt.font;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.Arrays;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.Set;
/** /**
* The <code>NumericShaper</code> class is used to convert Latin-1 (European) * The <code>NumericShaper</code> class is used to convert Latin-1 (European)
* digits to other Unicode decimal digits. Users of this class will * digits to other Unicode decimal digits. Users of this class will
...@@ -64,13 +71,261 @@ package java.awt.font; ...@@ -64,13 +71,261 @@ package java.awt.font;
* NumericShaper.getContextualShaper(NumericShaper.ARABIC | * NumericShaper.getContextualShaper(NumericShaper.ARABIC |
* NumericShaper.TAMIL, * NumericShaper.TAMIL,
* NumericShaper.EUROPEAN); * NumericShaper.EUROPEAN);
* shaper.shape(text. start, count); * shaper.shape(text, start, count);
* </pre></blockquote>
*
* <p><b>Bit mask- and enum-based Unicode ranges</b></p>
*
* <p>This class supports two different programming interfaces to
* represent Unicode ranges for script-specific digits: bit
* mask-based ones, such as {@link #ARABIC NumericShaper.ARABIC}, and
* enum-based ones, such as {@link NumericShaper.Range#ARABIC}.
* Multiple ranges can be specified by ORing bit mask-based constants,
* such as:
* <blockquote><pre>
* NumericShaper.ARABIC | NumericShaper.TAMIL
* </pre></blockquote>
* or creating a {@code Set} with the {@link NumericShaper.Range}
* constants, such as:
* <blockquote><pre>
* EnumSet.of(NumericShaper.Scirpt.ARABIC, NumericShaper.Range.TAMIL)
* </pre></blockquote> * </pre></blockquote>
* The enum-based ranges are a super set of the bit mask-based ones.
*
* <p>If the two interfaces are mixed (including serialization),
* Unicode range values are mapped to their counterparts where such
* mapping is possible, such as {@code NumericShaper.Range.ARABIC}
* from/to {@code NumericShaper.ARABIC}. If any unmappable range
* values are specified, such as {@code NumericShaper.Range.BALINESE},
* those ranges are ignored.
* *
* @since 1.4 * @since 1.4
*/ */
public final class NumericShaper implements java.io.Serializable { public final class NumericShaper implements java.io.Serializable {
/**
* A {@code NumericShaper.Range} represents a Unicode range of a
* script having its own decimal digits. For example, the {@link
* NumericShaper.Range#THAI} range has the Thai digits, THAI DIGIT
* ZERO (U+0E50) to THAI DIGIT NINE (U+0E59).
*
* <p>The <code>Range</code> enum replaces the traditional bit
* mask-based values (e.g., {@link NumericShaper#ARABIC}), and
* supports more Unicode ranges than the bit mask-based ones. For
* example, the following code using the bit mask:
* <blockquote><pre>
* NumericShaper.getContextualShaper(NumericShaper.ARABIC |
* NumericShaper.TAMIL,
* NumericShaper.EUROPEAN);
* </pre></blockquote>
* can be written using this enum as:
* <blockquote><pre>
* NumericShaper.getContextualShaper(EnumSet.of(
* NumericShaper.Range.ARABIC,
* NumericShaper.Range.TAMIL),
* NumericShaper.Range.EUROPEAN);
* </pre></blockquote>
*
* @since 1.7
*/
public static enum Range {
/**
* The Latin (European) range with the Latin (ASCII) digits.
*/
EUROPEAN ('\u0030', '\u0000', '\u0300'),
/**
* The Arabic range with the Arabic-Indic digits.
*/
ARABIC ('\u0660', '\u0600', '\u0780'),
/**
* The Arabic range with the Eastern Arabic-Indic digits.
*/
EASTERN_ARABIC ('\u06f0', '\u0600', '\u0780'),
/**
* The Devanagari range with the Devanagari digits.
*/
DEVANAGARI ('\u0966', '\u0900', '\u0980'),
/**
* The Bengali range with the Bengali digits.
*/
BENGALI ('\u09e6', '\u0980', '\u0a00'),
/**
* The Gurmukhi range with the Gurmukhi digits.
*/
GURMUKHI ('\u0a66', '\u0a00', '\u0a80'),
/**
* The Gujarati range with the Gujarati digits.
*/
GUJARATI ('\u0ae6', '\u0b00', '\u0b80'),
/**
* The Oriya range with the Oriya digits.
*/
ORIYA ('\u0b66', '\u0b00', '\u0b80'),
/**
* The Tamil range with the Tamil digits.
*/
TAMIL ('\u0be6', '\u0b80', '\u0c00'),
/**
* The Telugu range with the Telugu digits.
*/
TELUGU ('\u0c66', '\u0c00', '\u0c80'),
/**
* The Kannada range with the Kannada digits.
*/
KANNADA ('\u0ce6', '\u0c80', '\u0d00'),
/**
* The Malayalam range with the Malayalam digits.
*/
MALAYALAM ('\u0d66', '\u0d00', '\u0d80'),
/**
* The Thai range with the Thai digits.
*/
THAI ('\u0e50', '\u0e00', '\u0e80'),
/**
* The Lao range with the Lao digits.
*/
LAO ('\u0ed0', '\u0e80', '\u0f00'),
/**
* The Tibetan range with the Tibetan digits.
*/
TIBETAN ('\u0f20', '\u0f00', '\u1000'),
/**
* The Myanmar range with the Myanmar digits.
*/
MYANMAR ('\u1040', '\u1000', '\u1080'),
/**
* The Ethiopic range with the Ethiopic digits. Ethiopic
* does not have a decimal digit 0 so Latin (European) 0 is
* used.
*/
ETHIOPIC ('\u1369', '\u1200', '\u1380') {
@Override
char getNumericBase() { return 1; }
},
/**
* The Khmer range with the Khmer digits.
*/
KHMER ('\u17e0', '\u1780', '\u1800'),
/**
* The Mongolian range with the Mongolian digits.
*/
MONGOLIAN ('\u1810', '\u1800', '\u1900'),
/**
* The N'Ko range with the N'Ko digits.
*/
NKO ('\u07c0', '\u07c0', '\u0800'),
/**
* The Myanmar range with the Myanmar Shan digits.
*/
MYANMAR_SHAN ('\u1090', '\u1000', '\u10a0'),
/**
* The Limbu range with the Limbu digits.
*/
LIMBU ('\u1946', '\u1900', '\u1950'),
/**
* The New Tai Lue range with the New Tai Lue digits.
*/
NEW_TAI_LUE ('\u19d0', '\u1980', '\u19e0'),
/**
* The Balinese range with the Balinese digits.
*/
BALINESE ('\u1b50', '\u1b00', '\u1b80'),
/**
* The Sundanese range with the Sundanese digits.
*/
SUNDANESE ('\u1bb0', '\u1b80', '\u1bc0'),
/**
* The Lepcha range with the Lepcha digits.
*/
LEPCHA ('\u1c40', '\u1c00', '\u1c50'),
/**
* The Ol Chiki range with the Ol Chiki digits.
*/
OL_CHIKI ('\u1c50', '\u1c50', '\u1c80'),
/**
* The Vai range with the Vai digits.
*/
VAI ('\ua620', '\ua500', '\ua640'),
/**
* The Saurashtra range with the Saurashtra digits.
*/
SAURASHTRA ('\ua8d0', '\ua880', '\ua8e0'),
/**
* The Kayah Li range with the Kayah Li digits.
*/
KAYAH_LI ('\ua900', '\ua900', '\ua930'),
/**
* The Cham range with the Cham digits.
*/
CHAM ('\uaa50', '\uaa00', '\uaa60');
private static final Range[] ranges = Range.class.getEnumConstants();
static {
// sort ranges[] by base for binary search
Arrays.sort(ranges,
new Comparator<Range>() {
public int compare(Range s1, Range s2) {
return s1.base > s2.base ? 1 : s1.base == s2.base ? 0 : -1;
}
});
}
private static int toRangeIndex(Range script) {
int index = script.ordinal();
return index < NUM_KEYS ? index : -1;
}
private static Range indexToRange(int index) {
return index < NUM_KEYS ? Range.values()[index] : null;
}
private static int toRangeMask(Set<Range> ranges) {
int m = 0;
for (Range range : ranges) {
int index = range.ordinal();
if (index < NUM_KEYS) {
m |= 1 << index;
}
}
return m;
}
private static Set<Range> maskToRangeSet(int mask) {
Set<Range> set = EnumSet.noneOf(Range.class);
Range[] a = Range.values();
for (int i = 0; i < NUM_KEYS; i++) {
if ((mask & (1 << i)) != 0) {
set.add(a[i]);
}
}
return set;
}
// base character of range digits
private final int base;
// Unicode range
private final int start, // inclusive
end; // exclusive
private Range(int base, int start, int end) {
this.base = base - ('0' + getNumericBase());
this.start = start;
this.end = end;
}
private int getDigitBase() {
return base;
}
char getNumericBase() {
return 0;
}
private boolean inRange(int c) {
return start <= c && c < end;
}
}
/** index of context for contextual shaping - values range from 0 to 18 */ /** index of context for contextual shaping - values range from 0 to 18 */
private int key; private int key;
...@@ -79,6 +334,25 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -79,6 +334,25 @@ public final class NumericShaper implements java.io.Serializable {
*/ */
private int mask; private int mask;
/**
* The context {@code Range} for contextual shaping or the {@code
* Range} for non-contextual shaping. {@code null} for the bit
* mask-based API.
*
* @since 1.7
*/
private Range shapingRange;
/**
* {@code Set<Range>} indicating which Unicode ranges to
* shape. {@code null} for the bit mask-based API.
*
* @since 1.7
*/
private transient Set<Range> rangeSet;
private static final long serialVersionUID = -8022764705923730308L;
/** Identifies the Latin-1 (European) and extended range, and /** Identifies the Latin-1 (European) and extended range, and
* Latin-1 (European) decimal base. * Latin-1 (European) decimal base.
*/ */
...@@ -105,9 +379,8 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -105,9 +379,8 @@ public final class NumericShaper implements java.io.Serializable {
/** Identifies the ORIYA range and decimal base. */ /** Identifies the ORIYA range and decimal base. */
public static final int ORIYA = 1<<7; public static final int ORIYA = 1<<7;
/** Identifies the TAMIL range and decimal base. Tamil does not have a /** Identifies the TAMIL range and decimal base. */
* decimal digit 0 so Latin-1 (European) 0 is used. // TAMIL DIGIT ZERO was added in Unicode 4.1
*/
public static final int TAMIL = 1<<8; public static final int TAMIL = 1<<8;
/** Identifies the TELUGU range and decimal base. */ /** Identifies the TELUGU range and decimal base. */
...@@ -140,7 +413,12 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -140,7 +413,12 @@ public final class NumericShaper implements java.io.Serializable {
/** Identifies the MONGOLIAN range and decimal base. */ /** Identifies the MONGOLIAN range and decimal base. */
public static final int MONGOLIAN = 1<<18; public static final int MONGOLIAN = 1<<18;
/** Identifies all ranges, for full contextual shaping. */ /** Identifies all ranges, for full contextual shaping.
*
* <p>This constant specifies all of the bit mask-based
* ranges. Use {@code EmunSet.allOf(NumericShaper.Range.class)} to
* specify all of the enum-based ranges.
*/
public static final int ALL_RANGES = 0x0007ffff; public static final int ALL_RANGES = 0x0007ffff;
private static final int EUROPEAN_KEY = 0; private static final int EUROPEAN_KEY = 0;
...@@ -163,42 +441,20 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -163,42 +441,20 @@ public final class NumericShaper implements java.io.Serializable {
private static final int KHMER_KEY = 17; private static final int KHMER_KEY = 17;
private static final int MONGOLIAN_KEY = 18; private static final int MONGOLIAN_KEY = 18;
private static final int NUM_KEYS = 19; private static final int NUM_KEYS = MONGOLIAN_KEY + 1; // fixed
private static final String[] keyNames = {
"EUROPEAN",
"ARABIC",
"EASTERN_ARABIC",
"DEVANAGARI",
"BENGALI",
"GURMUKHI",
"GUJARATI",
"ORIYA",
"TAMIL",
"TELUGU",
"KANNADA",
"MALAYALAM",
"THAI",
"LAO",
"TIBETAN",
"MYANMAR",
"ETHIOPIC",
"KHMER",
"MONGOLIAN"
};
private static final int CONTEXTUAL_MASK = 1<<31; private static final int CONTEXTUAL_MASK = 1<<31;
private static final char[] bases = { private static final char[] bases = {
'\u0030' - '\u0030', // EUROPEAN '\u0030' - '\u0030', // EUROPEAN
'\u0660' - '\u0030', // ARABIC '\u0660' - '\u0030', // ARABIC-INDIC
'\u06f0' - '\u0030', // EASTERN_ARABIC '\u06f0' - '\u0030', // EXTENDED ARABIC-INDIC (EASTERN_ARABIC)
'\u0966' - '\u0030', // DEVANAGARI '\u0966' - '\u0030', // DEVANAGARI
'\u09e6' - '\u0030', // BENGALI '\u09e6' - '\u0030', // BENGALI
'\u0a66' - '\u0030', // GURMUKHI '\u0a66' - '\u0030', // GURMUKHI
'\u0ae6' - '\u0030', // GUJARATI '\u0ae6' - '\u0030', // GUJARATI
'\u0b66' - '\u0030', // ORIYA '\u0b66' - '\u0030', // ORIYA
'\u0be7' - '\u0030', // TAMIL - note missing zero '\u0be6' - '\u0030', // TAMIL - zero was added in Unicode 4.1
'\u0c66' - '\u0030', // TELUGU '\u0c66' - '\u0030', // TELUGU
'\u0ce6' - '\u0030', // KANNADA '\u0ce6' - '\u0030', // KANNADA
'\u0d66' - '\u0030', // MALAYALAM '\u0d66' - '\u0030', // MALAYALAM
...@@ -206,7 +462,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -206,7 +462,7 @@ public final class NumericShaper implements java.io.Serializable {
'\u0ed0' - '\u0030', // LAO '\u0ed0' - '\u0030', // LAO
'\u0f20' - '\u0030', // TIBETAN '\u0f20' - '\u0030', // TIBETAN
'\u1040' - '\u0030', // MYANMAR '\u1040' - '\u0030', // MYANMAR
'\u1369' - '\u0030', // ETHIOPIC '\u1369' - '\u0031', // ETHIOPIC - no zero
'\u17e0' - '\u0030', // KHMER '\u17e0' - '\u0030', // KHMER
'\u1810' - '\u0030', // MONGOLIAN '\u1810' - '\u0030', // MONGOLIAN
}; };
...@@ -215,14 +471,14 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -215,14 +471,14 @@ public final class NumericShaper implements java.io.Serializable {
private static final char[] contexts = { private static final char[] contexts = {
'\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended) '\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
'\u0600', '\u0700', // ARABIC '\u0600', '\u0780', // ARABIC
'\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic '\u0600', '\u0780', // EASTERN_ARABIC -- note overlap with arabic
'\u0900', '\u0980', // DEVANAGARI '\u0900', '\u0980', // DEVANAGARI
'\u0980', '\u0a00', // BENGALI '\u0980', '\u0a00', // BENGALI
'\u0a00', '\u0a80', // GURMUKHI '\u0a00', '\u0a80', // GURMUKHI
'\u0a80', '\u0b00', // GUJARATI '\u0a80', '\u0b00', // GUJARATI
'\u0b00', '\u0b80', // ORIYA '\u0b00', '\u0b80', // ORIYA
'\u0b80', '\u0c00', // TAMIL - note missing zero '\u0b80', '\u0c00', // TAMIL
'\u0c00', '\u0c80', // TELUGU '\u0c00', '\u0c80', // TELUGU
'\u0c80', '\u0d00', // KANNADA '\u0c80', '\u0d00', // KANNADA
'\u0d00', '\u0d80', // MALAYALAM '\u0d00', '\u0d80', // MALAYALAM
...@@ -230,7 +486,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -230,7 +486,7 @@ public final class NumericShaper implements java.io.Serializable {
'\u0e80', '\u0f00', // LAO '\u0e80', '\u0f00', // LAO
'\u0f00', '\u1000', // TIBETAN '\u0f00', '\u1000', // TIBETAN
'\u1000', '\u1080', // MYANMAR '\u1000', '\u1080', // MYANMAR
'\u1200', '\u1380', // ETHIOPIC '\u1200', '\u1380', // ETHIOPIC - note missing zero
'\u1780', '\u1800', // KHMER '\u1780', '\u1800', // KHMER
'\u1800', '\u1900', // MONGOLIAN '\u1800', '\u1900', // MONGOLIAN
'\uffff', '\uffff',
...@@ -254,378 +510,290 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -254,378 +510,290 @@ public final class NumericShaper implements java.io.Serializable {
return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY; return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
} }
// cache for the NumericShaper.Range version
private transient volatile Range currentRange = Range.EUROPEAN;
private Range rangeForCodePoint(int codepoint) {
Range range = currentRange;
if (range.inRange(codepoint)) {
return range;
}
final Range[] ranges = Range.ranges;
int lo = 0;
int hi = ranges.length - 1;
while (lo <= hi) {
int mid = (lo + hi) / 2;
range = ranges[mid];
if (codepoint < range.start) {
hi = mid - 1;
} else if (codepoint >= range.end) {
lo = mid + 1;
} else {
currentRange = range;
return range;
}
}
return Range.EUROPEAN;
}
/* /*
* A range table of strong directional characters (types L, R, AL). * A range table of strong directional characters (types L, R, AL).
* Even (left) indexes are starts of ranges of non-strong-directional (or undefined) * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
* characters, odd (right) indexes are starts of ranges of strong directional * characters, odd (right) indexes are starts of ranges of strong directional
* characters. * characters.
*/ */
private static char[] strongTable = { private static int[] strongTable = {
'\u0000', '\u0041', 0x0000, 0x0041,
'\u005b', '\u0061', 0x005b, 0x0061,
'\u007b', '\u00aa', 0x007b, 0x00aa,
'\u00ab', '\u00b5', 0x00ab, 0x00b5,
'\u00b6', '\u00ba', 0x00b6, 0x00ba,
'\u00bb', '\u00c0', 0x00bb, 0x00c0,
'\u00d7', '\u00d8', 0x00d7, 0x00d8,
'\u00f7', '\u00f8', 0x00f7, 0x00f8,
'\u0220', '\u0222', 0x02b9, 0x02bb,
'\u0234', '\u0250', 0x02c2, 0x02d0,
'\u02ae', '\u02b0', 0x02d2, 0x02e0,
'\u02b9', '\u02bb', 0x02e5, 0x02ee,
'\u02c2', '\u02d0', 0x02ef, 0x0370,
'\u02d2', '\u02e0', 0x0374, 0x0376,
'\u02e5', '\u02ee', 0x037e, 0x0386,
'\u02ef', '\u037a', 0x0387, 0x0388,
'\u037b', '\u0386', 0x03f6, 0x03f7,
'\u0387', '\u0388', 0x0483, 0x048a,
'\u038b', '\u038c', 0x058a, 0x05be,
'\u038d', '\u038e', 0x05bf, 0x05c0,
'\u03a2', '\u03a3', 0x05c1, 0x05c3,
'\u03cf', '\u03d0', 0x05c4, 0x05c6,
'\u03d8', '\u03da', 0x05c7, 0x05d0,
'\u03f4', '\u0400', 0x0600, 0x0608,
'\u0483', '\u048c', 0x0609, 0x060b,
'\u04c5', '\u04c7', 0x060c, 0x060d,
'\u04c9', '\u04cb', 0x060e, 0x061b,
'\u04cd', '\u04d0', 0x064b, 0x066d,
'\u04f6', '\u04f8', 0x0670, 0x0671,
'\u04fa', '\u0531', 0x06d6, 0x06e5,
'\u0557', '\u0559', 0x06e7, 0x06ee,
'\u0560', '\u0561', 0x06f0, 0x06fa,
'\u0588', '\u0589', 0x070f, 0x0710,
'\u058a', '\u05be', 0x0711, 0x0712,
'\u05bf', '\u05c0', 0x0730, 0x074d,
'\u05c1', '\u05c3', 0x07a6, 0x07b1,
'\u05c4', '\u05d0', 0x07eb, 0x07f4,
'\u05eb', '\u05f0', 0x07f6, 0x07fa,
'\u05f5', '\u061b', 0x0901, 0x0903,
'\u061c', '\u061f', 0x093c, 0x093d,
'\u0620', '\u0621', 0x0941, 0x0949,
'\u063b', '\u0640', 0x094d, 0x0950,
'\u064b', '\u066d', 0x0951, 0x0958,
'\u066e', '\u0671', 0x0962, 0x0964,
'\u06d6', '\u06e5', 0x0981, 0x0982,
'\u06e7', '\u06fa', 0x09bc, 0x09bd,
'\u06ff', '\u0700', 0x09c1, 0x09c7,
'\u070e', '\u0710', 0x09cd, 0x09ce,
'\u0711', '\u0712', 0x09e2, 0x09e6,
'\u072d', '\u0780', 0x09f2, 0x09f4,
'\u07a6', '\u0903', 0x0a01, 0x0a03,
'\u0904', '\u0905', 0x0a3c, 0x0a3e,
'\u093a', '\u093d', 0x0a41, 0x0a59,
'\u0941', '\u0949', 0x0a70, 0x0a72,
'\u094d', '\u0950', 0x0a75, 0x0a83,
'\u0951', '\u0958', 0x0abc, 0x0abd,
'\u0962', '\u0964', 0x0ac1, 0x0ac9,
'\u0971', '\u0982', 0x0acd, 0x0ad0,
'\u0984', '\u0985', 0x0ae2, 0x0ae6,
'\u098d', '\u098f', 0x0af1, 0x0b02,
'\u0991', '\u0993', 0x0b3c, 0x0b3d,
'\u09a9', '\u09aa', 0x0b3f, 0x0b40,
'\u09b1', '\u09b2', 0x0b41, 0x0b47,
'\u09b3', '\u09b6', 0x0b4d, 0x0b57,
'\u09ba', '\u09be', 0x0b62, 0x0b66,
'\u09c1', '\u09c7', 0x0b82, 0x0b83,
'\u09c9', '\u09cb', 0x0bc0, 0x0bc1,
'\u09cd', '\u09d7', 0x0bcd, 0x0bd0,
'\u09d8', '\u09dc', 0x0bf3, 0x0c01,
'\u09de', '\u09df', 0x0c3e, 0x0c41,
'\u09e2', '\u09e6', 0x0c46, 0x0c58,
'\u09f2', '\u09f4', 0x0c62, 0x0c66,
'\u09fb', '\u0a05', 0x0c78, 0x0c7f,
'\u0a0b', '\u0a0f', 0x0cbc, 0x0cbd,
'\u0a11', '\u0a13', 0x0ccc, 0x0cd5,
'\u0a29', '\u0a2a', 0x0ce2, 0x0ce6,
'\u0a31', '\u0a32', 0x0cf1, 0x0d02,
'\u0a34', '\u0a35', 0x0d41, 0x0d46,
'\u0a37', '\u0a38', 0x0d4d, 0x0d57,
'\u0a3a', '\u0a3e', 0x0d62, 0x0d66,
'\u0a41', '\u0a59', 0x0dca, 0x0dcf,
'\u0a5d', '\u0a5e', 0x0dd2, 0x0dd8,
'\u0a5f', '\u0a66', 0x0e31, 0x0e32,
'\u0a70', '\u0a72', 0x0e34, 0x0e40,
'\u0a75', '\u0a83', 0x0e47, 0x0e4f,
'\u0a84', '\u0a85', 0x0eb1, 0x0eb2,
'\u0a8c', '\u0a8d', 0x0eb4, 0x0ebd,
'\u0a8e', '\u0a8f', 0x0ec8, 0x0ed0,
'\u0a92', '\u0a93', 0x0f18, 0x0f1a,
'\u0aa9', '\u0aaa', 0x0f35, 0x0f36,
'\u0ab1', '\u0ab2', 0x0f37, 0x0f38,
'\u0ab4', '\u0ab5', 0x0f39, 0x0f3e,
'\u0aba', '\u0abd', 0x0f71, 0x0f7f,
'\u0ac1', '\u0ac9', 0x0f80, 0x0f85,
'\u0aca', '\u0acb', 0x0f86, 0x0f88,
'\u0acd', '\u0ad0', 0x0f90, 0x0fbe,
'\u0ad1', '\u0ae0', 0x0fc6, 0x0fc7,
'\u0ae1', '\u0ae6', 0x102d, 0x1031,
'\u0af0', '\u0b02', 0x1032, 0x1038,
'\u0b04', '\u0b05', 0x1039, 0x103b,
'\u0b0d', '\u0b0f', 0x103d, 0x103f,
'\u0b11', '\u0b13', 0x1058, 0x105a,
'\u0b29', '\u0b2a', 0x105e, 0x1061,
'\u0b31', '\u0b32', 0x1071, 0x1075,
'\u0b34', '\u0b36', 0x1082, 0x1083,
'\u0b3a', '\u0b3d', 0x1085, 0x1087,
'\u0b3f', '\u0b40', 0x108d, 0x108e,
'\u0b41', '\u0b47', 0x135f, 0x1360,
'\u0b49', '\u0b4b', 0x1390, 0x13a0,
'\u0b4d', '\u0b57', 0x1680, 0x1681,
'\u0b58', '\u0b5c', 0x169b, 0x16a0,
'\u0b5e', '\u0b5f', 0x1712, 0x1720,
'\u0b62', '\u0b66', 0x1732, 0x1735,
'\u0b71', '\u0b83', 0x1752, 0x1760,
'\u0b84', '\u0b85', 0x1772, 0x1780,
'\u0b8b', '\u0b8e', 0x17b7, 0x17be,
'\u0b91', '\u0b92', 0x17c6, 0x17c7,
'\u0b96', '\u0b99', 0x17c9, 0x17d4,
'\u0b9b', '\u0b9c', 0x17db, 0x17dc,
'\u0b9d', '\u0b9e', 0x17dd, 0x17e0,
'\u0ba0', '\u0ba3', 0x17f0, 0x1810,
'\u0ba5', '\u0ba8', 0x18a9, 0x18aa,
'\u0bab', '\u0bae', 0x1920, 0x1923,
'\u0bb6', '\u0bb7', 0x1927, 0x1929,
'\u0bba', '\u0bbe', 0x1932, 0x1933,
'\u0bc0', '\u0bc1', 0x1939, 0x1946,
'\u0bc3', '\u0bc6', 0x19de, 0x1a00,
'\u0bc9', '\u0bca', 0x1a17, 0x1a19,
'\u0bcd', '\u0bd7', 0x1b00, 0x1b04,
'\u0bd8', '\u0be7', 0x1b34, 0x1b35,
'\u0bf3', '\u0c01', 0x1b36, 0x1b3b,
'\u0c04', '\u0c05', 0x1b3c, 0x1b3d,
'\u0c0d', '\u0c0e', 0x1b42, 0x1b43,
'\u0c11', '\u0c12', 0x1b6b, 0x1b74,
'\u0c29', '\u0c2a', 0x1b80, 0x1b82,
'\u0c34', '\u0c35', 0x1ba2, 0x1ba6,
'\u0c3a', '\u0c41', 0x1ba8, 0x1baa,
'\u0c45', '\u0c60', 0x1c2c, 0x1c34,
'\u0c62', '\u0c66', 0x1c36, 0x1c3b,
'\u0c70', '\u0c82', 0x1dc0, 0x1e00,
'\u0c84', '\u0c85', 0x1fbd, 0x1fbe,
'\u0c8d', '\u0c8e', 0x1fbf, 0x1fc2,
'\u0c91', '\u0c92', 0x1fcd, 0x1fd0,
'\u0ca9', '\u0caa', 0x1fdd, 0x1fe0,
'\u0cb4', '\u0cb5', 0x1fed, 0x1ff2,
'\u0cba', '\u0cbe', 0x1ffd, 0x200e,
'\u0cbf', '\u0cc0', 0x2010, 0x2071,
'\u0cc5', '\u0cc7', 0x2074, 0x207f,
'\u0cc9', '\u0cca', 0x2080, 0x2090,
'\u0ccc', '\u0cd5', 0x20a0, 0x2102,
'\u0cd7', '\u0cde', 0x2103, 0x2107,
'\u0cdf', '\u0ce0', 0x2108, 0x210a,
'\u0ce2', '\u0ce6', 0x2114, 0x2115,
'\u0cf0', '\u0d02', 0x2116, 0x2119,
'\u0d04', '\u0d05', 0x211e, 0x2124,
'\u0d0d', '\u0d0e', 0x2125, 0x2126,
'\u0d11', '\u0d12', 0x2127, 0x2128,
'\u0d29', '\u0d2a', 0x2129, 0x212a,
'\u0d3a', '\u0d3e', 0x212e, 0x212f,
'\u0d41', '\u0d46', 0x213a, 0x213c,
'\u0d49', '\u0d4a', 0x2140, 0x2145,
'\u0d4d', '\u0d57', 0x214a, 0x214e,
'\u0d58', '\u0d60', 0x2153, 0x2160,
'\u0d62', '\u0d66', 0x2190, 0x2336,
'\u0d70', '\u0d82', 0x237b, 0x2395,
'\u0d84', '\u0d85', 0x2396, 0x249c,
'\u0d97', '\u0d9a', 0x24ea, 0x26ac,
'\u0db2', '\u0db3', 0x26ad, 0x2800,
'\u0dbc', '\u0dbd', 0x2900, 0x2c00,
'\u0dbe', '\u0dc0', 0x2ce5, 0x2d00,
'\u0dc7', '\u0dcf', 0x2de0, 0x3005,
'\u0dd2', '\u0dd8', 0x3008, 0x3021,
'\u0de0', '\u0df2', 0x302a, 0x3031,
'\u0df5', '\u0e01', 0x3036, 0x3038,
'\u0e31', '\u0e32', 0x303d, 0x3041,
'\u0e34', '\u0e40', 0x3099, 0x309d,
'\u0e47', '\u0e4f', 0x30a0, 0x30a1,
'\u0e5c', '\u0e81', 0x30fb, 0x30fc,
'\u0e83', '\u0e84', 0x31c0, 0x31f0,
'\u0e85', '\u0e87', 0x321d, 0x3220,
'\u0e89', '\u0e8a', 0x3250, 0x3260,
'\u0e8b', '\u0e8d', 0x327c, 0x327f,
'\u0e8e', '\u0e94', 0x32b1, 0x32c0,
'\u0e98', '\u0e99', 0x32cc, 0x32d0,
'\u0ea0', '\u0ea1', 0x3377, 0x337b,
'\u0ea4', '\u0ea5', 0x33de, 0x33e0,
'\u0ea6', '\u0ea7', 0x33ff, 0x3400,
'\u0ea8', '\u0eaa', 0x4dc0, 0x4e00,
'\u0eac', '\u0ead', 0xa490, 0xa500,
'\u0eb1', '\u0eb2', 0xa60d, 0xa610,
'\u0eb4', '\u0ebd', 0xa66f, 0xa680,
'\u0ebe', '\u0ec0', 0xa700, 0xa722,
'\u0ec5', '\u0ec6', 0xa788, 0xa789,
'\u0ec7', '\u0ed0', 0xa802, 0xa803,
'\u0eda', '\u0edc', 0xa806, 0xa807,
'\u0ede', '\u0f00', 0xa80b, 0xa80c,
'\u0f18', '\u0f1a', 0xa825, 0xa827,
'\u0f35', '\u0f36', 0xa828, 0xa840,
'\u0f37', '\u0f38', 0xa874, 0xa880,
'\u0f39', '\u0f3e', 0xa8c4, 0xa8ce,
'\u0f48', '\u0f49', 0xa926, 0xa92e,
'\u0f6b', '\u0f7f', 0xa947, 0xa952,
'\u0f80', '\u0f85', 0xaa29, 0xaa2f,
'\u0f86', '\u0f88', 0xaa31, 0xaa33,
'\u0f8c', '\u0fbe', 0xaa35, 0xaa40,
'\u0fc6', '\u0fc7', 0xaa43, 0xaa44,
'\u0fcd', '\u0fcf', 0xaa4c, 0xaa4d,
'\u0fd0', '\u1000', 0xfb1e, 0xfb1f,
'\u1022', '\u1023', 0xfb29, 0xfb2a,
'\u1028', '\u1029', 0xfd3e, 0xfd50,
'\u102b', '\u102c', 0xfdfd, 0xfe70,
'\u102d', '\u1031', 0xfeff, 0xff21,
'\u1032', '\u1038', 0xff3b, 0xff41,
'\u1039', '\u1040', 0xff5b, 0xff66,
'\u1058', '\u10a0', 0xffe0, 0x10000,
'\u10c6', '\u10d0', 0x10101, 0x10102,
'\u10f7', '\u10fb', 0x10140, 0x101d0,
'\u10fc', '\u1100', 0x101fd, 0x10280,
'\u115a', '\u115f', 0x1091f, 0x10920,
'\u11a3', '\u11a8', 0x10a01, 0x10a10,
'\u11fa', '\u1200', 0x10a38, 0x10a40,
'\u1207', '\u1208', 0x1d167, 0x1d16a,
'\u1247', '\u1248', 0x1d173, 0x1d183,
'\u1249', '\u124a', 0x1d185, 0x1d18c,
'\u124e', '\u1250', 0x1d1aa, 0x1d1ae,
'\u1257', '\u1258', 0x1d200, 0x1d360,
'\u1259', '\u125a', 0x1d7ce, 0x20000,
'\u125e', '\u1260', 0xe0001, 0xf0000,
'\u1287', '\u1288', 0x10fffe, 0x10ffff // sentinel
'\u1289', '\u128a',
'\u128e', '\u1290',
'\u12af', '\u12b0',
'\u12b1', '\u12b2',
'\u12b6', '\u12b8',
'\u12bf', '\u12c0',
'\u12c1', '\u12c2',
'\u12c6', '\u12c8',
'\u12cf', '\u12d0',
'\u12d7', '\u12d8',
'\u12ef', '\u12f0',
'\u130f', '\u1310',
'\u1311', '\u1312',
'\u1316', '\u1318',
'\u131f', '\u1320',
'\u1347', '\u1348',
'\u135b', '\u1361',
'\u137d', '\u13a0',
'\u13f5', '\u1401',
'\u1677', '\u1681',
'\u169b', '\u16a0',
'\u16f1', '\u1780',
'\u17b7', '\u17be',
'\u17c6', '\u17c7',
'\u17c9', '\u17d4',
'\u17db', '\u17dc',
'\u17dd', '\u17e0',
'\u17ea', '\u1810',
'\u181a', '\u1820',
'\u1878', '\u1880',
'\u18a9', '\u1e00',
'\u1e9c', '\u1ea0',
'\u1efa', '\u1f00',
'\u1f16', '\u1f18',
'\u1f1e', '\u1f20',
'\u1f46', '\u1f48',
'\u1f4e', '\u1f50',
'\u1f58', '\u1f59',
'\u1f5a', '\u1f5b',
'\u1f5c', '\u1f5d',
'\u1f5e', '\u1f5f',
'\u1f7e', '\u1f80',
'\u1fb5', '\u1fb6',
'\u1fbd', '\u1fbe',
'\u1fbf', '\u1fc2',
'\u1fc5', '\u1fc6',
'\u1fcd', '\u1fd0',
'\u1fd4', '\u1fd6',
'\u1fdc', '\u1fe0',
'\u1fed', '\u1ff2',
'\u1ff5', '\u1ff6',
'\u1ffd', '\u200e',
'\u2010', '\u207f',
'\u2080', '\u2102',
'\u2103', '\u2107',
'\u2108', '\u210a',
'\u2114', '\u2115',
'\u2116', '\u2119',
'\u211e', '\u2124',
'\u2125', '\u2126',
'\u2127', '\u2128',
'\u2129', '\u212a',
'\u212e', '\u212f',
'\u2132', '\u2133',
'\u213a', '\u2160',
'\u2184', '\u2336',
'\u237b', '\u2395',
'\u2396', '\u249c',
'\u24ea', '\u3005',
'\u3008', '\u3021',
'\u302a', '\u3031',
'\u3036', '\u3038',
'\u303b', '\u3041',
'\u3095', '\u309d',
'\u309f', '\u30a1',
'\u30fb', '\u30fc',
'\u30ff', '\u3105',
'\u312d', '\u3131',
'\u318f', '\u3190',
'\u31b8', '\u3200',
'\u321d', '\u3220',
'\u3244', '\u3260',
'\u327c', '\u327f',
'\u32b1', '\u32c0',
'\u32cc', '\u32d0',
'\u32ff', '\u3300',
'\u3377', '\u337b',
'\u33de', '\u33e0',
'\u33ff', '\u3400',
'\u4db6', '\u4e00',
'\u9fa6', '\ua000',
'\ua48d', '\uac00',
'\ud7a4', '\uf900',
'\ufa2e', '\ufb00',
'\ufb07', '\ufb13',
'\ufb18', '\ufb1d',
'\ufb1e', '\ufb1f',
'\ufb29', '\ufb2a',
'\ufb37', '\ufb38',
'\ufb3d', '\ufb3e',
'\ufb3f', '\ufb40',
'\ufb42', '\ufb43',
'\ufb45', '\ufb46',
'\ufbb2', '\ufbd3',
'\ufd3e', '\ufd50',
'\ufd90', '\ufd92',
'\ufdc8', '\ufdf0',
'\ufdfc', '\ufe70',
'\ufe73', '\ufe74',
'\ufe75', '\ufe76',
'\ufefd', '\uff21',
'\uff3b', '\uff41',
'\uff5b', '\uff66',
'\uffbf', '\uffc2',
'\uffc8', '\uffca',
'\uffd0', '\uffd2',
'\uffd8', '\uffda',
'\uffdd', '\uffff' // last entry is sentinel, actually never checked
}; };
// use a binary search with a cache // use a binary search with a cache
private static int stCache = 0; private transient volatile int stCache = 0;
// warning, synchronize access to this as it modifies state private boolean isStrongDirectional(char c) {
private static boolean isStrongDirectional(char c) { int cachedIndex = stCache;
if (c < strongTable[stCache]) { if (c < strongTable[cachedIndex]) {
stCache = search(c, strongTable, 0, stCache); cachedIndex = search(c, strongTable, 0, cachedIndex);
} else if (c >= strongTable[stCache + 1]) { } else if (c >= strongTable[cachedIndex + 1]) {
stCache = search(c, strongTable, stCache + 1, strongTable.length - stCache - 1); cachedIndex = search(c, strongTable, cachedIndex + 1,
strongTable.length - cachedIndex - 1);
} }
return (stCache & 0x1) == 1; boolean val = (cachedIndex & 0x1) == 1;
stCache = cachedIndex;
return val;
} }
static private int getKeyFromMask(int mask) { private static int getKeyFromMask(int mask) {
int key = 0; int key = 0;
while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) { while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
++key; ++key;
...@@ -644,11 +812,26 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -644,11 +812,26 @@ public final class NumericShaper implements java.io.Serializable {
* @return a non-contextual numeric shaper * @return a non-contextual numeric shaper
* @throws IllegalArgumentException if the range is not a single range * @throws IllegalArgumentException if the range is not a single range
*/ */
static public NumericShaper getShaper(int singleRange) { public static NumericShaper getShaper(int singleRange) {
int key = getKeyFromMask(singleRange); int key = getKeyFromMask(singleRange);
return new NumericShaper(key, singleRange); return new NumericShaper(key, singleRange);
} }
/**
* Returns a shaper for the provided Unicode
* range. All Latin-1 (EUROPEAN) digits are converted to the
* corresponding decimal digits of the specified Unicode range.
*
* @param singleRange the Unicode range given by a {@link
* NumericShaper.Range} constant.
* @return a non-contextual {@code NumericShaper}.
* @throws NullPointerException if {@code singleRange} is {@code null}
* @since 1.7
*/
public static NumericShaper getShaper(Range singleRange) {
return new NumericShaper(singleRange, EnumSet.of(singleRange));
}
/** /**
* Returns a contextual shaper for the provided unicode range(s). * Returns a contextual shaper for the provided unicode range(s).
* Latin-1 (EUROPEAN) digits are converted to the decimal digits * Latin-1 (EUROPEAN) digits are converted to the decimal digits
...@@ -663,11 +846,33 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -663,11 +846,33 @@ public final class NumericShaper implements java.io.Serializable {
* @param ranges the specified Unicode ranges * @param ranges the specified Unicode ranges
* @return a shaper for the specified ranges * @return a shaper for the specified ranges
*/ */
static public NumericShaper getContextualShaper(int ranges) { public static NumericShaper getContextualShaper(int ranges) {
ranges |= CONTEXTUAL_MASK; ranges |= CONTEXTUAL_MASK;
return new NumericShaper(EUROPEAN_KEY, ranges); return new NumericShaper(EUROPEAN_KEY, ranges);
} }
/**
* Returns a contextual shaper for the provided Unicode
* range(s). The Latin-1 (EUROPEAN) digits are converted to the
* decimal digits corresponding to the range of the preceding
* text, if the range is one of the provided ranges.
*
* <p>The shaper assumes EUROPEAN as the starting context, that
* is, if EUROPEAN digits are encountered before any strong
* directional text in the string, the context is presumed to be
* EUROPEAN, and so the digits will not shape.
*
* @param ranges the specified Unicode ranges
* @return a contextual shaper for the specified ranges
* @throws NullPointerException if {@code ranges} is {@code null}.
* @since 1.7
*/
public static NumericShaper getContextualShaper(Set<Range> ranges) {
NumericShaper shaper = new NumericShaper(Range.EUROPEAN, ranges);
shaper.mask = CONTEXTUAL_MASK;
return shaper;
}
/** /**
* Returns a contextual shaper for the provided unicode range(s). * Returns a contextual shaper for the provided unicode range(s).
* Latin-1 (EUROPEAN) digits will be converted to the decimal digits * Latin-1 (EUROPEAN) digits will be converted to the decimal digits
...@@ -683,12 +888,37 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -683,12 +888,37 @@ public final class NumericShaper implements java.io.Serializable {
* @throws IllegalArgumentException if the specified * @throws IllegalArgumentException if the specified
* <code>defaultContext</code> is not a single valid range. * <code>defaultContext</code> is not a single valid range.
*/ */
static public NumericShaper getContextualShaper(int ranges, int defaultContext) { public static NumericShaper getContextualShaper(int ranges, int defaultContext) {
int key = getKeyFromMask(defaultContext); int key = getKeyFromMask(defaultContext);
ranges |= CONTEXTUAL_MASK; ranges |= CONTEXTUAL_MASK;
return new NumericShaper(key, ranges); return new NumericShaper(key, ranges);
} }
/**
* Returns a contextual shaper for the provided Unicode range(s).
* The Latin-1 (EUROPEAN) digits will be converted to the decimal
* digits corresponding to the range of the preceding text, if the
* range is one of the provided ranges. The shaper uses {@code
* defaultContext} as the starting context.
*
* @param ranges the specified Unicode ranges
* @param defaultContext the starting context, such as
* {@code NumericShaper.Range.EUROPEAN}
* @return a contextual shaper for the specified Unicode ranges.
* @throws NullPointerException
* if {@code ranges} or {@code defaultContext} is {@code null}
* @since 1.7
*/
public static NumericShaper getContextualShaper(Set<Range> ranges,
Range defaultContext) {
if (defaultContext == null) {
throw new NullPointerException();
}
NumericShaper shaper = new NumericShaper(defaultContext, ranges);
shaper.mask = CONTEXTUAL_MASK;
return shaper;
}
/** /**
* Private constructor. * Private constructor.
*/ */
...@@ -697,6 +927,11 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -697,6 +927,11 @@ public final class NumericShaper implements java.io.Serializable {
this.mask = mask; this.mask = mask;
} }
private NumericShaper(Range defaultContext, Set<Range> ranges) {
this.shapingRange = defaultContext;
this.rangeSet = EnumSet.copyOf(ranges); // throws NPE if ranges is null.
}
/** /**
* Converts the digits in the text that occur between start and * Converts the digits in the text that occur between start and
* start + count. * start + count.
...@@ -710,19 +945,13 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -710,19 +945,13 @@ public final class NumericShaper implements java.io.Serializable {
* @throws NullPointerException if text is null * @throws NullPointerException if text is null
*/ */
public void shape(char[] text, int start, int count) { public void shape(char[] text, int start, int count) {
if (text == null) { checkParams(text, start, count);
throw new NullPointerException("text is null");
}
if ((start < 0)
|| (start > text.length)
|| ((start + count) < 0)
|| ((start + count) > text.length)) {
throw new IndexOutOfBoundsException(
"bad start or count for text of length " + text.length);
}
if (isContextual()) { if (isContextual()) {
shapeContextually(text, start, count, key); if (rangeSet == null) {
shapeContextually(text, start, count, key);
} else {
shapeContextually(text, start, count, shapingRange);
}
} else { } else {
shapeNonContextually(text, start, count); shapeNonContextually(text, start, count);
} }
...@@ -747,6 +976,60 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -747,6 +976,60 @@ public final class NumericShaper implements java.io.Serializable {
* range. * range.
*/ */
public void shape(char[] text, int start, int count, int context) { public void shape(char[] text, int start, int count, int context) {
checkParams(text, start, count);
if (isContextual()) {
int ctxKey = getKeyFromMask(context);
if (rangeSet == null) {
shapeContextually(text, start, count, ctxKey);
} else {
shapeContextually(text, start, count, Range.values()[ctxKey]);
}
} else {
shapeNonContextually(text, start, count);
}
}
/**
* Converts the digits in the text that occur between {@code
* start} and {@code start + count}, using the provided {@code
* context}. {@code Context} is ignored if the shaper is not a
* contextual shaper.
*
* @param text a {@code char} array
* @param start the index into {@code text} to start converting
* @param count the number of {@code char}s in {@code text}
* to convert
* @param context the context to which to convert the characters,
* such as {@code NumericShaper.Range.EUROPEAN}
* @throws IndexOutOfBoundsException
* if {@code start} or {@code start + count} is out of bounds
* @throws NullPointerException
* if {@code text} or {@code context} is null
* @since 1.7
*/
public void shape(char[] text, int start, int count, Range context) {
checkParams(text, start, count);
if (context == null) {
throw new NullPointerException("context is null");
}
if (isContextual()) {
if (rangeSet != null) {
shapeContextually(text, start, count, context);
} else {
int key = Range.toRangeIndex(context);
if (key >= 0) {
shapeContextually(text, start, count, key);
} else {
shapeContextually(text, start, count, shapingRange);
}
}
} else {
shapeNonContextually(text, start, count);
}
}
private void checkParams(char[] text, int start, int count) {
if (text == null) { if (text == null) {
throw new NullPointerException("text is null"); throw new NullPointerException("text is null");
} }
...@@ -757,13 +1040,6 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -757,13 +1040,6 @@ public final class NumericShaper implements java.io.Serializable {
throw new IndexOutOfBoundsException( throw new IndexOutOfBoundsException(
"bad start or count for text of length " + text.length); "bad start or count for text of length " + text.length);
} }
if (isContextual()) {
int ctxKey = getKeyFromMask(context);
shapeContextually(text, start, count, ctxKey);
} else {
shapeNonContextually(text, start, count);
}
} }
/** /**
...@@ -785,18 +1061,45 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -785,18 +1061,45 @@ public final class NumericShaper implements java.io.Serializable {
* <blockquote> * <blockquote>
* <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code> * <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
* </blockquote> * </blockquote>
*
* <p>Note that this method supports only the bit mask-based
* ranges. Call {@link #getRangeSet()} for the enum-based ranges.
*
* @return the values for all the ranges to be shaped. * @return the values for all the ranges to be shaped.
*/ */
public int getRanges() { public int getRanges() {
return mask & ~CONTEXTUAL_MASK; return mask & ~CONTEXTUAL_MASK;
} }
/**
* Returns a {@code Set} representing all the Unicode ranges in
* this {@code NumericShaper} that will be shaped.
*
* @return all the Unicode ranges to be shaped.
* @since 1.7
*/
public Set<Range> getRangeSet() {
if (rangeSet != null) {
return EnumSet.copyOf(rangeSet);
}
return Range.maskToRangeSet(mask);
}
/** /**
* Perform non-contextual shaping. * Perform non-contextual shaping.
*/ */
private void shapeNonContextually(char[] text, int start, int count) { private void shapeNonContextually(char[] text, int start, int count) {
int base = bases[key]; int base;
char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero char minDigit = '0';
if (shapingRange != null) {
base = shapingRange.getDigitBase();
minDigit += shapingRange.getNumericBase();
} else {
base = bases[key];
if (key == ETHIOPIC_KEY) {
minDigit++; // Ethiopic doesn't use decimal zero
}
}
for (int i = start, e = start + count; i < e; ++i) { for (int i = start, e = start + count; i < e; ++i) {
char c = text[i]; char c = text[i];
if (c >= minDigit && c <= '\u0039') { if (c >= minDigit && c <= '\u0039') {
...@@ -807,7 +1110,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -807,7 +1110,7 @@ public final class NumericShaper implements java.io.Serializable {
/** /**
* Perform contextual shaping. * Perform contextual shaping.
* Synchronized to protect caches used in getContextKey and isStrongDirectional. * Synchronized to protect caches used in getContextKey.
*/ */
private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) { private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {
...@@ -818,29 +1121,64 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -818,29 +1121,64 @@ public final class NumericShaper implements java.io.Serializable {
int lastkey = ctxKey; int lastkey = ctxKey;
int base = bases[ctxKey]; int base = bases[ctxKey];
char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero char minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
for (int i = start, e = start + count; i < e; ++i) { synchronized (NumericShaper.class) {
for (int i = start, e = start + count; i < e; ++i) {
char c = text[i];
if (c >= minDigit && c <= '\u0039') {
text[i] = (char)(c + base);
}
if (isStrongDirectional(c)) {
int newkey = getContextKey(c);
if (newkey != lastkey) {
lastkey = newkey;
ctxKey = newkey;
if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) {
ctxKey = EASTERN_ARABIC_KEY;
} else if ((mask & (1<<ctxKey)) == 0) {
ctxKey = EUROPEAN_KEY;
}
base = bases[ctxKey];
minDigit = ctxKey == ETHIOPIC_KEY ? '1' : '0'; // Ethiopic doesn't use decimal zero
}
}
}
}
}
private void shapeContextually(char[] text, int start, int count, Range ctxKey) {
if (ctxKey == null) {
ctxKey = Range.EUROPEAN;
}
Range lastKey = ctxKey;
int base = ctxKey.getDigitBase();
char minDigit = (char)('0' + ctxKey.getNumericBase());
for (int i = start, end = start + count; i < end; ++i) {
char c = text[i]; char c = text[i];
if (c >= minDigit && c <= '\u0039') { if (c >= minDigit && c <= '9') {
text[i] = (char)(c + base); text[i] = (char)(c + base);
continue;
} }
if (isStrongDirectional(c)) { if (isStrongDirectional(c)) {
int newkey = getContextKey(c); Range newKey = rangeForCodePoint(c);
if (newkey != lastkey) { if (newKey != lastKey) {
lastkey = newkey; lastKey = newKey;
ctxKey = newKey;
ctxKey = newkey; if (rangeSet.contains(Range.EUROPEAN)
if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) { && (ctxKey == Range.ARABIC || ctxKey == Range.EASTERN_ARABIC)) {
ctxKey = EASTERN_ARABIC_KEY; ctxKey = Range.EASTERN_ARABIC;
} else if ((mask & (1<<ctxKey)) == 0) { } else if (!rangeSet.contains(ctxKey)) {
ctxKey = EUROPEAN_KEY; ctxKey = Range.EUROPEAN;
} }
base = bases[ctxKey]; base = ctxKey.getDigitBase();
minDigit = (char)('0' + ctxKey.getNumericBase());
minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
} }
} }
} }
...@@ -852,12 +1190,28 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -852,12 +1190,28 @@ public final class NumericShaper implements java.io.Serializable {
* @see java.lang.Object#hashCode * @see java.lang.Object#hashCode
*/ */
public int hashCode() { public int hashCode() {
return mask; int hash = mask;
if (rangeSet != null) {
// Use the CONTEXTUAL_MASK bit only for the enum-based
// NumericShaper. A deserialized NumericShaper might have
// bit masks.
hash &= CONTEXTUAL_MASK;
hash ^= rangeSet.hashCode();
}
return hash;
} }
/** /**
* Returns true if the specified object is an instance of * Returns {@code true} if the specified object is an instance of
* <code>NumericShaper</code> and shapes identically to this one. * <code>NumericShaper</code> and shapes identically to this one,
* regardless of the range representations, the bit mask or the
* enum. For example, the following code produces {@code "true"}.
* <blockquote><pre>
* NumericShaper ns1 = NumericShaper.getShaper(NumericShaper.ARABIC);
* NumericShaper ns2 = NumericShaper.getShaper(NumericShaper.Range.ARABIC);
* System.out.println(ns1.equals(ns2));
* </pre></blockquote>
*
* @param o the specified object to compare to this * @param o the specified object to compare to this
* <code>NumericShaper</code> * <code>NumericShaper</code>
* @return <code>true</code> if <code>o</code> is an instance * @return <code>true</code> if <code>o</code> is an instance
...@@ -869,6 +1223,22 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -869,6 +1223,22 @@ public final class NumericShaper implements java.io.Serializable {
if (o != null) { if (o != null) {
try { try {
NumericShaper rhs = (NumericShaper)o; NumericShaper rhs = (NumericShaper)o;
if (rangeSet != null) {
if (rhs.rangeSet != null) {
return isContextual() == rhs.isContextual()
&& rangeSet.equals(rhs.rangeSet)
&& shapingRange == rhs.shapingRange;
}
return isContextual() == rhs.isContextual()
&& rangeSet.equals(Range.maskToRangeSet(rhs.mask))
&& shapingRange == Range.indexToRange(rhs.key);
} else if (rhs.rangeSet != null) {
Set<Range> rset = Range.maskToRangeSet(mask);
Range srange = Range.indexToRange(key);
return isContextual() == rhs.isContextual()
&& rset.equals(rhs.rangeSet)
&& srange == rhs.shapingRange;
}
return rhs.mask == mask && rhs.key == key; return rhs.mask == mask && rhs.key == key;
} }
catch (ClassCastException e) { catch (ClassCastException e) {
...@@ -885,23 +1255,29 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -885,23 +1255,29 @@ public final class NumericShaper implements java.io.Serializable {
public String toString() { public String toString() {
StringBuilder buf = new StringBuilder(super.toString()); StringBuilder buf = new StringBuilder(super.toString());
buf.append("[contextual:" + isContextual()); buf.append("[contextual:").append(isContextual());
String[] keyNames = null;
if (isContextual()) { if (isContextual()) {
buf.append(", context:" + keyNames[key]); buf.append(", context:");
buf.append(shapingRange == null ? Range.values()[key] : shapingRange);
} }
buf.append(", range(s): "); if (rangeSet == null) {
boolean first = true; buf.append(", range(s): ");
for (int i = 0; i < NUM_KEYS; ++i) { boolean first = true;
if ((mask & (1 << i)) != 0) { for (int i = 0; i < NUM_KEYS; ++i) {
if (first) { if ((mask & (1 << i)) != 0) {
first = false; if (first) {
} else { first = false;
buf.append(", "); } else {
buf.append(", ");
}
buf.append(Range.values()[i]);
} }
buf.append(keyNames[i]);
} }
} else {
buf.append(", range set: ").append(rangeSet);
} }
buf.append(']'); buf.append(']');
...@@ -940,7 +1316,6 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -940,7 +1316,6 @@ public final class NumericShaper implements java.io.Serializable {
} }
if (value >= 1 << 1) { if (value >= 1 << 1) {
value >>= 1;
bit += 1; bit += 1;
} }
...@@ -950,7 +1325,7 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -950,7 +1325,7 @@ public final class NumericShaper implements java.io.Serializable {
/** /**
* fast binary search over subrange of array. * fast binary search over subrange of array.
*/ */
private static int search(char value, char[] array, int start, int length) private static int search(int value, int[] array, int start, int length)
{ {
int power = 1 << getHighBit(length); int power = 1 << getHighBit(length);
int extra = length - power; int extra = length - power;
...@@ -971,4 +1346,27 @@ public final class NumericShaper implements java.io.Serializable { ...@@ -971,4 +1346,27 @@ public final class NumericShaper implements java.io.Serializable {
return index; return index;
} }
/**
* Converts the {@code NumericShaper.Range} enum-based parameters,
* if any, to the bit mask-based counterparts and writes this
* object to the {@code stream}. Any enum constants that have no
* bit mask-based counterparts are ignored in the conversion.
*
* @param stream the output stream to write to
* @throws IOException if an I/O error occurs while writing to {@code stream}
* @since 1.7
*/
private void writeObject(ObjectOutputStream stream) throws IOException {
if (shapingRange != null) {
int index = Range.toRangeIndex(shapingRange);
if (index >= 0) {
key = index;
}
}
if (rangeSet != null) {
mask |= Range.toRangeMask(rangeSet);
}
stream.defaultWriteObject();
}
} }
/*
* Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/*
* @test
* @bug 6842557
* @summary confirm that an instance which is created with new Enum ranges is
* equivalent to another instance which is created with equivalent traditional
* ranges or the same Enum ranges.
*/
import java.awt.font.NumericShaper;
import java.util.EnumSet;
import static java.awt.font.NumericShaper.*;
public class EqualsTest {
public static void main(String[] args) {
NumericShaper ns1 = getContextualShaper(ARABIC | TAMIL, TAMIL);
NumericShaper ns2 = getContextualShaper(
EnumSet.of(Range.ARABIC, Range.TAMIL),
Range.TAMIL);
NumericShaper ns3 = getContextualShaper(
EnumSet.of(Range.ARABIC, Range.TAMIL),
Range.TAMIL);
NumericShaper ns4 = getContextualShaper(
EnumSet.of(Range.ARABIC, Range.TAMIL),
Range.ARABIC);
if (!ns1.equals(ns2)) {
throw new RuntimeException("ns1 != ns2: ns1=" + ns1 + ", ns2=" + ns2);
}
if (!ns2.equals(ns1)) {
throw new RuntimeException("ns2 != ns1: ns1=" + ns1 + ", ns2=" + ns2);
}
if (!ns2.equals(ns3)) {
throw new RuntimeException("ns2 != ns3: ns2=" + ns2 + ", ns3=" + ns3);
}
if (ns1.equals(ns4)) {
throw new RuntimeException("ns1 == ns4: ns1=" + ns1 + ", ns4=" + ns4);
}
if (ns2.equals(ns4)) {
throw new RuntimeException("ns2 == ns4: ns2=" + ns2 + ", ns4=" + ns4);
}
}
}
/*
* Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/*
* @test
* @bug 6843181
* @summary Confirm that NumericShaper is thread-safe.
* @run main/timeout=300/othervm MTTest
*/
import java.awt.font.NumericShaper;
import java.util.Arrays;
import java.util.EnumSet;
import static java.awt.font.NumericShaper.*;
public class MTTest {
static volatile boolean runrun = true;
static volatile boolean err = false;
final static String text = "-123 (English) 456.00 (Arabic) \u0641\u0642\u0643 -789 (Thai) \u0e01\u0e33 01.23";
static char[] t1, t2;
static NumericShaper ns1, ns2, ns3, ns4;
public static void main(String[] args) {
System.out.println(" original: " + text);
ns1 = getContextualShaper(EnumSet.of(Range.ARABIC), Range.ARABIC);
t1 = text.toCharArray();
ns1.shape(t1, 0, t1.length);
System.out.println("expected t1: " + String.valueOf(t1));
ns2 = getContextualShaper(EnumSet.of(Range.THAI), Range.THAI);
t2 = text.toCharArray();
ns2.shape(t2, 0, t2.length);
System.out.println("expected t2: " + String.valueOf(t2));
ns3 = getContextualShaper(ARABIC, ARABIC);
ns4 = getContextualShaper(THAI, THAI);
Thread th1 = new Thread(new Work(ns1, t1));
Thread th2 = new Thread(new Work(ns2, t2));
Thread th3 = new Thread(new Work(ns1, t1));
Thread th4 = new Thread(new Work(ns2, t2));
Thread th5 = new Thread(new Work(ns3, t1));
Thread th6 = new Thread(new Work(ns4, t2));
Thread th7 = new Thread(new Work(ns3, t1));
Thread th8 = new Thread(new Work(ns4, t2));
th1.start();
th2.start();
th3.start();
th4.start();
th5.start();
th6.start();
th7.start();
th8.start();
try {
for (int i = 0; runrun && i < 180; i++) {
Thread.sleep(1000); // 1 seconds
}
runrun = false;
th1.join();
th2.join();
th3.join();
th4.join();
th5.join();
th6.join();
th7.join();
th8.join();
}
catch (InterruptedException e) {
}
if (err) {
throw new RuntimeException("Thread-safe test failed.");
}
}
private static class Work implements Runnable {
NumericShaper ns;
char[] expectedText;
Work(NumericShaper ns, char[] expectedText) {
this.ns = ns;
this.expectedText = expectedText;
}
public void run() {
int count = 0;
while (runrun) {
char[] t = text.toCharArray();
try {
count++;
ns.shape(t, 0, t.length);
} catch (Exception e) {
System.err.println("Error: Unexpected exception: " + e);
runrun = false;
err = true;
return;
}
if (!Arrays.equals(t, expectedText)) {
System.err.println("Error: shape() returned unexpected value: ");
System.err.println("count = " + count);
System.err.println(" expected: " + String.valueOf(expectedText));
System.err.println(" got: " + String.valueOf(t));
runrun = false;
err = true;
return;
}
}
}
}
}
/*
* Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/*
* @test
* @bug 6842557
* @summary confirm that shaping works as expected. (Mainly for new characters which were added in Unicode 5)
* used where appropriate.
*/
import java.awt.font.NumericShaper;
import java.util.EnumSet;
import static java.awt.font.NumericShaper.*;
public class ShapingTest {
public static void main(String[] args) {
NumericShaper ns_old = getContextualShaper(ARABIC | TAMIL | ETHIOPIC,
EUROPEAN);
NumericShaper ns_new = getContextualShaper(EnumSet.of(
Range.ARABIC, Range.TAMIL, Range.ETHIOPIC),
Range.EUROPEAN);
boolean err = false;
String[][] data = {
// Arabic "October 10"
{"\u0623\u0643\u062a\u0648\u0628\u0631 10",
"\u0623\u0643\u062a\u0648\u0628\u0631 \u0661\u0660"},
// Tamil "Year 2009"
{"\u0b86\u0ba3\u0bcd\u0b9f\u0bc1 2009",
"\u0b86\u0ba3\u0bcd\u0b9f\u0bc1 \u0be8\u0be6\u0be6\u0bef"},
// "\u0be800\u0bef is returned by pre-JDK7 because Tamil zero was not
// included in Unicode 4.0.0.
// Ethiopic "Syllable<HA> 2009"
{"\u1200 2009",
"\u1200 \u136a00\u1371"},
// Ethiopic zero doesn't exist even in Unicode 5.1.0.
};
for (int i = 0; i < data.length; i++) {
String expected = data[i][1];
char[] text = data[i][0].toCharArray();
ns_old.shape(text, 0, text.length);
String got = new String(text);
if (!expected.equals(got)) {
err = true;
System.err.println("Error with traditional range.");
System.err.println(" text = " + data[i][0]);
System.err.println(" got = " + got);
System.err.println(" expected = " + expected);
} else {
System.err.println("OK with traditional range.");
System.err.println(" text = " + data[i][0]);
System.err.println(" got = " + got);
System.err.println(" expected = " + expected);
}
text = data[i][0].toCharArray();
ns_new.shape(text, 0, text.length);
got = new String(text);
if (!expected.equals(got)) {
err = true;
System.err.println("Error with new Enum range.");
System.err.println(" text = " + data[i][0]);
System.err.println(" got = " + got);
System.err.println(" expected = " + expected);
} else {
System.err.println("OK with new Enum range.");
System.err.println(" text = " + data[i][0]);
System.err.println(" got = " + got);
System.err.println(" expected = " + expected);
}
}
if (err) {
throw new RuntimeException("shape() returned unexpected value.");
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册