From fb406a7c3211fc97290965784f33c1376b6a1b86 Mon Sep 17 00:00:00 2001 From: peytoia Date: Sat, 13 Jun 2009 06:43:54 +0900 Subject: [PATCH] 6850113: Bidi class needs to be updated to support Unicode 5.1 Reviewed-by: okutsu --- make/java/text/FILES_java.gmk | 4 + make/sun/font/FILES_c.gmk | 4 - make/sun/font/Makefile | 2 - make/sun/font/mapfile-vers | 2 - make/sun/font/mapfile-vers.openjdk | 2 - src/share/classes/java/text/Bidi.java | 375 +- src/share/classes/sun/text/bidi/BidiBase.java | 3444 +++++++++++++++++ src/share/classes/sun/text/bidi/BidiLine.java | 849 ++++ src/share/classes/sun/text/bidi/BidiRun.java | 124 + .../sun/text/normalizer/UCharacter.java | 4 +- src/share/native/sun/font/bidi/cmemory.h | 66 - src/share/native/sun/font/bidi/jbidi.c | 167 - src/share/native/sun/font/bidi/jbidi.h | 69 - src/share/native/sun/font/bidi/ubidi.c | 1433 ------- src/share/native/sun/font/bidi/ubidi.h | 913 ----- src/share/native/sun/font/bidi/ubidiimp.h | 246 -- src/share/native/sun/font/bidi/ubidiln.c | 996 ----- src/share/native/sun/font/bidi/uchardir.c | 652 ---- src/share/native/sun/font/bidi/uchardir.h | 100 - src/share/native/sun/font/bidi/utypes.h | 177 - src/share/native/sun/font/layout/LETypes.h | 3 +- test/java/text/Bidi/BidiBug.java | 6 +- test/java/text/Bidi/BidiConformance.java | 2334 +++++++++++ test/java/text/Bidi/BidiEmbeddingTest.java | 10 +- test/java/text/Bidi/Bug6850113.java | 60 + 25 files changed, 6854 insertions(+), 5188 deletions(-) create mode 100644 src/share/classes/sun/text/bidi/BidiBase.java create mode 100644 src/share/classes/sun/text/bidi/BidiLine.java create mode 100644 src/share/classes/sun/text/bidi/BidiRun.java delete mode 100644 src/share/native/sun/font/bidi/cmemory.h delete mode 100644 src/share/native/sun/font/bidi/jbidi.c delete mode 100644 src/share/native/sun/font/bidi/jbidi.h delete mode 100644 src/share/native/sun/font/bidi/ubidi.c delete mode 100644 src/share/native/sun/font/bidi/ubidi.h delete mode 100644 src/share/native/sun/font/bidi/ubidiimp.h delete mode 100644 src/share/native/sun/font/bidi/ubidiln.c delete mode 100644 src/share/native/sun/font/bidi/uchardir.c delete mode 100644 src/share/native/sun/font/bidi/uchardir.h delete mode 100644 src/share/native/sun/font/bidi/utypes.h create mode 100644 test/java/text/Bidi/BidiConformance.java create mode 100644 test/java/text/Bidi/Bug6850113.java diff --git a/make/java/text/FILES_java.gmk b/make/java/text/FILES_java.gmk index 88dc12903..ada9bde7f 100644 --- a/make/java/text/FILES_java.gmk +++ b/make/java/text/FILES_java.gmk @@ -76,6 +76,10 @@ FILES_java = \ sun/text/Normalizer.java \ sun/text/SupplementaryCharacterData.java \ sun/text/UCompactIntArray.java \ + sun/text/bidi/BidiBase.java \ + sun/text/bidi/BidiLine.java \ + sun/text/bidi/BidiRun.java \ + \ sun/text/normalizer/CharTrie.java \ sun/text/normalizer/CharacterIteratorWrapper.java \ sun/text/normalizer/ICUBinary.java \ diff --git a/make/sun/font/FILES_c.gmk b/make/sun/font/FILES_c.gmk index 2940ac78a..84a78c289 100644 --- a/make/sun/font/FILES_c.gmk +++ b/make/sun/font/FILES_c.gmk @@ -24,10 +24,6 @@ # FILES_c_shared = \ - jbidi.c \ - ubidi.c \ - ubidiln.c \ - uchardir.c \ DrawGlyphList.c \ sunFont.c diff --git a/make/sun/font/Makefile b/make/sun/font/Makefile index 2663144a5..b81918de5 100644 --- a/make/sun/font/Makefile +++ b/make/sun/font/Makefile @@ -145,7 +145,6 @@ include $(BUILDDIR)/common/Library.gmk # Add to the ambient vpath to pick up files in subdirectories # vpath %.c $(PLATFORM_SRC)/native/$(PKGDIR) -vpath %.c $(SHARE_SRC)/native/$(PKGDIR)/bidi vpath %.cpp $(SHARE_SRC)/native/$(PKGDIR)/layout vpath %.cpp $(SHARE_SRC)/native/$(PKGDIR) @@ -187,7 +186,6 @@ endif # PLATFORM CPPFLAGS += -I$(SHARE_SRC)/native/$(PKGDIR) \ -I$(SHARE_SRC)/native/$(PKGDIR)/layout \ - -I$(SHARE_SRC)/native/$(PKGDIR)/bidi \ -I$(SHARE_SRC)/native/sun/awt/image/cvutils \ -I$(PLATFORM_SRC)/native/sun/awt \ -I$(SHARE_SRC)/native/sun/awt/debug \ diff --git a/make/sun/font/mapfile-vers b/make/sun/font/mapfile-vers index 1d1f003bf..8d1b6af9d 100644 --- a/make/sun/font/mapfile-vers +++ b/make/sun/font/mapfile-vers @@ -31,8 +31,6 @@ SUNWprivate_1.1 { newLayoutTableCache; freeLayoutTableCache; isNullScalerContext; - Java_java_text_Bidi_nativeBidiChars; - Java_java_text_Bidi_nativeGetDirectionCode; Java_sun_font_NullFontScaler_getNullScalerContext; Java_sun_font_NullFontScaler_getGlyphImage; Java_sun_font_FontManager_getPlatformFontVar; diff --git a/make/sun/font/mapfile-vers.openjdk b/make/sun/font/mapfile-vers.openjdk index 55d5661bf..2977f35da 100644 --- a/make/sun/font/mapfile-vers.openjdk +++ b/make/sun/font/mapfile-vers.openjdk @@ -33,8 +33,6 @@ SUNWprivate_1.1 { newLayoutTableCache; freeLayoutTableCache; isNullScalerContext; - Java_java_text_Bidi_nativeBidiChars; - Java_java_text_Bidi_nativeGetDirectionCode; Java_sun_font_NullFontScaler_getNullScalerContext; Java_sun_font_NullFontScaler_getGlyphImage; Java_sun_font_FontManager_getPlatformFontVar; diff --git a/src/share/classes/java/text/Bidi.java b/src/share/classes/java/text/Bidi.java index 80d7f6fa8..e4db4b2d0 100644 --- a/src/share/classes/java/text/Bidi.java +++ b/src/share/classes/java/text/Bidi.java @@ -1,5 +1,5 @@ /* - * Copyright 2000-2003 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2000-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -35,10 +35,7 @@ package java.text; -import java.awt.Toolkit; -import java.awt.font.TextAttribute; -import java.awt.font.NumericShaper; -import sun.text.CodePointIterator; +import sun.text.bidi.BidiBase; /** * This class implements the Unicode Bidirectional Algorithm. @@ -62,15 +59,6 @@ import sun.text.CodePointIterator; * @since 1.4 */ public final class Bidi { - byte dir; - byte baselevel; - int length; - int[] runs; - int[] cws; - - static { - sun.font.FontManagerNativeLibrary.load(); - } /** Constant indicating base direction is left-to-right. */ public static final int DIRECTION_LEFT_TO_RIGHT = 0; @@ -94,7 +82,7 @@ public final class Bidi { */ public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1; - private static final int DIR_MIXED = 2; + private BidiBase bidiBase; /** * Create Bidi from the given paragraph of text and base direction. @@ -109,7 +97,7 @@ public final class Bidi { throw new IllegalArgumentException("paragraph is null"); } - nativeBidiChars(this, paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags); + bidiBase = new BidiBase(paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags); } /** @@ -142,67 +130,8 @@ public final class Bidi { throw new IllegalArgumentException("paragraph is null"); } - int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT; - byte[] embeddings = null; - - int start = paragraph.getBeginIndex(); - int limit = paragraph.getEndIndex(); - int length = limit - start; - int n = 0; - char[] text = new char[length]; - for (char c = paragraph.first(); c != paragraph.DONE; c = paragraph.next()) { - text[n++] = c; - } - - paragraph.first(); - try { - Boolean runDirection = (Boolean)paragraph.getAttribute(TextAttribute.RUN_DIRECTION); - if (runDirection != null) { - if (TextAttribute.RUN_DIRECTION_LTR.equals(runDirection)) { - flags = DIRECTION_LEFT_TO_RIGHT; // clears default setting - } else { - flags = DIRECTION_RIGHT_TO_LEFT; - } - } - } - catch (ClassCastException e) { - } - - try { - NumericShaper shaper = (NumericShaper)paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING); - if (shaper != null) { - shaper.shape(text, 0, text.length); - } - } - catch (ClassCastException e) { - } - - int pos = start; - do { - paragraph.setIndex(pos); - Object embeddingLevel = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING); - int newpos = paragraph.getRunLimit(TextAttribute.BIDI_EMBEDDING); - - if (embeddingLevel != null) { - try { - int intLevel = ((Integer)embeddingLevel).intValue(); - if (intLevel >= -61 && intLevel < 61) { - byte level = (byte)(intLevel < 0 ? (-intLevel | 0x80) : intLevel); - if (embeddings == null) { - embeddings = new byte[length]; - } - for (int i = pos - start; i < newpos - start; ++i) { - embeddings[i] = level; - } - } - } - catch (ClassCastException e) { - } - } - pos = newpos; - } while (pos < limit); - - nativeBidiChars(this, text, 0, embeddings, 0, text.length, flags); + bidiBase = new BidiBase(0, 0); + bidiBase.setPara(paragraph); } /** @@ -240,46 +169,7 @@ public final class Bidi { " for embeddings of length: " + text.length); } - if (embeddings != null) { - // native uses high bit to indicate override, not negative value, sigh - - for (int i = embStart, embLimit = embStart + paragraphLength; i < embLimit; ++i) { - if (embeddings[i] < 0) { - byte[] temp = new byte[paragraphLength]; - System.arraycopy(embeddings, embStart, temp, 0, paragraphLength); - - for (i -= embStart; i < paragraphLength; ++i) { - if (temp[i] < 0) { - temp[i] = (byte)(-temp[i] | 0x80); - } - } - - embeddings = temp; - embStart = 0; - break; - } - } - } - - nativeBidiChars(this, text, textStart, embeddings, embStart, paragraphLength, flags); - } - - /** - * Private constructor used by line bidi. - */ - private Bidi(int dir, int baseLevel, int length, int[] data, int[] cws) { - reset(dir, baseLevel, length, data, cws); - } - - /** - * Private mutator used by native code. - */ - private void reset(int dir, int baselevel, int length, int[] data, int[] cws) { - this.dir = (byte)dir; - this.baselevel = (byte)baselevel; - this.length = length; - this.runs = data; - this.cws = cws; + bidiBase = new BidiBase(text, textStart, embeddings, embStart, paragraphLength, flags); } /** @@ -290,96 +180,10 @@ public final class Bidi { * @param lineLimit the offset from the start of the paragraph to the limit of the line. */ public Bidi createLineBidi(int lineStart, int lineLimit) { - if (lineStart == 0 && lineLimit == length) { - return this; - } - - int lineLength = lineLimit - lineStart; - if (lineStart < 0 || - lineLimit < lineStart || - lineLimit > length) { - throw new IllegalArgumentException("range " + lineStart + - " to " + lineLimit + - " is invalid for paragraph of length " + length); - } + AttributedString astr = new AttributedString(""); + Bidi newBidi = new Bidi(astr.getIterator()); - if (runs == null) { - return new Bidi(dir, baselevel, lineLength, null, null); - } else { - int cwspos = -1; - int[] ncws = null; - if (cws != null) { - int cwss = 0; - int cwsl = cws.length; - while (cwss < cwsl) { - if (cws[cwss] >= lineStart) { - cwsl = cwss; - while (cwsl < cws.length && cws[cwsl] < lineLimit) { - cwsl++; - } - int ll = lineLimit-1; - while (cwsl > cwss && cws[cwsl-1] == ll) { - cwspos = ll; // record start of counter-directional whitespace - --cwsl; - --ll; - } - - if (cwspos == lineStart) { // entire line is cws, so ignore - return new Bidi(dir, baselevel, lineLength, null, null); - } - - int ncwslen = cwsl - cwss; - if (ncwslen > 0) { - ncws = new int[ncwslen]; - for (int i = 0; i < ncwslen; ++i) { - ncws[i] = cws[cwss+i] - lineStart; - } - } - break; - } - ++cwss; - } - } - - int[] nruns = null; - int nlevel = baselevel; - int limit = cwspos == -1 ? lineLimit : cwspos; - int rs = 0; - int rl = runs.length; - int ndir = dir; - for (; rs < runs.length; rs += 2) { - if (runs[rs] > lineStart) { - rl = rs; - while (rl < runs.length && runs[rl] < limit) { - rl += 2; - } - if ((rl > rs) || (runs[rs+1] != baselevel)) { - rl += 2; - - if (cwspos != -1 && rl > rs && runs[rl-1] != baselevel) { // add level for cws - nruns = new int[rl - rs + 2]; - nruns[rl - rs] = lineLength; - nruns[rl - rs + 1] = baselevel; - } else { - limit = lineLimit; - nruns = new int[rl - rs]; - } - - int n = 0; - for (int i = rs; i < rl; i += 2) { - nruns[n++] = runs[i] - lineStart; - nruns[n++] = runs[i+1]; - } - nruns[n-2] = limit - lineStart; - } else { - ndir = (runs[rs+1] & 0x1) == 0 ? DIRECTION_LEFT_TO_RIGHT : DIRECTION_RIGHT_TO_LEFT; - } - break; - } - } - - return new Bidi(ndir, baselevel, lineLength, nruns, ncws); - } + return bidiBase.setLine(this, bidiBase, newBidi, newBidi.bidiBase,lineStart, lineLimit); } /** @@ -388,7 +192,7 @@ public final class Bidi { * @return true if the line is not left-to-right or right-to-left. */ public boolean isMixed() { - return dir == DIR_MIXED; + return bidiBase.isMixed(); } /** @@ -396,7 +200,7 @@ public final class Bidi { * @return true if the line is all left-to-right text and the base direction is left-to-right */ public boolean isLeftToRight() { - return dir == DIRECTION_LEFT_TO_RIGHT; + return bidiBase.isLeftToRight(); } /** @@ -404,7 +208,7 @@ public final class Bidi { * @return true if the line is all right-to-left text, and the base direction is right-to-left */ public boolean isRightToLeft() { - return dir == DIRECTION_RIGHT_TO_LEFT; + return bidiBase.isRightToLeft(); } /** @@ -412,7 +216,7 @@ public final class Bidi { * @return the length of text in the line */ public int getLength() { - return length; + return bidiBase.getLength(); } /** @@ -420,7 +224,7 @@ public final class Bidi { * @return true if the base direction is left-to-right */ public boolean baseIsLeftToRight() { - return (baselevel & 0x1) == 0; + return bidiBase.baseIsLeftToRight(); } /** @@ -428,7 +232,7 @@ public final class Bidi { * @return the base level */ public int getBaseLevel() { - return baselevel; + return bidiBase.getParaLevel(); } /** @@ -438,17 +242,7 @@ public final class Bidi { * @return the resolved level of the character at offset */ public int getLevelAt(int offset) { - if (runs == null || offset < 0 || offset >= length) { - return baselevel; - } else { - int i = 0; - do { - if (offset < runs[i]) { - return runs[i+1]; - } - i += 2; - } while (true); - } + return bidiBase.getLevelAt(offset); } /** @@ -456,7 +250,7 @@ public final class Bidi { * @return the number of level runs */ public int getRunCount() { - return runs == null ? 1 : runs.length / 2; + return bidiBase.countRuns(); } /** @@ -465,7 +259,7 @@ public final class Bidi { * @return the level of the run */ public int getRunLevel(int run) { - return runs == null ? baselevel : runs[run * 2 + 1]; + return bidiBase.getRunLevel(run); } /** @@ -475,7 +269,7 @@ public final class Bidi { * @return the start of the run */ public int getRunStart(int run) { - return (runs == null || run == 0) ? 0 : runs[run * 2 - 2]; + return bidiBase.getRunStart(run); } /** @@ -486,7 +280,7 @@ public final class Bidi { * @return limit the limit of the run */ public int getRunLimit(int run) { - return runs == null ? length : runs[run * 2]; + return bidiBase.getRunLimit(run); } /** @@ -501,16 +295,7 @@ public final class Bidi { * @return true if the range of characters requires bidi analysis */ public static boolean requiresBidi(char[] text, int start, int limit) { - CodePointIterator cpi = CodePointIterator.create(text, start, limit); - for (int cp = cpi.next(); cp != CodePointIterator.DONE; cp = cpi.next()) { - if (cp > 0x0590) { - int dc = nativeGetDirectionCode(cp); - if ((RMASK & (1 << dc)) != 0) { - return true; - } - } - } - return false; + return BidiBase.requiresBidi(text, start, limit); } /** @@ -530,124 +315,14 @@ public final class Bidi { * @param count the number of objects to reorder */ public static void reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count) { - - if (count < 0) { - throw new IllegalArgumentException("count " + count + " must be >= 0"); - } - if (levelStart < 0 || levelStart + count > levels.length) { - throw new IllegalArgumentException("levelStart " + levelStart + " and count " + count + - " out of range [0, " + levels.length + "]"); - } - if (objectStart < 0 || objectStart + count > objects.length) { - throw new IllegalArgumentException("objectStart " + objectStart + " and count " + count + - " out of range [0, " + objects.length + "]"); - } - - byte lowestOddLevel = (byte)(NUMLEVELS + 1); - byte highestLevel = 0; - - // initialize mapping and levels - - int levelLimit = levelStart + count; - for (int i = levelStart; i < levelLimit; i++) { - byte level = levels[i]; - if (level > highestLevel) { - highestLevel = level; - } - - if ((level & 0x01) != 0 && level < lowestOddLevel) { - lowestOddLevel = level; - } - } - - int delta = objectStart - levelStart; - - while (highestLevel >= lowestOddLevel) { - int i = levelStart; - - for (;;) { - while (i < levelLimit && levels[i] < highestLevel) { - i++; - } - int begin = i++; - - if (begin == levelLimit) { - break; // no more runs at this level - } - - while (i < levelLimit && levels[i] >= highestLevel) { - i++; - } - int end = i - 1; - - begin += delta; - end += delta; - while (begin < end) { - Object temp = objects[begin]; - objects[begin] = objects[end]; - objects[end] = temp; - ++begin; - --end; - } - } - - --highestLevel; - } + BidiBase.reorderVisually(levels, levelStart, objects, objectStart, count); } - private static final char NUMLEVELS = 62; - - private static final int RMASK = - (1 << 1 /* U_RIGHT_TO_LEFT */) | - (1 << 5 /* U_ARABIC_NUMBER */) | - (1 << 13 /* U_RIGHT_TO_LEFT_ARABIC */) | - (1 << 14 /* U_RIGHT_TO_LEFT_EMBEDDING */) | - (1 << 15 /* U_RIGHT_TO_LEFT_OVERRIDE */); - - /** Access native bidi implementation. */ - private static native int nativeGetDirectionCode(int cp); - - /** Access native bidi implementation. */ - private static synchronized native void nativeBidiChars(Bidi bidi, char[] text, int textStart, - byte[] embeddings, int embeddingStart, - int length, int flags); - /** * Display the bidi internal state, used in debugging. */ public String toString() { - StringBuffer buf = new StringBuffer(super.toString()); - buf.append("[dir: " + dir); - buf.append(" baselevel: " + baselevel); - buf.append(" length: " + length); - if (runs == null) { - buf.append(" runs: null"); - } else { - buf.append(" runs: ["); - for (int i = 0; i < runs.length; i += 2) { - if (i != 0) { - buf.append(' '); - } - buf.append(runs[i]); // limit - buf.append('/'); - buf.append(runs[i+1]); // level - } - buf.append(']'); - } - if (cws == null) { - buf.append(" cws: null"); - } else { - buf.append(" cws: ["); - for (int i = 0; i < cws.length; ++i) { - if (i != 0) { - buf.append(' '); - } - buf.append(Integer.toHexString(cws[i])); - } - buf.append(']'); - } - buf.append(']'); - - return buf.toString(); + return bidiBase.toString(); } + } diff --git a/src/share/classes/sun/text/bidi/BidiBase.java b/src/share/classes/sun/text/bidi/BidiBase.java new file mode 100644 index 000000000..be494ea3c --- /dev/null +++ b/src/share/classes/sun/text/bidi/BidiBase.java @@ -0,0 +1,3444 @@ +/* + * Portions Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ +/* + ******************************************************************************* + * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * + * * + * The original version of this source code and documentation is copyrighted * + * and owned by IBM, These materials are provided under terms of a License * + * Agreement between IBM and Sun. This technology is protected by multiple * + * US and International patents. This notice and attribution to IBM may not * + * to removed. * + ******************************************************************************* + */ + +/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of + * algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre + * concept of RUNS_ONLY which is a double operation. + * It could be advantageous to divide this into 3 concepts: + * a) Operation: direct / inverse / RUNS_ONLY + * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L + * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL + * This would allow combinations not possible today like RUNS_ONLY with + * NUMBERS_SPECIAL. + * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and + * REMOVE_CONTROLS for the inverse step. + * Not all combinations would be supported, and probably not all do make sense. + * This would need to document which ones are supported and what are the + * fallbacks for unsupported combinations. + */ + +package sun.text.bidi; + +import java.awt.font.TextAttribute; +import java.awt.font.NumericShaper; +import java.io.IOException; +import java.lang.reflect.Array; +import java.text.AttributedCharacterIterator; +import java.text.Bidi; +import java.util.Arrays; +import java.util.MissingResourceException; +import sun.text.normalizer.UBiDiProps; +import sun.text.normalizer.UCharacter; +import sun.text.normalizer.UTF16; + +/** + * + *

Bidi algorithm for ICU

+ * + * This is an implementation of the Unicode Bidirectional algorithm. The + * algorithm is defined in the Unicode Standard Annex #9, + * version 13, also described in The Unicode Standard, Version 4.0 . + *

+ * + * Note: Libraries that perform a bidirectional algorithm and reorder strings + * accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and + * shaping (ArabicShaping) classes can be used at the core of such "Storage + * Layout Engines". + * + *

General remarks about the API:

+ * + * The "limit" of a sequence of characters is the position just after + * their last character, i.e., one more than that position. + *

+ * + * Some of the API methods provide access to "runs". Such a + * "run" is defined as a sequence of characters that are at the same + * embedding level after performing the Bidi algorithm. + *

+ * + *

Basic concept: paragraph

+ * A piece of text can be divided into several paragraphs by characters + * with the Bidi class Block Separator. For handling of + * paragraphs, see: + * + * + *

Basic concept: text direction

+ * The direction of a piece of text may be: + * + * + *

Basic concept: levels

+ * + * Levels in this API represent embedding levels according to the Unicode + * Bidirectional Algorithm. + * Their low-order bit (even/odd value) indicates the visual direction.

+ * + * Levels can be abstract values when used for the + * paraLevel and embeddingLevels + * arguments of setPara(); there: + *

+ * + *

The related constants are not real, valid level values. + * DEFAULT_XXX can be used to specify + * a default for the paragraph level for + * when the setPara() method + * shall determine it but there is no + * strongly typed character in the input.

+ * + * Note that the value for LEVEL_DEFAULT_LTR is even + * and the one for LEVEL_DEFAULT_RTL is odd, + * just like with normal LTR and RTL level values - + * these special values are designed that way. Also, the implementation + * assumes that MAX_EXPLICIT_LEVEL is odd. + * + *

+ * + *

Basic concept: Reordering Mode

+ * Reordering mode values indicate which variant of the Bidi algorithm to + * use. + * + * + * + *

Basic concept: Reordering Options

+ * Reordering options can be applied during Bidi text transformations. + * + * + * + * @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer) + * @stable ICU 3.8 + * + * + *

Sample code for the ICU Bidi API

+ * + *
Rendering a paragraph with the ICU Bidi API
+ * + * This is (hypothetical) sample code that illustrates how the ICU Bidi API + * could be used to render a paragraph of text. Rendering code depends highly on + * the graphics system, therefore this sample code must make a lot of + * assumptions, which may or may not match any existing graphics system's + * properties. + * + *

+ * The basic assumptions are: + *

+ * + * + *
+ *
+ *  package com.ibm.icu.dev.test.bidi;
+ *
+ *  import com.ibm.icu.text.Bidi;
+ *  import com.ibm.icu.text.BidiRun;
+ *
+ *  public class Sample {
+ *
+ *      static final int styleNormal = 0;
+ *      static final int styleSelected = 1;
+ *      static final int styleBold = 2;
+ *      static final int styleItalics = 4;
+ *      static final int styleSuper=8;
+ *      static final int styleSub = 16;
+ *
+ *      static class StyleRun {
+ *          int limit;
+ *          int style;
+ *
+ *          public StyleRun(int limit, int style) {
+ *              this.limit = limit;
+ *              this.style = style;
+ *          }
+ *      }
+ *
+ *      static class Bounds {
+ *          int start;
+ *          int limit;
+ *
+ *          public Bounds(int start, int limit) {
+ *              this.start = start;
+ *              this.limit = limit;
+ *          }
+ *      }
+ *
+ *      static int getTextWidth(String text, int start, int limit,
+ *                              StyleRun[] styleRuns, int styleRunCount) {
+ *          // simplistic way to compute the width
+ *          return limit - start;
+ *      }
+ *
+ *      // set limit and StyleRun limit for a line
+ *      // from text[start] and from styleRuns[styleRunStart]
+ *      // using Bidi.getLogicalRun(...)
+ *      // returns line width
+ *      static int getLineBreak(String text, Bounds line, Bidi para,
+ *                              StyleRun styleRuns[], Bounds styleRun) {
+ *          // dummy return
+ *          return 0;
+ *      }
+ *
+ *      // render runs on a line sequentially, always from left to right
+ *
+ *      // prepare rendering a new line
+ *      static void startLine(byte textDirection, int lineWidth) {
+ *          System.out.println();
+ *      }
+ *
+ *      // render a run of text and advance to the right by the run width
+ *      // the text[start..limit-1] is always in logical order
+ *      static void renderRun(String text, int start, int limit,
+ *                            byte textDirection, int style) {
+ *      }
+ *
+ *      // We could compute a cross-product
+ *      // from the style runs with the directional runs
+ *      // and then reorder it.
+ *      // Instead, here we iterate over each run type
+ *      // and render the intersections -
+ *      // with shortcuts in simple (and common) cases.
+ *      // renderParagraph() is the main function.
+ *
+ *      // render a directional run with
+ *      // (possibly) multiple style runs intersecting with it
+ *      static void renderDirectionalRun(String text, int start, int limit,
+ *                                       byte direction, StyleRun styleRuns[],
+ *                                       int styleRunCount) {
+ *          int i;
+ *
+ *          // iterate over style runs
+ *          if (direction == Bidi.LTR) {
+ *              int styleLimit;
+ *              for (i = 0; i < styleRunCount; ++i) {
+ *                  styleLimit = styleRuns[i].limit;
+ *                  if (start < styleLimit) {
+ *                      if (styleLimit > limit) {
+ *                          styleLimit = limit;
+ *                      }
+ *                      renderRun(text, start, styleLimit,
+ *                                direction, styleRuns[i].style);
+ *                      if (styleLimit == limit) {
+ *                          break;
+ *                      }
+ *                      start = styleLimit;
+ *                  }
+ *              }
+ *          } else {
+ *              int styleStart;
+ *
+ *              for (i = styleRunCount-1; i >= 0; --i) {
+ *                  if (i > 0) {
+ *                      styleStart = styleRuns[i-1].limit;
+ *                  } else {
+ *                      styleStart = 0;
+ *                  }
+ *                  if (limit >= styleStart) {
+ *                      if (styleStart < start) {
+ *                          styleStart = start;
+ *                      }
+ *                      renderRun(text, styleStart, limit, direction,
+ *                                styleRuns[i].style);
+ *                      if (styleStart == start) {
+ *                          break;
+ *                      }
+ *                      limit = styleStart;
+ *                  }
+ *              }
+ *          }
+ *      }
+ *
+ *      // the line object represents text[start..limit-1]
+ *      static void renderLine(Bidi line, String text, int start, int limit,
+ *                             StyleRun styleRuns[], int styleRunCount) {
+ *          byte direction = line.getDirection();
+ *          if (direction != Bidi.MIXED) {
+ *              // unidirectional
+ *              if (styleRunCount <= 1) {
+ *                  renderRun(text, start, limit, direction, styleRuns[0].style);
+ *              } else {
+ *                  renderDirectionalRun(text, start, limit, direction,
+ *                                       styleRuns, styleRunCount);
+ *              }
+ *          } else {
+ *              // mixed-directional
+ *              int count, i;
+ *              BidiRun run;
+ *
+ *              try {
+ *                  count = line.countRuns();
+ *              } catch (IllegalStateException e) {
+ *                  e.printStackTrace();
+ *                  return;
+ *              }
+ *              if (styleRunCount <= 1) {
+ *                  int style = styleRuns[0].style;
+ *
+ *                  // iterate over directional runs
+ *                  for (i = 0; i < count; ++i) {
+ *                      run = line.getVisualRun(i);
+ *                      renderRun(text, run.getStart(), run.getLimit(),
+ *                                run.getDirection(), style);
+ *                  }
+ *              } else {
+ *                  // iterate over both directional and style runs
+ *                  for (i = 0; i < count; ++i) {
+ *                      run = line.getVisualRun(i);
+ *                      renderDirectionalRun(text, run.getStart(),
+ *                                           run.getLimit(), run.getDirection(),
+ *                                           styleRuns, styleRunCount);
+ *                  }
+ *              }
+ *          }
+ *      }
+ *
+ *      static void renderParagraph(String text, byte textDirection,
+ *                                  StyleRun styleRuns[], int styleRunCount,
+ *                                  int lineWidth) {
+ *          int length = text.length();
+ *          Bidi para = new Bidi();
+ *          try {
+ *              para.setPara(text,
+ *                           textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
+ *                                              : Bidi.LEVEL_DEFAULT_LTR,
+ *                           null);
+ *          } catch (Exception e) {
+ *              e.printStackTrace();
+ *              return;
+ *          }
+ *          byte paraLevel = (byte)(1 & para.getParaLevel());
+ *          StyleRun styleRun = new StyleRun(length, styleNormal);
+ *
+ *          if (styleRuns == null || styleRunCount <= 0) {
+ *              styleRuns = new StyleRun[1];
+ *              styleRunCount = 1;
+ *              styleRuns[0] = styleRun;
+ *          }
+ *          // assume styleRuns[styleRunCount-1].limit>=length
+ *
+ *          int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
+ *          if (width <= lineWidth) {
+ *              // everything fits onto one line
+ *
+ *              // prepare rendering a new line from either left or right
+ *              startLine(paraLevel, width);
+ *
+ *              renderLine(para, text, 0, length, styleRuns, styleRunCount);
+ *          } else {
+ *              // we need to render several lines
+ *              Bidi line = new Bidi(length, 0);
+ *              int start = 0, limit;
+ *              int styleRunStart = 0, styleRunLimit;
+ *
+ *              for (;;) {
+ *                  limit = length;
+ *                  styleRunLimit = styleRunCount;
+ *                  width = getLineBreak(text, new Bounds(start, limit),
+ *                                       para, styleRuns,
+ *                                       new Bounds(styleRunStart, styleRunLimit));
+ *                  try {
+ *                      line = para.setLine(start, limit);
+ *                  } catch (Exception e) {
+ *                      e.printStackTrace();
+ *                      return;
+ *                  }
+ *                  // prepare rendering a new line
+ *                  // from either left or right
+ *                  startLine(paraLevel, width);
+ *
+ *                  if (styleRunStart > 0) {
+ *                      int newRunCount = styleRuns.length - styleRunStart;
+ *                      StyleRun[] newRuns = new StyleRun[newRunCount];
+ *                      System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
+ *                                       newRunCount);
+ *                      renderLine(line, text, start, limit, newRuns,
+ *                                 styleRunLimit - styleRunStart);
+ *                  } else {
+ *                      renderLine(line, text, start, limit, styleRuns,
+ *                                 styleRunLimit - styleRunStart);
+ *                  }
+ *                  if (limit == length) {
+ *                      break;
+ *                  }
+ *                  start = limit;
+ *                  styleRunStart = styleRunLimit - 1;
+ *                  if (start >= styleRuns[styleRunStart].limit) {
+ *                      ++styleRunStart;
+ *                  }
+ *              }
+ *          }
+ *      }
+ *
+ *      public static void main(String[] args)
+ *      {
+ *          renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
+ *          renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
+ *      }
+ *  }
+ *
+ * 
+ */ + +public class BidiBase { + + class Point { + int pos; /* position in text */ + int flag; /* flag for LRM/RLM, before/after */ + } + + class InsertPoints { + int size; + int confirmed; + Point[] points = new Point[0]; + } + + /** Paragraph level setting

+ * + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, + * then set the paragraph level to 0 (left-to-right).

+ * + * If this value is used in conjunction with reordering modes + * REORDER_INVERSE_LIKE_DIRECT or + * REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder + * is assumed to be visual LTR, and the text after reordering is required + * to be the corresponding logical string with appropriate contextual + * direction. The direction of the result string will be RTL if either + * the righmost or leftmost strong character of the source text is RTL + * or Arabic Letter, the direction will be LTR otherwise.

+ * + * If reordering option OPTION_INSERT_MARKS is set, an RLM may + * be added at the beginning of the result string to ensure round trip + * (that the result string, when reordered back to visual, will produce + * the original source text). + * @see #REORDER_INVERSE_LIKE_DIRECT + * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL + * @stable ICU 3.8 + */ + public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e; + + /** Paragraph level setting

+ * + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, + * then set the paragraph level to 1 (right-to-left).

+ * + * If this value is used in conjunction with reordering modes + * REORDER_INVERSE_LIKE_DIRECT or + * REORDER_INVERSE_FOR_NUMBERS_SPECIAL, the text to reorder + * is assumed to be visual LTR, and the text after reordering is required + * to be the corresponding logical string with appropriate contextual + * direction. The direction of the result string will be RTL if either + * the righmost or leftmost strong character of the source text is RTL + * or Arabic Letter, or if the text contains no strong character; + * the direction will be LTR otherwise.

+ * + * If reordering option OPTION_INSERT_MARKS is set, an RLM may + * be added at the beginning of the result string to ensure round trip + * (that the result string, when reordered back to visual, will produce + * the original source text). + * @see #REORDER_INVERSE_LIKE_DIRECT + * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL + * @stable ICU 3.8 + */ + public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f; + + /** + * Maximum explicit embedding level. + * (The maximum resolved level can be up to MAX_EXPLICIT_LEVEL+1). + * @stable ICU 3.8 + */ + public static final byte MAX_EXPLICIT_LEVEL = 61; + + /** + * Bit flag for level input. + * Overrides directional properties. + * @stable ICU 3.8 + */ + public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80; + + /** + * Special value which can be returned by the mapping methods when a + * logical index has no corresponding visual index or vice-versa. This may + * happen for the logical-to-visual mapping of a Bidi control when option + * OPTION_REMOVE_CONTROLS is + * specified. This can also happen for the visual-to-logical mapping of a + * Bidi mark (LRM or RLM) inserted by option + * OPTION_INSERT_MARKS. + * @see #getVisualIndex + * @see #getVisualMap + * @see #getLogicalIndex + * @see #getLogicalMap + * @see #OPTION_INSERT_MARKS + * @see #OPTION_REMOVE_CONTROLS + * @stable ICU 3.8 + */ + public static final int MAP_NOWHERE = -1; + + /** + * Mixed-directional text. + * @stable ICU 3.8 + */ + public static final byte MIXED = 2; + + /** + * option bit for writeReordered(): + * replace characters with the "mirrored" property in RTL runs + * by their mirror-image mappings + * + * @see #writeReordered + * @stable ICU 3.8 + */ + public static final short DO_MIRRORING = 2; + + /** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode. + * @see #setReorderingMode + * @stable ICU 3.8 + */ + private static final short REORDER_DEFAULT = 0; + + /** Reordering mode: Logical to Visual algorithm which handles numbers in + * a way which mimicks the behavior of Windows XP. + * @see #setReorderingMode + * @stable ICU 3.8 + */ + private static final short REORDER_NUMBERS_SPECIAL = 1; + + /** Reordering mode: Logical to Visual algorithm grouping numbers with + * adjacent R characters (reversible algorithm). + * @see #setReorderingMode + * @stable ICU 3.8 + */ + private static final short REORDER_GROUP_NUMBERS_WITH_R = 2; + + /** Reordering mode: Reorder runs only to transform a Logical LTR string + * to the logical RTL string with the same display, or vice-versa.
+ * If this mode is set together with option + * OPTION_INSERT_MARKS, some Bidi controls in the source + * text may be removed and other controls may be added to produce the + * minimum combination which has the required display. + * @see #OPTION_INSERT_MARKS + * @see #setReorderingMode + * @stable ICU 3.8 + */ + private static final short REORDER_RUNS_ONLY = 3; + + /** Reordering mode: Visual to Logical algorithm which handles numbers + * like L (same algorithm as selected by setInverse(true). + * @see #setInverse + * @see #setReorderingMode + * @stable ICU 3.8 + */ + private static final short REORDER_INVERSE_NUMBERS_AS_L = 4; + + /** Reordering mode: Visual to Logical algorithm equivalent to the regular + * Logical to Visual algorithm. + * @see #setReorderingMode + * @stable ICU 3.8 + */ + private static final short REORDER_INVERSE_LIKE_DIRECT = 5; + + /** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the + * REORDER_NUMBERS_SPECIAL Bidi algorithm. + * @see #setReorderingMode + * @stable ICU 3.8 + */ + private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6; + + /* Reordering mode values must be ordered so that all the regular logical to + * visual modes come first, and all inverse Bidi modes come last. + */ + private static final short REORDER_LAST_LOGICAL_TO_VISUAL = + REORDER_NUMBERS_SPECIAL; + + /** + * Option bit for setReorderingOptions: + * insert Bidi marks (LRM or RLM) when needed to ensure correct result of + * a reordering to a Logical order + * + *

This option must be set or reset before calling + * setPara.

+ * + *

This option is significant only with reordering modes which generate + * a result with Logical order, specifically.

+ * + * + *

If this option is set in conjunction with reordering mode + * REORDER_INVERSE_NUMBERS_AS_L or with calling + * setInverse(true), it implies option + * INSERT_LRM_FOR_NUMERIC in calls to method + * writeReordered().

+ * + *

For other reordering modes, a minimum number of LRM or RLM characters + * will be added to the source text after reordering it so as to ensure + * round trip, i.e. when applying the inverse reordering mode on the + * resulting logical text with removal of Bidi marks + * (option OPTION_REMOVE_CONTROLS set before calling + * setPara() or option + * REMOVE_BIDI_CONTROLS in + * writeReordered), the result will be identical to the + * source text in the first transformation. + * + *

This option will be ignored if specified together with option + * OPTION_REMOVE_CONTROLS. It inhibits option + * REMOVE_BIDI_CONTROLS in calls to method + * writeReordered() and it implies option + * INSERT_LRM_FOR_NUMERIC in calls to method + * writeReordered() if the reordering mode is + * REORDER_INVERSE_NUMBERS_AS_L.

+ * + * @see #setReorderingMode + * @see #setReorderingOptions + * @see #INSERT_LRM_FOR_NUMERIC + * @see #REMOVE_BIDI_CONTROLS + * @see #OPTION_REMOVE_CONTROLS + * @see #REORDER_RUNS_ONLY + * @see #REORDER_INVERSE_NUMBERS_AS_L + * @see #REORDER_INVERSE_LIKE_DIRECT + * @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL + * @stable ICU 3.8 + */ + private static final int OPTION_INSERT_MARKS = 1; + + /** + * Option bit for setReorderingOptions: + * remove Bidi control characters + * + *

This option must be set or reset before calling + * setPara.

+ * + *

This option nullifies option + * OPTION_INSERT_MARKS. It inhibits option + * INSERT_LRM_FOR_NUMERIC in calls to method + * writeReordered() and it implies option + * REMOVE_BIDI_CONTROLS in calls to that method.

+ * + * @see #setReorderingMode + * @see #setReorderingOptions + * @see #OPTION_INSERT_MARKS + * @see #INSERT_LRM_FOR_NUMERIC + * @see #REMOVE_BIDI_CONTROLS + * @stable ICU 3.8 + */ + private static final int OPTION_REMOVE_CONTROLS = 2; + + /** + * Option bit for setReorderingOptions: + * process the output as part of a stream to be continued + * + *

This option must be set or reset before calling + * setPara.

+ * + *

This option specifies that the caller is interested in processing + * large text object in parts. The results of the successive calls are + * expected to be concatenated by the caller. Only the call for the last + * part will have this option bit off.

+ * + *

When this option bit is on, setPara() may process + * less than the full source text in order to truncate the text at a + * meaningful boundary. The caller should call + * getProcessedLength() immediately after calling + * setPara() in order to determine how much of the source + * text has been processed. Source text beyond that length should be + * resubmitted in following calls to setPara. The + * processed length may be less than the length of the source text if a + * character preceding the last character of the source text constitutes a + * reasonable boundary (like a block separator) for text to be continued.
+ * If the last character of the source text constitutes a reasonable + * boundary, the whole text will be processed at once.
+ * If nowhere in the source text there exists + * such a reasonable boundary, the processed length will be zero.
+ * The caller should check for such an occurrence and do one of the following: + *

+ * In all cases, this option should be turned off before processing the last + * part of the text.

+ * + *

When the OPTION_STREAMING option is used, it is + * recommended to call orderParagraphsLTR() with argument + * orderParagraphsLTR set to true before calling + * setPara() so that later paragraphs may be concatenated to + * previous paragraphs on the right. + *

+ * + * @see #setReorderingMode + * @see #setReorderingOptions + * @see #getProcessedLength + * @see #orderParagraphsLTR + * @stable ICU 3.8 + */ + private static final int OPTION_STREAMING = 4; + + /* + * Comparing the description of the Bidi algorithm with this implementation + * is easier with the same names for the Bidi types in the code as there. + * See UCharacterDirection + */ + private static final byte L = 0; + private static final byte R = 1; + private static final byte EN = 2; + private static final byte ES = 3; + private static final byte ET = 4; + private static final byte AN = 5; + private static final byte CS = 6; + static final byte B = 7; + private static final byte S = 8; + private static final byte WS = 9; + private static final byte ON = 10; + private static final byte LRE = 11; + private static final byte LRO = 12; + private static final byte AL = 13; + private static final byte RLE = 14; + private static final byte RLO = 15; + private static final byte PDF = 16; + private static final byte NSM = 17; + private static final byte BN = 18; + + private static final int MASK_R_AL = (1 << R | 1 << AL); + + private static final char CR = '\r'; + private static final char LF = '\n'; + + static final int LRM_BEFORE = 1; + static final int LRM_AFTER = 2; + static final int RLM_BEFORE = 4; + static final int RLM_AFTER = 8; + + /* + * reference to parent paragraph object (reference to self if this object is + * a paragraph object); set to null in a newly opened object; set to a + * real value after a successful execution of setPara or setLine + */ + BidiBase paraBidi; + + final UBiDiProps bdp; + + /* character array representing the current text */ + char[] text; + + /* length of the current text */ + int originalLength; + + /* if the option OPTION_STREAMING is set, this is the length of + * text actually processed by setPara, which may be shorter + * than the original length. Otherwise, it is identical to the original + * length. + */ + public int length; + + /* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi + * marks are allowed to be inserted in one of the reordering modes, the + * length of the result string may be different from the processed length. + */ + int resultLength; + + /* indicators for whether memory may be allocated after construction */ + boolean mayAllocateText; + boolean mayAllocateRuns; + + /* arrays with one value per text-character */ + byte[] dirPropsMemory = new byte[1]; + byte[] levelsMemory = new byte[1]; + byte[] dirProps; + byte[] levels; + + /* must block separators receive level 0? */ + boolean orderParagraphsLTR; + + /* the paragraph level */ + byte paraLevel; + + /* original paraLevel when contextual */ + /* must be one of DEFAULT_xxx or 0 if not contextual */ + byte defaultParaLevel; + + /* the following is set in setPara, used in processPropertySeq */ + + ImpTabPair impTabPair; /* reference to levels state table pair */ + + /* the overall paragraph or line directionality*/ + byte direction; + + /* flags is a bit set for which directional properties are in the text */ + int flags; + + /* lastArabicPos is index to the last AL in the text, -1 if none */ + int lastArabicPos; + + /* characters after trailingWSStart are WS and are */ + /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ + int trailingWSStart; + + /* fields for paragraph handling */ + int paraCount; /* set in getDirProps() */ + int[] parasMemory = new int[1]; + int[] paras; /* limits of paragraphs, filled in + ResolveExplicitLevels() or CheckExplicitLevels() */ + + /* for single paragraph text, we only need a tiny array of paras (no allocation) */ + int[] simpleParas = {0}; + + /* fields for line reordering */ + int runCount; /* ==-1: runs not set up yet */ + BidiRun[] runsMemory = new BidiRun[0]; + BidiRun[] runs; + + /* for non-mixed text, we only need a tiny array of runs (no allocation) */ + BidiRun[] simpleRuns = {new BidiRun()}; + + /* mapping of runs in logical order to visual order */ + int[] logicalToVisualRunsMap; + + /* flag to indicate that the map has been updated */ + boolean isGoodLogicalToVisualRunsMap; + + /* for inverse Bidi with insertion of directional marks */ + InsertPoints insertPoints = new InsertPoints(); + + /* for option OPTION_REMOVE_CONTROLS */ + int controlCount; + + /* + * Sometimes, bit values are more appropriate + * to deal with directionality properties. + * Abbreviations in these method names refer to names + * used in the Bidi algorithm. + */ + static int DirPropFlag(byte dir) { + return (1 << dir); + } + + /* + * The following bit is ORed to the property of characters in paragraphs + * with contextual RTL direction when paraLevel is contextual. + */ + static final byte CONTEXT_RTL_SHIFT = 6; + static final byte CONTEXT_RTL = (byte)(1<>CONTEXT_RTL_SHIFT) : paraLevel; + } + + static boolean IsBidiControlChar(int c) + { + /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or + 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ + return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))); + } + + public void verifyValidPara() + { + if (this != this.paraBidi) { + throw new IllegalStateException(""); + } + } + + public void verifyValidParaOrLine() + { + BidiBase para = this.paraBidi; + /* verify Para */ + if (this == para) { + return; + } + /* verify Line */ + if ((para == null) || (para != para.paraBidi)) { + throw new IllegalStateException(); + } + } + + public void verifyRange(int index, int start, int limit) + { + if (index < start || index >= limit) { + throw new IllegalArgumentException("Value " + index + + " is out of range " + start + " to " + limit); + } + } + + public void verifyIndex(int index, int start, int limit) + { + if (index < start || index >= limit) { + throw new ArrayIndexOutOfBoundsException("Index " + index + + " is out of range " + start + " to " + limit); + } + } + + /** + * Allocate a Bidi object with preallocated memory + * for internal structures. + * This method provides a Bidi object like the default constructor + * but it also preallocates memory for internal structures + * according to the sizings supplied by the caller.

+ * The preallocation can be limited to some of the internal memory + * by setting some values to 0 here. That means that if, e.g., + * maxRunCount cannot be reasonably predetermined and should not + * be set to maxLength (the only failproof value) to avoid + * wasting memory, then maxRunCount could be set to 0 here + * and the internal structures that are associated with it will be allocated + * on demand, just like with the default constructor. + * + * @param maxLength is the maximum text or line length that internal memory + * will be preallocated for. An attempt to associate this object with a + * longer text will fail, unless this value is 0, which leaves the allocation + * up to the implementation. + * + * @param maxRunCount is the maximum anticipated number of same-level runs + * that internal memory will be preallocated for. An attempt to access + * visual runs on an object that was not preallocated for as many runs + * as the text was actually resolved to will fail, + * unless this value is 0, which leaves the allocation up to the implementation.

+ * The number of runs depends on the actual text and maybe anywhere between + * 1 and maxLength. It is typically small. + * + * @throws IllegalArgumentException if maxLength or maxRunCount is less than 0 + * @stable ICU 3.8 + */ + public BidiBase(int maxLength, int maxRunCount) + { + /* check the argument values */ + if (maxLength < 0 || maxRunCount < 0) { + throw new IllegalArgumentException(); + } + + /* reset the object, all reference variables null, all flags false, + all sizes 0. + In fact, we don't need to do anything, since class members are + initialized as zero when an instance is created. + */ + /* + mayAllocateText = false; + mayAllocateRuns = false; + orderParagraphsLTR = false; + paraCount = 0; + runCount = 0; + trailingWSStart = 0; + flags = 0; + paraLevel = 0; + defaultParaLevel = 0; + direction = 0; + */ + /* get Bidi properties */ + try { + bdp = UBiDiProps.getSingleton(); + } + catch (IOException e) { + throw new MissingResourceException(e.getMessage(), "(BidiProps)", ""); + } + + /* allocate memory for arrays as requested */ + if (maxLength > 0) { + getInitialDirPropsMemory(maxLength); + getInitialLevelsMemory(maxLength); + } else { + mayAllocateText = true; + } + + if (maxRunCount > 0) { + // if maxRunCount == 1, use simpleRuns[] + if (maxRunCount > 1) { + getInitialRunsMemory(maxRunCount); + } + } else { + mayAllocateRuns = true; + } + } + + /* + * We are allowed to allocate memory if object==null or + * mayAllocate==true for each array that we need. + * + * Assume sizeNeeded>0. + * If object != null, then assume size > 0. + */ + private Object getMemory(String label, Object array, Class arrayClass, + boolean mayAllocate, int sizeNeeded) + { + int len = Array.getLength(array); + + /* we have at least enough memory and must not allocate */ + if (sizeNeeded == len) { + return array; + } + if (!mayAllocate) { + /* we must not allocate */ + if (sizeNeeded <= len) { + return array; + } + throw new OutOfMemoryError("Failed to allocate memory for " + + label); + } + /* we may try to grow or shrink */ + /* FOOD FOR THOUGHT: when shrinking it should be possible to avoid + the allocation altogether and rely on this.length */ + try { + return Array.newInstance(arrayClass, sizeNeeded); + } catch (Exception e) { + throw new OutOfMemoryError("Failed to allocate memory for " + + label); + } + } + + /* helper methods for each allocated array */ + private void getDirPropsMemory(boolean mayAllocate, int len) + { + Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len); + dirPropsMemory = (byte[]) array; + } + + void getDirPropsMemory(int len) + { + getDirPropsMemory(mayAllocateText, len); + } + + private void getLevelsMemory(boolean mayAllocate, int len) + { + Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len); + levelsMemory = (byte[]) array; + } + + void getLevelsMemory(int len) + { + getLevelsMemory(mayAllocateText, len); + } + + private void getRunsMemory(boolean mayAllocate, int len) + { + Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len); + runsMemory = (BidiRun[]) array; + } + + void getRunsMemory(int len) + { + getRunsMemory(mayAllocateRuns, len); + } + + /* additional methods used by constructor - always allow allocation */ + private void getInitialDirPropsMemory(int len) + { + getDirPropsMemory(true, len); + } + + private void getInitialLevelsMemory(int len) + { + getLevelsMemory(true, len); + } + + private void getInitialParasMemory(int len) + { + Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len); + parasMemory = (int[]) array; + } + + private void getInitialRunsMemory(int len) + { + getRunsMemory(true, len); + } + +/* perform (P2)..(P3) ------------------------------------------------------- */ + + private void getDirProps() + { + int i = 0, i0, i1; + flags = 0; /* collect all directionalities in the text */ + int uchar; + byte dirProp; + byte paraDirDefault = 0; /* initialize to avoid compiler warnings */ + boolean isDefaultLevel = IsDefaultLevel(paraLevel); + /* for inverse Bidi, the default para level is set to RTL if there is a + strong R or AL character at either end of the text */ + lastArabicPos = -1; + controlCount = 0; + + final int NOT_CONTEXTUAL = 0; /* 0: not contextual paraLevel */ + final int LOOKING_FOR_STRONG = 1; /* 1: looking for first strong char */ + final int FOUND_STRONG_CHAR = 2; /* 2: found first strong char */ + + int state; + int paraStart = 0; /* index of first char in paragraph */ + byte paraDir; /* == CONTEXT_RTL within paragraphs + starting with strong R char */ + byte lastStrongDir=0; /* for default level & inverse Bidi */ + int lastStrongLTR=0; /* for STREAMING option */ + + if (isDefaultLevel) { + paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0; + paraDir = paraDirDefault; + lastStrongDir = paraDirDefault; + state = LOOKING_FOR_STRONG; + } else { + state = NOT_CONTEXTUAL; + paraDir = 0; + } + /* count paragraphs and determine the paragraph level (P2..P3) */ + /* + * see comment on constant fields: + * the LEVEL_DEFAULT_XXX values are designed so that + * their low-order bit alone yields the intended default + */ + + for (i = 0; i < originalLength; /* i is incremented in the loop */) { + i0 = i; /* index of first code unit */ + uchar = UTF16.charAt(text, 0, originalLength, i); + i += Character.charCount(uchar); + i1 = i - 1; /* index of last code unit, gets the directional property */ + + dirProp = (byte)bdp.getClass(uchar); + + flags |= DirPropFlag(dirProp); + dirProps[i1] = (byte)(dirProp | paraDir); + if (i1 > i0) { /* set previous code units' properties to BN */ + flags |= DirPropFlag(BN); + do { + dirProps[--i1] = (byte)(BN | paraDir); + } while (i1 > i0); + } + if (state == LOOKING_FOR_STRONG) { + if (dirProp == L) { + state = FOUND_STRONG_CHAR; + if (paraDir != 0) { + paraDir = 0; + for (i1 = paraStart; i1 < i; i1++) { + dirProps[i1] &= ~CONTEXT_RTL; + } + } + continue; + } + if (dirProp == R || dirProp == AL) { + state = FOUND_STRONG_CHAR; + if (paraDir == 0) { + paraDir = CONTEXT_RTL; + for (i1 = paraStart; i1 < i; i1++) { + dirProps[i1] |= CONTEXT_RTL; + } + } + continue; + } + } + if (dirProp == L) { + lastStrongDir = 0; + lastStrongLTR = i; /* i is index to next character */ + } + else if (dirProp == R) { + lastStrongDir = CONTEXT_RTL; + } + else if (dirProp == AL) { + lastStrongDir = CONTEXT_RTL; + lastArabicPos = i-1; + } + else if (dirProp == B) { + if (i < originalLength) { /* B not last char in text */ + if (!((uchar == (int)CR) && (text[i] == (int)LF))) { + paraCount++; + } + if (isDefaultLevel) { + state=LOOKING_FOR_STRONG; + paraStart = i; /* i is index to next character */ + paraDir = paraDirDefault; + lastStrongDir = paraDirDefault; + } + } + } + } + if (isDefaultLevel) { + paraLevel = GetParaLevelAt(0); + } + + /* The following line does nothing new for contextual paraLevel, but is + needed for absolute paraLevel. */ + flags |= DirPropFlagLR(paraLevel); + + if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { + flags |= DirPropFlag(L); + } + } + + /* perform (X1)..(X9) ------------------------------------------------------- */ + + /* determine if the text is mixed-directional or single-directional */ + private byte directionFromFlags() { + /* if the text contains AN and neutrals, then some neutrals may become RTL */ + if (!((flags & MASK_RTL) != 0 || + ((flags & DirPropFlag(AN)) != 0 && + (flags & MASK_POSSIBLE_N) != 0))) { + return Bidi.DIRECTION_LEFT_TO_RIGHT; + } else if ((flags & MASK_LTR) == 0) { + return Bidi.DIRECTION_RIGHT_TO_LEFT; + } else { + return MIXED; + } + } + + /* + * Resolve the explicit levels as specified by explicit embedding codes. + * Recalculate the flags to have them reflect the real properties + * after taking the explicit embeddings into account. + * + * The Bidi algorithm is designed to result in the same behavior whether embedding + * levels are externally specified (from "styled text", supposedly the preferred + * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. + * That is why (X9) instructs to remove all explicit codes (and BN). + * However, in a real implementation, this removal of these codes and their index + * positions in the plain text is undesirable since it would result in + * reallocated, reindexed text. + * Instead, this implementation leaves the codes in there and just ignores them + * in the subsequent processing. + * In order to get the same reordering behavior, positions with a BN or an + * explicit embedding code just get the same level assigned as the last "real" + * character. + * + * Some implementations, not this one, then overwrite some of these + * directionality properties at "real" same-level-run boundaries by + * L or R codes so that the resolution of weak types can be performed on the + * entire paragraph at once instead of having to parse it once more and + * perform that resolution on same-level-runs. + * This limits the scope of the implicit rules in effectively + * the same way as the run limits. + * + * Instead, this implementation does not modify these codes. + * On one hand, the paragraph has to be scanned for same-level-runs, but + * on the other hand, this saves another loop to reset these codes, + * or saves making and modifying a copy of dirProps[]. + * + * + * Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm. + * + * + * Handling the stack of explicit levels (Xn): + * + * With the Bidi stack of explicit levels, + * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, + * the explicit level must never exceed MAX_EXPLICIT_LEVEL==61. + * + * In order to have a correct push-pop semantics even in the case of overflows, + * there are two overflow counters: + * - countOver60 is incremented with each LRx at level 60 + * - from level 60, one RLx increases the level to 61 + * - countOver61 is incremented with each LRx and RLx at level 61 + * + * Popping levels with PDF must work in the opposite order so that level 61 + * is correct at the correct point. Underflows (too many PDFs) must be checked. + * + * This implementation assumes that MAX_EXPLICIT_LEVEL is odd. + */ + private byte resolveExplicitLevels() { + int i = 0; + byte dirProp; + byte level = GetParaLevelAt(0); + + byte dirct; + int paraIndex = 0; + + /* determine if the text is mixed-directional or single-directional */ + dirct = directionFromFlags(); + + /* we may not need to resolve any explicit levels, but for multiple + paragraphs we want to loop on all chars to set the para boundaries */ + if ((dirct != MIXED) && (paraCount == 1)) { + /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ + } else if ((paraCount == 1) && + ((flags & MASK_EXPLICIT) == 0)) { + /* mixed, but all characters are at the same embedding level */ + /* or we are in "inverse Bidi" */ + /* and we don't have contextual multiple paragraphs with some B char */ + /* set all levels to the paragraph level */ + for (i = 0; i < length; ++i) { + levels[i] = level; + } + } else { + /* continue to perform (Xn) */ + + /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ + /* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */ + byte embeddingLevel = level; + byte newLevel; + byte stackTop = 0; + + byte[] stack = new byte[MAX_EXPLICIT_LEVEL]; /* we never push anything >=MAX_EXPLICIT_LEVEL */ + int countOver60 = 0; + int countOver61 = 0; /* count overflows of explicit levels */ + + /* recalculate the flags */ + flags = 0; + + for (i = 0; i < length; ++i) { + dirProp = NoContextRTL(dirProps[i]); + switch(dirProp) { + case LRE: + case LRO: + /* (X3, X5) */ + newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */ + if (newLevel <= MAX_EXPLICIT_LEVEL) { + stack[stackTop] = embeddingLevel; + ++stackTop; + embeddingLevel = newLevel; + if (dirProp == LRO) { + embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; + } + /* we don't need to set LEVEL_OVERRIDE off for LRE + since this has already been done for newLevel which is + the source for embeddingLevel. + */ + } else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) { + ++countOver61; + } else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ { + ++countOver60; + } + flags |= DirPropFlag(BN); + break; + case RLE: + case RLO: + /* (X2, X4) */ + newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */ + if (newLevel<=MAX_EXPLICIT_LEVEL) { + stack[stackTop] = embeddingLevel; + ++stackTop; + embeddingLevel = newLevel; + if (dirProp == RLO) { + embeddingLevel |= INTERNAL_LEVEL_OVERRIDE; + } + /* we don't need to set LEVEL_OVERRIDE off for RLE + since this has already been done for newLevel which is + the source for embeddingLevel. + */ + } else { + ++countOver61; + } + flags |= DirPropFlag(BN); + break; + case PDF: + /* (X7) */ + /* handle all the overflow cases first */ + if (countOver61 > 0) { + --countOver61; + } else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) { + /* handle LRx overflows from level 60 */ + --countOver60; + } else if (stackTop > 0) { + /* this is the pop operation; it also pops level 61 while countOver60>0 */ + --stackTop; + embeddingLevel = stack[stackTop]; + /* } else { (underflow) */ + } + flags |= DirPropFlag(BN); + break; + case B: + stackTop = 0; + countOver60 = 0; + countOver61 = 0; + level = GetParaLevelAt(i); + if ((i + 1) < length) { + embeddingLevel = GetParaLevelAt(i+1); + if (!((text[i] == CR) && (text[i + 1] == LF))) { + paras[paraIndex++] = i+1; + } + } + flags |= DirPropFlag(B); + break; + case BN: + /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ + /* they will get their levels set correctly in adjustWSLevels() */ + flags |= DirPropFlag(BN); + break; + default: + /* all other types get the "real" level */ + if (level != embeddingLevel) { + level = embeddingLevel; + if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { + flags |= DirPropFlagO(level) | DirPropFlagMultiRuns; + } else { + flags |= DirPropFlagE(level) | DirPropFlagMultiRuns; + } + } + if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { + flags |= DirPropFlag(dirProp); + } + break; + } + + /* + * We need to set reasonable levels even on BN codes and + * explicit codes because we will later look at same-level runs (X10). + */ + levels[i] = level; + } + if ((flags & MASK_EMBEDDING) != 0) { + flags |= DirPropFlagLR(paraLevel); + } + if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) { + flags |= DirPropFlag(L); + } + + /* subsequently, ignore the explicit codes and BN (X9) */ + + /* again, determine if the text is mixed-directional or single-directional */ + dirct = directionFromFlags(); + } + + return dirct; + } + + /* + * Use a pre-specified embedding levels array: + * + * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + * ignore all explicit codes (X9), + * and check all the preset levels. + * + * Recalculate the flags to have them reflect the real properties + * after taking the explicit embeddings into account. + */ + private byte checkExplicitLevels() { + byte dirProp; + int i; + this.flags = 0; /* collect all directionalities in the text */ + byte level; + int paraIndex = 0; + + for (i = 0; i < length; ++i) { + if (levels[i] == 0) { + levels[i] = paraLevel; + } + if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) { + if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) { + levels[i] = (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE); + } else { + levels[i] = paraLevel; + } + } + level = levels[i]; + dirProp = NoContextRTL(dirProps[i]); + if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) { + /* keep the override flag in levels[i] but adjust the flags */ + level &= ~INTERNAL_LEVEL_OVERRIDE; /* make the range check below simpler */ + flags |= DirPropFlagO(level); + } else { + /* set the flags */ + flags |= DirPropFlagE(level) | DirPropFlag(dirProp); + } + + if ((level < GetParaLevelAt(i) && + !((0 == level) && (dirProp == B))) || + (MAX_EXPLICIT_LEVEL 0) */ + /* */ + /* Cells may be of format "n" where n represents the next state */ + /* (except for the rightmost column). */ + /* Cells may also be of format "_(x,y)" where x represents an action */ + /* to perform and y represents the next state. */ + /* */ + /*********************************************************************/ + /* Definitions and type for properties state tables */ + /*********************************************************************/ + private static final int IMPTABPROPS_COLUMNS = 14; + private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1; + private static short GetStateProps(short cell) { + return (short)(cell & 0x1f); + } + private static short GetActionProps(short cell) { + return (short)(cell >> 5); + } + + private static final short groupProp[] = /* dirProp regrouped */ + { + /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ + 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 + }; + private static final short _L = 0; + private static final short _R = 1; + private static final short _EN = 2; + private static final short _AN = 3; + private static final short _ON = 4; + private static final short _S = 5; + private static final short _B = 6; /* reduced dirProp */ + + /*********************************************************************/ + /* */ + /* PROPERTIES STATE TABLE */ + /* */ + /* In table impTabProps, */ + /* - the ON column regroups ON and WS */ + /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */ + /* - the Res column is the reduced property assigned to a run */ + /* */ + /* Action 1: process current run1, init new run1 */ + /* 2: init new run2 */ + /* 3: process run1, process run2, init new run1 */ + /* 4: process run1, set run1=run2, init new run2 */ + /* */ + /* Notes: */ + /* 1) This table is used in resolveImplicitLevels(). */ + /* 2) This table triggers actions when there is a change in the Bidi*/ + /* property of incoming characters (action 1). */ + /* 3) Most such property sequences are processed immediately (in */ + /* fact, passed to processPropertySeq(). */ + /* 4) However, numbers are assembled as one sequence. This means */ + /* that undefined situations (like CS following digits, until */ + /* it is known if the next char will be a digit) are held until */ + /* following chars define them. */ + /* Example: digits followed by CS, then comes another CS or ON; */ + /* the digits will be processed, then the CS assigned */ + /* as the start of an ON sequence (action 3). */ + /* 5) There are cases where more than one sequence must be */ + /* processed, for instance digits followed by CS followed by L: */ + /* the digits must be processed as one sequence, and the CS */ + /* must be processed as an ON sequence, all this before starting */ + /* assembling chars for the opening L sequence. */ + /* */ + /* */ + private static final short impTabProps[][] = + { +/* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, Res */ +/* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, _ON }, +/* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, _L }, +/* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, _R }, +/* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, _R }, +/* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, _EN }, +/* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, _AN }, +/* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, _AN }, +/* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, _ON }, +/* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, _ON }, +/* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, _ON }, +/*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, _EN }, +/*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, _EN }, +/*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, _AN }, +/*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, _AN }, +/*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3, _ON }, +/*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, _S }, +/*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, _S }, +/*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, _B } + }; + + /*********************************************************************/ + /* The levels state machine tables */ + /*********************************************************************/ + /* */ + /* All table cells are 8 bits: */ + /* bits 0..3: next state */ + /* bits 4..7: action to perform (if > 0) */ + /* */ + /* Cells may be of format "n" where n represents the next state */ + /* (except for the rightmost column). */ + /* Cells may also be of format "_(x,y)" where x represents an action */ + /* to perform and y represents the next state. */ + /* */ + /* This format limits each table to 16 states each and to 15 actions.*/ + /* */ + /*********************************************************************/ + /* Definitions and type for levels state tables */ + /*********************************************************************/ + private static final int IMPTABLEVELS_COLUMNS = _B + 2; + private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1; + private static short GetState(byte cell) { return (short)(cell & 0x0f); } + private static short GetAction(byte cell) { return (short)(cell >> 4); } + + private static class ImpTabPair { + byte[][][] imptab; + short[][] impact; + + ImpTabPair(byte[][] table1, byte[][] table2, + short[] act1, short[] act2) { + imptab = new byte[][][] {table1, table2}; + impact = new short[][] {act1, act2}; + } + } + + /*********************************************************************/ + /* */ + /* LEVELS STATE TABLES */ + /* */ + /* In all levels state tables, */ + /* - state 0 is the initial state */ + /* - the Res column is the increment to add to the text level */ + /* for this property sequence. */ + /* */ + /* The impact arrays for each table of a pair map the local action */ + /* numbers of the table to the total list of actions. For instance, */ + /* action 2 in a given table corresponds to the action number which */ + /* appears in entry [2] of the impact array for that table. */ + /* The first entry of all impact arrays must be 0. */ + /* */ + /* Action 1: init conditional sequence */ + /* 2: prepend conditional sequence to current sequence */ + /* 3: set ON sequence to new level - 1 */ + /* 4: init EN/AN/ON sequence */ + /* 5: fix EN/AN/ON sequence followed by R */ + /* 6: set previous level sequence to level 2 */ + /* */ + /* Notes: */ + /* 1) These tables are used in processPropertySeq(). The input */ + /* is property sequences as determined by resolveImplicitLevels. */ + /* 2) Most such property sequences are processed immediately */ + /* (levels are assigned). */ + /* 3) However, some sequences cannot be assigned a final level till */ + /* one or more following sequences are received. For instance, */ + /* ON following an R sequence within an even-level paragraph. */ + /* If the following sequence is R, the ON sequence will be */ + /* assigned basic run level+1, and so will the R sequence. */ + /* 4) S is generally handled like ON, since its level will be fixed */ + /* to paragraph level in adjustWSLevels(). */ + /* */ + + private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */ + /* In this table, conditional sequences receive the higher possible level + until proven otherwise. + */ + { + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 }, + /* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 }, + /* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 }, + /* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 }, + /* 4 : R+ON */ { 0x20, 1, 3, 3, 4, 4, 0x20, 1 }, + /* 5 : AN+ON */ { 0x20, 1, 0x20, 2, 5, 5, 0x20, 1 } + }; + + private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */ + /* In this table, conditional sequences receive the lower possible level + until proven otherwise. + */ + { + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, + /* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 }, + /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, + /* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 }, + /* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 }, + /* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 } + }; + + private static final short[] impAct0 = {0,1,2,3,4,5,6}; + + private static final ImpTabPair impTab_DEFAULT = new ImpTabPair( + impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0); + + private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */ + /* In this table, conditional sequences receive the higher possible + level until proven otherwise. + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 0, 2, 1, 1, 0, 0, 0, 0 }, + /* 1 : L+EN/AN */ { 0, 2, 1, 1, 0, 0, 0, 2 }, + /* 2 : R */ { 0, 2, 4, 4, 0x13, 0, 0, 1 }, + /* 3 : R+ON */ { 0x20, 2, 4, 4, 3, 3, 0x20, 1 }, + /* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 } + }; + private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair( + impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0); + + private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = { + /* In this table, EN/AN+ON sequences receive levels as if associated with R + until proven that there is L or sor/eor on both sides. AN is handled like EN. + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 }, + /* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 }, + /* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 }, + /* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 }, + /* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 }, + /* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 } + }; + private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = { + /* In this table, EN/AN+ON sequences receive levels as if associated with R + until proven that there is L on both sides. AN is handled like EN. + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 }, + /* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 }, + /* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 }, + /* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 }, + /* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 } + }; + private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new + ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R, + impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0); + + private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = { + /* This table is identical to the Default LTR table except that EN and AN + are handled like L. + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 }, + /* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 }, + /* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 }, + /* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 }, + /* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 }, + /* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 } + }; + private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = { + /* This table is identical to the Default RTL table except that EN and AN + are handled like L. + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 }, + /* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 }, + /* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 }, + /* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 }, + /* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 }, + /* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 } + }; + private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair + (impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L, + impAct0, impAct0); + + private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */ + /* In this table, conditional sequences receive the lower possible level + until proven otherwise. + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 }, + /* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 }, + /* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 }, + /* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 }, + /* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 }, + /* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 }, + /* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 } + }; + private static final short[] impAct1 = {0,1,11,12}; + private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair( + impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); + + private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { + /* The case handled in this table is (visually): R EN L + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 }, + /* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 }, + /* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 }, + /* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 }, + /* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 }, + /* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 }, + /* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 } + }; + private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = { + /* The cases handled in this table are (visually): R EN L + R L AN L + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 }, + /* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 }, + /* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 }, + /* 3 : L */ { 3 , 0, 3, 0x36, 0x14, 0x40, 0, 1 }, + /* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 }, + /* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 }, + /* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 } + }; + private static final short impAct2[] = {0,1,7,8,9,10}; + private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = + new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, + impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); + + private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair( + impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1); + + private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = { + /* The case handled in this table is (visually): R EN L + */ + /* L, R, EN, AN, ON, S, B, Res */ + /* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 }, + /* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 }, + /* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 }, + /* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 }, + /* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 } + }; + private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new + ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, + impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2); + + private class LevState { + byte[][] impTab; /* level table pointer */ + short[] impAct; /* action map array */ + int startON; /* start of ON sequence */ + int startL2EN; /* start of level 2 sequence */ + int lastStrongRTL; /* index of last found R or AL */ + short state; /* current state */ + byte runLevel; /* run level before implicit solving */ + } + + /*------------------------------------------------------------------------*/ + + static final int FIRSTALLOC = 10; + /* + * param pos: position where to insert + * param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER + */ + private void addPoint(int pos, int flag) + { + Point point = new Point(); + + int len = insertPoints.points.length; + if (len == 0) { + insertPoints.points = new Point[FIRSTALLOC]; + len = FIRSTALLOC; + } + if (insertPoints.size >= len) { /* no room for new point */ + Point[] savePoints = insertPoints.points; + insertPoints.points = new Point[len * 2]; + System.arraycopy(savePoints, 0, insertPoints.points, 0, len); + } + point.pos = pos; + point.flag = flag; + insertPoints.points[insertPoints.size] = point; + insertPoints.size++; + } + + /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ + + /* + * This implementation of the (Wn) rules applies all rules in one pass. + * In order to do so, it needs a look-ahead of typically 1 character + * (except for W5: sequences of ET) and keeps track of changes + * in a rule Wp that affect a later Wq (p= 0) { + addPoint(levState.startL2EN, LRM_BEFORE); + } + levState.startL2EN = -1; /* not within previous if since could also be -2 */ + /* check if we had any relevant EN/AN after R/AL */ + if ((insertPoints.points.length == 0) || + (insertPoints.size <= insertPoints.confirmed)) { + /* nothing, just clean up */ + levState.lastStrongRTL = -1; + /* check if we have a pending conditional segment */ + level = (byte)impTab[oldStateSeq][IMPTABLEVELS_RES]; + if ((level & 1) != 0 && levState.startON > 0) { /* after ON */ + start = levState.startON; /* reset to basic run level */ + } + if (_prop == _S) { /* add LRM before S */ + addPoint(start0, LRM_BEFORE); + insertPoints.confirmed = insertPoints.size; + } + break; + } + /* reset previous RTL cont to level for LTR text */ + for (k = levState.lastStrongRTL + 1; k < start0; k++) { + /* reset odd level, leave runLevel+2 as is */ + levels[k] = (byte)((levels[k] - 2) & ~1); + } + /* mark insert points as confirmed */ + insertPoints.confirmed = insertPoints.size; + levState.lastStrongRTL = -1; + if (_prop == _S) { /* add LRM before S */ + addPoint(start0, LRM_BEFORE); + insertPoints.confirmed = insertPoints.size; + } + break; + + case 4: /* R/AL after possible relevant EN/AN */ + /* just clean up */ + if (insertPoints.points.length > 0) + /* remove all non confirmed insert points */ + insertPoints.size = insertPoints.confirmed; + levState.startON = -1; + levState.startL2EN = -1; + levState.lastStrongRTL = limit - 1; + break; + + case 5: /* EN/AN after R/AL + possible cont */ + /* check for real AN */ + if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) { + /* real AN */ + if (levState.startL2EN == -1) { /* if no relevant EN already found */ + /* just note the righmost digit as a strong RTL */ + levState.lastStrongRTL = limit - 1; + break; + } + if (levState.startL2EN >= 0) { /* after EN, no AN */ + addPoint(levState.startL2EN, LRM_BEFORE); + levState.startL2EN = -2; + } + /* note AN */ + addPoint(start0, LRM_BEFORE); + break; + } + /* if first EN/AN after R/AL */ + if (levState.startL2EN == -1) { + levState.startL2EN = start0; + } + break; + + case 6: /* note location of latest R/AL */ + levState.lastStrongRTL = limit - 1; + levState.startON = -1; + break; + + case 7: /* L after R+ON/EN/AN */ + /* include possible adjacent number on the left */ + for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) { + } + if (k >= 0) { + addPoint(k, RLM_BEFORE); /* add RLM before */ + insertPoints.confirmed = insertPoints.size; /* confirm it */ + } + levState.startON = start0; + break; + + case 8: /* AN after L */ + /* AN numbers between L text on both sides may be trouble. */ + /* tentatively bracket with LRMs; will be confirmed if followed by L */ + addPoint(start0, LRM_BEFORE); /* add LRM before */ + addPoint(start0, LRM_AFTER); /* add LRM after */ + break; + + case 9: /* R after L+ON/EN/AN */ + /* false alert, infirm LRMs around previous AN */ + insertPoints.size=insertPoints.confirmed; + if (_prop == _S) { /* add RLM before S */ + addPoint(start0, RLM_BEFORE); + insertPoints.confirmed = insertPoints.size; + } + break; + + case 10: /* L after L+ON/AN */ + level = (byte)(levState.runLevel + addLevel); + for (k=levState.startON; k < start0; k++) { + if (levels[k] < level) { + levels[k] = level; + } + } + insertPoints.confirmed = insertPoints.size; /* confirm inserts */ + levState.startON = start0; + break; + + case 11: /* L after L+ON+EN/AN/ON */ + level = (byte)levState.runLevel; + for (k = start0-1; k >= levState.startON; k--) { + if (levels[k] == level+3) { + while (levels[k] == level+3) { + levels[k--] -= 2; + } + while (levels[k] == level) { + k--; + } + } + if (levels[k] == level+2) { + levels[k] = level; + continue; + } + levels[k] = (byte)(level+1); + } + break; + + case 12: /* R after L+ON+EN/AN/ON */ + level = (byte)(levState.runLevel+1); + for (k = start0-1; k >= levState.startON; k--) { + if (levels[k] > level) { + levels[k] -= 2; + } + } + break; + + default: /* we should never get here */ + throw new IllegalStateException("Internal ICU error in processPropertySeq"); + } + } + if ((addLevel) != 0 || (start < start0)) { + level = (byte)(levState.runLevel + addLevel); + for (k = start; k < limit; k++) { + levels[k] = level; + } + } + } + + private void resolveImplicitLevels(int start, int limit, short sor, short eor) + { + LevState levState = new LevState(); + int i, start1, start2; + short oldStateImp, stateImp, actionImp; + short gprop, resProp, cell; + short nextStrongProp = R; + int nextStrongPos = -1; + + + /* check for RTL inverse Bidi mode */ + /* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to + * loop on the text characters from end to start. + * This would need a different properties state table (at least different + * actions) and different levels state tables (maybe very similar to the + * LTR corresponding ones. + */ + /* initialize for levels state table */ + levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ + levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ + levState.state = 0; + levState.runLevel = levels[start]; + levState.impTab = impTabPair.imptab[levState.runLevel & 1]; + levState.impAct = impTabPair.impact[levState.runLevel & 1]; + processPropertySeq(levState, (short)sor, start, start); + /* initialize for property state table */ + if (dirProps[start] == NSM) { + stateImp = (short)(1 + sor); + } else { + stateImp = 0; + } + start1 = start; + start2 = 0; + + for (i = start; i <= limit; i++) { + if (i >= limit) { + gprop = eor; + } else { + short prop, prop1; + prop = NoContextRTL(dirProps[i]); + gprop = groupProp[prop]; + } + oldStateImp = stateImp; + cell = impTabProps[oldStateImp][gprop]; + stateImp = GetStateProps(cell); /* isolate the new state */ + actionImp = GetActionProps(cell); /* isolate the action */ + if ((i == limit) && (actionImp == 0)) { + /* there is an unprocessed sequence if its property == eor */ + actionImp = 1; /* process the last sequence */ + } + if (actionImp != 0) { + resProp = impTabProps[oldStateImp][IMPTABPROPS_RES]; + switch (actionImp) { + case 1: /* process current seq1, init new seq1 */ + processPropertySeq(levState, resProp, start1, i); + start1 = i; + break; + case 2: /* init new seq2 */ + start2 = i; + break; + case 3: /* process seq1, process seq2, init new seq1 */ + processPropertySeq(levState, resProp, start1, start2); + processPropertySeq(levState, _ON, start2, i); + start1 = i; + break; + case 4: /* process seq1, set seq1=seq2, init new seq2 */ + processPropertySeq(levState, resProp, start1, start2); + start1 = start2; + start2 = i; + break; + default: /* we should never get here */ + throw new IllegalStateException("Internal ICU error in resolveImplicitLevels"); + } + } + } + /* flush possible pending sequence, e.g. ON */ + processPropertySeq(levState, (short)eor, limit, limit); + } + + /* perform (L1) and (X9) ---------------------------------------------------- */ + + /* + * Reset the embedding levels for some non-graphic characters (L1). + * This method also sets appropriate levels for BN, and + * explicit embedding types that are supposed to have been removed + * from the paragraph in (X9). + */ + private void adjustWSLevels() { + int i; + + if ((flags & MASK_WS) != 0) { + int flag; + i = trailingWSStart; + while (i > 0) { + /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ + while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) { + if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { + levels[i] = 0; + } else { + levels[i] = GetParaLevelAt(i); + } + } + + /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ + /* here, i+1 is guaranteed to be 0) { + flag = DirPropFlagNC(dirProps[--i]); + if ((flag & MASK_BN_EXPLICIT) != 0) { + levels[i] = levels[i + 1]; + } else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) { + levels[i] = 0; + break; + } else if ((flag & MASK_B_S) != 0){ + levels[i] = GetParaLevelAt(i); + break; + } + } + } + } + } + + private int Bidi_Min(int x, int y) { + return x < y ? x : y; + } + + private int Bidi_Abs(int x) { + return x >= 0 ? x : -x; + } + + /** + * Perform the Unicode Bidi algorithm. It is defined in the + * Unicode Standard Annex #9, + * version 13, + * also described in The Unicode Standard, Version 4.0 .

+ * + * This method takes a piece of plain text containing one or more paragraphs, + * with or without externally specified embedding levels from styled + * text and computes the left-right-directionality of each character.

+ * + * If the entire text is all of the same directionality, then + * the method may not perform all the steps described by the algorithm, + * i.e., some levels may not be the same as if all steps were performed. + * This is not relevant for unidirectional text.
+ * For example, in pure LTR text with numbers the numbers would get + * a resolved level of 2 higher than the surrounding text according to + * the algorithm. This implementation may set all resolved levels to + * the same value in such a case.

+ * + * The text can be composed of multiple paragraphs. Occurrence of a block + * separator in the text terminates a paragraph, and whatever comes next starts + * a new paragraph. The exception to this rule is when a Carriage Return (CR) + * is followed by a Line Feed (LF). Both CR and LF are block separators, but + * in that case, the pair of characters is considered as terminating the + * preceding paragraph, and a new paragraph will be started by a character + * coming after the LF. + * + * Although the text is passed here as a String, it is + * stored internally as an array of characters. Therefore the + * documentation will refer to indexes of the characters in the text. + * + * @param text contains the text that the Bidi algorithm will be performed + * on. This text can be retrieved with getText() or + * getTextAsString.
+ * + * @param paraLevel specifies the default level for the text; + * it is typically 0 (LTR) or 1 (RTL). + * If the method shall determine the paragraph level from the text, + * then paraLevel can be set to + * either LEVEL_DEFAULT_LTR + * or LEVEL_DEFAULT_RTL; if the text contains multiple + * paragraphs, the paragraph level shall be determined separately for + * each paragraph; if a paragraph does not include any strongly typed + * character, then the desired default is used (0 for LTR or 1 for RTL). + * Any other value between 0 and MAX_EXPLICIT_LEVEL + * is also valid, with odd levels indicating RTL. + * + * @param embeddingLevels (in) may be used to preset the embedding and override levels, + * ignoring characters like LRE and PDF in the text. + * A level overrides the directional property of its corresponding + * (same index) character if the level has the + * LEVEL_OVERRIDE bit set.

+ * Except for that bit, it must be + * paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL, + * with one exception: a level of zero may be specified for a + * paragraph separator even if paraLevel>0 when multiple + * paragraphs are submitted in the same call to setPara().

+ * Caution: A reference to this array, not a copy + * of the levels, will be stored in the Bidi object; + * the embeddingLevels + * should not be modified to avoid unexpected results on subsequent + * Bidi operations. However, the setPara() and + * setLine() methods may modify some or all of the + * levels.

+ * Note: the embeddingLevels array must + * have one entry for each character in text. + * + * @throws IllegalArgumentException if the values in embeddingLevels are + * not within the allowed range + * + * @see #LEVEL_DEFAULT_LTR + * @see #LEVEL_DEFAULT_RTL + * @see #LEVEL_OVERRIDE + * @see #MAX_EXPLICIT_LEVEL + * @stable ICU 3.8 + */ + void setPara(String text, byte paraLevel, byte[] embeddingLevels) + { + if (text == null) { + setPara(new char[0], paraLevel, embeddingLevels); + } else { + setPara(text.toCharArray(), paraLevel, embeddingLevels); + } + } + + /** + * Perform the Unicode Bidi algorithm. It is defined in the + * Unicode Standard Annex #9, + * version 13, + * also described in The Unicode Standard, Version 4.0 .

+ * + * This method takes a piece of plain text containing one or more paragraphs, + * with or without externally specified embedding levels from styled + * text and computes the left-right-directionality of each character.

+ * + * If the entire text is all of the same directionality, then + * the method may not perform all the steps described by the algorithm, + * i.e., some levels may not be the same as if all steps were performed. + * This is not relevant for unidirectional text.
+ * For example, in pure LTR text with numbers the numbers would get + * a resolved level of 2 higher than the surrounding text according to + * the algorithm. This implementation may set all resolved levels to + * the same value in such a case.

+ * + * The text can be composed of multiple paragraphs. Occurrence of a block + * separator in the text terminates a paragraph, and whatever comes next starts + * a new paragraph. The exception to this rule is when a Carriage Return (CR) + * is followed by a Line Feed (LF). Both CR and LF are block separators, but + * in that case, the pair of characters is considered as terminating the + * preceding paragraph, and a new paragraph will be started by a character + * coming after the LF. + * + * The text is stored internally as an array of characters. Therefore the + * documentation will refer to indexes of the characters in the text. + * + * @param chars contains the text that the Bidi algorithm will be performed + * on. This text can be retrieved with getText() or + * getTextAsString.
+ * + * @param paraLevel specifies the default level for the text; + * it is typically 0 (LTR) or 1 (RTL). + * If the method shall determine the paragraph level from the text, + * then paraLevel can be set to + * either LEVEL_DEFAULT_LTR + * or LEVEL_DEFAULT_RTL; if the text contains multiple + * paragraphs, the paragraph level shall be determined separately for + * each paragraph; if a paragraph does not include any strongly typed + * character, then the desired default is used (0 for LTR or 1 for RTL). + * Any other value between 0 and MAX_EXPLICIT_LEVEL + * is also valid, with odd levels indicating RTL. + * + * @param embeddingLevels (in) may be used to preset the embedding and + * override levels, ignoring characters like LRE and PDF in the text. + * A level overrides the directional property of its corresponding + * (same index) character if the level has the + * LEVEL_OVERRIDE bit set.

+ * Except for that bit, it must be + * paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL, + * with one exception: a level of zero may be specified for a + * paragraph separator even if paraLevel>0 when multiple + * paragraphs are submitted in the same call to setPara().

+ * Caution: A reference to this array, not a copy + * of the levels, will be stored in the Bidi object; + * the embeddingLevels + * should not be modified to avoid unexpected results on subsequent + * Bidi operations. However, the setPara() and + * setLine() methods may modify some or all of the + * levels.

+ * Note: the embeddingLevels array must + * have one entry for each character in text. + * + * @throws IllegalArgumentException if the values in embeddingLevels are + * not within the allowed range + * + * @see #LEVEL_DEFAULT_LTR + * @see #LEVEL_DEFAULT_RTL + * @see #LEVEL_OVERRIDE + * @see #MAX_EXPLICIT_LEVEL + * @stable ICU 3.8 + */ + public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels) + { + /* check the argument values */ + if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) { + verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1); + } + if (chars == null) { + chars = new char[0]; + } + + /* initialize the Bidi object */ + this.paraBidi = null; /* mark unfinished setPara */ + this.text = chars; + this.length = this.originalLength = this.resultLength = text.length; + this.paraLevel = paraLevel; + this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT; + this.paraCount = 1; + + /* Allocate zero-length arrays instead of setting to null here; then + * checks for null in various places can be eliminated. + */ + dirProps = new byte[0]; + levels = new byte[0]; + runs = new BidiRun[0]; + isGoodLogicalToVisualRunsMap = false; + insertPoints.size = 0; /* clean up from last call */ + insertPoints.confirmed = 0; /* clean up from last call */ + + /* + * Save the original paraLevel if contextual; otherwise, set to 0. + */ + if (IsDefaultLevel(paraLevel)) { + defaultParaLevel = paraLevel; + } else { + defaultParaLevel = 0; + } + + if (length == 0) { + /* + * For an empty paragraph, create a Bidi object with the paraLevel and + * the flags and the direction set but without allocating zero-length arrays. + * There is nothing more to do. + */ + if (IsDefaultLevel(paraLevel)) { + this.paraLevel &= 1; + defaultParaLevel = 0; + } + if ((this.paraLevel & 1) != 0) { + flags = DirPropFlag(R); + direction = Bidi.DIRECTION_RIGHT_TO_LEFT; + } else { + flags = DirPropFlag(L); + direction = Bidi.DIRECTION_LEFT_TO_RIGHT; + } + + runCount = 0; + paraCount = 0; + paraBidi = this; /* mark successful setPara */ + return; + } + + runCount = -1; + + /* + * Get the directional properties, + * the flags bit-set, and + * determine the paragraph level if necessary. + */ + getDirPropsMemory(length); + dirProps = dirPropsMemory; + getDirProps(); + + /* the processed length may have changed if OPTION_STREAMING is set */ + trailingWSStart = length; /* the levels[] will reflect the WS run */ + + /* allocate paras memory */ + if (paraCount > 1) { + getInitialParasMemory(paraCount); + paras = parasMemory; + paras[paraCount - 1] = length; + } else { + /* initialize paras for single paragraph */ + paras = simpleParas; + simpleParas[0] = length; + } + + /* are explicit levels specified? */ + if (embeddingLevels == null) { + /* no: determine explicit levels according to the (Xn) rules */ + getLevelsMemory(length); + levels = levelsMemory; + direction = resolveExplicitLevels(); + } else { + /* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */ + levels = embeddingLevels; + direction = checkExplicitLevels(); + } + + /* + * The steps after (X9) in the Bidi algorithm are performed only if + * the paragraph text has mixed directionality! + */ + switch (direction) { + case Bidi.DIRECTION_LEFT_TO_RIGHT: + /* make sure paraLevel is even */ + paraLevel = (byte)((paraLevel + 1) & ~1); + + /* all levels are implicitly at paraLevel (important for getLevels()) */ + trailingWSStart = 0; + break; + case Bidi.DIRECTION_RIGHT_TO_LEFT: + /* make sure paraLevel is odd */ + paraLevel |= 1; + + /* all levels are implicitly at paraLevel (important for getLevels()) */ + trailingWSStart = 0; + break; + default: + this.impTabPair = impTab_DEFAULT; + + /* + * If there are no external levels specified and there + * are no significant explicit level codes in the text, + * then we can treat the entire paragraph as one run. + * Otherwise, we need to perform the following rules on runs of + * the text with the same embedding levels. (X10) + * "Significant" explicit level codes are ones that actually + * affect non-BN characters. + * Examples for "insignificant" ones are empty embeddings + * LRE-PDF, LRE-RLE-PDF-PDF, etc. + */ + if (embeddingLevels == null && paraCount <= 1 && + (flags & DirPropFlagMultiRuns) == 0) { + resolveImplicitLevels(0, length, + GetLRFromLevel(GetParaLevelAt(0)), + GetLRFromLevel(GetParaLevelAt(length - 1))); + } else { + /* sor, eor: start and end types of same-level-run */ + int start, limit = 0; + byte level, nextLevel; + short sor, eor; + + /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ + level = GetParaLevelAt(0); + nextLevel = levels[0]; + if (level < nextLevel) { + eor = GetLRFromLevel(nextLevel); + } else { + eor = GetLRFromLevel(level); + } + + do { + /* determine start and limit of the run (end points just behind the run) */ + + /* the values for this run's start are the same as for the previous run's end */ + start = limit; + level = nextLevel; + if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) { + /* except if this is a new paragraph, then set sor = para level */ + sor = GetLRFromLevel(GetParaLevelAt(start)); + } else { + sor = eor; + } + + /* search for the limit of this run */ + while (++limit < length && levels[limit] == level) {} + + /* get the correct level of the next run */ + if (limit < length) { + nextLevel = levels[limit]; + } else { + nextLevel = GetParaLevelAt(length - 1); + } + + /* determine eor from max(level, nextLevel); sor is last run's eor */ + if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) { + eor = GetLRFromLevel(nextLevel); + } else { + eor = GetLRFromLevel(level); + } + + /* if the run consists of overridden directional types, then there + are no implicit types to be resolved */ + if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) { + resolveImplicitLevels(start, limit, sor, eor); + } else { + /* remove the LEVEL_OVERRIDE flags */ + do { + levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE; + } while (start < limit); + } + } while (limit < length); + } + + /* reset the embedding levels for some non-graphic characters (L1), (X9) */ + adjustWSLevels(); + + break; + } + + resultLength += insertPoints.size; + paraBidi = this; /* mark successful setPara */ + } + + /** + * Perform the Unicode Bidi algorithm on a given paragraph, as defined in the + * Unicode Standard Annex #9, + * version 13, + * also described in The Unicode Standard, Version 4.0 .

+ * + * This method takes a paragraph of text and computes the + * left-right-directionality of each character. The text should not + * contain any Unicode block separators.

+ * + * The RUN_DIRECTION attribute in the text, if present, determines the base + * direction (left-to-right or right-to-left). If not present, the base + * direction is computed using the Unicode Bidirectional Algorithm, + * defaulting to left-to-right if there are no strong directional characters + * in the text. This attribute, if present, must be applied to all the text + * in the paragraph.

+ * + * The BIDI_EMBEDDING attribute in the text, if present, represents + * embedding level information. Negative values from -1 to -62 indicate + * overrides at the absolute value of the level. Positive values from 1 to + * 62 indicate embeddings. Where values are zero or not defined, the base + * embedding level as determined by the base direction is assumed.

+ * + * The NUMERIC_SHAPING attribute in the text, if present, converts European + * digits to other decimal digits before running the bidi algorithm. This + * attribute, if present, must be applied to all the text in the paragraph. + * + * If the entire text is all of the same directionality, then + * the method may not perform all the steps described by the algorithm, + * i.e., some levels may not be the same as if all steps were performed. + * This is not relevant for unidirectional text.
+ * For example, in pure LTR text with numbers the numbers would get + * a resolved level of 2 higher than the surrounding text according to + * the algorithm. This implementation may set all resolved levels to + * the same value in such a case.

+ * + * @param paragraph a paragraph of text with optional character and + * paragraph attribute information + * @stable ICU 3.8 + */ + public void setPara(AttributedCharacterIterator paragraph) + { + byte paraLvl; + Boolean runDirection = (Boolean) paragraph.getAttribute(TextAttribute.RUN_DIRECTION); + NumericShaper shaper = (NumericShaper) paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING); + if (runDirection == null) { + paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; + } else { + paraLvl = (runDirection.equals(TextAttribute.RUN_DIRECTION_LTR)) ? + (byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT; + } + + byte[] lvls = null; + int len = paragraph.getEndIndex() - paragraph.getBeginIndex(); + byte[] embeddingLevels = new byte[len]; + char[] txt = new char[len]; + int i = 0; + char ch = paragraph.first(); + while (ch != AttributedCharacterIterator.DONE) { + txt[i] = ch; + Integer embedding = (Integer) paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING); + if (embedding != null) { + byte level = embedding.byteValue(); + if (level == 0) { + /* no-op */ + } else if (level < 0) { + lvls = embeddingLevels; + embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE); + } else { + lvls = embeddingLevels; + embeddingLevels[i] = level; + } + } + ch = paragraph.next(); + ++i; + } + + if (shaper != null) { + shaper.shape(txt, 0, len); + } + setPara(txt, paraLvl, lvls); + } + + /** + * Specify whether block separators must be allocated level zero, + * so that successive paragraphs will progress from left to right. + * This method must be called before setPara(). + * Paragraph separators (B) may appear in the text. Setting them to level zero + * means that all paragraph separators (including one possibly appearing + * in the last text position) are kept in the reordered text after the text + * that they follow in the source text. + * When this feature is not enabled, a paragraph separator at the last + * position of the text before reordering will go to the first position + * of the reordered text when the paragraph level is odd. + * + * @param ordarParaLTR specifies whether paragraph separators (B) must + * receive level 0, so that successive paragraphs progress from left to right. + * + * @see #setPara + * @stable ICU 3.8 + */ + private void orderParagraphsLTR(boolean ordarParaLTR) { + orderParagraphsLTR = ordarParaLTR; + } + + /** + * Get the directionality of the text. + * + * @return a value of LTR, RTL or MIXED + * that indicates if the entire text + * represented by this object is unidirectional, + * and which direction, or if it is mixed-directional. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * + * @see #LTR + * @see #RTL + * @see #MIXED + * @stable ICU 3.8 + */ + private byte getDirection() + { + verifyValidParaOrLine(); + return direction; + } + + /** + * Get the length of the text. + * + * @return The length of the text that the Bidi object was + * created for. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @stable ICU 3.8 + */ + public int getLength() + { + verifyValidParaOrLine(); + return originalLength; + } + + /* paragraphs API methods ------------------------------------------------- */ + + /** + * Get the paragraph level of the text. + * + * @return The paragraph level. If there are multiple paragraphs, their + * level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or + * LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph + * is returned. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * + * @see #LEVEL_DEFAULT_LTR + * @see #LEVEL_DEFAULT_RTL + * @see #getParagraph + * @see #getParagraphByIndex + * @stable ICU 3.8 + */ + public byte getParaLevel() + { + verifyValidParaOrLine(); + return paraLevel; + } + + /** + * Get the index of a paragraph, given a position within the text.

+ * + * @param charIndex is the index of a character within the text, in the + * range [0..getProcessedLength()-1]. + * + * @return The index of the paragraph containing the specified position, + * starting from 0. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @throws IllegalArgumentException if charIndex is not within the legal range + * + * @see com.ibm.icu.text.BidiRun + * @see #getProcessedLength + * @stable ICU 3.8 + */ + public int getParagraphIndex(int charIndex) + { + verifyValidParaOrLine(); + BidiBase bidi = paraBidi; /* get Para object if Line object */ + verifyRange(charIndex, 0, bidi.length); + int paraIndex; + for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) { + } + return paraIndex; + } + + /** + * setLine() returns a Bidi object to + * contain the reordering information, especially the resolved levels, + * for all the characters in a line of text. This line of text is + * specified by referring to a Bidi object representing + * this information for a piece of text containing one or more paragraphs, + * and by specifying a range of indexes in this text.

+ * In the new line object, the indexes will range from 0 to limit-start-1.

+ * + * This is used after calling setPara() + * for a piece of text, and after line-breaking on that text. + * It is not necessary if each paragraph is treated as a single line.

+ * + * After line-breaking, rules (L1) and (L2) for the treatment of + * trailing WS and for reordering are performed on + * a Bidi object that represents a line.

+ * + * Important: the line Bidi object may + * reference data within the global text Bidi object. + * You should not alter the content of the global text object until + * you are finished using the line object. + * + * @param start is the line's first index into the text. + * + * @param limit is just behind the line's last index into the text + * (its last index +1). + * + * @return a Bidi object that will now represent a line of the text. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara + * @throws IllegalArgumentException if start and limit are not in the range + * 0<=start<limit<=getProcessedLength(), + * or if the specified line crosses a paragraph boundary + * + * @see #setPara + * @see #getProcessedLength + * @stable ICU 3.8 + */ + public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit) + { + verifyValidPara(); + verifyRange(start, 0, limit); + verifyRange(limit, 0, length+1); + if (getParagraphIndex(start) != getParagraphIndex(limit - 1)) { + /* the line crosses a paragraph boundary */ + throw new IllegalArgumentException(); + } + + return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit); + } + + /** + * Get the level for one character. + * + * @param charIndex the index of a character. + * + * @return The level for the character at charIndex. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @throws IllegalArgumentException if charIndex is not in the range + * 0<=charIndex<getProcessedLength() + * + * @see #getProcessedLength + * @stable ICU 3.8 + */ + public byte getLevelAt(int charIndex) + { + if (charIndex < 0 || charIndex >= length) { + return (byte)getBaseLevel(); + } + verifyValidParaOrLine(); + verifyRange(charIndex, 0, length); + return BidiLine.getLevelAt(this, charIndex); + } + + /** + * Get an array of levels for each character.

+ * + * Note that this method may allocate memory under some + * circumstances, unlike getLevelAt(). + * + * @return The levels array for the text, + * or null if an error occurs. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @stable ICU 3.8 + */ + private byte[] getLevels() + { + verifyValidParaOrLine(); + if (length <= 0) { + return new byte[0]; + } + return BidiLine.getLevels(this); + } + + /** + * Get the number of runs. + * This method may invoke the actual reordering on the + * Bidi object, after setPara() + * may have resolved only the levels of the text. Therefore, + * countRuns() may have to allocate memory, + * and may throw an exception if it fails to do so. + * + * @return The number of runs. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @stable ICU 3.8 + */ + public int countRuns() + { + verifyValidParaOrLine(); + BidiLine.getRuns(this); + return runCount; + } + + /** + * Get a visual-to-logical index map (array) for the characters in the + * Bidi (paragraph or line) object. + *

+ * Some values in the map may be MAP_NOWHERE if the + * corresponding text characters are Bidi marks inserted in the visual + * output by the option OPTION_INSERT_MARKS. + *

+ * When the visual output is altered by using options of + * writeReordered() such as INSERT_LRM_FOR_NUMERIC, + * KEEP_BASE_COMBINING, OUTPUT_REVERSE, + * REMOVE_BIDI_CONTROLS, the logical positions returned may not + * be correct. It is advised to use, when possible, reordering options + * such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}. + * + * @return an array of getResultLength() + * indexes which will reflect the reordering of the characters.

+ * The index map will result in + * indexMap[visualIndex]==logicalIndex, where + * indexMap represents the returned array. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * + * @see #getLogicalMap + * @see #getLogicalIndex + * @see #getResultLength + * @see #MAP_NOWHERE + * @see #OPTION_INSERT_MARKS + * @see #writeReordered + * @stable ICU 3.8 + */ + private int[] getVisualMap() + { + /* countRuns() checks successful call to setPara/setLine */ + countRuns(); + if (resultLength <= 0) { + return new int[0]; + } + return BidiLine.getVisualMap(this); + } + + /** + * This is a convenience method that does not use a Bidi object. + * It is intended to be used for when an application has determined the levels + * of objects (character sequences) and just needs to have them reordered (L2). + * This is equivalent to using getVisualMap() on a + * Bidi object. + * + * @param levels is an array of levels that have been determined by + * the application. + * + * @return an array of levels.length + * indexes which will reflect the reordering of the characters.

+ * The index map will result in + * indexMap[visualIndex]==logicalIndex, where + * indexMap represents the returned array. + * + * @stable ICU 3.8 + */ + private static int[] reorderVisual(byte[] levels) + { + return BidiLine.reorderVisual(levels); + } + + /** + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, the base + * direction is left-to-right. + * @stable ICU 3.8 + */ + private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e; + + /** + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, the base + * direction is right-to-left. + * @stable ICU 3.8 + */ + private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f; + + /** + * Create Bidi from the given text, embedding, and direction information. + * The embeddings array may be null. If present, the values represent + * embedding level information. Negative values from -1 to -61 indicate + * overrides at the absolute value of the level. Positive values from 1 to + * 61 indicate embeddings. Where values are zero, the base embedding level + * as determined by the base direction is assumed.

+ * + * Note: this constructor calls setPara() internally. + * + * @param text an array containing the paragraph of text to process. + * @param textStart the index into the text array of the start of the + * paragraph. + * @param embeddings an array containing embedding values for each character + * in the paragraph. This can be null, in which case it is assumed + * that there is no external embedding information. + * @param embStart the index into the embedding array of the start of the + * paragraph. + * @param paragraphLength the length of the paragraph in the text and + * embeddings arrays. + * @param flags a collection of flags that control the algorithm. The + * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, + * DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and + * DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved. + * + * @throws IllegalArgumentException if the values in embeddings are + * not within the allowed range + * + * @see #DIRECTION_LEFT_TO_RIGHT + * @see #DIRECTION_RIGHT_TO_LEFT + * @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT + * @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT + * @stable ICU 3.8 + */ + public BidiBase(char[] text, + int textStart, + byte[] embeddings, + int embStart, + int paragraphLength, + int flags) + { + this(0, 0); + byte paraLvl; + switch (flags) { + case Bidi.DIRECTION_LEFT_TO_RIGHT: + default: + paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT; + break; + case Bidi.DIRECTION_RIGHT_TO_LEFT: + paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT; + break; + case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT: + paraLvl = INTERNAL_LEVEL_DEFAULT_LTR; + break; + case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT: + paraLvl = INTERNAL_LEVEL_DEFAULT_RTL; + break; + } + byte[] paraEmbeddings; + if (embeddings == null) { + paraEmbeddings = null; + } else { + paraEmbeddings = new byte[paragraphLength]; + byte lev; + for (int i = 0; i < paragraphLength; i++) { + lev = embeddings[i + embStart]; + if (lev < 0) { + lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE); + } else if (lev == 0) { + lev = paraLvl; + if (paraLvl > MAX_EXPLICIT_LEVEL) { + lev &= 1; + } + } + paraEmbeddings[i] = lev; + } + } + if (textStart == 0 && embStart == 0 && paragraphLength == text.length) { + setPara(text, paraLvl, paraEmbeddings); + } else { + char[] paraText = new char[paragraphLength]; + System.arraycopy(text, textStart, paraText, 0, paragraphLength); + setPara(paraText, paraLvl, paraEmbeddings); + } + } + + /** + * Return true if the line is not left-to-right or right-to-left. This means + * it either has mixed runs of left-to-right and right-to-left text, or the + * base direction differs from the direction of the only run of text. + * + * @return true if the line is not left-to-right or right-to-left. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara + * @stable ICU 3.8 + */ + public boolean isMixed() + { + return (!isLeftToRight() && !isRightToLeft()); + } + + /** + * Return true if the line is all left-to-right text and the base direction + * is left-to-right. + * + * @return true if the line is all left-to-right text and the base direction + * is left-to-right. + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara + * @stable ICU 3.8 + */ + public boolean isLeftToRight() + { + return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0); + } + + /** + * Return true if the line is all right-to-left text, and the base direction + * is right-to-left + * + * @return true if the line is all right-to-left text, and the base + * direction is right-to-left + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara + * @stable ICU 3.8 + */ + public boolean isRightToLeft() + { + return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1); + } + + /** + * Return true if the base direction is left-to-right + * + * @return true if the base direction is left-to-right + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * + * @stable ICU 3.8 + */ + public boolean baseIsLeftToRight() + { + return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT); + } + + /** + * Return the base level (0 if left-to-right, 1 if right-to-left). + * + * @return the base level + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * + * @stable ICU 3.8 + */ + public int getBaseLevel() + { + return getParaLevel(); + } + + /** + * Compute the logical to visual run mapping + */ + private void getLogicalToVisualRunsMap() + { + if (isGoodLogicalToVisualRunsMap) { + return; + } + int count = countRuns(); + if ((logicalToVisualRunsMap == null) || + (logicalToVisualRunsMap.length < count)) { + logicalToVisualRunsMap = new int[count]; + } + int i; + long[] keys = new long[count]; + for (i = 0; i < count; i++) { + keys[i] = ((long)(runs[i].start)<<32) + i; + } + Arrays.sort(keys); + for (i = 0; i < count; i++) { + logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF); + } + keys = null; + isGoodLogicalToVisualRunsMap = true; + } + + /** + * Return the level of the nth logical run in this line. + * + * @param run the index of the run, between 0 and countRuns()-1 + * + * @return the level of the run + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @throws IllegalArgumentException if run is not in + * the range 0<=run<countRuns() + * @stable ICU 3.8 + */ + public int getRunLevel(int run) + { + verifyValidParaOrLine(); + BidiLine.getRuns(this); + if (runCount == 1) { + return getParaLevel(); + } + verifyIndex(run, 0, runCount); + getLogicalToVisualRunsMap(); + return runs[logicalToVisualRunsMap[run]].level; + } + + /** + * Return the index of the character at the start of the nth logical run in + * this line, as an offset from the start of the line. + * + * @param run the index of the run, between 0 and countRuns() + * + * @return the start of the run + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @throws IllegalArgumentException if run is not in + * the range 0<=run<countRuns() + * @stable ICU 3.8 + */ + public int getRunStart(int run) + { + verifyValidParaOrLine(); + BidiLine.getRuns(this); + if (runCount == 1) { + return 0; + } else if (run == runCount) { + return length; + } + verifyIndex(run, 0, runCount); + getLogicalToVisualRunsMap(); + return runs[logicalToVisualRunsMap[run]].start; + } + + /** + * Return the index of the character past the end of the nth logical run in + * this line, as an offset from the start of the line. For example, this + * will return the length of the line for the last run on the line. + * + * @param run the index of the run, between 0 and countRuns() + * + * @return the limit of the run + * + * @throws IllegalStateException if this call is not preceded by a successful + * call to setPara or setLine + * @throws IllegalArgumentException if run is not in + * the range 0<=run<countRuns() + * @stable ICU 3.8 + */ + public int getRunLimit(int run) + { + verifyValidParaOrLine(); + BidiLine.getRuns(this); + if (runCount == 1) { + return length; + } + verifyIndex(run, 0, runCount); + getLogicalToVisualRunsMap(); + int idx = logicalToVisualRunsMap[run]; + int len = idx == 0 ? runs[idx].limit : + runs[idx].limit - runs[idx-1].limit; + return runs[idx].start + len; + } + + /** + * Return true if the specified text requires bidi analysis. If this returns + * false, the text will display left-to-right. Clients can then avoid + * constructing a Bidi object. Text in the Arabic Presentation Forms area of + * Unicode is presumed to already be shaped and ordered for display, and so + * will not cause this method to return true. + * + * @param text the text containing the characters to test + * @param start the start of the range of characters to test + * @param limit the limit of the range of characters to test + * + * @return true if the range of characters requires bidi analysis + * + * @stable ICU 3.8 + */ + public static boolean requiresBidi(char[] text, + int start, + int limit) + { + final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT | + 1 << AL | + 1 << RLE | + 1 << RLO | + 1 << AN); + + if (0 > start || start > limit || limit > text.length) { + throw new IllegalArgumentException("Value start " + start + + " is out of range 0 to " + limit); + } + for (int i = start; i < limit; ++i) { + if (Character.isHighSurrogate(text[i]) && i < (limit-1) && + Character.isLowSurrogate(text[i+1])) { + if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) { + return true; + } + } else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) { + return true; + } + } + return false; + } + + /** + * Reorder the objects in the array into visual order based on their levels. + * This is a utility method to use when you have a collection of objects + * representing runs of text in logical order, each run containing text at a + * single level. The elements at index from + * objectStart up to objectStart + count in the + * objects array will be reordered into visual order assuming + * each run of text has the level indicated by the corresponding element in + * the levels array (at index - objectStart + levelStart). + * + * @param levels an array representing the bidi level of each object + * @param levelStart the start position in the levels array + * @param objects the array of objects to be reordered into visual order + * @param objectStart the start position in the objects array + * @param count the number of objects to reorder + * @stable ICU 3.8 + */ + public static void reorderVisually(byte[] levels, + int levelStart, + Object[] objects, + int objectStart, + int count) + { + if (0 > levelStart || levels.length <= levelStart) { + throw new IllegalArgumentException("Value levelStart " + + levelStart + " is out of range 0 to " + + (levels.length-1)); + } + if (0 > objectStart || objects.length <= objectStart) { + throw new IllegalArgumentException("Value objectStart " + + levelStart + " is out of range 0 to " + + (objects.length-1)); + } + if (0 > count || objects.length < (objectStart+count)) { + throw new IllegalArgumentException("Value count " + + levelStart + " is out of range 0 to " + + (objects.length - objectStart)); + } + byte[] reorderLevels = new byte[count]; + System.arraycopy(levels, levelStart, reorderLevels, 0, count); + int[] indexMap = reorderVisual(reorderLevels); + Object[] temp = new Object[count]; + System.arraycopy(objects, objectStart, temp, 0, count); + for (int i = 0; i < count; ++i) { + objects[objectStart + i] = temp[indexMap[i]]; + } + } + + /** + * Display the bidi internal state, used in debugging. + */ + public String toString() { + StringBuffer buf = new StringBuffer(super.toString()); + + buf.append("[dir: " + direction); + buf.append(" baselevel: " + paraLevel); + buf.append(" length: " + length); + buf.append(" runs: "); + if (levels == null) { + buf.append("null"); + } else { + buf.append('['); + buf.append(levels[0]); + for (int i = 0; i < levels.length; i++) { + buf.append(' '); + buf.append(levels[i]); + } + buf.append(']'); + } + buf.append(" text: [0x"); + buf.append(Integer.toHexString(text[0])); + for (int i = 0; i < text.length; i++) { + buf.append(" 0x"); + buf.append(Integer.toHexString(text[i])); + } + buf.append(']'); + buf.append(']'); + + return buf.toString(); + } + +} diff --git a/src/share/classes/sun/text/bidi/BidiLine.java b/src/share/classes/sun/text/bidi/BidiLine.java new file mode 100644 index 000000000..311cf3349 --- /dev/null +++ b/src/share/classes/sun/text/bidi/BidiLine.java @@ -0,0 +1,849 @@ +/* + * Portions Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ +/* + ******************************************************************************* + * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * + * * + * The original version of this source code and documentation is copyrighted * + * and owned by IBM, These materials are provided under terms of a License * + * Agreement between IBM and Sun. This technology is protected by multiple * + * US and International patents. This notice and attribution to IBM may not * + * to removed. * + ******************************************************************************* + */ +/* Written by Simon Montagu, Matitiahu Allouche + * (ported from C code written by Markus W. Scherer) + */ + +package sun.text.bidi; + +import java.text.Bidi; +import java.util.Arrays; + +public final class BidiLine { + + /* + * General remarks about the functions in this file: + * + * These functions deal with the aspects of potentially mixed-directional + * text in a single paragraph or in a line of a single paragraph + * which has already been processed according to + * the Unicode 3.0 Bidi algorithm as defined in + * http://www.unicode.org/unicode/reports/tr9/ , version 13, + * also described in The Unicode Standard, Version 4.0.1 . + * + * This means that there is a Bidi object with a levels + * and a dirProps array. + * paraLevel and direction are also set. + * Only if the length of the text is zero, then levels==dirProps==NULL. + * + * The overall directionality of the paragraph + * or line is used to bypass the reordering steps if possible. + * Even purely RTL text does not need reordering there because + * the getLogical/VisualIndex() methods can compute the + * index on the fly in such a case. + * + * The implementation of the access to same-level-runs and of the reordering + * do attempt to provide better performance and less memory usage compared to + * a direct implementation of especially rule (L2) with an array of + * one (32-bit) integer per text character. + * + * Here, the levels array is scanned as soon as necessary, and a vector of + * same-level-runs is created. Reordering then is done on this vector. + * For each run of text positions that were resolved to the same level, + * only 8 bytes are stored: the first text position of the run and the visual + * position behind the run after reordering. + * One sign bit is used to hold the directionality of the run. + * This is inefficient if there are many very short runs. If the average run + * length is <2, then this uses more memory. + * + * In a further attempt to save memory, the levels array is never changed + * after all the resolution rules (Xn, Wn, Nn, In). + * Many methods have to consider the field trailingWSStart: + * if it is less than length, then there is an implicit trailing run + * at the paraLevel, + * which is not reflected in the levels array. + * This allows a line Bidi object to use the same levels array as + * its paragraph parent object. + * + * When a Bidi object is created for a line of a paragraph, then the + * paragraph's levels and dirProps arrays are reused by way of setting + * a pointer into them, not by copying. This again saves memory and forbids to + * change the now shared levels for (L1). + */ + + /* handle trailing WS (L1) -------------------------------------------------- */ + + /* + * setTrailingWSStart() sets the start index for a trailing + * run of WS in the line. This is necessary because we do not modify + * the paragraph's levels array that we just point into. + * Using trailingWSStart is another form of performing (L1). + * + * To make subsequent operations easier, we also include the run + * before the WS if it is at the paraLevel - we merge the two here. + * + * This method is called only from setLine(), so paraLevel is + * set correctly for the line even when contextual multiple paragraphs. + */ + + static void setTrailingWSStart(BidiBase bidiBase) + { + byte[] dirProps = bidiBase.dirProps; + byte[] levels = bidiBase.levels; + int start = bidiBase.length; + byte paraLevel = bidiBase.paraLevel; + + /* If the line is terminated by a block separator, all preceding WS etc... + are already set to paragraph level. + Setting trailingWSStart to pBidi->length will avoid changing the + level of B chars from 0 to paraLevel in getLevels when + orderParagraphsLTR==TRUE + */ + if (BidiBase.NoContextRTL(dirProps[start - 1]) == BidiBase.B) { + bidiBase.trailingWSStart = start; /* currently == bidiBase.length */ + return; + } + /* go backwards across all WS, BN, explicit codes */ + while (start > 0 && + (BidiBase.DirPropFlagNC(dirProps[start - 1]) & BidiBase.MASK_WS) != 0) { + --start; + } + + /* if the WS run can be merged with the previous run then do so here */ + while (start > 0 && levels[start - 1] == paraLevel) { + --start; + } + + bidiBase.trailingWSStart=start; + } + + public static Bidi setLine(Bidi bidi, BidiBase paraBidi, + Bidi newBidi, BidiBase newBidiBase, + int start, int limit) { + int length; + + BidiBase lineBidi = newBidiBase; + + /* set the values in lineBidi from its paraBidi parent */ + /* class members are already initialized to 0 */ + // lineBidi.paraBidi = null; /* mark unfinished setLine */ + // lineBidi.flags = 0; + // lineBidi.controlCount = 0; + + length = lineBidi.length = lineBidi.originalLength = + lineBidi.resultLength = limit - start; + + lineBidi.text = new char[length]; + System.arraycopy(paraBidi.text, start, lineBidi.text, 0, length); + lineBidi.paraLevel = paraBidi.GetParaLevelAt(start); + lineBidi.paraCount = paraBidi.paraCount; + lineBidi.runs = new BidiRun[0]; + if (paraBidi.controlCount > 0) { + int j; + for (j = start; j < limit; j++) { + if (BidiBase.IsBidiControlChar(paraBidi.text[j])) { + lineBidi.controlCount++; + } + } + lineBidi.resultLength -= lineBidi.controlCount; + } + /* copy proper subset of DirProps */ + lineBidi.getDirPropsMemory(length); + lineBidi.dirProps = lineBidi.dirPropsMemory; + System.arraycopy(paraBidi.dirProps, start, lineBidi.dirProps, 0, + length); + /* copy proper subset of Levels */ + lineBidi.getLevelsMemory(length); + lineBidi.levels = lineBidi.levelsMemory; + System.arraycopy(paraBidi.levels, start, lineBidi.levels, 0, + length); + lineBidi.runCount = -1; + + if (paraBidi.direction != BidiBase.MIXED) { + /* the parent is already trivial */ + lineBidi.direction = paraBidi.direction; + + /* + * The parent's levels are all either + * implicitly or explicitly ==paraLevel; + * do the same here. + */ + if (paraBidi.trailingWSStart <= start) { + lineBidi.trailingWSStart = 0; + } else if (paraBidi.trailingWSStart < limit) { + lineBidi.trailingWSStart = paraBidi.trailingWSStart - start; + } else { + lineBidi.trailingWSStart = length; + } + } else { + byte[] levels = lineBidi.levels; + int i, trailingWSStart; + byte level; + + setTrailingWSStart(lineBidi); + trailingWSStart = lineBidi.trailingWSStart; + + /* recalculate lineBidi.direction */ + if (trailingWSStart == 0) { + /* all levels are at paraLevel */ + lineBidi.direction = (byte)(lineBidi.paraLevel & 1); + } else { + /* get the level of the first character */ + level = (byte)(levels[0] & 1); + + /* if there is anything of a different level, then the line + is mixed */ + if (trailingWSStart < length && + (lineBidi.paraLevel & 1) != level) { + /* the trailing WS is at paraLevel, which differs from + levels[0] */ + lineBidi.direction = BidiBase.MIXED; + } else { + /* see if levels[1..trailingWSStart-1] have the same + direction as levels[0] and paraLevel */ + for (i = 1; ; i++) { + if (i == trailingWSStart) { + /* the direction values match those in level */ + lineBidi.direction = level; + break; + } else if ((levels[i] & 1) != level) { + lineBidi.direction = BidiBase.MIXED; + break; + } + } + } + } + + switch(lineBidi.direction) { + case Bidi.DIRECTION_LEFT_TO_RIGHT: + /* make sure paraLevel is even */ + lineBidi.paraLevel = (byte) + ((lineBidi.paraLevel + 1) & ~1); + + /* all levels are implicitly at paraLevel (important for + getLevels()) */ + lineBidi.trailingWSStart = 0; + break; + case Bidi.DIRECTION_RIGHT_TO_LEFT: + /* make sure paraLevel is odd */ + lineBidi.paraLevel |= 1; + + /* all levels are implicitly at paraLevel (important for + getLevels()) */ + lineBidi.trailingWSStart = 0; + break; + default: + break; + } + } + + newBidiBase.paraBidi = paraBidi; /* mark successful setLine */ + return newBidi; + } + + static byte getLevelAt(BidiBase bidiBase, int charIndex) + { + /* return paraLevel if in the trailing WS run, otherwise the real level */ + if (bidiBase.direction != BidiBase.MIXED || charIndex >= bidiBase.trailingWSStart) { + return bidiBase.GetParaLevelAt(charIndex); + } else { + return bidiBase.levels[charIndex]; + } + } + + static byte[] getLevels(BidiBase bidiBase) + { + int start = bidiBase.trailingWSStart; + int length = bidiBase.length; + + if (start != length) { + /* the current levels array does not reflect the WS run */ + /* + * After the previous if(), we know that the levels array + * has an implicit trailing WS run and therefore does not fully + * reflect itself all the levels. + * This must be a Bidi object for a line, and + * we need to create a new levels array. + */ + /* bidiBase.paraLevel is ok even if contextual multiple paragraphs, + since bidiBase is a line object */ + Arrays.fill(bidiBase.levels, start, length, bidiBase.paraLevel); + + /* this new levels array is set for the line and reflects the WS run */ + bidiBase.trailingWSStart = length; + } + if (length < bidiBase.levels.length) { + byte[] levels = new byte[length]; + System.arraycopy(bidiBase.levels, 0, levels, 0, length); + return levels; + } + return bidiBase.levels; + } + + static BidiRun getLogicalRun(BidiBase bidiBase, int logicalPosition) + { + /* this is done based on runs rather than on levels since levels have + a special interpretation when REORDER_RUNS_ONLY + */ + BidiRun newRun = new BidiRun(), iRun; + getRuns(bidiBase); + int runCount = bidiBase.runCount; + int visualStart = 0, logicalLimit = 0; + iRun = bidiBase.runs[0]; + + for (int i = 0; i < runCount; i++) { + iRun = bidiBase.runs[i]; + logicalLimit = iRun.start + iRun.limit - visualStart; + if ((logicalPosition >= iRun.start) && + (logicalPosition < logicalLimit)) { + break; + } + visualStart = iRun.limit; + } + newRun.start = iRun.start; + newRun.limit = logicalLimit; + newRun.level = iRun.level; + return newRun; + } + + /* in trivial cases there is only one trivial run; called by getRuns() */ + private static void getSingleRun(BidiBase bidiBase, byte level) { + /* simple, single-run case */ + bidiBase.runs = bidiBase.simpleRuns; + bidiBase.runCount = 1; + + /* fill and reorder the single run */ + bidiBase.runs[0] = new BidiRun(0, bidiBase.length, level); + } + + /* reorder the runs array (L2) ---------------------------------------------- */ + + /* + * Reorder the same-level runs in the runs array. + * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. + * All the visualStart fields=logical start before reordering. + * The "odd" bits are not set yet. + * + * Reordering with this data structure lends itself to some handy shortcuts: + * + * Since each run is moved but not modified, and since at the initial maxLevel + * each sequence of same-level runs consists of only one run each, we + * don't need to do anything there and can predecrement maxLevel. + * In many simple cases, the reordering is thus done entirely in the + * index mapping. + * Also, reordering occurs only down to the lowest odd level that occurs, + * which is minLevel|1. However, if the lowest level itself is odd, then + * in the last reordering the sequence of the runs at this level or higher + * will be all runs, and we don't need the elaborate loop to search for them. + * This is covered by ++minLevel instead of minLevel|=1 followed + * by an extra reorder-all after the reorder-some loop. + * About a trailing WS run: + * Such a run would need special treatment because its level is not + * reflected in levels[] if this is not a paragraph object. + * Instead, all characters from trailingWSStart on are implicitly at + * paraLevel. + * However, for all maxLevel>paraLevel, this run will never be reordered + * and does not need to be taken into account. maxLevel==paraLevel is only reordered + * if minLevel==paraLevel is odd, which is done in the extra segment. + * This means that for the main reordering loop we don't need to consider + * this run and can --runCount. If it is later part of the all-runs + * reordering, then runCount is adjusted accordingly. + */ + private static void reorderLine(BidiBase bidiBase, byte minLevel, byte maxLevel) { + + /* nothing to do? */ + if (maxLevel<=(minLevel|1)) { + return; + } + + BidiRun[] runs; + BidiRun tempRun; + byte[] levels; + int firstRun, endRun, limitRun, runCount; + + /* + * Reorder only down to the lowest odd level + * and reorder at an odd minLevel in a separate, simpler loop. + * See comments above for why minLevel is always incremented. + */ + ++minLevel; + + runs = bidiBase.runs; + levels = bidiBase.levels; + runCount = bidiBase.runCount; + + /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ + if (bidiBase.trailingWSStart < bidiBase.length) { + --runCount; + } + + while (--maxLevel >= minLevel) { + firstRun = 0; + + /* loop for all sequences of runs */ + for ( ; ; ) { + /* look for a sequence of runs that are all at >=maxLevel */ + /* look for the first run of such a sequence */ + while (firstRun < runCount && levels[runs[firstRun].start] < maxLevel) { + ++firstRun; + } + if (firstRun >= runCount) { + break; /* no more such runs */ + } + + /* look for the limit run of such a sequence (the run behind it) */ + for (limitRun = firstRun; ++limitRun < runCount && + levels[runs[limitRun].start]>=maxLevel; ) {} + + /* Swap the entire sequence of runs from firstRun to limitRun-1. */ + endRun = limitRun - 1; + while (firstRun < endRun) { + tempRun = runs[firstRun]; + runs[firstRun] = runs[endRun]; + runs[endRun] = tempRun; + ++firstRun; + --endRun; + } + + if (limitRun == runCount) { + break; /* no more such runs */ + } else { + firstRun = limitRun + 1; + } + } + } + + /* now do maxLevel==old minLevel (==odd!), see above */ + if ((minLevel & 1) == 0) { + firstRun = 0; + + /* include the trailing WS run in this complete reordering */ + if (bidiBase.trailingWSStart == bidiBase.length) { + --runCount; + } + + /* Swap the entire sequence of all runs. (endRun==runCount) */ + while (firstRun < runCount) { + tempRun = runs[firstRun]; + runs[firstRun] = runs[runCount]; + runs[runCount] = tempRun; + ++firstRun; + --runCount; + } + } + } + + /* compute the runs array --------------------------------------------------- */ + + static int getRunFromLogicalIndex(BidiBase bidiBase, int logicalIndex) { + BidiRun[] runs = bidiBase.runs; + int runCount = bidiBase.runCount, visualStart = 0, i, length, logicalStart; + + for (i = 0; i < runCount; i++) { + length = runs[i].limit - visualStart; + logicalStart = runs[i].start; + if ((logicalIndex >= logicalStart) && (logicalIndex < (logicalStart+length))) { + return i; + } + visualStart += length; + } + /* we should never get here */ + throw new IllegalStateException("Internal ICU error in getRunFromLogicalIndex"); + } + + /* + * Compute the runs array from the levels array. + * After getRuns() returns true, runCount is guaranteed to be >0 + * and the runs are reordered. + * Odd-level runs have visualStart on their visual right edge and + * they progress visually to the left. + * If option OPTION_INSERT_MARKS is set, insertRemove will contain the + * sum of appropriate LRM/RLM_BEFORE/AFTER flags. + * If option OPTION_REMOVE_CONTROLS is set, insertRemove will contain the + * negative number of BiDi control characters within this run. + */ + static void getRuns(BidiBase bidiBase) { + /* + * This method returns immediately if the runs are already set. This + * includes the case of length==0 (handled in setPara).. + */ + if (bidiBase.runCount >= 0) { + return; + } + if (bidiBase.direction != BidiBase.MIXED) { + /* simple, single-run case - this covers length==0 */ + /* bidiBase.paraLevel is ok even for contextual multiple paragraphs */ + getSingleRun(bidiBase, bidiBase.paraLevel); + } else /* BidiBase.MIXED, length>0 */ { + /* mixed directionality */ + int length = bidiBase.length, limit; + byte[] levels = bidiBase.levels; + int i, runCount; + byte level = BidiBase.INTERNAL_LEVEL_DEFAULT_LTR; /* initialize with no valid level */ + /* + * If there are WS characters at the end of the line + * and the run preceding them has a level different from + * paraLevel, then they will form their own run at paraLevel (L1). + * Count them separately. + * We need some special treatment for this in order to not + * modify the levels array which a line Bidi object shares + * with its paragraph parent and its other line siblings. + * In other words, for the trailing WS, it may be + * levels[]!=paraLevel but we have to treat it like it were so. + */ + limit = bidiBase.trailingWSStart; + /* count the runs, there is at least one non-WS run, and limit>0 */ + runCount = 0; + for (i = 0; i < limit; ++i) { + /* increment runCount at the start of each run */ + if (levels[i] != level) { + ++runCount; + level = levels[i]; + } + } + + /* + * We don't need to see if the last run can be merged with a trailing + * WS run because setTrailingWSStart() would have done that. + */ + if (runCount == 1 && limit == length) { + /* There is only one non-WS run and no trailing WS-run. */ + getSingleRun(bidiBase, levels[0]); + } else /* runCount>1 || limit 1 */ + bidiBase.getRunsMemory(runCount); + runs = bidiBase.runsMemory; + + /* set the runs */ + /* FOOD FOR THOUGHT: this could be optimized, e.g.: + * 464->444, 484->444, 575->555, 595->555 + * However, that would take longer. Check also how it would + * interact with BiDi control removal and inserting Marks. + */ + runIndex = 0; + + /* search for the run limits and initialize visualLimit values with the run lengths */ + i = 0; + do { + /* prepare this run */ + start = i; + level = levels[i]; + if (level < minLevel) { + minLevel = level; + } + if (level > maxLevel) { + maxLevel = level; + } + + /* look for the run limit */ + while (++i < limit && levels[i] == level) {} + + /* i is another run limit */ + runs[runIndex] = new BidiRun(start, i - start, level); + ++runIndex; + } while (i < limit); + + if (limit < length) { + /* there is a separate WS run */ + runs[runIndex] = new BidiRun(limit, length - limit, bidiBase.paraLevel); + /* For the trailing WS run, bidiBase.paraLevel is ok even + if contextual multiple paragraphs. */ + if (bidiBase.paraLevel < minLevel) { + minLevel = bidiBase.paraLevel; + } + } + + /* set the object fields */ + bidiBase.runs = runs; + bidiBase.runCount = runCount; + + reorderLine(bidiBase, minLevel, maxLevel); + + /* now add the direction flags and adjust the visualLimit's to be just that */ + /* this loop will also handle the trailing WS run */ + limit = 0; + for (i = 0; i < runCount; ++i) { + runs[i].level = levels[runs[i].start]; + limit = (runs[i].limit += limit); + } + + /* Set the embedding level for the trailing WS run. */ + /* For a RTL paragraph, it will be the *first* run in visual order. */ + /* For the trailing WS run, bidiBase.paraLevel is ok even if + contextual multiple paragraphs. */ + if (runIndex < runCount) { + int trailingRun = ((bidiBase.paraLevel & 1) != 0)? 0 : runIndex; + runs[trailingRun].level = bidiBase.paraLevel; + } + } + } + + /* handle insert LRM/RLM BEFORE/AFTER run */ + if (bidiBase.insertPoints.size > 0) { + BidiBase.Point point; + int runIndex, ip; + for (ip = 0; ip < bidiBase.insertPoints.size; ip++) { + point = bidiBase.insertPoints.points[ip]; + runIndex = getRunFromLogicalIndex(bidiBase, point.pos); + bidiBase.runs[runIndex].insertRemove |= point.flag; + } + } + + /* handle remove BiDi control characters */ + if (bidiBase.controlCount > 0) { + int runIndex, ic; + char c; + for (ic = 0; ic < bidiBase.length; ic++) { + c = bidiBase.text[ic]; + if (BidiBase.IsBidiControlChar(c)) { + runIndex = getRunFromLogicalIndex(bidiBase, ic); + bidiBase.runs[runIndex].insertRemove--; + } + } + } + } + + static int[] prepareReorder(byte[] levels, byte[] pMinLevel, byte[] pMaxLevel) + { + int start; + byte level, minLevel, maxLevel; + + if (levels == null || levels.length <= 0) { + return null; + } + + /* determine minLevel and maxLevel */ + minLevel = BidiBase.MAX_EXPLICIT_LEVEL + 1; + maxLevel = 0; + for (start = levels.length; start>0; ) { + level = levels[--start]; + if (level > BidiBase.MAX_EXPLICIT_LEVEL + 1) { + return null; + } + if (level < minLevel) { + minLevel = level; + } + if (level > maxLevel) { + maxLevel = level; + } + } + pMinLevel[0] = minLevel; + pMaxLevel[0] = maxLevel; + + /* initialize the index map */ + int[] indexMap = new int[levels.length]; + for (start = levels.length; start > 0; ) { + --start; + indexMap[start] = start; + } + + return indexMap; + } + + static int[] reorderVisual(byte[] levels) + { + byte[] aMinLevel = new byte[1]; + byte[] aMaxLevel = new byte[1]; + int start, end, limit, temp; + byte minLevel, maxLevel; + + int[] indexMap = prepareReorder(levels, aMinLevel, aMaxLevel); + if (indexMap == null) { + return null; + } + + minLevel = aMinLevel[0]; + maxLevel = aMaxLevel[0]; + + /* nothing to do? */ + if (minLevel == maxLevel && (minLevel & 1) == 0) { + return indexMap; + } + + /* reorder only down to the lowest odd level */ + minLevel |= 1; + + /* loop maxLevel..minLevel */ + do { + start = 0; + + /* loop for all sequences of levels to reorder at the current maxLevel */ + for ( ; ; ) { + /* look for a sequence of levels that are all at >=maxLevel */ + /* look for the first index of such a sequence */ + while (start < levels.length && levels[start] < maxLevel) { + ++start; + } + if (start >= levels.length) { + break; /* no more such runs */ + } + + /* look for the limit of such a sequence (the index behind it) */ + for (limit = start; ++limit < levels.length && levels[limit] >= maxLevel; ) {} + + /* + * Swap the entire interval of indexes from start to limit-1. + * We don't need to swap the levels for the purpose of this + * algorithm: the sequence of levels that we look at does not + * move anyway. + */ + end = limit - 1; + while (start < end) { + temp = indexMap[start]; + indexMap[start] = indexMap[end]; + indexMap[end] = temp; + + ++start; + --end; + } + + if (limit == levels.length) { + break; /* no more such sequences */ + } else { + start = limit + 1; + } + } + } while (--maxLevel >= minLevel); + + return indexMap; + } + + static int[] getVisualMap(BidiBase bidiBase) + { + /* fill a visual-to-logical index map using the runs[] */ + BidiRun[] runs = bidiBase.runs; + int logicalStart, visualStart, visualLimit; + int allocLength = bidiBase.length > bidiBase.resultLength ? bidiBase.length + : bidiBase.resultLength; + int[] indexMap = new int[allocLength]; + + visualStart = 0; + int idx = 0; + for (int j = 0; j < bidiBase.runCount; ++j) { + logicalStart = runs[j].start; + visualLimit = runs[j].limit; + if (runs[j].isEvenRun()) { + do { /* LTR */ + indexMap[idx++] = logicalStart++; + } while (++visualStart < visualLimit); + } else { + logicalStart += visualLimit - visualStart; /* logicalLimit */ + do { /* RTL */ + indexMap[idx++] = --logicalStart; + } while (++visualStart < visualLimit); + } + /* visualStart==visualLimit; */ + } + + if (bidiBase.insertPoints.size > 0) { + int markFound = 0, runCount = bidiBase.runCount; + int insertRemove, i, j, k; + runs = bidiBase.runs; + /* count all inserted marks */ + for (i = 0; i < runCount; i++) { + insertRemove = runs[i].insertRemove; + if ((insertRemove & (BidiBase.LRM_BEFORE|BidiBase.RLM_BEFORE)) > 0) { + markFound++; + } + if ((insertRemove & (BidiBase.LRM_AFTER|BidiBase.RLM_AFTER)) > 0) { + markFound++; + } + } + /* move back indexes by number of preceding marks */ + k = bidiBase.resultLength; + for (i = runCount - 1; i >= 0 && markFound > 0; i--) { + insertRemove = runs[i].insertRemove; + if ((insertRemove & (BidiBase.LRM_AFTER|BidiBase.RLM_AFTER)) > 0) { + indexMap[--k] = BidiBase.MAP_NOWHERE; + markFound--; + } + visualStart = i > 0 ? runs[i-1].limit : 0; + for (j = runs[i].limit - 1; j >= visualStart && markFound > 0; j--) { + indexMap[--k] = indexMap[j]; + } + if ((insertRemove & (BidiBase.LRM_BEFORE|BidiBase.RLM_BEFORE)) > 0) { + indexMap[--k] = BidiBase.MAP_NOWHERE; + markFound--; + } + } + } + else if (bidiBase.controlCount > 0) { + int runCount = bidiBase.runCount, logicalEnd; + int insertRemove, length, i, j, k, m; + char uchar; + boolean evenRun; + runs = bidiBase.runs; + visualStart = 0; + /* move forward indexes by number of preceding controls */ + k = 0; + for (i = 0; i < runCount; i++, visualStart += length) { + length = runs[i].limit - visualStart; + insertRemove = runs[i].insertRemove; + /* if no control found yet, nothing to do in this run */ + if ((insertRemove == 0) && (k == visualStart)) { + k += length; + continue; + } + /* if no control in this run */ + if (insertRemove == 0) { + visualLimit = runs[i].limit; + for (j = visualStart; j < visualLimit; j++) { + indexMap[k++] = indexMap[j]; + } + continue; + } + logicalStart = runs[i].start; + evenRun = runs[i].isEvenRun(); + logicalEnd = logicalStart + length - 1; + for (j = 0; j < length; j++) { + m = evenRun ? logicalStart + j : logicalEnd - j; + uchar = bidiBase.text[m]; + if (!BidiBase.IsBidiControlChar(uchar)) { + indexMap[k++] = m; + } + } + } + } + if (allocLength == bidiBase.resultLength) { + return indexMap; + } + int[] newMap = new int[bidiBase.resultLength]; + System.arraycopy(indexMap, 0, newMap, 0, bidiBase.resultLength); + return newMap; + } + +} diff --git a/src/share/classes/sun/text/bidi/BidiRun.java b/src/share/classes/sun/text/bidi/BidiRun.java new file mode 100644 index 000000000..8ff6cc2f8 --- /dev/null +++ b/src/share/classes/sun/text/bidi/BidiRun.java @@ -0,0 +1,124 @@ +/* + * Portions Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ +/* + ******************************************************************************* + * (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved * + * * + * The original version of this source code and documentation is copyrighted * + * and owned by IBM, These materials are provided under terms of a License * + * Agreement between IBM and Sun. This technology is protected by multiple * + * US and International patents. This notice and attribution to IBM may not * + * to removed. * + ******************************************************************************* + */ +/* Written by Simon Montagu, Matitiahu Allouche + * (ported from C code written by Markus W. Scherer) + */ + +package sun.text.bidi; + +/** + * A BidiRun represents a sequence of characters at the same embedding level. + * The Bidi algorithm decomposes a piece of text into sequences of characters + * at the same embedding level, each such sequence is called a run. + * + *

A BidiRun represents such a run by storing its essential properties, + * but does not duplicate the characters which form the run. + * + *

The "limit" of the run is the position just after the + * last character, i.e., one more than that position. + * + *

This class has no public constructor, and its members cannot be + * modified by users. + * + * @see com.ibm.icu.text.Bidi + */ +public class BidiRun { + + int start; /* first logical position of the run */ + int limit; /* last visual position of the run +1 */ + int insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, + if <0, count of bidi controls within run */ + byte level; + + /* + * Default constructor + * + * Note that members start and limit of a run instance have different + * meanings depending whether the run is part of the runs array of a Bidi + * object, or if it is a reference returned by getVisualRun() or + * getLogicalRun(). + * For a member of the runs array of a Bidi object, + * - start is the first logical position of the run in the source text. + * - limit is one after the last visual position of the run. + * For a reference returned by getLogicalRun() or getVisualRun(), + * - start is the first logical position of the run in the source text. + * - limit is one after the last logical position of the run. + */ + BidiRun() + { + this(0, 0, (byte)0); + } + + /* + * Constructor + */ + BidiRun(int start, int limit, byte embeddingLevel) + { + this.start = start; + this.limit = limit; + this.level = embeddingLevel; + } + + /* + * Copy the content of a BidiRun instance + */ + void copyFrom(BidiRun run) + { + this.start = run.start; + this.limit = run.limit; + this.level = run.level; + this.insertRemove = run.insertRemove; + } + + /** + * Get level of run + */ + public byte getEmbeddingLevel() + { + return level; + } + + /** + * Check if run level is even + * @return true if the embedding level of this run is even, i.e. it is a + * left-to-right run. + */ + boolean isEvenRun() + { + return (level & 1) == 0; + } + +} diff --git a/src/share/classes/sun/text/normalizer/UCharacter.java b/src/share/classes/sun/text/normalizer/UCharacter.java index 8225517f0..4ff5695dd 100644 --- a/src/share/classes/sun/text/normalizer/UCharacter.java +++ b/src/share/classes/sun/text/normalizer/UCharacter.java @@ -1,5 +1,5 @@ /* - * Portions Copyright 2005-2009 Sun Microsystems, Inc. All Rights Reserved. + * Portions Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -355,7 +355,7 @@ public final class UCharacter private static int getEuropeanDigit(int ch) { if ((ch > 0x7a && ch < 0xff21) || ch < 0x41 || (ch > 0x5a && ch < 0x61) - || ch > 0xff5a || (ch > 0xff31 && ch < 0xff41)) { + || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) { return -1; } if (ch <= 0x7a) { diff --git a/src/share/native/sun/font/bidi/cmemory.h b/src/share/native/sun/font/bidi/cmemory.h deleted file mode 100644 index 65245f7fc..000000000 --- a/src/share/native/sun/font/bidi/cmemory.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Portions Copyright 2000 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - -/* - * (C) Copyright IBM Corp. 1998, 1999 - All Rights Reserved - * - * The original version of this source code and documentation is - * copyrighted and owned by IBM. These materials are provided - * under terms of a License Agreement between IBM and Sun. - * This technology is protected by multiple US and International - * patents. This notice and attribution to IBM may not be removed. - */ - -/* -* File CMEMORY.H -* -* Contains stdlib.h/string.h memory functions -* -* @author Bertrand A. Damiba -* -* Modification History: -* -* Date Name Description -* 6/20/98 Bertrand Created. -* 05/03/99 stephen Changed from functions to macros. -* -******************************************************************************* -*/ - -#ifndef CMEMORY_H -#define CMEMORY_H - -#include -#include - -#define icu_malloc(size) malloc(size) -#define icu_realloc(buffer, size) realloc(buffer, size) -#define icu_free(buffer) free(buffer) -#define icu_memcpy(dst, src, size) memcpy(dst, src, size) -#define icu_memmove(dst, src, size) memmove(dst, src, size) -#define icu_memset(buffer, mark, size) memset(buffer, mark, size) -#define icu_memcmp(buffer1, buffer2, size) memcmp(buffer1, buffer2,size) - -#endif diff --git a/src/share/native/sun/font/bidi/jbidi.c b/src/share/native/sun/font/bidi/jbidi.c deleted file mode 100644 index 2b2342aee..000000000 --- a/src/share/native/sun/font/bidi/jbidi.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Portions Copyright 2000-2003 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - -/* - * (C) Copyright IBM Corp. 2000 - 2003 - All Rights Reserved - * - * The original version of this source code and documentation is - * copyrighted and owned by IBM. These materials are provided - * under terms of a License Agreement between IBM and Sun. - * This technology is protected by multiple US and International - * patents. This notice and attribution to IBM may not be removed. - */ - -// jni interface to native bidi from java - -#include -#include "jbidi.h" - -#define U_COMMON_IMPLEMENTATION -#include "ubidi.h" -#include "ubidiimp.h" -#include "uchardir.h" - -static jclass g_bidi_class = 0; -static jmethodID g_bidi_reset = 0; - -static void resetBidi(JNIEnv *env, jclass cls, jobject bidi, jint dir, jint level, jint len, jintArray runs, jintArray cws) { - if (!g_bidi_class) { - g_bidi_class = (*env)->NewGlobalRef(env, cls); - g_bidi_reset = (*env)->GetMethodID(env, g_bidi_class, "reset", "(III[I[I)V"); - } - - (*env)->CallVoidMethod(env, bidi, g_bidi_reset, dir, level, len, runs, cws); -} - -JNIEXPORT jint JNICALL Java_java_text_Bidi_nativeGetDirectionCode - (JNIEnv *env, jclass cls, jint cp) -{ - return (jint)u_getDirection((uint32_t)cp); -} - -JNIEXPORT void JNICALL Java_java_text_Bidi_nativeBidiChars - (JNIEnv *env, jclass cls, jobject jbidi, jcharArray text, jint tStart, jbyteArray embs, jint eStart, jint length, jint dir) -{ - UErrorCode err = U_ZERO_ERROR; - UBiDi* bidi = ubidi_openSized(length, length, &err); - if (!U_FAILURE(err)) { - jchar *cText = (jchar*)(*env)->GetPrimitiveArrayCritical(env, text, NULL); - if (cText) { - UBiDiLevel baseLevel = (UBiDiLevel)dir; - jbyte *cEmbs = 0; - uint8_t *cEmbsAdj = 0; - if (embs != NULL) { - cEmbs = (jbyte*)(*env)->GetPrimitiveArrayCritical(env, embs, NULL); - if (cEmbs) { - cEmbsAdj = (uint8_t*)(cEmbs + eStart); - } - } - ubidi_setPara(bidi, cText + tStart, length, baseLevel, cEmbsAdj, &err); - if (cEmbs) { - (*env)->ReleasePrimitiveArrayCritical(env, embs, cEmbs, JNI_ABORT); - } - - (*env)->ReleasePrimitiveArrayCritical(env, text, cText, JNI_ABORT); - - if (!U_FAILURE(err)) { - jint resDir = (jint)ubidi_getDirection(bidi); - jint resLevel = (jint)ubidi_getParaLevel(bidi); - jint resRunCount = 0; - jintArray resRuns = 0; - jintArray resCWS = 0; - if (resDir == UBIDI_MIXED) { - resRunCount = (jint)ubidi_countRuns(bidi, &err); - if (!U_FAILURE(err)) { - if (resRunCount) { - jint* cResRuns = (jint*)calloc(resRunCount * 2, sizeof(jint)); - if (cResRuns) { - int32_t limit = 0; - UBiDiLevel level; - jint *p = cResRuns; - while (limit < length) { - ubidi_getLogicalRun(bidi, limit, &limit, &level); - *p++ = (jint)limit; - *p++ = (jint)level; - } - - { - const DirProp *dp = bidi->dirProps; - jint ccws = 0; - jint n = 0; - p = cResRuns; - do { - if ((*(p+1) ^ resLevel) & 0x1) { - while (n < *p) { - if (dp[n++] == WS) { - ++ccws; - } - } - } else { - n = *p; - } - p += 2; - } while (n < length); - - resCWS = (*env)->NewIntArray(env, ccws); - if (resCWS) { - jint* cResCWS = (jint*)(*env)->GetPrimitiveArrayCritical(env, resCWS, NULL); - if (cResCWS) { - jint ccws = 0; - jint n = 0; - p = cResRuns; - do { - if ((*(p+1) ^ resLevel) & 0x1) { - while (n < *p) { - if (dp[n] == WS) { - cResCWS[ccws++] = n; - } - ++n; - } - } else { - n = *p; - } - p += 2; - } while (n < length); - (*env)->ReleasePrimitiveArrayCritical(env, resCWS, cResCWS, 0); - } - } - } - - resRuns = (*env)->NewIntArray(env, resRunCount * 2); - if (resRuns) { - (*env)->SetIntArrayRegion(env, resRuns, 0, resRunCount * 2, cResRuns); - } - free(cResRuns); - } - } - } - } - - resetBidi(env, cls, jbidi, resDir, resLevel, length, resRuns, resCWS); - } - } - ubidi_close(bidi); - } -} diff --git a/src/share/native/sun/font/bidi/jbidi.h b/src/share/native/sun/font/bidi/jbidi.h deleted file mode 100644 index 38855e12e..000000000 --- a/src/share/native/sun/font/bidi/jbidi.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Portions Copyright 2000-2003 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - -/* - * (C) Copyright IBM Corp. 2000 - 2003 - All Rights Reserved - * - * The original version of this source code and documentation is - * copyrighted and owned by IBM. These materials are provided - * under terms of a License Agreement between IBM and Sun. - * This technology is protected by multiple US and International - * patents. This notice and attribution to IBM may not be removed. - */ - -/* DO NOT EDIT THIS FILE - it is machine generated */ -#include -/* Header for class Bidi */ - -#ifndef _Included_Bidi -#define _Included_Bidi -#ifdef __cplusplus -extern "C" { -#endif -#undef Bidi_DIR_LTR -#define Bidi_DIR_LTR 0L -#undef Bidi_DIR_RTL -#define Bidi_DIR_RTL 1L -#undef Bidi_DIR_DEFAULT_LTR -#define Bidi_DIR_DEFAULT_LTR -2L -#undef Bidi_DIR_DEFAULT_RTL -#define Bidi_DIR_DEFAULT_RTL -1L -#undef Bidi_DIR_MIXED -#define Bidi_DIR_MIXED -1L -#undef Bidi_DIR_MIN -#define Bidi_DIR_MIN -2L -#undef Bidi_DIR_MAX -#define Bidi_DIR_MAX 1L - -JNIEXPORT jint JNICALL Java_java_text_Bidi_nativeGetDirectionCode - (JNIEnv *, jclass, jint); - -JNIEXPORT void JNICALL Java_java_text_Bidi_nativeBidiChars - (JNIEnv *, jclass, jobject, jcharArray, jint, jbyteArray, jint, jint, jint); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/share/native/sun/font/bidi/ubidi.c b/src/share/native/sun/font/bidi/ubidi.c deleted file mode 100644 index cd4e3fec1..000000000 --- a/src/share/native/sun/font/bidi/ubidi.c +++ /dev/null @@ -1,1433 +0,0 @@ -/* - * Portions Copyright 2000-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Sun designates this - * particular file as subject to the "Classpath" exception as provided - * by Sun in the LICENSE file that accompanied this code. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - */ - -/* - * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved - * - * The original version of this source code and documentation is - * copyrighted and owned by IBM. These materials are provided - * under terms of a License Agreement between IBM and Sun. - * This technology is protected by multiple US and International - * patents. This notice and attribution to IBM may not be removed. - */ - -/* -* -****************************************************************************** -* file name: ubidi.c -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999jul27 -* created by: Markus W. Scherer -*/ - -/* set import/export definitions */ -#ifndef U_COMMON_IMPLEMENTATION -# define U_COMMON_IMPLEMENTATION -#endif - -#include "cmemory.h" -#include "utypes.h" -#include "uchardir.h" -#include "ubidi.h" -#include "ubidiimp.h" - -/* - * General implementation notes: - * - * Throughout the implementation, there are comments like (W2) that refer to - * rules of the BiDi algorithm in its version 5, in this example to the second - * rule of the resolution of weak types. - * - * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) - * character according to UTF-16, the second UChar gets the directional property of - * the entire character assigned, while the first one gets a BN, a boundary - * neutral, type, which is ignored by most of the algorithm according to - * rule (X9) and the implementation suggestions of the BiDi algorithm. - * - * Later, adjustWSLevels() will set the level for each BN to that of the - * following character (UChar), which results in surrogate pairs getting the - * same level on each of their surrogates. - * - * In a UTF-8 implementation, the same thing could be done: the last byte of - * a multi-byte sequence would get the "real" property, while all previous - * bytes of that sequence would get BN. - * - * It is not possible to assign all those parts of a character the same real - * property because this would fail in the resolution of weak types with rules - * that look at immediately surrounding types. - * - * As a related topic, this implementation does not remove Boundary Neutral - * types from the input, but ignores them whereever this is relevant. - * For example, the loop for the resolution of the weak types reads - * types until it finds a non-BN. - * Also, explicit embedding codes are neither changed into BN nor removed. - * They are only treated the same way real BNs are. - * As stated before, adjustWSLevels() takes care of them at the end. - * For the purpose of conformance, the levels of all these codes - * do not matter. - * - * Note that this implementation never modifies the dirProps - * after the initial setup. - * - * - * In this implementation, the resolution of weak types (Wn), - * neutrals (Nn), and the assignment of the resolved level (In) - * are all done in one single loop, in resolveImplicitLevels(). - * Changes of dirProp values are done on the fly, without writing - * them back to the dirProps array. - * - * - * This implementation contains code that allows to bypass steps of the - * algorithm that are not needed on the specific paragraph - * in order to speed up the most common cases considerably, - * like text that is entirely LTR, or RTL text without numbers. - * - * Most of this is done by setting a bit for each directional property - * in a flags variable and later checking for whether there are - * any LTR characters or any RTL characters, or both, whether - * there are any explicit embedding codes, etc. - * - * If the (Xn) steps are performed, then the flags are re-evaluated, - * because they will then not contain the embedding codes any more - * and will be adjusted for override codes, so that subsequently - * more bypassing may be possible than what the initial flags suggested. - * - * If the text is not mixed-directional, then the - * algorithm steps for the weak type resolution are not performed, - * and all levels are set to the paragraph level. - * - * If there are no explicit embedding codes, then the (Xn) steps - * are not performed. - * - * If embedding levels are supplied as a parameter, then all - * explicit embedding codes are ignored, and the (Xn) steps - * are not performed. - * - * White Space types could get the level of the run they belong to, - * and are checked with a test of (flags&MASK_EMBEDDING) to - * consider if the paragraph direction should be considered in - * the flags variable. - * - * If there are no White Space types in the paragraph, then - * (L1) is not necessary in adjustWSLevels(). - */ - -/* prototypes --------------------------------------------------------------- */ - -static void -getDirProps(UBiDi *pBiDi, const UChar *text); - -static UBiDiDirection -resolveExplicitLevels(UBiDi *pBiDi); - -static UBiDiDirection -checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); - -static UBiDiDirection -directionFromFlags(Flags flags); - -static void -resolveImplicitLevels(UBiDi *pBiDi, - int32_t start, int32_t limit, - DirProp sor, DirProp eor); - -static void -adjustWSLevels(UBiDi *pBiDi); - -/* to avoid some conditional statements, use tiny constant arrays */ -static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; -static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; -static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; - -#define DIRPROP_FLAG_LR(level) flagLR[(level)&1] -#define DIRPROP_FLAG_E(level) flagE[(level)&1] -#define DIRPROP_FLAG_O(level) flagO[(level)&1] - -/* UBiDi object management -------------------------------------------------- */ - -U_CAPI UBiDi * U_EXPORT2 -ubidi_open(void) -{ - UErrorCode errorCode=U_ZERO_ERROR; - return ubidi_openSized(0, 0, &errorCode); -} - -U_CAPI UBiDi * U_EXPORT2 -ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { - UBiDi *pBiDi; - - /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return NULL; - } else if(maxLength<0 || maxRunCount<0) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; /* invalid arguments */ - } - - /* allocate memory for the object */ - pBiDi=(UBiDi *)icu_malloc(sizeof(UBiDi)); - if(pBiDi==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - - /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ - icu_memset(pBiDi, 0, sizeof(UBiDi)); - - /* allocate memory for arrays as requested */ - if(maxLength>0) { - if( !getInitialDirPropsMemory(pBiDi, maxLength) || - !getInitialLevelsMemory(pBiDi, maxLength) - ) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - pBiDi->mayAllocateText=TRUE; - } - - if(maxRunCount>0) { - if(maxRunCount==1) { - /* use simpleRuns[] */ - pBiDi->runsSize=sizeof(Run); - } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - } - } else { - pBiDi->mayAllocateRuns=TRUE; - } - - if(U_SUCCESS(*pErrorCode)) { - return pBiDi; - } else { - ubidi_close(pBiDi); - return NULL; - } -} - -/* - * We are allowed to allocate memory if memory==NULL or - * mayAllocate==TRUE for each array that we need. - * We also try to grow and shrink memory as needed if we - * allocate it. - * - * Assume sizeNeeded>0. - * If *pMemory!=NULL, then assume *pSize>0. - * - * ### this realloc() may unnecessarily copy the old data, - * which we know we don't need any more; - * is this the best way to do this?? - */ -extern bool_t -ubidi_getMemory(void **pMemory, int32_t *pSize, bool_t mayAllocate, int32_t sizeNeeded) { - /* check for existing memory */ - if(*pMemory==NULL) { - /* we need to allocate memory */ - if(mayAllocate && (*pMemory=icu_malloc(sizeNeeded))!=NULL) { - *pSize=sizeNeeded; - return TRUE; - } else { - return FALSE; - } - } else { - /* there is some memory, is it enough or too much? */ - if(sizeNeeded>*pSize && !mayAllocate) { - /* not enough memory, and we must not allocate */ - return FALSE; - } else if(sizeNeeded!=*pSize && mayAllocate) { - /* we may try to grow or shrink */ - void *memory; - - if((memory=icu_realloc(*pMemory, sizeNeeded))!=NULL) { - *pMemory=memory; - *pSize=sizeNeeded; - return TRUE; - } else { - /* we failed to grow */ - return FALSE; - } - } else { - /* we have at least enough memory and must not allocate */ - return TRUE; - } - } -} - -U_CAPI void U_EXPORT2 -ubidi_close(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - if(pBiDi->dirPropsMemory!=NULL) { - icu_free(pBiDi->dirPropsMemory); - } - if(pBiDi->levelsMemory!=NULL) { - icu_free(pBiDi->levelsMemory); - } - if(pBiDi->runsMemory!=NULL) { - icu_free(pBiDi->runsMemory); - } - icu_free(pBiDi); - } -} - -/* set to approximate "inverse BiDi" ---------------------------------------- */ - -U_CAPI void U_EXPORT2 -ubidi_setInverse(UBiDi *pBiDi, bool_t isInverse) { - if(pBiDi!=NULL) { - pBiDi->isInverse=isInverse; - } -} - -U_CAPI bool_t U_EXPORT2 -ubidi_isInverse(UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->isInverse; - } else { - return FALSE; - } -} - -/* ubidi_setPara ------------------------------------------------------------ */ - -U_CAPI void U_EXPORT2 -ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, - UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, - UErrorCode *pErrorCode) { - UBiDiDirection direction; - - /* check the argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } else if(pBiDi==NULL || text==NULL || - ((UBIDI_MAX_EXPLICIT_LEVELtext=text; - pBiDi->length=length; - pBiDi->paraLevel=paraLevel; - pBiDi->direction=UBIDI_LTR; - pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ - - pBiDi->dirProps=NULL; - pBiDi->levels=NULL; - pBiDi->runs=NULL; - - if(length==0) { - /* - * For an empty paragraph, create a UBiDi object with the paraLevel and - * the flags and the direction set but without allocating zero-length arrays. - * There is nothing more to do. - */ - if(IS_DEFAULT_LEVEL(paraLevel)) { - pBiDi->paraLevel&=1; - } - if(paraLevel&1) { - pBiDi->flags=DIRPROP_FLAG(R); - pBiDi->direction=UBIDI_RTL; - } else { - pBiDi->flags=DIRPROP_FLAG(L); - pBiDi->direction=UBIDI_LTR; - } - - pBiDi->runCount=0; - return; - } - - pBiDi->runCount=-1; - - /* - * Get the directional properties, - * the flags bit-set, and - * determine the partagraph level if necessary. - */ - if(getDirPropsMemory(pBiDi, length)) { - pBiDi->dirProps=pBiDi->dirPropsMemory; - getDirProps(pBiDi, text); - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - - if (getLevelsMemory(pBiDi, length)) { - pBiDi->levels=pBiDi->levelsMemory; - /* are explicit levels specified? */ - if(embeddingLevels==NULL) { - /* no: determine explicit levels according to the (Xn) rules */ - direction=resolveExplicitLevels(pBiDi); - } else { - /* set BN for all explicit codes, check that all levels are paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ - icu_memcpy(pBiDi->levels, embeddingLevels, length); - direction=checkExplicitLevels(pBiDi, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return; - } - } - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - - /* - * The steps after (X9) in the UBiDi algorithm are performed only if - * the paragraph text has mixed directionality! - */ - pBiDi->direction=direction; - switch(direction) { - case UBIDI_LTR: - /* make sure paraLevel is even */ - pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pBiDi->trailingWSStart=0; - break; - case UBIDI_RTL: - /* make sure paraLevel is odd */ - pBiDi->paraLevel|=1; - - /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ - pBiDi->trailingWSStart=0; - break; - default: - /* - * If there are no external levels specified and there - * are no significant explicit level codes in the text, - * then we can treat the entire paragraph as one run. - * Otherwise, we need to perform the following rules on runs of - * the text with the same embedding levels. (X10) - * "Significant" explicit level codes are ones that actually - * affect non-BN characters. - * Examples for "insignificant" ones are empty embeddings - * LRE-PDF, LRE-RLE-PDF-PDF, etc. - */ - if(embeddingLevels==NULL && !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { - resolveImplicitLevels(pBiDi, 0, length, - GET_LR_FROM_LEVEL(pBiDi->paraLevel), - GET_LR_FROM_LEVEL(pBiDi->paraLevel)); - } else { - /* sor, eor: start and end types of same-level-run */ - UBiDiLevel *levels=pBiDi->levels; - int32_t start, limit=0; - UBiDiLevel level, nextLevel; - DirProp sor, eor; - - /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ - level=pBiDi->paraLevel; - nextLevel=levels[0]; - if(levelparaLevel; - } - - /* determine eor from max(level, nextLevel); sor is last run's eor */ - if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { - eor=GET_LR_FROM_LEVEL(nextLevel); - } else { - eor=GET_LR_FROM_LEVEL(level); - } - - /* if the run consists of overridden directional types, then there - are no implicit types to be resolved */ - if(!(level&UBIDI_LEVEL_OVERRIDE)) { - resolveImplicitLevels(pBiDi, start, limit, sor, eor); - } else { - /* remove the UBIDI_LEVEL_OVERRIDE flags */ - do { - levels[start++]&=~UBIDI_LEVEL_OVERRIDE; - } while(startisInverse) { - if(!ubidi_getRuns(pBiDi)) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } - break; - } -} - -/* perform (P2)..(P3) ------------------------------------------------------- */ - -/* - * Get the directional properties for the text, - * calculate the flags bit-set, and - * determine the partagraph level if necessary. - */ -static void -getDirProps(UBiDi *pBiDi, const UChar *text) { - DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ - - int32_t i=0, i0, i1, length=pBiDi->length; - Flags flags=0; /* collect all directionalities in the text */ - UChar uchar; - DirProp dirProp; - - if(IS_DEFAULT_LEVEL(pBiDi->paraLevel)) { - /* determine the paragraph level (P2..P3) */ - for(;;) { - uchar=text[i]; - if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(text[i+1])) { - /* not a surrogate pair */ - flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_charDirection(uchar)); - } else { - /* a surrogate pair */ - dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */ - flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_surrogatePairDirection(uchar, text[i]))|DIRPROP_FLAG(BN); - } - ++i; - if(dirProp==L) { - pBiDi->paraLevel=0; - break; - } else if(dirProp==R || dirProp==AL) { - pBiDi->paraLevel=1; - break; - } else if(i>=length) { - /* - * see comment in ubidi.h: - * the DEFAULT_XXX values are designed so that - * their bit 0 alone yields the intended default - */ - pBiDi->paraLevel&=1; - break; - } - } - } else { - flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); - } - - /* get the rest of the directional properties and the flags bits */ - while(iparaLevel); - } - - pBiDi->flags=flags; -} - -/* perform (X1)..(X9) ------------------------------------------------------- */ - -/* - * Resolve the explicit levels as specified by explicit embedding codes. - * Recalculate the flags to have them reflect the real properties - * after taking the explicit embeddings into account. - * - * The BiDi algorithm is designed to result in the same behavior whether embedding - * levels are externally specified (from "styled text", supposedly the preferred - * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. - * That is why (X9) instructs to remove all explicit codes (and BN). - * However, in a real implementation, this removal of these codes and their index - * positions in the plain text is undesirable since it would result in - * reallocated, reindexed text. - * Instead, this implementation leaves the codes in there and just ignores them - * in the subsequent processing. - * In order to get the same reordering behavior, positions with a BN or an - * explicit embedding code just get the same level assigned as the last "real" - * character. - * - * Some implementations, not this one, then overwrite some of these - * directionality properties at "real" same-level-run boundaries by - * L or R codes so that the resolution of weak types can be performed on the - * entire paragraph at once instead of having to parse it once more and - * perform that resolution on same-level-runs. - * This limits the scope of the implicit rules in effectively - * the same way as the run limits. - * - * Instead, this implementation does not modify these codes. - * On one hand, the paragraph has to be scanned for same-level-runs, but - * on the other hand, this saves another loop to reset these codes, - * or saves making and modifying a copy of dirProps[]. - * - * - * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. - * - * - * Handling the stack of explicit levels (Xn): - * - * With the BiDi stack of explicit levels, - * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, - * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61. - * - * In order to have a correct push-pop semantics even in the case of overflows, - * there are two overflow counters: - * - countOver60 is incremented with each LRx at level 60 - * - from level 60, one RLx increases the level to 61 - * - countOver61 is incremented with each LRx and RLx at level 61 - * - * Popping levels with PDF must work in the opposite order so that level 61 - * is correct at the correct point. Underflows (too many PDFs) must be checked. - * - * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. - */ - -static UBiDiDirection -resolveExplicitLevels(UBiDi *pBiDi) { - const DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - - int32_t i=0, length=pBiDi->length; - Flags flags=pBiDi->flags; /* collect all directionalities in the text */ - DirProp dirProp; - UBiDiLevel level=pBiDi->paraLevel; - - UBiDiDirection direction; - - /* determine if the text is mixed-directional or single-directional */ - direction=directionFromFlags(flags); - - /* we may not need to resolve any explicit levels */ - if(direction!=UBIDI_MIXED) { - /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ - } else if(!(flags&MASK_EXPLICIT) || pBiDi->isInverse) { - /* mixed, but all characters are at the same embedding level */ - /* or we are in "inverse BiDi" */ - /* set all levels to the paragraph level */ - for(i=0; i=UBIDI_MAX_EXPLICIT_LEVEL */ - uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ - - /* recalculate the flags */ - flags=0; - - /* since we assume that this is a single paragraph, we ignore (X8) */ - for(i=0; i0) { - --countOver61; - } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) { - /* handle LRx overflows from level 60 */ - --countOver60; - } else if(stackTop>0) { - /* this is the pop operation; it also pops level 61 while countOver60>0 */ - --stackTop; - embeddingLevel=stack[stackTop]; - /* } else { (underflow) */ - } - flags|=DIRPROP_FLAG(BN); - break; - case B: - /* - * We do not really expect to see a paragraph separator (B), - * but we should do something reasonable with it, - * especially at the end of the text. - */ - stackTop=0; - countOver60=countOver61=0; - embeddingLevel=level=pBiDi->paraLevel; - flags|=DIRPROP_FLAG(B); - break; - case BN: - /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ - /* they will get their levels set correctly in adjustWSLevels() */ - flags|=DIRPROP_FLAG(BN); - break; - default: - /* all other types get the "real" level */ - if(level!=embeddingLevel) { - level=embeddingLevel; - if(level&UBIDI_LEVEL_OVERRIDE) { - flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; - } else { - flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; - } - } - if(!(level&UBIDI_LEVEL_OVERRIDE)) { - flags|=DIRPROP_FLAG(dirProp); - } - break; - } - - /* - * We need to set reasonable levels even on BN codes and - * explicit codes because we will later look at same-level runs (X10). - */ - levels[i]=level; - } - if(flags&MASK_EMBEDDING) { - flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); - } - - /* subsequently, ignore the explicit codes and BN (X9) */ - - /* again, determine if the text is mixed-directional or single-directional */ - pBiDi->flags=flags; - direction=directionFromFlags(flags); - } - return direction; -} - -/* - * Use a pre-specified embedding levels array: - * - * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), - * ignore all explicit codes (X9), - * and check all the preset levels. - * - * Recalculate the flags to have them reflect the real properties - * after taking the explicit embeddings into account. - */ -static UBiDiDirection -checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - const DirProp *dirProps=pBiDi->dirProps; - UBiDiLevel *levels=pBiDi->levels; - - int32_t i, length=pBiDi->length; - Flags flags=0; /* collect all directionalities in the text */ - UBiDiLevel level, paraLevel=pBiDi->paraLevel; - - for(i=0; iparaLevel); - } - - /* determine if the text is mixed-directional or single-directional */ - pBiDi->flags=flags; - return directionFromFlags(flags); -} - -/* determine if the text is mixed-directional or single-directional */ -static UBiDiDirection -directionFromFlags(Flags flags) { - /* if the text contains AN and neutrals, then some neutrals may become RTL */ - if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { - return UBIDI_LTR; - } else if(!(flags&MASK_LTR)) { - return UBIDI_RTL; - } else { - return UBIDI_MIXED; - } -} - -/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ - -/* - * This implementation of the (Wn) rules applies all rules in one pass. - * In order to do so, it needs a look-ahead of typically 1 character - * (except for W5: sequences of ET) and keeps track of changes - * in a rule Wp that affect a later Wq (pdirProps; - UBiDiLevel *levels=pBiDi->levels; - - int32_t i, next, neutralStart=-1; - DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral=L; - UBiDiLevel numberLevel; - uint8_t historyOfEN; - - /* initialize: current at sor, next at start (it is startisInverse) { - /* - * For "inverse BiDi", we set the levels of numbers just like for - * regular L characters, plus a flag that ubidi_getRuns() will use - * to set a similar flag on the corresponding output run. - */ - numberLevel=levels[start]; - if(numberLevel&1) { - ++numberLevel; - } - } else { - /* normal BiDi: least greater even level */ - numberLevel=(UBiDiLevel)((levels[start]+2)&~1); - } - - /* - * In all steps of this implementation, BN and explicit embedding codes - * must be treated as if they didn't exist (X9). - * They will get levels set before a non-neutral character, and remain - * undefined before a neutral one, but adjustWSLevels() will take care - * of all of them. - */ - while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) { - if(++next>=EN_SHIFT; - /* - * Technically, this should be done before the switch() in the form - * if(nextDirProp==NSM) { - * dirProps[next]=nextDirProp=dirProp; - * } - * - * - effectively one iteration ahead. - * However, whether the next dirProp is NSM or is equal to the current dirProp - * does not change the outcome of any condition in (W2)..(W7). - */ - break; - default: - break; - } - - /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */ - - /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */ - /* for "inverse BiDi", treat neutrals like L */ - /* this is one iteration late for the neutrals */ - if(DIRPROP_FLAG(dirProp)&MASK_N) { - if(neutralStart<0) { - /* start of a sequence of neutrals */ - neutralStart=i; - beforeNeutral=prevDirProp; - } - } else /* not a neutral, can be only one of { L, R, EN, AN } */ { - /* - * Note that all levels[] values are still the same at this - * point because this function is called for an entire - * same-level run. - * Therefore, we need to read only one actual level. - */ - UBiDiLevel level=levels[i]; - - if(neutralStart>=0) { - UBiDiLevel final; - /* end of a sequence of neutrals (dirProp is "afterNeutral") */ - if(!(pBiDi->isInverse)) { - if(beforeNeutral==L) { - if(dirProp==L) { - final=0; /* make all neutrals L (N1) */ - } else { - final=level; /* make all neutrals "e" (N2) */ - } - } else /* beforeNeutral is one of { R, EN, AN } */ { - if(dirProp==L) { - final=level; /* make all neutrals "e" (N2) */ - } else { - final=1; /* make all neutrals R (N1) */ - } - } - } else { - /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */ - if(beforeNeutral!=R) { - if(dirProp!=R) { - final=0; /* make all neutrals L (N1) */ - } else { - final=level; /* make all neutrals "e" (N2) */ - } - } else /* beforeNeutral is one of { R, EN, AN } */ { - if(dirProp!=R) { - final=level; /* make all neutrals "e" (N2) */ - } else { - final=1; /* make all neutrals R (N1) */ - } - } - } - /* perform (In) on the sequence of neutrals */ - if((level^final)&1) { - /* do something only if we need to _change_ the level */ - do { - ++levels[neutralStart]; - } while(++neutralStart=0) { - /* - * Note that all levels[] values are still the same at this - * point because this function is called for an entire - * same-level run. - * Therefore, we need to read only one actual level. - */ - UBiDiLevel level=levels[neutralStart], final; - - /* end of a sequence of neutrals (eor is "afterNeutral") */ - if(!(pBiDi->isInverse)) { - if(beforeNeutral==L) { - if(eor==L) { - final=0; /* make all neutrals L (N1) */ - } else { - final=level; /* make all neutrals "e" (N2) */ - } - } else /* beforeNeutral is one of { R, EN, AN } */ { - if(eor==L) { - final=level; /* make all neutrals "e" (N2) */ - } else { - final=1; /* make all neutrals R (N1) */ - } - } - } else { - /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */ - if(beforeNeutral!=R) { - if(eor!=R) { - final=0; /* make all neutrals L (N1) */ - } else { - final=level; /* make all neutrals "e" (N2) */ - } - } else /* beforeNeutral is one of { R, EN, AN } */ { - if(eor!=R) { - final=level; /* make all neutrals "e" (N2) */ - } else { - final=1; /* make all neutrals R (N1) */ - } - } - } - /* perform (In) on the sequence of neutrals */ - if((level^final)&1) { - /* do something only if we need to _change_ the level */ - do { - ++levels[neutralStart]; - } while(++neutralStartdirProps; - UBiDiLevel *levels=pBiDi->levels; - int32_t i; - - if(pBiDi->flags&MASK_WS) { - UBiDiLevel paraLevel=pBiDi->paraLevel; - Flags flag; - - i=pBiDi->trailingWSStart; - while(i>0) { - /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ - while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) { - levels[i]=paraLevel; - } - - /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ - /* here, i+1 is guaranteed to be 0) { - flag=DIRPROP_FLAG(dirProps[--i]); - if(flag&MASK_BN_EXPLICIT) { - levels[i]=levels[i+1]; - } else if(flag&MASK_B_S) { - levels[i]=paraLevel; - break; - } - } - } - } - - /* now remove the UBIDI_LEVEL_OVERRIDE flags, if any */ - /* (a separate loop can be optimized more easily by a compiler) */ - if(pBiDi->flags&MASK_OVERRIDE) { - for(i=pBiDi->trailingWSStart; i>0;) { - levels[--i]&=~UBIDI_LEVEL_OVERRIDE; - } - } -} - -/* -------------------------------------------------------------------------- */ - -U_CAPI UBiDiDirection U_EXPORT2 -ubidi_getDirection(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->direction; - } else { - return UBIDI_LTR; - } -} - -U_CAPI const UChar * U_EXPORT2 -ubidi_getText(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->text; - } else { - return NULL; - } -} - -U_CAPI int32_t U_EXPORT2 -ubidi_getLength(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->length; - } else { - return 0; - } -} - -U_CAPI UBiDiLevel U_EXPORT2 -ubidi_getParaLevel(const UBiDi *pBiDi) { - if(pBiDi!=NULL) { - return pBiDi->paraLevel; - } else { - return 0; - } -} - -/* statetable prototype ----------------------------------------------------- */ - -/* - * This is here for possible future - * performance work and is not compiled right now. - */ - -#if 0 -/* - * This is a piece of code that could be part of ubidi.c/resolveImplicitLevels(). - * It replaces in the (Wn) state machine the switch()-if()-cascade with - * just a few if()s and a state table. - */ - -/* use the state table only for the following dirProp's */ -#define MASK_W_TABLE (FLAG(L)|FLAG(R)|FLAG(AL)|FLAG(EN)|FLAG(ES)|FLAG(CS)|FLAG(ET)|FLAG(AN)) - -/* - * inputs: - * - * 0..1 historyOfEN - 2b - * 2 prevDirProp==AN - 1b - * 3..4 lastStrong, one of { L, R, AL, none } - 2b - * 5..7 dirProp, one of { L, R, AL, EN, ES, CS, ET, AN } - 3b - * 8..9 nextDirProp, one of { EN, AN, other } - * - * total: 10b=1024 states - */ -enum { _L, _R, _AL, _EN, _ES, _CS, _ET, _AN, _OTHER }; /* lastStrong, dirProp */ -enum { __EN, __AN, __OTHER }; /* nextDirProp */ - -#define LAST_STRONG_SHIFT 3 -#define DIR_PROP_SHIFT 5 -#define NEXT_DIR_PROP_SHIFT 8 - -/* masks after shifting */ -#define LAST_STRONG_MASK 3 -#define DIR_PROP_MASK 7 -#define STATE_MASK 0x1f - -/* convert dirProp into _dirProp (above enum) */ -static DirProp inputDirProp[dirPropCount]={ _X<>DIR_PROP_SHIFT]; - state&=STATE_MASK; - } else if(dirProp==ET) { - /* get sequence of ET; advance only next, not current, previous or historyOfEN */ - while(next>DIR_PROP_SHIFT]; - state&=STATE_MASK; - - /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */ - } else if(dirProp==NSM) { - /* (W1) */ - dirProp=prevDirProp; - /* keep prevDirProp's EN and AN states! */ - } else /* other */ { - /* set EN and AN states to 0 */ - state&=LAST_STRONG_MASK<BiDi algorithm for ICU - * - * This is an implementation of the Unicode Bidirectional algorithm. - * The algorithm is defined in the - * Unicode Technical Report 9, - * version 5, also described in The Unicode Standard, Version 3.0 .

- * - *

General remarks about the API:

- * - * In functions with an error code parameter, - * the pErrorCode pointer must be valid - * and the value that it points to must not indicate a failure before - * the function call. Otherwise, the function returns immediately. - * After the function call, the value indicates success or failure.

- * - * The limit of a sequence of characters is the position just after their - * last character, i.e., one more than that position.

- * - * Some of the API functions provide access to runs. - * Such a run is defined as a sequence of characters - * that are at the same embedding level - * after performing the BiDi algorithm.

- * - * @author Markus W. Scherer - */ -DOCXX_TAG -/*@{*/ - -/** - * UBiDiLevel is the type of the level values in this - * BiDi implementation. - * It holds an embedding level and indicates the visual direction - * by its bit 0 (even/odd value).

- * - * It can also hold non-level values for the - * paraLevel and embeddingLevels - * arguments of ubidi_setPara(); there: - *