From bbdd73ec077a1bb6801b25a639834be16dfd78cb Mon Sep 17 00:00:00 2001 From: Seigo Nonaka Date: Thu, 1 Oct 2015 15:59:38 +0900 Subject: [PATCH] Support Variation Selector in font selection. This CL contains the following changes: - Add a variation selector argument into getFamilyForChar to be able to select fonts which support variation selector. - In case no fonts support the codepoint and variation selector pair, add a fallback rule which selects font family with ignoring variation selector. - Change FontCollection::itemize to not change the font family immediately preceding a variation selector. - Introduce unit tests for variation selectors. With this CL, TextView can render the variation selectors correctly. Bug: 11256006 Change-Id: I22ce0e9eadc941f84e3a9b23462f194e51dd7180 --- include/minikin/FontCollection.h | 2 +- libs/minikin/FontCollection.cpp | 79 ++++++--- tests/FontCollectionItemizeTest.cpp | 250 +++++++++++++++++++++++++++- 3 files changed, 306 insertions(+), 25 deletions(-) diff --git a/include/minikin/FontCollection.h b/include/minikin/FontCollection.h index c4daf98fa6..ca24e386a0 100644 --- a/include/minikin/FontCollection.h +++ b/include/minikin/FontCollection.h @@ -60,7 +60,7 @@ private: size_t end; }; - FontFamily* getFamilyForChar(uint32_t ch, FontLanguage lang, int variant) const; + FontFamily* getFamilyForChar(uint32_t ch, uint32_t vs, FontLanguage lang, int variant) const; // static for allocating unique id's static uint32_t sNextId; diff --git a/libs/minikin/FontCollection.cpp b/libs/minikin/FontCollection.cpp index 36d47ded9d..4c0070c2b1 100644 --- a/libs/minikin/FontCollection.cpp +++ b/libs/minikin/FontCollection.cpp @@ -103,7 +103,7 @@ FontCollection::~FontCollection() { // 3. If a font matches just language, it gets a score of 2. // 4. Matching the "compact" or "elegant" variant adds one to the score. // 5. Highest score wins, with ties resolved to the first font. -FontFamily* FontCollection::getFamilyForChar(uint32_t ch, +FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs, FontLanguage lang, int variant) const { if (ch >= mMaxChar) { return NULL; @@ -112,11 +112,11 @@ FontFamily* FontCollection::getFamilyForChar(uint32_t ch, #ifdef VERBOSE_DEBUG ALOGD("querying range %d:%d\n", range.start, range.end); #endif - FontFamily* bestFamily = NULL; + FontFamily* bestFamily = nullptr; int bestScore = -1; for (size_t i = range.start; i < range.end; i++) { FontFamily* family = mFamilyVec[i]; - if (family->getCoverage()->get(ch)) { + if (vs == 0 ? family->getCoverage()->get(ch) : family->hasVariationSelector(ch, vs)) { // First font family in collection always matches if (mFamilies[0] == family) { return family; @@ -131,7 +131,13 @@ FontFamily* FontCollection::getFamilyForChar(uint32_t ch, } } } - if (bestFamily == NULL && !mFamilyVec.empty()) { + if (bestFamily == nullptr && vs != 0) { + // If no fonts support the codepoint and variation selector pair, + // fallback to select a font family that supports just the base + // character, ignoring the variation selector. + return getFamilyForChar(ch, 0, lang, variant); + } + if (bestFamily == nullptr && !mFamilyVec.empty()) { UErrorCode errorCode = U_ZERO_ERROR; const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode); if (U_SUCCESS(errorCode)) { @@ -140,7 +146,7 @@ FontFamily* FontCollection::getFamilyForChar(uint32_t ch, if (U_SUCCESS(errorCode) && len > 0) { int off = 0; U16_NEXT_UNSAFE(decomposed, off, ch); - return getFamilyForChar(ch, lang, variant); + return getFamilyForChar(ch, vs, lang, variant); } } bestFamily = mFamilies[0]; @@ -167,35 +173,61 @@ static bool isStickyWhitelisted(uint32_t c) { return false; } +static bool isVariationSelector(uint32_t c) { + return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF); +} + void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, vector* result) const { FontLanguage lang = style.getLanguage(); int variant = style.getVariant(); FontFamily* lastFamily = NULL; Run* run = NULL; - int nShorts; - for (size_t i = 0; i < string_size; i += nShorts) { - nShorts = 1; - uint32_t ch = string[i]; - // sigh, decode UTF-16 by hand here - if ((ch & 0xfc00) == 0xd800) { - if ((i + 1) < string_size) { - ch = 0x10000 + ((ch & 0x3ff) << 10) + (string[i + 1] & 0x3ff); - nShorts = 2; + + if (string_size == 0) { + return; + } + + const uint32_t kEndOfString = 0xFFFFFFFF; + + uint32_t nextCh = 0; + uint32_t prevCh = 0; + size_t nextUtf16Pos = 0; + size_t readLength = 0; + U16_NEXT(string, readLength, string_size, nextCh); + + do { + const uint32_t ch = nextCh; + const size_t utf16Pos = nextUtf16Pos; + nextUtf16Pos = readLength; + if (readLength < string_size) { + U16_NEXT(string, readLength, string_size, nextCh); + } else { + nextCh = kEndOfString; + } + + bool shouldContinueRun = false; + if (lastFamily != nullptr) { + if (isStickyWhitelisted(ch)) { + // Continue using existing font as long as it has coverage and is whitelisted + shouldContinueRun = lastFamily->getCoverage()->get(ch); + } else if (isVariationSelector(ch)) { + // Always continue if the character is a variation selector. + shouldContinueRun = true; } } - // Continue using existing font as long as it has coverage and is whitelisted - if (lastFamily == NULL - || !(isStickyWhitelisted(ch) && lastFamily->getCoverage()->get(ch))) { - FontFamily* family = getFamilyForChar(ch, lang, variant); - if (i == 0 || family != lastFamily) { - size_t start = i; + + if (!shouldContinueRun) { + FontFamily* family = + getFamilyForChar(ch, isVariationSelector(nextCh) ? nextCh : 0, lang, variant); + if (utf16Pos == 0 || family != lastFamily) { + size_t start = utf16Pos; // Workaround for Emoji keycap until we implement per-cluster font // selection: if keycap is found in a different font that also // supports previous char, attach previous char to the new run. // Only handles non-surrogate characters. // Bug 7557244. - if (ch == KEYCAP && i && family && family->getCoverage()->get(string[i - 1])) { + if (ch == KEYCAP && utf16Pos != 0 && family && family->getCoverage()->get(prevCh)) { run->end--; if (run->start == run->end) { result->pop_back(); @@ -214,8 +246,9 @@ void FontCollection::itemize(const uint16_t *string, size_t string_size, FontSty run->start = start; } } - run->end = i + nShorts; - } + prevCh = ch; + run->end = nextUtf16Pos; // exclusive + } while (nextCh != kEndOfString); } MinikinFont* FontCollection::baseFont(FontStyle style) { diff --git a/tests/FontCollectionItemizeTest.cpp b/tests/FontCollectionItemizeTest.cpp index cabc967984..3c453f3062 100644 --- a/tests/FontCollectionItemizeTest.cpp +++ b/tests/FontCollectionItemizeTest.cpp @@ -277,6 +277,255 @@ TEST(FontCollectionItemizeTest, itemize_mixed) { EXPECT_FALSE(runs[4].fakedFont.fakery.isFakeItalic()); } +TEST(FontCollectionItemizeTest, itemize_variationSelector) { + std::unique_ptr collection = getFontCollection(); + std::vector runs; + + // A glyph for U+4FAE is provided by both Japanese font and Simplified + // Chinese font. Also a glyph for U+242EE is provided by both Japanese and + // Traditional Chinese font. To avoid effects of device default locale, + // explicitly specify the locale. + FontStyle kZH_HansStyle = FontStyle(FontLanguage("zh_Hans", 7)); + FontStyle kZH_HantStyle = FontStyle(FontLanguage("zh_Hant", 7)); + + // U+4FAE is available in both zh_Hans and ja font, but U+4FAE,U+FE00 is + // only available in ja font. + itemize(collection.get(), "U+4FAE", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+4FAE U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+4FAE U+4FAE U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(3, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection.get(), "U+4FAE U+4FAE U+FE00 U+4FAE", kZH_HansStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(3, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(3, runs[2].start); + EXPECT_EQ(4, runs[2].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection.get(), "U+4FAE U+FE00 U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + // No font supports U+242EE U+FE0E. + itemize(collection.get(), "U+4FAE U+FE0E", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + // Surrogate pairs handling. + // U+242EE is available in ja font and zh_Hant font. + // U+242EE U+FE00 is available only in ja font. + itemize(collection.get(), "U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+242EE U+FE00", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+242EE U+242EE U+FE00", kZH_HantStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(5, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection.get(), "U+242EE U+242EE U+FE00 U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(5, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(5, runs[2].start); + EXPECT_EQ(7, runs[2].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection.get(), "U+242EE U+FE00 U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + // No font supports U+242EE U+FE0E + itemize(collection.get(), "U+242EE U+FE0E", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + // Isolated variation selector supplement. + itemize(collection.get(), "U+FE00", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+FE00", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); +} + +TEST(FontCollectionItemizeTest, itemize_variationSelectorSupplement) { + std::unique_ptr collection = getFontCollection(); + std::vector runs; + + // A glyph for U+845B is provided by both Japanese font and Simplified + // Chinese font. Also a glyph for U+242EE is provided by both Japanese and + // Traditional Chinese font. To avoid effects of device default locale, + // explicitly specify the locale. + FontStyle kZH_HansStyle = FontStyle(FontLanguage("zh_Hans", 7)); + FontStyle kZH_HantStyle = FontStyle(FontLanguage("zh_Hant", 7)); + + // U+845B is available in both zh_Hans and ja font, but U+845B,U+E0100 is + // only available in ja font. + itemize(collection.get(), "U+845B", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+845B U+E0100", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+845B U+845B U+E0100", kZH_HansStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(4, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection.get(), "U+845B U+845B U+E0100 U+845B", kZH_HansStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(4, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(4, runs[2].start); + EXPECT_EQ(5, runs[2].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection.get(), "U+845B U+E0100 U+E0100", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + // No font supports U+845B U+E01E0. + itemize(collection.get(), "U+845B U+E01E0", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + // Isolated variation selector supplement + // Surrogate pairs handling. + // U+242EE is available in ja font and zh_Hant font. + // U+242EE U+E0100 is available only in ja font. + itemize(collection.get(), "U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+242EE U+E0101", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+242EE U+242EE U+E0101", kZH_HantStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(6, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection.get(), "U+242EE U+242EE U+E0101 U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(6, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(6, runs[2].start); + EXPECT_EQ(8, runs[2].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection.get(), "U+242EE U+E0100 U+E0100", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(6, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + // No font supports U+242EE U+E01E0. + itemize(collection.get(), "U+242EE U+E01E0", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + // Isolated variation selector supplement. + itemize(collection.get(), "U+E0100", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + + itemize(collection.get(), "U+E0100", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); +} + TEST(FontCollectionItemizeTest, itemize_no_crash) { std::unique_ptr collection = getFontCollection(); std::vector runs; @@ -340,4 +589,3 @@ TEST(FontCollectionItemizeTest, itemize_fakery) { EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic()); } -// TODO(11256006): Add Variation Selector test cases once it is supported. -- GitLab