提交 6f9966ea 编写于 作者: S Seigo Nonaka

Introduce multiple language based font fallback.

The motivation of this CL is enhance the font fallback score design
to support multiple language font fallback.

This CL contains following changes:
- Break language based font score into two: script-based score and
  primary-language-based score.
- The primary-language-based score is 0 if the script-based score is 0.
  If the script-based score is not 0 and the primary language is the
  as same as the requested one, the font gets an extra score of 1.
- The language score gets a higher multiplier for languages higher in
  the locale list.

Bug: 25122318
Bug: 26168983
Change-Id: Ib999997a88e6977e341f4c325e2a1b41a59db2d5
上级 a69ca2e1
......@@ -67,6 +67,16 @@ private:
FontFamily* getFamilyForChar(uint32_t ch, uint32_t vs, uint32_t langListId, int variant) const;
uint32_t calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
FontFamily* fontFamily) const;
uint32_t calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const;
static uint32_t calcLanguageMatchingScore(uint32_t userLangListId,
const FontFamily& fontFamily);
static uint32_t calcVariantMatchingScore(int variant, const FontFamily& fontFamily);
// static for allocating unique id's
static uint32_t sNextId;
......
......@@ -18,6 +18,7 @@
#define LOG_TAG "Minikin"
#include <cutils/log.h>
#include <algorithm>
#include "unicode/unistr.h"
#include "unicode/unorm2.h"
......@@ -103,29 +104,135 @@ FontCollection::~FontCollection() {
}
}
// Special scores for the font fallback.
const uint32_t kUnsupportedFontScore = 0;
const uint32_t kFirstFontScore = UINT32_MAX;
// Calculates a font score.
// The score of the font family is based on three subscores.
// - Coverage Score: How well the font family covers the given character or variation sequence.
// - Language Score: How well the font family is appropriate for the language.
// - Variant Score: Whether the font family matches the variant. Note that this variant is not the
// one in BCP47. This is our own font variant (e.g., elegant, compact).
//
// Then, there is a priority for these three subscores as follow:
// Coverage Score > Language Score > Variant Score
// The returned score reflects this priority order.
//
// Note that there are two special scores.
// - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
// base character.
// - kFirstFontScore: When the font is the first font family in the collection and it supports the
// given character or variation sequence.
uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
FontFamily* fontFamily) const {
const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
// No need to calculate other scores.
return coverageScore;
}
const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily);
const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
// Subscores are encoded into 31 bits representation to meet the subscore priority.
// The highest 2 bits are for coverage score, then following 28 bits are for language score,
// then the last 1 bit is for variant score.
return coverageScore << 29 | languageScore << 1 | variantScore;
}
// Calculates a font score based on variation sequence coverage.
// - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
// character.
// - Returns kFirstFontScore if the font family is the first font family in the collection and it
// supports the given character or variation sequence.
// - Returns 3 if the font family supports the variation sequence.
// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
// - Returns 1 if the variation selector is not specified or if the font family only supports the
// variation sequence's base character.
uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const {
const bool hasVSGlyph = (vs != 0) && fontFamily->hasVariationSelector(ch, vs);
if (!hasVSGlyph && !fontFamily->getCoverage()->get(ch)) {
// The font doesn't support either variation sequence or even the base character.
return kUnsupportedFontScore;
}
if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
// If the first font family supports the given character or variation sequence, always use
// it.
return kFirstFontScore;
}
if (vs == 0) {
return 1;
}
if (hasVSGlyph) {
return 3;
}
if (vs == 0xFE0F || vs == 0xFE0E) {
// TODO use all language in the list.
const FontLanguage lang = FontLanguageListCache::getById(fontFamily->langId())[0];
const bool hasEmojiFlag = lang.hasEmojiFlag();
if (vs == 0xFE0F) {
return hasEmojiFlag ? 2 : 1;
} else { // vs == 0xFE0E
return hasEmojiFlag ? 1 : 2;
}
}
return 1;
}
// Calculates font scores based on the script matching and primary langauge matching.
//
// If the font's script doesn't support the requested script, the font gets a score of 0. If the
// font's script supports the requested script and the font has the same primary language as the
// requested one, the font gets a score of 2. If the font's script supports the requested script
// but the primary language is different from the requested one, the font gets a score of 1.
//
// If two languages in the requested list have the same language score, the font matching with
// higher priority language gets a higher score. For example, in the case the user requested
// language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
// than the font of "en-Latn".
//
// To achieve the above two conditions, the language score is determined as follows:
// LanguageScore = s(0) * 3^(m - 1) + s(1) * 3^(m - 2) + ... + s(m - 2) * 3 + s(m - 1)
// Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
// matching score. The possible values of s(i) are 0, 1 and 2.
uint32_t FontCollection::calcLanguageMatchingScore(
uint32_t userLangListId, const FontFamily& fontFamily) {
const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
// TODO use all language in the list.
FontLanguage fontLanguage = FontLanguageListCache::getById(fontFamily.langId())[0];
const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
uint32_t score = fontLanguage.getScoreFor(langList[0]); // maxCompareNum can't be zero.
for (size_t i = 1; i < maxCompareNum; ++i) {
score = score * 3u + fontLanguage.getScoreFor(langList[i]);
}
return score;
}
// Calculates a font score based on variant ("compact" or "elegant") matching.
// - Returns 1 if the font doesn't have variant or the variant matches with the text style.
// - No score if the font has a variant but it doesn't match with the text style.
uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) {
return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
}
// Implement heuristic for choosing best-match font. Here are the rules:
// 1. If first font in the collection has the character, it wins.
// 2. If a font matches language, it gets a score of 2.
// 3. Matching the "compact" or "elegant" variant adds one to the score.
// 4. If there is a variation selector and a font supports the complete variation sequence, we add
// 8 to the score.
// 5. If there is a color variation selector (U+FE0F), we add 4 to the score if the font is an emoji
// font. This additional score of 4 is only given if the base character is supported in the font,
// but not the whole variation sequence.
// 6. If there is a text variation selector (U+FE0E), we add 4 to the score if the font is not an
// emoji font. This additional score of 4 is only given if the base character is supported in the
// font, but not the whole variation sequence.
// 7. Highest score wins, with ties resolved to the first font.
// 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
// 3. Highest score wins, with ties resolved to the first font.
FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
uint32_t langListId, int variant) const {
if (ch >= mMaxChar) {
return NULL;
}
const FontLanguages& langList = FontLanguageListCache::getById(langListId);
// TODO: use all languages in langList.
const FontLanguage lang = (langList.size() == 0) ? FontLanguage() : langList[0];
// Even if the font supports variation sequence, mRanges isn't aware of the base character of
// the sequence. Search all FontFamilies if variation sequence is specified.
// TODO: Always use mRanges for font search.
......@@ -141,40 +248,19 @@ FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
ALOGD("querying range %zd:%zd\n", range.start, range.end);
#endif
FontFamily* bestFamily = nullptr;
int bestScore = -1;
uint32_t bestScore = kUnsupportedFontScore;
for (size_t i = range.start; i < range.end; i++) {
FontFamily* family = familyVec[i];
const bool hasVSGlyph = (vs != 0) && family->hasVariationSelector(ch, vs);
if (hasVSGlyph || family->getCoverage()->get(ch)) {
if ((vs == 0 || hasVSGlyph) && mFamilies[0] == family) {
// If the first font family in collection supports the given character or sequence,
// always use it.
return family;
}
// TODO use all language in the list.
FontLanguage fontLang = FontLanguageListCache::getById(family->langId())[0];
int score = lang.match(fontLang) * 2;
if (family->variant() == 0 || family->variant() == variant) {
score++;
}
if (hasVSGlyph) {
score += 8;
} else if (((vs == 0xFE0F) && fontLang.hasEmojiFlag()) ||
((vs == 0xFE0E) && !fontLang.hasEmojiFlag())) {
score += 4;
}
if (score > bestScore) {
bestScore = score;
bestFamily = family;
}
const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
if (score == kFirstFontScore) {
// If the first font family supports the given character or variation sequence, always
// use it.
return family;
}
if (score > bestScore) {
bestScore = score;
bestFamily = family;
}
}
if (bestFamily == nullptr && vs != 0) {
// If no fonts support the codepoint and variation selector pair,
// fallback to select a font family that supports just the base
// character, ignoring the variation selector.
return getFamilyForChar(ch, 0, langListId, variant);
}
if (bestFamily == nullptr && !mFamilyVec.empty()) {
UErrorCode errorCode = U_ZERO_ERROR;
......
......@@ -115,21 +115,29 @@ std::string FontLanguage::getString() const {
return std::string(buf, i);
}
bool FontLanguage::isEqualScript(const FontLanguage other) const {
bool FontLanguage::isEqualScript(const FontLanguage& other) const {
return other.mScript == mScript;
}
bool FontLanguage::supportsScript(uint8_t requestedBits) const {
return requestedBits != 0 && (mSubScriptBits & requestedBits) == requestedBits;
}
bool FontLanguage::supportsHbScript(hb_script_t script) const {
static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'),
"The Minikin script and HarfBuzz hb_script_t have different encodings.");
if (script == mScript) return true;
uint8_t requestedBits = scriptToSubScriptBits(script);
return requestedBits != 0 && (mSubScriptBits & requestedBits) == requestedBits;
return supportsScript(scriptToSubScriptBits(script));
}
int FontLanguage::match(const FontLanguage other) const {
// TODO: Use script for matching.
return *this == other;
int FontLanguage::getScoreFor(const FontLanguage other) const {
if (isUnsupported() || other.isUnsupported()) {
return 0;
} else if (isEqualScript(other) || supportsScript(other.mSubScriptBits)) {
return mLanguage == other.mLanguage ? 2 : 1;
} else {
return 0;
}
}
#undef SCRIPT_TAG
......
......@@ -36,7 +36,7 @@ public:
FontLanguage(const char* buf, size_t length);
bool operator==(const FontLanguage other) const {
return !isUnsupported() && isEqualScript(other) && isEqualLanguage(other);
return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage;
}
bool operator!=(const FontLanguage other) const {
......@@ -46,8 +46,7 @@ public:
bool isUnsupported() const { return mLanguage == 0ul; }
bool hasEmojiFlag() const { return mSubScriptBits & kEmojiFlag; }
bool isEqualLanguage(const FontLanguage other) const { return mLanguage == other.mLanguage; }
bool isEqualScript(const FontLanguage other) const;
bool isEqualScript(const FontLanguage& other) const;
// Returns true if this script supports the given script. For example, ja-Jpan supports Hira,
// ja-Hira doesn't support Jpan.
......@@ -55,8 +54,8 @@ public:
std::string getString() const;
// 0 = no match, 1 = language matches
int match(const FontLanguage other) const;
// 0 = no match, 1 = script match, 2 = script and primary language match.
int getScoreFor(const FontLanguage other) const;
uint64_t getIdentifier() const { return (uint64_t)mScript << 32 | (uint64_t)mLanguage; }
......@@ -80,8 +79,11 @@ private:
uint8_t mSubScriptBits;
static uint8_t scriptToSubScriptBits(uint32_t script);
bool supportsScript(uint8_t requestedBits) const;
};
// Due to the limit of font fallback cost calculation, we can't use anything more than 17 languages.
const size_t FONT_LANGUAGES_LIMIT = 17;
typedef std::vector<FontLanguage> FontLanguages;
} // namespace android
......
......@@ -92,15 +92,20 @@ static FontLanguages constructFontLanguages(const std::string& input) {
uint64_t identifier = lang.getIdentifier();
if (!lang.isUnsupported() && seen.count(identifier) == 0) {
result.push_back(lang);
if (result.size() == FONT_LANGUAGES_LIMIT) {
break;
}
seen.insert(identifier);
}
}
locale.assign(input, currentIdx, input.size() - currentIdx);
size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale);
FontLanguage lang(langTag, length);
uint64_t identifier = lang.getIdentifier();
if (!lang.isUnsupported() && seen.count(identifier) == 0) {
result.push_back(lang);
if (result.size() < FONT_LANGUAGES_LIMIT) {
locale.assign(input, currentIdx, input.size() - currentIdx);
size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale);
FontLanguage lang(langTag, length);
uint64_t identifier = lang.getIdentifier();
if (!lang.isUnsupported() && seen.count(identifier) == 0) {
result.push_back(lang);
}
}
return result;
}
......
此差异已折叠。
......@@ -14,6 +14,9 @@
* limitations under the License.
*/
#ifndef MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H
#define MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H
#include <minikin/MinikinFont.h>
class SkTypeface;
......@@ -35,3 +38,5 @@ private:
SkTypeface *mTypeface;
const std::string mFontPath;
};
#endif // MINIKIN_TEST_MINIKIN_FONT_FOR_TEST_H
无法预览此类型文件
......@@ -30,6 +30,10 @@
<GlyphID id="10" name="U+82B1"/>
<GlyphID id="11" name="U+242EE"/>
<GlyphID id="12" name="U+1F470"/>
<GlyphID id="13" name="U+9AA8"/>
<GlyphID id="14" name="U+35A8"/>
<GlyphID id="15" name="U+35B6"/>
<GlyphID id="16" name="U+35C5"/>
</GlyphOrder>
<head>
......@@ -159,6 +163,10 @@
<mtx name="U+82B1" width="500" lsb="93"/>
<mtx name="U+242EE" width="500" lsb="93"/>
<mtx name="U+1F470" width="500" lsb="93"/>
<mtx name="U+9AA8" width="500" lsb="93"/>
<mtx name="U+35A8" width="500" lsb="93"/>
<mtx name="U+35B6" width="500" lsb="93"/>
<mtx name="U+35C5" width="500" lsb="93"/>
</hmtx>
<cmap>
......@@ -176,6 +184,10 @@
<map code="0x82B1" name="U+82B1" />
<map code="0x242EE" name="U+242EE" />
<map code="0x1F470" name="U+242EE" />
<map code="0x9AA8" name="U+9AA8" />
<map code="0x35A8" name="U+35A8" />
<map code="0x35B6" name="U+35B6" />
<map code="0x35C5" name="U+35C5" />
</cmap_format_12>
<cmap_format_14 format="14" platformID="0" platEncID="5" length="40" numVarSelectorRecords="3">
<map uvs="0xFE00" uv="0x4FAE" name="None" />
......@@ -183,6 +195,9 @@
<map uvs="0xE0100" uv="0x845B" name="None" />
<map uvs="0xE0100" uv="0x242EE" name="None" />
<map uvs="0xE0101" uv="0x242EE" name="None" />
<map uvs="0xE0100" uv="0x35A8" name="None" />
<map uvs="0xE0100" uv="0x35B6" name="None" />
<map uvs="0xE0100" uv="0x35C5" name="None" />
</cmap_format_14>
</cmap>
......@@ -235,6 +250,18 @@
<TTGlyph name="U+1F470" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+9AA8" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35A8" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35B6" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35C5" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
</glyf>
<name>
......
......@@ -26,6 +26,11 @@
<GlyphID id="6" name="U+4F60"/>
<GlyphID id="7" name="U+4FAE"/>
<GlyphID id="8" name="U+845B"/>
<GlyphID id="9" name="U+3402"/>
<GlyphID id="10" name="U+9AA8"/>
<GlyphID id="11" name="U+35A8"/>
<GlyphID id="12" name="U+35B6_VS17"/>
<GlyphID id="13" name="U+35C5_VS17"/>
</GlyphOrder>
<head>
......@@ -151,6 +156,11 @@
<mtx name="U+4F60" width="500" lsb="93"/>
<mtx name="U+4FAE" width="500" lsb="93"/>
<mtx name="U+845B" width="500" lsb="93"/>
<mtx name="U+3402" width="500" lsb="93"/>
<mtx name="U+9AA8" width="500" lsb="93"/>
<mtx name="U+35A8" width="500" lsb="93"/>
<mtx name="U+35B6_VS17" width="500" lsb="93"/>
<mtx name="U+35C5_VS17" width="500" lsb="93"/>
</hmtx>
<cmap>
......@@ -164,7 +174,16 @@
<map code="0x4F60" name="U+4F60" />
<map code="0x4FAE" name="U+4FAE" />
<map code="0x845B" name="U+845B" />
<map code="0x3402" name="U+3402" />
<map code="0x9AA8" name="U+9AA8" />
<map code="0x35A8" name="U+35A8" />
</cmap_format_12>
<cmap_format_14 format="14" platformID="0" platEncID="5" length="40" numVarSelectorRecords="3">
<map uvs="0xE0100" uv="0x3402" name="None"/>
<map uvs="0xE0100" uv="0x35A8" name="None"/>
<map uvs="0xE0100" uv="0x35B6" name="U+35B6_VS17"/>
<map uvs="0xE0100" uv="0x35C5" name="U+35C5_VS17"/>
</cmap_format_14>
</cmap>
<loca>
......@@ -204,6 +223,21 @@
<TTGlyph name="U+845B" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+3402" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+9AA8" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35A8" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35B6_VS17" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35C5_VS17" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
</glyf>
<name>
......
......@@ -19,6 +19,11 @@
<!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
<GlyphID id="0" name=".notdef"/>
<GlyphID id="1" name="U+242EE"/>
<GlyphID id="2" name="U+3402"/>
<GlyphID id="3" name="U+9AA8"/>
<GlyphID id="4" name="U+35A8"/>
<GlyphID id="5" name="U+35B6"/>
<GlyphID id="6" name="U+35C5_VS17"/>
</GlyphOrder>
<head>
......@@ -137,13 +142,28 @@
<hmtx>
<mtx name=".notdef" width="500" lsb="93"/>
<mtx name="U+242EE" width="500" lsb="93"/>
<mtx name="U+3402" width="500" lsb="93"/>
<mtx name="U+9AA8" width="500" lsb="93"/>
<mtx name="U+35A8" width="500" lsb="93"/>
<mtx name="U+35B6" width="500" lsb="93"/>
<mtx name="U+35C5_VS17" width="500" lsb="93"/>
</hmtx>
<cmap>
<tableVersion version="0"/>
<cmap_format_12 format="12" reserved="0" length="10" nGroups="1" platformID="3" platEncID="1" language="0">
<map code="0x242EE" name="U+242EE" />
<map code="0x3402" name="U+3402" />
<map code="0x9AA8" name="U+9AA8" />
<map code="0x35A8" name="U+35A8" />
<map code="0x35B6" name="U+35B6" />
</cmap_format_12>
<cmap_format_14 format="14" platformID="0" platEncID="5" length="40" numVarSelectorRecords="3">
<map uvs="0xE0100" uv="0x3402" name="None" />
<map uvs="0xE0100" uv="0x35A8" name="None" />
<map uvs="0xE0100" uv="0x35B6" name="None" />
<map uvs="0xE0100" uv="0x35C5" name="U+35C5_VS17" />
</cmap_format_14>
</cmap>
<loca>
......@@ -161,6 +181,21 @@
<TTGlyph name="U+242EE" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+3402" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+9AA8" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35A8" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35B6" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
<TTGlyph name="U+35C5_VS17" xMin="0" yMin="0" xMax="0" yMax="0">
<contour></contour><instructions><assembly></assembly></instructions>
</TTGlyph>
</glyf>
<name>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册