diff --git a/third_party/txt/benchmarks/txt_run_all_benchmarks.cc b/third_party/txt/benchmarks/txt_run_all_benchmarks.cc index cc5bfc364574cb89ff490b9f5e9a1661c41a4483..a5af92546a7355ce075b6b6050312f96bdc34e3a 100644 --- a/third_party/txt/benchmarks/txt_run_all_benchmarks.cc +++ b/third_party/txt/benchmarks/txt_run_all_benchmarks.cc @@ -38,4 +38,4 @@ int main(int argc, char** argv) { fml::icu::InitializeICU(); ::benchmark::RunSpecifiedBenchmarks(); -} \ No newline at end of file +} diff --git a/third_party/txt/src/minikin/CmapCoverage.cpp b/third_party/txt/src/minikin/CmapCoverage.cpp index a953304e5bc9fa5cad19d78992411399c7149178..6de0618aed0edebc28220ec592ec75491c3e2e80 100644 --- a/third_party/txt/src/minikin/CmapCoverage.cpp +++ b/third_party/txt/src/minikin/CmapCoverage.cpp @@ -23,266 +23,283 @@ using std::vector; #include -#include #include +#include #include "MinikinInternal.h" namespace minikin { // These could perhaps be optimized to use __builtin_bswap16 and friends. static uint32_t readU16(const uint8_t* data, size_t offset) { - return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]); + return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]); } static uint32_t readU32(const uint8_t* data, size_t offset) { - return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | - ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); + return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | + ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); } -static void addRange(vector &coverage, uint32_t start, uint32_t end) { +static void addRange(vector& coverage, uint32_t start, uint32_t end) { #ifdef VERBOSE_DEBUG - ALOGD("adding range %d-%d\n", start, end); + ALOGD("adding range %d-%d\n", start, end); #endif - if (coverage.empty() || coverage.back() < start) { - coverage.push_back(start); - coverage.push_back(end); - } else { - coverage.back() = end; - } + if (coverage.empty() || coverage.back() < start) { + coverage.push_back(start); + coverage.push_back(end); + } else { + coverage.back() = end; + } } -// Get the coverage information out of a Format 4 subtable, storing it in the coverage vector -static bool getCoverageFormat4(vector& coverage, const uint8_t* data, size_t size) { - const size_t kSegCountOffset = 6; - const size_t kEndCountOffset = 14; - const size_t kHeaderSize = 16; - const size_t kSegmentSize = 8; // total size of array elements for one segment - if (kEndCountOffset > size) { - return false; +// Get the coverage information out of a Format 4 subtable, storing it in the +// coverage vector +static bool getCoverageFormat4(vector& coverage, + const uint8_t* data, + size_t size) { + const size_t kSegCountOffset = 6; + const size_t kEndCountOffset = 14; + const size_t kHeaderSize = 16; + const size_t kSegmentSize = + 8; // total size of array elements for one segment + if (kEndCountOffset > size) { + return false; + } + size_t segCount = readU16(data, kSegCountOffset) >> 1; + if (kHeaderSize + segCount * kSegmentSize > size) { + return false; + } + for (size_t i = 0; i < segCount; i++) { + uint32_t end = readU16(data, kEndCountOffset + 2 * i); + uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i)); + if (end < start) { + // invalid segment range: size must be positive + android_errorWriteLog(0x534e4554, "26413177"); + return false; } - size_t segCount = readU16(data, kSegCountOffset) >> 1; - if (kHeaderSize + segCount * kSegmentSize > size) { - return false; - } - for (size_t i = 0; i < segCount; i++) { - uint32_t end = readU16(data, kEndCountOffset + 2 * i); - uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i)); - if (end < start) { - // invalid segment range: size must be positive - android_errorWriteLog(0x534e4554, "26413177"); - return false; + uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i)); + if (rangeOffset == 0) { + uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i)); + if (((end + delta) & 0xffff) > end - start) { + addRange(coverage, start, end + 1); + } else { + for (uint32_t j = start; j < end + 1; j++) { + if (((j + delta) & 0xffff) != 0) { + addRange(coverage, j, j + 1); + } + } + } + } else { + for (uint32_t j = start; j < end + 1; j++) { + uint32_t actualRangeOffset = + kHeaderSize + 6 * segCount + rangeOffset + (i + j - start) * 2; + if (actualRangeOffset + 2 > size) { + // invalid rangeOffset is considered a "warning" by OpenType Sanitizer + continue; } - uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i)); - if (rangeOffset == 0) { - uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i)); - if (((end + delta) & 0xffff) > end - start) { - addRange(coverage, start, end + 1); - } else { - for (uint32_t j = start; j < end + 1; j++) { - if (((j + delta) & 0xffff) != 0) { - addRange(coverage, j, j + 1); - } - } - } - } else { - for (uint32_t j = start; j < end + 1; j++) { - uint32_t actualRangeOffset = kHeaderSize + 6 * segCount + rangeOffset + - (i + j - start) * 2; - if (actualRangeOffset + 2 > size) { - // invalid rangeOffset is considered a "warning" by OpenType Sanitizer - continue; - } - uint32_t glyphId = readU16(data, actualRangeOffset); - if (glyphId != 0) { - addRange(coverage, j, j + 1); - } - } + uint32_t glyphId = readU16(data, actualRangeOffset); + if (glyphId != 0) { + addRange(coverage, j, j + 1); } + } } - return true; + } + return true; } -// Get the coverage information out of a Format 12 subtable, storing it in the coverage vector -static bool getCoverageFormat12(vector& coverage, const uint8_t* data, size_t size) { - const size_t kNGroupsOffset = 12; - const size_t kFirstGroupOffset = 16; - const size_t kGroupSize = 12; - const size_t kStartCharCodeOffset = 0; - const size_t kEndCharCodeOffset = 4; - const size_t kMaxNGroups = 0xfffffff0 / kGroupSize; // protection against overflow - // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits. - if (kFirstGroupOffset > size) { - return false; - } - uint32_t nGroups = readU32(data, kNGroupsOffset); - if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) { - android_errorWriteLog(0x534e4554, "25645298"); - return false; +// Get the coverage information out of a Format 12 subtable, storing it in the +// coverage vector +static bool getCoverageFormat12(vector& coverage, + const uint8_t* data, + size_t size) { + const size_t kNGroupsOffset = 12; + const size_t kFirstGroupOffset = 16; + const size_t kGroupSize = 12; + const size_t kStartCharCodeOffset = 0; + const size_t kEndCharCodeOffset = 4; + const size_t kMaxNGroups = + 0xfffffff0 / kGroupSize; // protection against overflow + // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits + // in 32 bits. + if (kFirstGroupOffset > size) { + return false; + } + uint32_t nGroups = readU32(data, kNGroupsOffset); + if (nGroups >= kMaxNGroups || + kFirstGroupOffset + nGroups * kGroupSize > size) { + android_errorWriteLog(0x534e4554, "25645298"); + return false; + } + for (uint32_t i = 0; i < nGroups; i++) { + uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize; + uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset); + uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset); + if (end < start) { + // invalid group range: size must be positive + android_errorWriteLog(0x534e4554, "26413177"); + return false; } - for (uint32_t i = 0; i < nGroups; i++) { - uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize; - uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset); - uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset); - if (end < start) { - // invalid group range: size must be positive - android_errorWriteLog(0x534e4554, "26413177"); - return false; - } - // No need to read outside of Unicode code point range. - if (start > MAX_UNICODE_CODE_POINT) { - return true; - } - if (end > MAX_UNICODE_CODE_POINT) { - // file is inclusive, vector is exclusive - addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1); - return true; - } - addRange(coverage, start, end + 1); // file is inclusive, vector is exclusive + // No need to read outside of Unicode code point range. + if (start > MAX_UNICODE_CODE_POINT) { + return true; + } + if (end > MAX_UNICODE_CODE_POINT) { + // file is inclusive, vector is exclusive + addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1); + return true; } - return true; + addRange(coverage, start, + end + 1); // file is inclusive, vector is exclusive + } + return true; } // Lower value has higher priority. 0 for the highest priority table. // kLowestPriority for unsupported tables. -// This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it. +// This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync +// with it. constexpr uint8_t kLowestPriority = 255; uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) { - if (platformId == 3 && encodingId == 10) { - return 0; - } - if (platformId == 0 && encodingId == 6) { - return 1; - } - if (platformId == 0 && encodingId == 4) { - return 2; - } - if (platformId == 3 && encodingId == 1) { - return 3; - } - if (platformId == 0 && encodingId == 3) { - return 4; - } - if (platformId == 0 && encodingId == 2) { - return 5; - } - if (platformId == 0 && encodingId == 1) { - return 6; - } - if (platformId == 0 && encodingId == 0) { - return 7; - } - // Tables other than above are not supported. - return kLowestPriority; + if (platformId == 3 && encodingId == 10) { + return 0; + } + if (platformId == 0 && encodingId == 6) { + return 1; + } + if (platformId == 0 && encodingId == 4) { + return 2; + } + if (platformId == 3 && encodingId == 1) { + return 3; + } + if (platformId == 0 && encodingId == 3) { + return 4; + } + if (platformId == 0 && encodingId == 2) { + return 5; + } + if (platformId == 0 && encodingId == 1) { + return 6; + } + if (platformId == 0 && encodingId == 0) { + return 7; + } + // Tables other than above are not supported. + return kLowestPriority; } -SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size, - bool* has_cmap_format14_subtable) { - constexpr size_t kHeaderSize = 4; - constexpr size_t kNumTablesOffset = 2; - constexpr size_t kTableSize = 8; - constexpr size_t kPlatformIdOffset = 0; - constexpr size_t kEncodingIdOffset = 2; - constexpr size_t kOffsetOffset = 4; - constexpr size_t kFormatOffset = 0; - constexpr uint32_t kInvalidOffset = UINT32_MAX; +SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, + size_t cmap_size, + bool* has_cmap_format14_subtable) { + constexpr size_t kHeaderSize = 4; + constexpr size_t kNumTablesOffset = 2; + constexpr size_t kTableSize = 8; + constexpr size_t kPlatformIdOffset = 0; + constexpr size_t kEncodingIdOffset = 2; + constexpr size_t kOffsetOffset = 4; + constexpr size_t kFormatOffset = 0; + constexpr uint32_t kInvalidOffset = UINT32_MAX; - if (kHeaderSize > cmap_size) { - return SparseBitSet(); - } - uint32_t numTables = readU16(cmap_data, kNumTablesOffset); - if (kHeaderSize + numTables * kTableSize > cmap_size) { - return SparseBitSet(); - } + if (kHeaderSize > cmap_size) { + return SparseBitSet(); + } + uint32_t numTables = readU16(cmap_data, kNumTablesOffset); + if (kHeaderSize + numTables * kTableSize > cmap_size) { + return SparseBitSet(); + } - uint32_t bestTableOffset = kInvalidOffset; - uint16_t bestTableFormat = 0; - uint8_t bestTablePriority = kLowestPriority; - *has_cmap_format14_subtable = false; - for (uint32_t i = 0; i < numTables; ++i) { - const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize; - const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset); - const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset); - const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset); - - if (offset > cmap_size - 2) { - continue; // Invalid table: not enough space to read. - } - const uint16_t format = readU16(cmap_data, offset + kFormatOffset); + uint32_t bestTableOffset = kInvalidOffset; + uint16_t bestTableFormat = 0; + uint8_t bestTablePriority = kLowestPriority; + *has_cmap_format14_subtable = false; + for (uint32_t i = 0; i < numTables; ++i) { + const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize; + const uint16_t platformId = + readU16(cmap_data, tableHeadOffset + kPlatformIdOffset); + const uint16_t encodingId = + readU16(cmap_data, tableHeadOffset + kEncodingIdOffset); + const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset); - if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) { - if (!(*has_cmap_format14_subtable) && format == 14) { - *has_cmap_format14_subtable = true; - } else { - // Ignore the (0, 5) table if we have already seen another valid one or it's in a - // format we don't understand. - } - } else { - uint32_t length; - uint32_t language; + if (offset > cmap_size - 2) { + continue; // Invalid table: not enough space to read. + } + const uint16_t format = readU16(cmap_data, offset + kFormatOffset); - if (format == 4) { - constexpr size_t lengthOffset = 2; - constexpr size_t languageOffset = 4; - constexpr size_t minTableSize = languageOffset + 2; - if (offset > cmap_size - minTableSize) { - continue; // Invalid table: not enough space to read. - } - length = readU16(cmap_data, offset + lengthOffset); - language = readU16(cmap_data, offset + languageOffset); - } else if (format == 12) { - constexpr size_t lengthOffset = 4; - constexpr size_t languageOffset = 8; - constexpr size_t minTableSize = languageOffset + 4; - if (offset > cmap_size - minTableSize) { - continue; // Invalid table: not enough space to read. - } - length = readU32(cmap_data, offset + lengthOffset); - language = readU32(cmap_data, offset + languageOffset); - } else { - continue; - } + if (platformId == 0 /* Unicode */ && + encodingId == 5 /* Variation Sequences */) { + if (!(*has_cmap_format14_subtable) && format == 14) { + *has_cmap_format14_subtable = true; + } else { + // Ignore the (0, 5) table if we have already seen another valid one or + // it's in a format we don't understand. + } + } else { + uint32_t length; + uint32_t language; - if (length > cmap_size - offset) { - continue; // Invalid table: table length is larger than whole cmap data size. - } - if (language != 0) { - // Unsupported or invalid table: this is either a subtable for the Macintosh - // platform (which we don't support), or an invalid subtable since language field - // should be zero for non-Macintosh subtables. - continue; - } - const uint8_t priority = getTablePriority(platformId, encodingId); - if (priority < bestTablePriority) { - bestTableOffset = offset; - bestTablePriority = priority; - bestTableFormat = format; - } + if (format == 4) { + constexpr size_t lengthOffset = 2; + constexpr size_t languageOffset = 4; + constexpr size_t minTableSize = languageOffset + 2; + if (offset > cmap_size - minTableSize) { + continue; // Invalid table: not enough space to read. } - if (*has_cmap_format14_subtable && bestTablePriority == 0 /* highest priority */) { - // Already found the highest priority table and variation sequences table. No need to - // look at remaining tables. - break; + length = readU16(cmap_data, offset + lengthOffset); + language = readU16(cmap_data, offset + languageOffset); + } else if (format == 12) { + constexpr size_t lengthOffset = 4; + constexpr size_t languageOffset = 8; + constexpr size_t minTableSize = languageOffset + 4; + if (offset > cmap_size - minTableSize) { + continue; // Invalid table: not enough space to read. } + length = readU32(cmap_data, offset + lengthOffset); + language = readU32(cmap_data, offset + languageOffset); + } else { + continue; + } + + if (length > cmap_size - offset) { + continue; // Invalid table: table length is larger than whole cmap data + // size. + } + if (language != 0) { + // Unsupported or invalid table: this is either a subtable for the + // Macintosh platform (which we don't support), or an invalid subtable + // since language field should be zero for non-Macintosh subtables. + continue; + } + const uint8_t priority = getTablePriority(platformId, encodingId); + if (priority < bestTablePriority) { + bestTableOffset = offset; + bestTablePriority = priority; + bestTableFormat = format; + } } - if (bestTableOffset == kInvalidOffset) { - return SparseBitSet(); - } - const uint8_t* tableData = cmap_data + bestTableOffset; - const size_t tableSize = cmap_size - bestTableOffset; - vector coverageVec; - bool success; - if (bestTableFormat == 4) { - success = getCoverageFormat4(coverageVec, tableData, tableSize); - } else { - success = getCoverageFormat12(coverageVec, tableData, tableSize); - } - if (success) { - return SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1); - } else { - return SparseBitSet(); + if (*has_cmap_format14_subtable && + bestTablePriority == 0 /* highest priority */) { + // Already found the highest priority table and variation sequences table. + // No need to look at remaining tables. + break; } - + } + if (bestTableOffset == kInvalidOffset) { + return SparseBitSet(); + } + const uint8_t* tableData = cmap_data + bestTableOffset; + const size_t tableSize = cmap_size - bestTableOffset; + vector coverageVec; + bool success; + if (bestTableFormat == 4) { + success = getCoverageFormat4(coverageVec, tableData, tableSize); + } else { + success = getCoverageFormat12(coverageVec, tableData, tableSize); + } + if (success) { + return SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1); + } else { + return SparseBitSet(); + } } } // namespace minikin diff --git a/third_party/txt/src/minikin/CmapCoverage.h b/third_party/txt/src/minikin/CmapCoverage.h index 5136d8692a516553f3e792d5e79ae4cb0e515715..9ca72715a6719d3875857a2cb309b1c7266abf4c 100644 --- a/third_party/txt/src/minikin/CmapCoverage.h +++ b/third_party/txt/src/minikin/CmapCoverage.h @@ -22,9 +22,10 @@ namespace minikin { class CmapCoverage { -public: - static SparseBitSet getCoverage(const uint8_t* cmap_data, size_t cmap_size, - bool* has_cmap_format14_subtable); + public: + static SparseBitSet getCoverage(const uint8_t* cmap_data, + size_t cmap_size, + bool* has_cmap_format14_subtable); }; } // namespace minikin diff --git a/third_party/txt/src/minikin/Emoji.cpp b/third_party/txt/src/minikin/Emoji.cpp index df43c75b36fd38aeb5ab89129561f2f57ced931f..da61c8c27aa2c4e4115e78e7dacc5634bc44237d 100644 --- a/third_party/txt/src/minikin/Emoji.cpp +++ b/third_party/txt/src/minikin/Emoji.cpp @@ -19,71 +19,65 @@ namespace minikin { bool isNewEmoji(uint32_t c) { - // Emoji characters new in Unicode emoji 5.0. - // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt - // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0. - if (c < 0x1F6F7 || c > 0x1F9E6) { - // Optimization for characters outside the new emoji range. - return false; - } - return (0x1F6F7 <= c && c <= 0x1F6F8) - || c == 0x1F91F - || (0x1F928 <= c && c <= 0x1F92F) - || (0x1F931 <= c && c <= 0x1F932) - || c == 0x1F94C - || (0x1F95F <= c && c <= 0x1F96B) - || (0x1F992 <= c && c <= 0x1F997) - || (0x1F9D0 <= c && c <= 0x1F9E6); + // Emoji characters new in Unicode emoji 5.0. + // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt + // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0. + if (c < 0x1F6F7 || c > 0x1F9E6) { + // Optimization for characters outside the new emoji range. + return false; + } + return (0x1F6F7 <= c && c <= 0x1F6F8) || c == 0x1F91F || + (0x1F928 <= c && c <= 0x1F92F) || (0x1F931 <= c && c <= 0x1F932) || + c == 0x1F94C || (0x1F95F <= c && c <= 0x1F96B) || + (0x1F992 <= c && c <= 0x1F997) || (0x1F9D0 <= c && c <= 0x1F9E6); } bool isEmoji(uint32_t c) { #if WIP_NEEDS_ICU_UPDATE - return false; -#else // WIP_NEEDS_ICU_UPDATE - return isNewEmoji(c) || u_hasBinaryProperty(c, UCHAR_EMOJI); -#endif // WIP_NEEDS_ICU_UPDATE + return false; +#else // WIP_NEEDS_ICU_UPDATE + return isNewEmoji(c) || u_hasBinaryProperty(c, UCHAR_EMOJI); +#endif // WIP_NEEDS_ICU_UPDATE } bool isEmojiModifier(uint32_t c) { #if WIP_NEEDS_ICU_UPDATE - return false; -#else // WIP_NEEDS_ICU_UPDATE - // Emoji modifier are not expected to change, so there's a small change we need to customize - // this. - return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER); -#endif // WIP_NEEDS_ICU_UPDATE + return false; +#else // WIP_NEEDS_ICU_UPDATE + // Emoji modifier are not expected to change, so there's a small change we + // need to customize this. + return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER); +#endif // WIP_NEEDS_ICU_UPDATE } bool isEmojiBase(uint32_t c) { #if WIP_NEEDS_ICU_UPDATE - return false; -#else // WIP_NEEDS_ICU_UPDATE - // These two characters were removed from Emoji_Modifier_Base in Emoji 4.0, but we need to keep - // them as emoji modifier bases since there are fonts and user-generated text out there that - // treats these as potential emoji bases. - if (c == 0x1F91D || c == 0x1F93C) { - return true; - } - // Emoji Modifier Base characters new in Unicode emoji 5.0. - // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt - // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0. - if (c == 0x1F91F - || (0x1F931 <= c && c <= 0x1F932) - || (0x1F9D1 <= c && c <= 0x1F9DD)) { - return true; - } - return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER_BASE); -#endif // WIP_NEEDS_ICU_UPDATE + return false; +#else // WIP_NEEDS_ICU_UPDATE + // These two characters were removed from Emoji_Modifier_Base in Emoji 4.0, + // but we need to keep them as emoji modifier bases since there are fonts and + // user-generated text out there that treats these as potential emoji bases. + if (c == 0x1F91D || c == 0x1F93C) { + return true; + } + // Emoji Modifier Base characters new in Unicode emoji 5.0. + // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt + // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0. + if (c == 0x1F91F || (0x1F931 <= c && c <= 0x1F932) || + (0x1F9D1 <= c && c <= 0x1F9DD)) { + return true; + } + return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER_BASE); +#endif // WIP_NEEDS_ICU_UPDATE } UCharDirection emojiBidiOverride(const void* /* context */, UChar32 c) { - if (isNewEmoji(c)) { - // All new emoji characters in Unicode 10.0 are of the bidi class ON. - return U_OTHER_NEUTRAL; - } else { - return u_charDirection(c); - } + if (isNewEmoji(c)) { + // All new emoji characters in Unicode 10.0 are of the bidi class ON. + return U_OTHER_NEUTRAL; + } else { + return u_charDirection(c); + } } } // namespace minikin - diff --git a/third_party/txt/src/minikin/Emoji.h b/third_party/txt/src/minikin/Emoji.h index 28261735aaa297928aab16a2526d7d8277aea8d3..046a9d60283e63ef3813d2f05e89e51660c9a040 100644 --- a/third_party/txt/src/minikin/Emoji.h +++ b/third_party/txt/src/minikin/Emoji.h @@ -31,4 +31,3 @@ bool isEmojiModifier(uint32_t c); UCharDirection emojiBidiOverride(const void* context, UChar32 c); } // namespace minikin - diff --git a/third_party/txt/src/minikin/FontCollection.cpp b/third_party/txt/src/minikin/FontCollection.cpp index ddb85720dee71015c7ccab731a869f81d4098f1d..1e7744953ccfbf792a08e42a49950694ea24a0c5 100644 --- a/third_party/txt/src/minikin/FontCollection.cpp +++ b/third_party/txt/src/minikin/FontCollection.cpp @@ -24,11 +24,11 @@ #include "unicode/unistr.h" #include "unicode/unorm2.h" +#include +#include #include "FontLanguage.h" #include "FontLanguageListCache.h" #include "MinikinInternal.h" -#include -#include using std::vector; @@ -36,7 +36,7 @@ namespace minikin { template static inline T max(T a, T b) { - return a>b ? a : b; + return a > b ? a : b; } const uint32_t EMOJI_STYLE_VS = 0xFE0F; @@ -44,76 +44,80 @@ const uint32_t TEXT_STYLE_VS = 0xFE0E; uint32_t FontCollection::sNextId = 0; -FontCollection::FontCollection(std::shared_ptr&& typeface) : mMaxChar(0) { - std::vector> typefaces; - typefaces.push_back(typeface); - init(typefaces); +FontCollection::FontCollection(std::shared_ptr&& typeface) + : mMaxChar(0) { + std::vector> typefaces; + typefaces.push_back(typeface); + init(typefaces); } -FontCollection::FontCollection(const vector>& typefaces) : - mMaxChar(0) { - init(typefaces); +FontCollection::FontCollection( + const vector>& typefaces) + : mMaxChar(0) { + init(typefaces); } -void FontCollection::init(const vector>& typefaces) { - std::lock_guard _l(gMinikinLock); - mId = sNextId++; - vector lastChar; - size_t nTypefaces = typefaces.size(); +void FontCollection::init( + const vector>& typefaces) { + std::lock_guard _l(gMinikinLock); + mId = sNextId++; + vector lastChar; + size_t nTypefaces = typefaces.size(); #ifdef VERBOSE_DEBUG - ALOGD("nTypefaces = %zd\n", nTypefaces); + ALOGD("nTypefaces = %zd\n", nTypefaces); #endif - const FontStyle defaultStyle; - for (size_t i = 0; i < nTypefaces; i++) { - const std::shared_ptr& family = typefaces[i]; - if (family->getClosestMatch(defaultStyle).font == nullptr) { - continue; - } - const SparseBitSet& coverage = family->getCoverage(); - mFamilies.push_back(family); // emplace_back would be better - if (family->hasVSTable()) { - mVSFamilyVec.push_back(family); - } - mMaxChar = max(mMaxChar, coverage.length()); - lastChar.push_back(coverage.nextSetBit(0)); - - const std::unordered_set& supportedAxes = family->supportedAxes(); - mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end()); + const FontStyle defaultStyle; + for (size_t i = 0; i < nTypefaces; i++) { + const std::shared_ptr& family = typefaces[i]; + if (family->getClosestMatch(defaultStyle).font == nullptr) { + continue; } - nTypefaces = mFamilies.size(); - LOG_ALWAYS_FATAL_IF(nTypefaces == 0, - "Font collection must have at least one valid typeface"); - LOG_ALWAYS_FATAL_IF(nTypefaces > 254, - "Font collection may only have up to 254 font families."); - size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage; - // TODO: Use variation selector map for mRanges construction. - // A font can have a glyph for a base code point and variation selector pair but no glyph for - // the base code point without variation selector. The family won't be listed in the range in - // this case. - for (size_t i = 0; i < nPages; i++) { - Range dummy; - mRanges.push_back(dummy); - Range* range = &mRanges.back(); + const SparseBitSet& coverage = family->getCoverage(); + mFamilies.push_back(family); // emplace_back would be better + if (family->hasVSTable()) { + mVSFamilyVec.push_back(family); + } + mMaxChar = max(mMaxChar, coverage.length()); + lastChar.push_back(coverage.nextSetBit(0)); + + const std::unordered_set& supportedAxes = family->supportedAxes(); + mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end()); + } + nTypefaces = mFamilies.size(); + LOG_ALWAYS_FATAL_IF(nTypefaces == 0, + "Font collection must have at least one valid typeface"); + LOG_ALWAYS_FATAL_IF(nTypefaces > 254, + "Font collection may only have up to 254 font families."); + size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage; + // TODO: Use variation selector map for mRanges construction. + // A font can have a glyph for a base code point and variation selector pair + // but no glyph for the base code point without variation selector. The family + // won't be listed in the range in this case. + for (size_t i = 0; i < nPages; i++) { + Range dummy; + mRanges.push_back(dummy); + Range* range = &mRanges.back(); #ifdef VERBOSE_DEBUG - ALOGD("i=%zd: range start = %zd\n", i, offset); + ALOGD("i=%zd: range start = %zd\n", i, offset); #endif - range->start = mFamilyVec.size(); - for (size_t j = 0; j < nTypefaces; j++) { - if (lastChar[j] < (i + 1) << kLogCharsPerPage) { - const std::shared_ptr& family = mFamilies[j]; - mFamilyVec.push_back(static_cast(j)); - uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage); + range->start = mFamilyVec.size(); + for (size_t j = 0; j < nTypefaces; j++) { + if (lastChar[j] < (i + 1) << kLogCharsPerPage) { + const std::shared_ptr& family = mFamilies[j]; + mFamilyVec.push_back(static_cast(j)); + uint32_t nextChar = + family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage); #ifdef VERBOSE_DEBUG - ALOGD("nextChar = %d (j = %zd)\n", nextChar, j); + ALOGD("nextChar = %d (j = %zd)\n", nextChar, j); #endif - lastChar[j] = nextChar; - } - } - range->end = mFamilyVec.size(); + lastChar[j] = nextChar; + } } - // See the comment in Range for more details. - LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF, - "Exceeded the maximum indexable cmap coverage."); + range->end = mFamilyVec.size(); + } + // See the comment in Range for more details. + LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF, + "Exceeded the maximum indexable cmap coverage."); } // Special scores for the font fallback. @@ -122,9 +126,11 @@ const uint32_t kFirstFontScore = UINT32_MAX; // Calculates a font score. // The score of the font family is based on three subscores. -// - Coverage Score: How well the font family covers the given character or variation sequence. +// - Coverage Score: How well the font family covers the given character or +// variation sequence. // - Language Score: How well the font family is appropriate for the language. -// - Variant Score: Whether the font family matches the variant. Note that this variant is not the +// - Variant Score: Whether the font family matches the variant. Note that this +// variant is not the // one in BCP47. This is our own font variant (e.g., elegant, compact). // // Then, there is a priority for these three subscores as follow: @@ -132,168 +138,205 @@ const uint32_t kFirstFontScore = UINT32_MAX; // The returned score reflects this priority order. // // Note that there are two special scores. -// - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its +// - kUnsupportedFontScore: When the font family doesn't support the variation +// sequence or even its // base character. -// - kFirstFontScore: When the font is the first font family in the collection and it supports the +// - kFirstFontScore: When the font is the first font family in the collection +// and it supports the // given character or variation sequence. -uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId, - const std::shared_ptr& fontFamily) const { - - const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily); - if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) { - // No need to calculate other scores. - return coverageScore; - } - - const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily); - const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily); - - // Subscores are encoded into 31 bits representation to meet the subscore priority. - // The highest 2 bits are for coverage score, then following 28 bits are for language score, - // then the last 1 bit is for variant score. - return coverageScore << 29 | languageScore << 1 | variantScore; +uint32_t FontCollection::calcFamilyScore( + uint32_t ch, + uint32_t vs, + int variant, + uint32_t langListId, + const std::shared_ptr& fontFamily) const { + const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily); + if (coverageScore == kFirstFontScore || + coverageScore == kUnsupportedFontScore) { + // No need to calculate other scores. + return coverageScore; + } + + const uint32_t languageScore = + calcLanguageMatchingScore(langListId, *fontFamily); + const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily); + + // Subscores are encoded into 31 bits representation to meet the subscore + // priority. The highest 2 bits are for coverage score, then following 28 bits + // are for language score, then the last 1 bit is for variant score. + return coverageScore << 29 | languageScore << 1 | variantScore; } // Calculates a font score based on variation sequence coverage. -// - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base +// - Returns kUnsupportedFontScore if the font doesn't support the variation +// sequence or its base // character. -// - Returns kFirstFontScore if the font family is the first font family in the collection and it +// - Returns kFirstFontScore if the font family is the first font family in the +// collection and it // supports the given character or variation sequence. // - Returns 3 if the font family supports the variation sequence. -// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font. -// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font. -// - Returns 1 if the variation selector is not specified or if the font family only supports the +// - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font +// is an emoji font. +// - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font +// is not an emoji font. +// - Returns 1 if the variation selector is not specified or if the font family +// only supports the // variation sequence's base character. -uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, - const std::shared_ptr& fontFamily) const { - const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs); - if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) { - // The font doesn't support either variation sequence or even the base character. - return kUnsupportedFontScore; - } - - if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) { - // If the first font family supports the given character or variation sequence, always use - // it. - return kFirstFontScore; - } - - if (vs == 0) { - return 1; +uint32_t FontCollection::calcCoverageScore( + uint32_t ch, + uint32_t vs, + const std::shared_ptr& fontFamily) const { + const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs); + if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) { + // The font doesn't support either variation sequence or even the base + // character. + return kUnsupportedFontScore; + } + + if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) { + // If the first font family supports the given character or variation + // sequence, always use it. + return kFirstFontScore; + } + + if (vs == 0) { + return 1; + } + + if (hasVSGlyph) { + return 3; + } + + if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) { + const FontLanguages& langs = + FontLanguageListCache::getById(fontFamily->langId()); + bool hasEmojiFlag = false; + for (size_t i = 0; i < langs.size(); ++i) { + if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) { + hasEmojiFlag = true; + break; + } } - if (hasVSGlyph) { - return 3; + if (vs == EMOJI_STYLE_VS) { + return hasEmojiFlag ? 2 : 1; + } else { // vs == TEXT_STYLE_VS + return hasEmojiFlag ? 1 : 2; } - - if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) { - const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId()); - bool hasEmojiFlag = false; - for (size_t i = 0; i < langs.size(); ++i) { - if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) { - hasEmojiFlag = true; - break; - } - } - - if (vs == EMOJI_STYLE_VS) { - return hasEmojiFlag ? 2 : 1; - } else { // vs == TEXT_STYLE_VS - return hasEmojiFlag ? 1 : 2; - } - } - return 1; + } + return 1; } -// Calculate font scores based on the script matching, subtag matching and primary langauge matching. +// Calculate font scores based on the script matching, subtag matching and +// primary langauge matching. // -// 1. If only the font's language matches or there is no matches between requested font and +// 1. If only the font's language matches or there is no matches between +// requested font and // supported font, then the font obtains a score of 0. -// 2. Without a match in language, considering subtag may change font's EmojiStyle over script, -// a match in subtag gets a score of 2 and a match in scripts gains a score of 1. -// 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while +// 2. Without a match in language, considering subtag may change font's +// EmojiStyle over script, +// a match in subtag gets a score of 2 and a match in scripts gains a score +// of 1. +// 3. Regarding to two elements matchings, language-and-subtag matching has a +// score of 4, while // language-and-script obtains a socre of 3 with the same reason above. // -// If two languages in the requested list have the same language score, the font matching with -// higher priority language gets a higher score. For example, in the case the user requested -// language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score -// than the font of "en-Latn". +// If two languages in the requested list have the same language score, the font +// matching with higher priority language gets a higher score. For example, in +// the case the user requested language list is "ja-Jpan,en-Latn". The score of +// for the font of "ja-Jpan" gets a higher score than the font of "en-Latn". // -// To achieve score calculation with priorities, the language score is determined as follows: -// LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1) -// Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's -// matching score. The possible values of s(i) are 0, 1, 2, 3 and 4. +// To achieve score calculation with priorities, the language score is +// determined as follows: +// LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + +// s(m - 1) +// Here, m is the maximum number of languages to be compared, and s(i) is the +// i-th language's matching score. The possible values of s(i) are 0, 1, 2, 3 +// and 4. uint32_t FontCollection::calcLanguageMatchingScore( - uint32_t userLangListId, const FontFamily& fontFamily) { - const FontLanguages& langList = FontLanguageListCache::getById(userLangListId); - const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId()); - - const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT); - uint32_t score = 0; - for (size_t i = 0; i < maxCompareNum; ++i) { - score = score * 5u + langList[i].calcScoreFor(fontLanguages); - } - return score; + uint32_t userLangListId, + const FontFamily& fontFamily) { + const FontLanguages& langList = + FontLanguageListCache::getById(userLangListId); + const FontLanguages& fontLanguages = + FontLanguageListCache::getById(fontFamily.langId()); + + const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT); + uint32_t score = 0; + for (size_t i = 0; i < maxCompareNum; ++i) { + score = score * 5u + langList[i].calcScoreFor(fontLanguages); + } + return score; } // Calculates a font score based on variant ("compact" or "elegant") matching. -// - Returns 1 if the font doesn't have variant or the variant matches with the text style. -// - No score if the font has a variant but it doesn't match with the text style. -uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) { - return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0; +// - Returns 1 if the font doesn't have variant or the variant matches with the +// text style. +// - No score if the font has a variant but it doesn't match with the text +// style. +uint32_t FontCollection::calcVariantMatchingScore( + int variant, + const FontFamily& fontFamily) { + return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0; } // Implement heuristic for choosing best-match font. Here are the rules: // 1. If first font in the collection has the character, it wins. -// 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail. +// 2. Calculate a score for the font family. See comments in calcFamilyScore for +// the detail. // 3. Highest score wins, with ties resolved to the first font. // This method never returns nullptr. -const std::shared_ptr& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs, - uint32_t langListId, int variant) const { - if (ch >= mMaxChar) { - return mFamilies[0]; - } +const std::shared_ptr& FontCollection::getFamilyForChar( + uint32_t ch, + uint32_t vs, + uint32_t langListId, + int variant) const { + if (ch >= mMaxChar) { + return mFamilies[0]; + } - Range range = mRanges[ch >> kLogCharsPerPage]; + Range range = mRanges[ch >> kLogCharsPerPage]; - if (vs != 0) { - range = { 0, static_cast(mFamilies.size()) }; - } + if (vs != 0) { + range = {0, static_cast(mFamilies.size())}; + } #ifdef VERBOSE_DEBUG - ALOGD("querying range %zd:%zd\n", range.start, range.end); + ALOGD("querying range %zd:%zd\n", range.start, range.end); #endif - int bestFamilyIndex = -1; - uint32_t bestScore = kUnsupportedFontScore; - for (size_t i = range.start; i < range.end; i++) { - const std::shared_ptr& family = - vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i]; - const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family); - if (score == kFirstFontScore) { - // If the first font family supports the given character or variation sequence, always - // use it. - return family; - } - if (score > bestScore) { - bestScore = score; - bestFamilyIndex = i; - } + int bestFamilyIndex = -1; + uint32_t bestScore = kUnsupportedFontScore; + for (size_t i = range.start; i < range.end; i++) { + const std::shared_ptr& family = + vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i]; + const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family); + if (score == kFirstFontScore) { + // If the first font family supports the given character or variation + // sequence, always use it. + return family; } - if (bestFamilyIndex == -1) { - UErrorCode errorCode = U_ZERO_ERROR; - const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode); - if (U_SUCCESS(errorCode)) { - UChar decomposed[4]; - int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode); - if (U_SUCCESS(errorCode) && len > 0) { - int off = 0; - U16_NEXT_UNSAFE(decomposed, off, ch); - return getFamilyForChar(ch, vs, langListId, variant); - } - } - return mFamilies[0]; + if (score > bestScore) { + bestScore = score; + bestFamilyIndex = i; + } + } + if (bestFamilyIndex == -1) { + UErrorCode errorCode = U_ZERO_ERROR; + const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode); + if (U_SUCCESS(errorCode)) { + UChar decomposed[4]; + int len = + unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode); + if (U_SUCCESS(errorCode) && len > 0) { + int off = 0; + U16_NEXT_UNSAFE(decomposed, off, ch); + return getFamilyForChar(ch, vs, langListId, variant); + } } - return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex]; + return mFamilies[0]; + } + return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] + : mFamilies[bestFamilyIndex]; } const uint32_t NBSP = 0x00A0; @@ -310,162 +353,176 @@ const uint32_t STAFF_OF_AESCULAPIUS = 0x2695; // Characters where we want to continue using existing font run instead of // recomputing the best match in the fallback list. static const uint32_t stickyWhitelist[] = { - '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ, - HYPHEN, NB_HYPHEN, NNBSP, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS }; + '!', ',', '-', '.', + ':', ';', '?', NBSP, + ZWJ, ZWNJ, HYPHEN, NB_HYPHEN, + NNBSP, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS}; static bool isStickyWhitelisted(uint32_t c) { - for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) { - if (stickyWhitelist[i] == c) return true; - } - return false; + for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); + i++) { + if (stickyWhitelist[i] == c) + return true; + } + return false; } static bool isVariationSelector(uint32_t c) { - return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF); + return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF); } bool FontCollection::hasVariationSelector(uint32_t baseCodepoint, - uint32_t variationSelector) const { - if (!isVariationSelector(variationSelector)) { - return false; - } - if (baseCodepoint >= mMaxChar) { - return false; - } + uint32_t variationSelector) const { + if (!isVariationSelector(variationSelector)) { + return false; + } + if (baseCodepoint >= mMaxChar) { + return false; + } - std::lock_guard _l(gMinikinLock); + std::lock_guard _l(gMinikinLock); - // Currently mRanges can not be used here since it isn't aware of the variation sequence. - for (size_t i = 0; i < mVSFamilyVec.size(); i++) { - if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) { - return true; - } + // Currently mRanges can not be used here since it isn't aware of the + // variation sequence. + for (size_t i = 0; i < mVSFamilyVec.size(); i++) { + if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) { + return true; } - - // Even if there is no cmap format 14 subtable entry for the given sequence, should return true - // for case since we have special fallback rule for the - // sequence. Note that we don't need to restrict this to already standardized variation - // sequences, since Unicode is adding variation sequences more frequently now and may even move - // towards allowing text and emoji variation selectors on any character. - if (variationSelector == TEXT_STYLE_VS) { - for (size_t i = 0; i < mFamilies.size(); ++i) { - if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) { - return true; - } - } + } + + // Even if there is no cmap format 14 subtable entry for the given sequence, + // should return true for case since we + // have special fallback rule for the sequence. Note that we don't need to + // restrict this to already standardized variation sequences, since Unicode is + // adding variation sequences more frequently now and may even move towards + // allowing text and emoji variation selectors on any character. + if (variationSelector == TEXT_STYLE_VS) { + for (size_t i = 0; i < mFamilies.size(); ++i) { + if (!mFamilies[i]->isColorEmojiFamily() && + mFamilies[i]->hasGlyph(baseCodepoint, 0)) { + return true; + } } + } - return false; + return false; } -void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style, - vector* result) const { - const uint32_t langListId = style.getLanguageListId(); - int variant = style.getVariant(); - const FontFamily* lastFamily = nullptr; - Run* run = NULL; - - if (string_size == 0) { - return; +void FontCollection::itemize(const uint16_t* string, + size_t string_size, + FontStyle style, + vector* result) const { + const uint32_t langListId = style.getLanguageListId(); + int variant = style.getVariant(); + const FontFamily* lastFamily = nullptr; + Run* run = NULL; + + if (string_size == 0) { + return; + } + + const uint32_t kEndOfString = 0xFFFFFFFF; + + uint32_t nextCh = 0; + uint32_t prevCh = 0; + size_t nextUtf16Pos = 0; + size_t readLength = 0; + U16_NEXT(string, readLength, string_size, nextCh); + + do { + const uint32_t ch = nextCh; + const size_t utf16Pos = nextUtf16Pos; + nextUtf16Pos = readLength; + if (readLength < string_size) { + U16_NEXT(string, readLength, string_size, nextCh); + } else { + nextCh = kEndOfString; } - const uint32_t kEndOfString = 0xFFFFFFFF; - - uint32_t nextCh = 0; - uint32_t prevCh = 0; - size_t nextUtf16Pos = 0; - size_t readLength = 0; - U16_NEXT(string, readLength, string_size, nextCh); - - do { - const uint32_t ch = nextCh; - const size_t utf16Pos = nextUtf16Pos; - nextUtf16Pos = readLength; - if (readLength < string_size) { - U16_NEXT(string, readLength, string_size, nextCh); - } else { - nextCh = kEndOfString; - } - - bool shouldContinueRun = false; - if (lastFamily != nullptr) { - if (isStickyWhitelisted(ch)) { - // Continue using existing font as long as it has coverage and is whitelisted - shouldContinueRun = lastFamily->getCoverage().get(ch); - } else if (ch == SOFT_HYPHEN || isVariationSelector(ch)) { - // Always continue if the character is the soft hyphen or a variation selector. - shouldContinueRun = true; - } - } + bool shouldContinueRun = false; + if (lastFamily != nullptr) { + if (isStickyWhitelisted(ch)) { + // Continue using existing font as long as it has coverage and is + // whitelisted + shouldContinueRun = lastFamily->getCoverage().get(ch); + } else if (ch == SOFT_HYPHEN || isVariationSelector(ch)) { + // Always continue if the character is the soft hyphen or a variation + // selector. + shouldContinueRun = true; + } + } - if (!shouldContinueRun) { - const std::shared_ptr& family = getFamilyForChar( - ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant); - if (utf16Pos == 0 || family.get() != lastFamily) { - size_t start = utf16Pos; - // Workaround for combining marks and emoji modifiers until we implement - // per-cluster font selection: if a combining mark or an emoji modifier is found in - // a different font that also supports the previous character, attach previous - // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is - // handled properly by this since it's a combining mark too. - if (utf16Pos != 0 && - ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 || - (isEmojiModifier(ch) && isEmojiBase(prevCh))) && - family != nullptr && family->getCoverage().get(prevCh)) { - const size_t prevChLength = U16_LENGTH(prevCh); - run->end -= prevChLength; - if (run->start == run->end) { - result->pop_back(); - } - start -= prevChLength; - } - result->push_back({family->getClosestMatch(style), static_cast(start), 0}); - run = &result->back(); - lastFamily = family.get(); - } + if (!shouldContinueRun) { + const std::shared_ptr& family = getFamilyForChar( + ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant); + if (utf16Pos == 0 || family.get() != lastFamily) { + size_t start = utf16Pos; + // Workaround for combining marks and emoji modifiers until we implement + // per-cluster font selection: if a combining mark or an emoji modifier + // is found in a different font that also supports the previous + // character, attach previous character to the new run. U+20E3 COMBINING + // ENCLOSING KEYCAP, used in emoji, is handled properly by this since + // it's a combining mark too. + if (utf16Pos != 0 && + ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 || + (isEmojiModifier(ch) && isEmojiBase(prevCh))) && + family != nullptr && family->getCoverage().get(prevCh)) { + const size_t prevChLength = U16_LENGTH(prevCh); + run->end -= prevChLength; + if (run->start == run->end) { + result->pop_back(); + } + start -= prevChLength; } - prevCh = ch; - run->end = nextUtf16Pos; // exclusive - } while (nextCh != kEndOfString); + result->push_back( + {family->getClosestMatch(style), static_cast(start), 0}); + run = &result->back(); + lastFamily = family.get(); + } + } + prevCh = ch; + run->end = nextUtf16Pos; // exclusive + } while (nextCh != kEndOfString); } FakedFont FontCollection::baseFontFaked(FontStyle style) { - return mFamilies[0]->getClosestMatch(style); + return mFamilies[0]->getClosestMatch(style); } std::shared_ptr FontCollection::createCollectionWithVariation( - const std::vector& variations) { - if (variations.empty() || mSupportedAxes.empty()) { - return nullptr; + const std::vector& variations) { + if (variations.empty() || mSupportedAxes.empty()) { + return nullptr; + } + + bool hasSupportedAxis = false; + for (const FontVariation& variation : variations) { + if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) { + hasSupportedAxis = true; + break; } - - bool hasSupportedAxis = false; - for (const FontVariation& variation : variations) { - if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) { - hasSupportedAxis = true; - break; - } - } - if (!hasSupportedAxis) { - // None of variation axes are supported by this font collection. - return nullptr; - } - - std::vector > families; - for (const std::shared_ptr& family : mFamilies) { - std::shared_ptr newFamily = family->createFamilyWithVariation(variations); - if (newFamily) { - families.push_back(newFamily); - } else { - families.push_back(family); - } + } + if (!hasSupportedAxis) { + // None of variation axes are supported by this font collection. + return nullptr; + } + + std::vector> families; + for (const std::shared_ptr& family : mFamilies) { + std::shared_ptr newFamily = + family->createFamilyWithVariation(variations); + if (newFamily) { + families.push_back(newFamily); + } else { + families.push_back(family); } + } - return std::shared_ptr(new FontCollection(families)); + return std::shared_ptr(new FontCollection(families)); } uint32_t FontCollection::getId() const { - return mId; + return mId; } } // namespace minikin diff --git a/third_party/txt/src/minikin/FontCollection.h b/third_party/txt/src/minikin/FontCollection.h index 138ba45c491ce578698705288f54a90c3808380c..0b292f918fdb0029b98bf06fc0b3c58490cda743 100644 --- a/third_party/txt/src/minikin/FontCollection.h +++ b/third_party/txt/src/minikin/FontCollection.h @@ -21,102 +21,116 @@ #include #include -#include #include +#include namespace minikin { class FontCollection { -public: - explicit FontCollection(const std::vector>& typefaces); - explicit FontCollection(std::shared_ptr&& typeface); - - struct Run { - FakedFont fakedFont; - int start; - int end; - }; - - void itemize(const uint16_t *string, size_t string_length, FontStyle style, - std::vector* result) const; - - // Returns true if there is a glyph for the code point and variation selector pair. - // Returns false if no fonts have a glyph for the code point and variation - // selector pair, or invalid variation selector is passed. - bool hasVariationSelector(uint32_t baseCodepoint, uint32_t variationSelector) const; - - // Get base font with fakery information (fake bold could affect metrics) - FakedFont baseFontFaked(FontStyle style); - - // Creates new FontCollection based on this collection while applying font variations. Returns - // nullptr if none of variations apply to this collection. - std::shared_ptr - createCollectionWithVariation(const std::vector& variations); - - const std::unordered_set& getSupportedTags() const { - return mSupportedAxes; - } - - uint32_t getId() const; - -private: - static const int kLogCharsPerPage = 8; - static const int kPageMask = (1 << kLogCharsPerPage) - 1; - - // mFamilyVec holds the indices of the mFamilies and mRanges holds the range of indices of - // mFamilyVec. The maximum number of pages is 0x10FF (U+10FFFF >> 8). The maximum number of - // the fonts is 0xFF. Thus, technically the maximum length of mFamilyVec is 0x10EE01 - // (0x10FF * 0xFF). However, in practice, 16-bit integers are enough since most fonts supports - // only limited range of code points. - struct Range { - uint16_t start; - uint16_t end; - }; - - // Initialize the FontCollection. - void init(const std::vector>& typefaces); - - const std::shared_ptr& getFamilyForChar(uint32_t ch, uint32_t vs, - uint32_t langListId, int variant) const; - - uint32_t calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId, - const std::shared_ptr& fontFamily) const; - - uint32_t calcCoverageScore(uint32_t ch, uint32_t vs, - const std::shared_ptr& fontFamily) const; - - static uint32_t calcLanguageMatchingScore(uint32_t userLangListId, - const FontFamily& fontFamily); - - static uint32_t calcVariantMatchingScore(int variant, const FontFamily& fontFamily); - - // static for allocating unique id's - static uint32_t sNextId; - - // unique id for this font collection (suitable for cache key) - uint32_t mId; - - // Highest UTF-32 code point that can be mapped - uint32_t mMaxChar; - - // This vector has pointers to the all font family instances in this collection. - // This vector can't be empty. - std::vector> mFamilies; - - // Following two vectors are pre-calculated tables for resolving coverage faster. - // For example, to iterate over all fonts which support Unicode code point U+XXYYZZ, - // iterate font families index from mFamilyVec[mRanges[0xXXYY].start] to - // mFamilyVec[mRange[0xXXYY].end] instead of whole mFamilies. - // This vector contains indices into mFamilies. - // This vector can't be empty. - std::vector mRanges; - std::vector mFamilyVec; - - // This vector has pointers to the font family instances which have cmap 14 subtables. - std::vector> mVSFamilyVec; - - // Set of supported axes in this collection. - std::unordered_set mSupportedAxes; + public: + explicit FontCollection( + const std::vector>& typefaces); + explicit FontCollection(std::shared_ptr&& typeface); + + struct Run { + FakedFont fakedFont; + int start; + int end; + }; + + void itemize(const uint16_t* string, + size_t string_length, + FontStyle style, + std::vector* result) const; + + // Returns true if there is a glyph for the code point and variation selector + // pair. Returns false if no fonts have a glyph for the code point and + // variation selector pair, or invalid variation selector is passed. + bool hasVariationSelector(uint32_t baseCodepoint, + uint32_t variationSelector) const; + + // Get base font with fakery information (fake bold could affect metrics) + FakedFont baseFontFaked(FontStyle style); + + // Creates new FontCollection based on this collection while applying font + // variations. Returns nullptr if none of variations apply to this collection. + std::shared_ptr createCollectionWithVariation( + const std::vector& variations); + + const std::unordered_set& getSupportedTags() const { + return mSupportedAxes; + } + + uint32_t getId() const; + + private: + static const int kLogCharsPerPage = 8; + static const int kPageMask = (1 << kLogCharsPerPage) - 1; + + // mFamilyVec holds the indices of the mFamilies and mRanges holds the range + // of indices of mFamilyVec. The maximum number of pages is 0x10FF (U+10FFFF + // >> 8). The maximum number of the fonts is 0xFF. Thus, technically the + // maximum length of mFamilyVec is 0x10EE01 (0x10FF * 0xFF). However, in + // practice, 16-bit integers are enough since most fonts supports only limited + // range of code points. + struct Range { + uint16_t start; + uint16_t end; + }; + + // Initialize the FontCollection. + void init(const std::vector>& typefaces); + + const std::shared_ptr& getFamilyForChar(uint32_t ch, + uint32_t vs, + uint32_t langListId, + int variant) const; + + uint32_t calcFamilyScore(uint32_t ch, + uint32_t vs, + int variant, + uint32_t langListId, + const std::shared_ptr& fontFamily) const; + + uint32_t calcCoverageScore( + uint32_t ch, + uint32_t vs, + const std::shared_ptr& fontFamily) const; + + static uint32_t calcLanguageMatchingScore(uint32_t userLangListId, + const FontFamily& fontFamily); + + static uint32_t calcVariantMatchingScore(int variant, + const FontFamily& fontFamily); + + // static for allocating unique id's + static uint32_t sNextId; + + // unique id for this font collection (suitable for cache key) + uint32_t mId; + + // Highest UTF-32 code point that can be mapped + uint32_t mMaxChar; + + // This vector has pointers to the all font family instances in this + // collection. This vector can't be empty. + std::vector> mFamilies; + + // Following two vectors are pre-calculated tables for resolving coverage + // faster. For example, to iterate over all fonts which support Unicode code + // point U+XXYYZZ, iterate font families index from + // mFamilyVec[mRanges[0xXXYY].start] to mFamilyVec[mRange[0xXXYY].end] instead + // of whole mFamilies. This vector contains indices into mFamilies. This + // vector can't be empty. + std::vector mRanges; + std::vector mFamilyVec; + + // This vector has pointers to the font family instances which have cmap 14 + // subtables. + std::vector> mVSFamilyVec; + + // Set of supported axes in this collection. + std::unordered_set mSupportedAxes; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/FontFamily.cpp b/third_party/txt/src/minikin/FontFamily.cpp index 1ea47fafcbb9b80252154396c312ad3fa64310ba..39cbab37874210b733e78c26b2daf39f6bfc8daa 100644 --- a/third_party/txt/src/minikin/FontFamily.cpp +++ b/third_party/txt/src/minikin/FontFamily.cpp @@ -23,224 +23,238 @@ #include #include -#include #include +#include +#include +#include +#include #include "FontLanguage.h" #include "FontLanguageListCache.h" #include "FontUtils.h" #include "HbFontCache.h" #include "MinikinInternal.h" -#include -#include -#include -#include using std::vector; namespace minikin { FontStyle::FontStyle(int variant, int weight, bool italic) - : FontStyle(FontLanguageListCache::kEmptyListId, variant, weight, italic) { -} + : FontStyle(FontLanguageListCache::kEmptyListId, variant, weight, italic) {} -FontStyle::FontStyle(uint32_t languageListId, int variant, int weight, bool italic) - : bits(pack(variant, weight, italic)), mLanguageListId(languageListId) { -} +FontStyle::FontStyle(uint32_t languageListId, + int variant, + int weight, + bool italic) + : bits(pack(variant, weight, italic)), mLanguageListId(languageListId) {} android::hash_t FontStyle::hash() const { - uint32_t hash = android::JenkinsHashMix(0, bits); - hash = android::JenkinsHashMix(hash, mLanguageListId); - return android::JenkinsHashWhiten(hash); + uint32_t hash = android::JenkinsHashMix(0, bits); + hash = android::JenkinsHashMix(hash, mLanguageListId); + return android::JenkinsHashWhiten(hash); } // static uint32_t FontStyle::registerLanguageList(const std::string& languages) { - std::lock_guard _l(gMinikinLock); - return FontLanguageListCache::getId(languages); + std::lock_guard _l(gMinikinLock); + return FontLanguageListCache::getId(languages); } // static uint32_t FontStyle::pack(int variant, int weight, bool italic) { - return (weight & kWeightMask) | (italic ? kItalicMask : 0) | (variant << kVariantShift); + return (weight & kWeightMask) | (italic ? kItalicMask : 0) | + (variant << kVariantShift); } Font::Font(const std::shared_ptr& typeface, FontStyle style) - : typeface(typeface), style(style) { -} + : typeface(typeface), style(style) {} Font::Font(std::shared_ptr&& typeface, FontStyle style) - : typeface(typeface), style(style) { -} + : typeface(typeface), style(style) {} std::unordered_set Font::getSupportedAxesLocked() const { - const uint32_t fvarTag = MinikinFont::MakeTag('f', 'v', 'a', 'r'); - HbBlob fvarTable(getFontTable(typeface.get(), fvarTag)); - if (fvarTable.size() == 0) { - return std::unordered_set(); - } - - std::unordered_set supportedAxes; - analyzeAxes(fvarTable.get(), fvarTable.size(), &supportedAxes); - return supportedAxes; + const uint32_t fvarTag = MinikinFont::MakeTag('f', 'v', 'a', 'r'); + HbBlob fvarTable(getFontTable(typeface.get(), fvarTag)); + if (fvarTable.size() == 0) { + return std::unordered_set(); + } + + std::unordered_set supportedAxes; + analyzeAxes(fvarTable.get(), fvarTable.size(), &supportedAxes); + return supportedAxes; } Font::Font(Font&& o) { - typeface = std::move(o.typeface); - style = o.style; - o.typeface = nullptr; + typeface = std::move(o.typeface); + style = o.style; + o.typeface = nullptr; } Font::Font(const Font& o) { - typeface = o.typeface; - style = o.style; + typeface = o.typeface; + style = o.style; } // static -FontFamily::FontFamily(std::vector&& fonts) : FontFamily(0 /* variant */, std::move(fonts)) { -} +FontFamily::FontFamily(std::vector&& fonts) + : FontFamily(0 /* variant */, std::move(fonts)) {} FontFamily::FontFamily(int variant, std::vector&& fonts) - : FontFamily(FontLanguageListCache::kEmptyListId, variant, std::move(fonts)) { -} + : FontFamily(FontLanguageListCache::kEmptyListId, + variant, + std::move(fonts)) {} FontFamily::FontFamily(uint32_t langId, int variant, std::vector&& fonts) - : mLangId(langId), mVariant(variant), mFonts(std::move(fonts)), mHasVSTable(false) { - computeCoverage(); + : mLangId(langId), + mVariant(variant), + mFonts(std::move(fonts)), + mHasVSTable(false) { + computeCoverage(); } -bool FontFamily::analyzeStyle(const std::shared_ptr& typeface, int* weight, - bool* italic) { - std::lock_guard _l(gMinikinLock); - const uint32_t os2Tag = MinikinFont::MakeTag('O', 'S', '/', '2'); - HbBlob os2Table(getFontTable(typeface.get(), os2Tag)); - if (os2Table.get() == nullptr) return false; - return ::minikin::analyzeStyle(os2Table.get(), os2Table.size(), weight, italic); +bool FontFamily::analyzeStyle(const std::shared_ptr& typeface, + int* weight, + bool* italic) { + std::lock_guard _l(gMinikinLock); + const uint32_t os2Tag = MinikinFont::MakeTag('O', 'S', '/', '2'); + HbBlob os2Table(getFontTable(typeface.get(), os2Tag)); + if (os2Table.get() == nullptr) + return false; + return ::minikin::analyzeStyle(os2Table.get(), os2Table.size(), weight, + italic); } // Compute a matching metric between two styles - 0 is an exact match static int computeMatch(FontStyle style1, FontStyle style2) { - if (style1 == style2) return 0; - int score = abs(style1.getWeight() - style2.getWeight()); - if (style1.getItalic() != style2.getItalic()) { - score += 2; - } - return score; + if (style1 == style2) + return 0; + int score = abs(style1.getWeight() - style2.getWeight()); + if (style1.getItalic() != style2.getItalic()) { + score += 2; + } + return score; } static FontFakery computeFakery(FontStyle wanted, FontStyle actual) { - // If desired weight is semibold or darker, and 2 or more grades - // higher than actual (for example, medium 500 -> bold 700), then - // select fake bold. - int wantedWeight = wanted.getWeight(); - bool isFakeBold = wantedWeight >= 6 && (wantedWeight - actual.getWeight()) >= 2; - bool isFakeItalic = wanted.getItalic() && !actual.getItalic(); - return FontFakery(isFakeBold, isFakeItalic); + // If desired weight is semibold or darker, and 2 or more grades + // higher than actual (for example, medium 500 -> bold 700), then + // select fake bold. + int wantedWeight = wanted.getWeight(); + bool isFakeBold = + wantedWeight >= 6 && (wantedWeight - actual.getWeight()) >= 2; + bool isFakeItalic = wanted.getItalic() && !actual.getItalic(); + return FontFakery(isFakeBold, isFakeItalic); } FakedFont FontFamily::getClosestMatch(FontStyle style) const { - const Font* bestFont = nullptr; - int bestMatch = 0; - for (size_t i = 0; i < mFonts.size(); i++) { - const Font& font = mFonts[i]; - int match = computeMatch(font.style, style); - if (i == 0 || match < bestMatch) { - bestFont = &font; - bestMatch = match; - } + const Font* bestFont = nullptr; + int bestMatch = 0; + for (size_t i = 0; i < mFonts.size(); i++) { + const Font& font = mFonts[i]; + int match = computeMatch(font.style, style); + if (i == 0 || match < bestMatch) { + bestFont = &font; + bestMatch = match; } - if (bestFont != nullptr) { - return FakedFont{ bestFont->typeface.get(), computeFakery(style, bestFont->style) }; - } - return FakedFont{ nullptr, FontFakery() }; + } + if (bestFont != nullptr) { + return FakedFont{bestFont->typeface.get(), + computeFakery(style, bestFont->style)}; + } + return FakedFont{nullptr, FontFakery()}; } bool FontFamily::isColorEmojiFamily() const { - const FontLanguages& languageList = FontLanguageListCache::getById(mLangId); - for (size_t i = 0; i < languageList.size(); ++i) { - if (languageList[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) { - return true; - } + const FontLanguages& languageList = FontLanguageListCache::getById(mLangId); + for (size_t i = 0; i < languageList.size(); ++i) { + if (languageList[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) { + return true; } - return false; + } + return false; } void FontFamily::computeCoverage() { - std::lock_guard _l(gMinikinLock); - const FontStyle defaultStyle; - const MinikinFont* typeface = getClosestMatch(defaultStyle).font; - const uint32_t cmapTag = MinikinFont::MakeTag('c', 'm', 'a', 'p'); - HbBlob cmapTable(getFontTable(typeface, cmapTag)); - if (cmapTable.get() == nullptr) { - ALOGE("Could not get cmap table size!\n"); - return; - } - mCoverage = CmapCoverage::getCoverage(cmapTable.get(), cmapTable.size(), &mHasVSTable); - - for (size_t i = 0; i < mFonts.size(); ++i) { - std::unordered_set supportedAxes = mFonts[i].getSupportedAxesLocked(); - mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end()); - } + std::lock_guard _l(gMinikinLock); + const FontStyle defaultStyle; + const MinikinFont* typeface = getClosestMatch(defaultStyle).font; + const uint32_t cmapTag = MinikinFont::MakeTag('c', 'm', 'a', 'p'); + HbBlob cmapTable(getFontTable(typeface, cmapTag)); + if (cmapTable.get() == nullptr) { + ALOGE("Could not get cmap table size!\n"); + return; + } + mCoverage = CmapCoverage::getCoverage(cmapTable.get(), cmapTable.size(), + &mHasVSTable); + + for (size_t i = 0; i < mFonts.size(); ++i) { + std::unordered_set supportedAxes = + mFonts[i].getSupportedAxesLocked(); + mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end()); + } } -bool FontFamily::hasGlyph(uint32_t codepoint, uint32_t variationSelector) const { - assertMinikinLocked(); - if (variationSelector != 0 && !mHasVSTable) { - // Early exit if the variation selector is specified but the font doesn't have a cmap format - // 14 subtable. - return false; - } - - const FontStyle defaultStyle; - hb_font_t* font = getHbFontLocked(getClosestMatch(defaultStyle).font); - uint32_t unusedGlyph; - bool result = hb_font_get_glyph(font, codepoint, variationSelector, &unusedGlyph); - hb_font_destroy(font); - return result; +bool FontFamily::hasGlyph(uint32_t codepoint, + uint32_t variationSelector) const { + assertMinikinLocked(); + if (variationSelector != 0 && !mHasVSTable) { + // Early exit if the variation selector is specified but the font doesn't + // have a cmap format 14 subtable. + return false; + } + + const FontStyle defaultStyle; + hb_font_t* font = getHbFontLocked(getClosestMatch(defaultStyle).font); + uint32_t unusedGlyph; + bool result = + hb_font_get_glyph(font, codepoint, variationSelector, &unusedGlyph); + hb_font_destroy(font); + return result; } std::shared_ptr FontFamily::createFamilyWithVariation( - const std::vector& variations) const { - if (variations.empty() || mSupportedAxes.empty()) { - return nullptr; + const std::vector& variations) const { + if (variations.empty() || mSupportedAxes.empty()) { + return nullptr; + } + + bool hasSupportedAxis = false; + for (const FontVariation& variation : variations) { + if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) { + hasSupportedAxis = true; + break; } - - bool hasSupportedAxis = false; - for (const FontVariation& variation : variations) { - if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) { - hasSupportedAxis = true; - break; + } + if (!hasSupportedAxis) { + // None of variation axes are suppored by this family. + return nullptr; + } + + std::vector fonts; + for (const Font& font : mFonts) { + bool supportedVariations = false; + std::lock_guard _l(gMinikinLock); + std::unordered_set supportedAxes = font.getSupportedAxesLocked(); + if (!supportedAxes.empty()) { + for (const FontVariation& variation : variations) { + if (supportedAxes.find(variation.axisTag) != supportedAxes.end()) { + supportedVariations = true; + break; } + } } - if (!hasSupportedAxis) { - // None of variation axes are suppored by this family. - return nullptr; + std::shared_ptr minikinFont; + if (supportedVariations) { + minikinFont = font.typeface->createFontWithVariation(variations); } - - std::vector fonts; - for (const Font& font : mFonts) { - bool supportedVariations = false; - std::lock_guard _l(gMinikinLock); - std::unordered_set supportedAxes = font.getSupportedAxesLocked(); - if (!supportedAxes.empty()) { - for (const FontVariation& variation : variations) { - if (supportedAxes.find(variation.axisTag) != supportedAxes.end()) { - supportedVariations = true; - break; - } - } - } - std::shared_ptr minikinFont; - if (supportedVariations) { - minikinFont = font.typeface->createFontWithVariation(variations); - } - if (minikinFont == nullptr) { - minikinFont = font.typeface; - } - fonts.push_back(Font(std::move(minikinFont), font.style)); + if (minikinFont == nullptr) { + minikinFont = font.typeface; } + fonts.push_back(Font(std::move(minikinFont), font.style)); + } - return std::shared_ptr(new FontFamily(mLangId, mVariant, std::move(fonts))); + return std::shared_ptr( + new FontFamily(mLangId, mVariant, std::move(fonts))); } } // namespace minikin diff --git a/third_party/txt/src/minikin/FontFamily.h b/third_party/txt/src/minikin/FontFamily.h index babed732f51564615bd4b69b3f967050c6fc7c12..aac7a0d62bd12ae5a344a553bd11e0d08fbfaf1c 100644 --- a/third_party/txt/src/minikin/FontFamily.h +++ b/third_party/txt/src/minikin/FontFamily.h @@ -36,142 +36,157 @@ class MinikinFont; // from a collection. The implementation is packed into two 32-bit words // so it can be efficiently copied, embedded in other objects, etc. class FontStyle { -public: - FontStyle() : FontStyle(0 /* variant */, 4 /* weight */, false /* italic */) {} - FontStyle(int weight, bool italic) : FontStyle(0 /* variant */, weight, italic) {} - FontStyle(uint32_t langListId) // NOLINT(implicit) - : FontStyle(langListId, 0 /* variant */, 4 /* weight */, false /* italic */) {} - - FontStyle(int variant, int weight, bool italic); - FontStyle(uint32_t langListId, int variant, int weight, bool italic); - - int getWeight() const { return bits & kWeightMask; } - bool getItalic() const { return (bits & kItalicMask) != 0; } - int getVariant() const { return (bits >> kVariantShift) & kVariantMask; } - uint32_t getLanguageListId() const { return mLanguageListId; } - - bool operator==(const FontStyle other) const { - return bits == other.bits && mLanguageListId == other.mLanguageListId; - } - - android::hash_t hash() const; - - // Looks up a language list from an internal cache and returns its ID. - // If the passed language list is not in the cache, registers it and returns newly assigned ID. - static uint32_t registerLanguageList(const std::string& languages); -private: - static const uint32_t kWeightMask = (1 << 4) - 1; - static const uint32_t kItalicMask = 1 << 4; - static const int kVariantShift = 5; - static const uint32_t kVariantMask = (1 << 2) - 1; - - static uint32_t pack(int variant, int weight, bool italic); - - uint32_t bits; - uint32_t mLanguageListId; + public: + FontStyle() + : FontStyle(0 /* variant */, 4 /* weight */, false /* italic */) {} + FontStyle(int weight, bool italic) + : FontStyle(0 /* variant */, weight, italic) {} + FontStyle(uint32_t langListId) // NOLINT(implicit) + : FontStyle(langListId, + 0 /* variant */, + 4 /* weight */, + false /* italic */) {} + + FontStyle(int variant, int weight, bool italic); + FontStyle(uint32_t langListId, int variant, int weight, bool italic); + + int getWeight() const { return bits & kWeightMask; } + bool getItalic() const { return (bits & kItalicMask) != 0; } + int getVariant() const { return (bits >> kVariantShift) & kVariantMask; } + uint32_t getLanguageListId() const { return mLanguageListId; } + + bool operator==(const FontStyle other) const { + return bits == other.bits && mLanguageListId == other.mLanguageListId; + } + + android::hash_t hash() const; + + // Looks up a language list from an internal cache and returns its ID. + // If the passed language list is not in the cache, registers it and returns + // newly assigned ID. + static uint32_t registerLanguageList(const std::string& languages); + + private: + static const uint32_t kWeightMask = (1 << 4) - 1; + static const uint32_t kItalicMask = 1 << 4; + static const int kVariantShift = 5; + static const uint32_t kVariantMask = (1 << 2) - 1; + + static uint32_t pack(int variant, int weight, bool italic); + + uint32_t bits; + uint32_t mLanguageListId; }; enum FontVariant { - VARIANT_DEFAULT = 0, - VARIANT_COMPACT = 1, - VARIANT_ELEGANT = 2, + VARIANT_DEFAULT = 0, + VARIANT_COMPACT = 1, + VARIANT_ELEGANT = 2, }; -inline android::hash_t hash_type(const FontStyle &style) { - return style.hash(); +inline android::hash_t hash_type(const FontStyle& style) { + return style.hash(); } // attributes representing transforms (fake bold, fake italic) to match styles class FontFakery { -public: - FontFakery() : mFakeBold(false), mFakeItalic(false) { } - FontFakery(bool fakeBold, bool fakeItalic) : mFakeBold(fakeBold), mFakeItalic(fakeItalic) { } - // TODO: want to support graded fake bolding - bool isFakeBold() { return mFakeBold; } - bool isFakeItalic() { return mFakeItalic; } -private: - bool mFakeBold; - bool mFakeItalic; + public: + FontFakery() : mFakeBold(false), mFakeItalic(false) {} + FontFakery(bool fakeBold, bool fakeItalic) + : mFakeBold(fakeBold), mFakeItalic(fakeItalic) {} + // TODO: want to support graded fake bolding + bool isFakeBold() { return mFakeBold; } + bool isFakeItalic() { return mFakeItalic; } + + private: + bool mFakeBold; + bool mFakeItalic; }; struct FakedFont { - // ownership is the enclosing FontCollection - MinikinFont* font; - FontFakery fakery; + // ownership is the enclosing FontCollection + MinikinFont* font; + FontFakery fakery; }; typedef uint32_t AxisTag; struct Font { - Font(const std::shared_ptr& typeface, FontStyle style); - Font(std::shared_ptr&& typeface, FontStyle style); - Font(Font&& o); - Font(const Font& o); + Font(const std::shared_ptr& typeface, FontStyle style); + Font(std::shared_ptr&& typeface, FontStyle style); + Font(Font&& o); + Font(const Font& o); - std::shared_ptr typeface; - FontStyle style; + std::shared_ptr typeface; + FontStyle style; - std::unordered_set getSupportedAxesLocked() const; + std::unordered_set getSupportedAxesLocked() const; }; struct FontVariation { - FontVariation(AxisTag axisTag, float value) : axisTag(axisTag), value(value) {} - AxisTag axisTag; - float value; + FontVariation(AxisTag axisTag, float value) + : axisTag(axisTag), value(value) {} + AxisTag axisTag; + float value; }; class FontFamily { -public: - explicit FontFamily(std::vector&& fonts); - FontFamily(int variant, std::vector&& fonts); - FontFamily(uint32_t langId, int variant, std::vector&& fonts); - - // TODO: Good to expose FontUtil.h. - static bool analyzeStyle(const std::shared_ptr& typeface, int* weight, - bool* italic); - FakedFont getClosestMatch(FontStyle style) const; - - uint32_t langId() const { return mLangId; } - int variant() const { return mVariant; } - - // API's for enumerating the fonts in a family. These don't guarantee any particular order - size_t getNumFonts() const { return mFonts.size(); } - const std::shared_ptr& getFont(size_t index) const { - return mFonts[index].typeface; - } - FontStyle getStyle(size_t index) const { return mFonts[index].style; } - bool isColorEmojiFamily() const; - const std::unordered_set& supportedAxes() const { return mSupportedAxes; } - - // Get Unicode coverage. - const SparseBitSet& getCoverage() const { return mCoverage; } - - // Returns true if the font has a glyph for the code point and variation selector pair. - // Caller should acquire a lock before calling the method. - bool hasGlyph(uint32_t codepoint, uint32_t variationSelector) const; - - // Returns true if this font family has a variaion sequence table (cmap format 14 subtable). - bool hasVSTable() const { return mHasVSTable; } - - // Creates new FontFamily based on this family while applying font variations. Returns nullptr - // if none of variations apply to this family. - std::shared_ptr createFamilyWithVariation( - const std::vector& variations) const; - -private: - void computeCoverage(); - - uint32_t mLangId; - int mVariant; - std::vector mFonts; - std::unordered_set mSupportedAxes; - - SparseBitSet mCoverage; - bool mHasVSTable; - - // Forbid copying and assignment. - FontFamily(const FontFamily&) = delete; - void operator=(const FontFamily&) = delete; + public: + explicit FontFamily(std::vector&& fonts); + FontFamily(int variant, std::vector&& fonts); + FontFamily(uint32_t langId, int variant, std::vector&& fonts); + + // TODO: Good to expose FontUtil.h. + static bool analyzeStyle(const std::shared_ptr& typeface, + int* weight, + bool* italic); + FakedFont getClosestMatch(FontStyle style) const; + + uint32_t langId() const { return mLangId; } + int variant() const { return mVariant; } + + // API's for enumerating the fonts in a family. These don't guarantee any + // particular order + size_t getNumFonts() const { return mFonts.size(); } + const std::shared_ptr& getFont(size_t index) const { + return mFonts[index].typeface; + } + FontStyle getStyle(size_t index) const { return mFonts[index].style; } + bool isColorEmojiFamily() const; + const std::unordered_set& supportedAxes() const { + return mSupportedAxes; + } + + // Get Unicode coverage. + const SparseBitSet& getCoverage() const { return mCoverage; } + + // Returns true if the font has a glyph for the code point and variation + // selector pair. Caller should acquire a lock before calling the method. + bool hasGlyph(uint32_t codepoint, uint32_t variationSelector) const; + + // Returns true if this font family has a variaion sequence table (cmap format + // 14 subtable). + bool hasVSTable() const { return mHasVSTable; } + + // Creates new FontFamily based on this family while applying font variations. + // Returns nullptr if none of variations apply to this family. + std::shared_ptr createFamilyWithVariation( + const std::vector& variations) const; + + private: + void computeCoverage(); + + uint32_t mLangId; + int mVariant; + std::vector mFonts; + std::unordered_set mSupportedAxes; + + SparseBitSet mCoverage; + bool mHasVSTable; + + // Forbid copying and assignment. + FontFamily(const FontFamily&) = delete; + void operator=(const FontFamily&) = delete; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/FontLanguage.cpp b/third_party/txt/src/minikin/FontLanguage.cpp index 0897c06ea5d667c7ae4ed0027ebd53bee2fdbe67..0576a8340d5ebe32bdc43b3aea397669fe564e30 100644 --- a/third_party/txt/src/minikin/FontLanguage.cpp +++ b/third_party/txt/src/minikin/FontLanguage.cpp @@ -18,327 +18,350 @@ #include "FontLanguage.h" -#include #include #include #include +#include namespace minikin { -#define SCRIPT_TAG(c1, c2, c3, c4) \ - (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) << 8 | \ - ((uint32_t)(c4))) +#define SCRIPT_TAG(c1, c2, c3, c4) \ + (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) << 8 | \ + ((uint32_t)(c4))) // Check if a language code supports emoji according to its subtag -static bool isEmojiSubtag(const char* buf, size_t bufLen, const char* subtag, size_t subtagLen) { - if (bufLen < subtagLen) { - return false; - } - if (strncmp(buf, subtag, subtagLen) != 0) { - return false; // no match between two strings - } - return (bufLen == subtagLen || buf[subtagLen] == '\0' || - buf[subtagLen] == '-' || buf[subtagLen] == '_'); +static bool isEmojiSubtag(const char* buf, + size_t bufLen, + const char* subtag, + size_t subtagLen) { + if (bufLen < subtagLen) { + return false; + } + if (strncmp(buf, subtag, subtagLen) != 0) { + return false; // no match between two strings + } + return (bufLen == subtagLen || buf[subtagLen] == '\0' || + buf[subtagLen] == '-' || buf[subtagLen] == '_'); } -// Pack the three letter code into 15 bits and stored to 16 bit integer. The highest bit is 0. -// For the region code, the letters must be all digits in three letter case, so the number of -// possible values are 10. For the language code, the letters must be all small alphabets, so the -// number of possible values are 26. Thus, 5 bits are sufficient for each case and we can pack the +// Pack the three letter code into 15 bits and stored to 16 bit integer. The +// highest bit is 0. For the region code, the letters must be all digits in +// three letter case, so the number of possible values are 10. For the language +// code, the letters must be all small alphabets, so the number of possible +// values are 26. Thus, 5 bits are sufficient for each case and we can pack the // three letter language code or region code to 15 bits. // // In case of two letter code, use fullbit(0x1f) for the first letter instead. -static uint16_t packLanguageOrRegion(const char* c, size_t length, uint8_t twoLetterBase, - uint8_t threeLetterBase) { - if (length == 2) { - return 0x7c00u | // 0x1fu << 10 - (uint16_t)(c[0] - twoLetterBase) << 5 | - (uint16_t)(c[1] - twoLetterBase); - } else { - return ((uint16_t)(c[0] - threeLetterBase) << 10) | - (uint16_t)(c[1] - threeLetterBase) << 5 | - (uint16_t)(c[2] - threeLetterBase); - } +static uint16_t packLanguageOrRegion(const char* c, + size_t length, + uint8_t twoLetterBase, + uint8_t threeLetterBase) { + if (length == 2) { + return 0x7c00u | // 0x1fu << 10 + (uint16_t)(c[0] - twoLetterBase) << 5 | + (uint16_t)(c[1] - twoLetterBase); + } else { + return ((uint16_t)(c[0] - threeLetterBase) << 10) | + (uint16_t)(c[1] - threeLetterBase) << 5 | + (uint16_t)(c[2] - threeLetterBase); + } } -static size_t unpackLanguageOrRegion(uint16_t in, char* out, uint8_t twoLetterBase, - uint8_t threeLetterBase) { - uint8_t first = (in >> 10) & 0x1f; - uint8_t second = (in >> 5) & 0x1f; - uint8_t third = in & 0x1f; - - if (first == 0x1f) { - out[0] = second + twoLetterBase; - out[1] = third + twoLetterBase; - return 2; - } else { - out[0] = first + threeLetterBase; - out[1] = second + threeLetterBase; - out[2] = third + threeLetterBase; - return 3; - } +static size_t unpackLanguageOrRegion(uint16_t in, + char* out, + uint8_t twoLetterBase, + uint8_t threeLetterBase) { + uint8_t first = (in >> 10) & 0x1f; + uint8_t second = (in >> 5) & 0x1f; + uint8_t third = in & 0x1f; + + if (first == 0x1f) { + out[0] = second + twoLetterBase; + out[1] = third + twoLetterBase; + return 2; + } else { + out[0] = first + threeLetterBase; + out[1] = second + threeLetterBase; + out[2] = third + threeLetterBase; + return 3; + } } -// Find the next '-' or '_' index from startOffset position. If not found, returns bufferLength. -static size_t nextDelimiterIndex(const char* buffer, size_t bufferLength, size_t startOffset) { - for (size_t i = startOffset; i < bufferLength; ++i) { - if (buffer[i] == '-' || buffer[i] == '_') { - return i; - } +// Find the next '-' or '_' index from startOffset position. If not found, +// returns bufferLength. +static size_t nextDelimiterIndex(const char* buffer, + size_t bufferLength, + size_t startOffset) { + for (size_t i = startOffset; i < bufferLength; ++i) { + if (buffer[i] == '-' || buffer[i] == '_') { + return i; } - return bufferLength; + } + return bufferLength; } static inline bool isLowercase(char c) { - return 'a' <= c && c <= 'z'; + return 'a' <= c && c <= 'z'; } static inline bool isUppercase(char c) { - return 'A' <= c && c <= 'Z'; + return 'A' <= c && c <= 'Z'; } static inline bool isDigit(char c) { - return '0' <= c && c <= '9'; + return '0' <= c && c <= '9'; } // Returns true if the buffer is valid for language code. static inline bool isValidLanguageCode(const char* buffer, size_t length) { - if (length != 2 && length != 3) return false; - if (!isLowercase(buffer[0])) return false; - if (!isLowercase(buffer[1])) return false; - if (length == 3 && !isLowercase(buffer[2])) return false; - return true; + if (length != 2 && length != 3) + return false; + if (!isLowercase(buffer[0])) + return false; + if (!isLowercase(buffer[1])) + return false; + if (length == 3 && !isLowercase(buffer[2])) + return false; + return true; } -// Returns true if buffer is valid for script code. The length of buffer must be 4. +// Returns true if buffer is valid for script code. The length of buffer must +// be 4. static inline bool isValidScriptCode(const char* buffer) { - return isUppercase(buffer[0]) && isLowercase(buffer[1]) && isLowercase(buffer[2]) && - isLowercase(buffer[3]); + return isUppercase(buffer[0]) && isLowercase(buffer[1]) && + isLowercase(buffer[2]) && isLowercase(buffer[3]); } // Returns true if the buffer is valid for region code. static inline bool isValidRegionCode(const char* buffer, size_t length) { - return (length == 2 && isUppercase(buffer[0]) && isUppercase(buffer[1])) || - (length == 3 && isDigit(buffer[0]) && isDigit(buffer[1]) && isDigit(buffer[2])); + return (length == 2 && isUppercase(buffer[0]) && isUppercase(buffer[1])) || + (length == 3 && isDigit(buffer[0]) && isDigit(buffer[1]) && + isDigit(buffer[2])); } // Parse BCP 47 language identifier into internal structure FontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() { - size_t firstDelimiterPos = nextDelimiterIndex(buf, length, 0); - if (isValidLanguageCode(buf, firstDelimiterPos)) { - mLanguage = packLanguageOrRegion(buf, firstDelimiterPos, 'a', 'a'); - } else { - // We don't understand anything other than two-letter or three-letter - // language codes, so we skip parsing the rest of the string. - return; + size_t firstDelimiterPos = nextDelimiterIndex(buf, length, 0); + if (isValidLanguageCode(buf, firstDelimiterPos)) { + mLanguage = packLanguageOrRegion(buf, firstDelimiterPos, 'a', 'a'); + } else { + // We don't understand anything other than two-letter or three-letter + // language codes, so we skip parsing the rest of the string. + return; + } + + if (firstDelimiterPos == length) { + mHbLanguage = hb_language_from_string(getString().c_str(), -1); + return; // Language code only. + } + + size_t nextComponentStartPos = firstDelimiterPos + 1; + size_t nextDelimiterPos = + nextDelimiterIndex(buf, length, nextComponentStartPos); + size_t componentLength = nextDelimiterPos - nextComponentStartPos; + + if (componentLength == 4) { + // Possibly script code. + const char* p = buf + nextComponentStartPos; + if (isValidScriptCode(p)) { + mScript = SCRIPT_TAG(p[0], p[1], p[2], p[3]); + mSubScriptBits = scriptToSubScriptBits(mScript); } - if (firstDelimiterPos == length) { - mHbLanguage = hb_language_from_string(getString().c_str(), -1); - return; // Language code only. + if (nextDelimiterPos == length) { + mHbLanguage = hb_language_from_string(getString().c_str(), -1); + mEmojiStyle = resolveEmojiStyle(buf, length, mScript); + return; // No region code. } - size_t nextComponentStartPos = firstDelimiterPos + 1; - size_t nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos); - size_t componentLength = nextDelimiterPos - nextComponentStartPos; - - if (componentLength == 4) { - // Possibly script code. - const char* p = buf + nextComponentStartPos; - if (isValidScriptCode(p)) { - mScript = SCRIPT_TAG(p[0], p[1], p[2], p[3]); - mSubScriptBits = scriptToSubScriptBits(mScript); - } - - if (nextDelimiterPos == length) { - mHbLanguage = hb_language_from_string(getString().c_str(), -1); - mEmojiStyle = resolveEmojiStyle(buf, length, mScript); - return; // No region code. - } - - nextComponentStartPos = nextDelimiterPos + 1; - nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos); - componentLength = nextDelimiterPos - nextComponentStartPos; - } + nextComponentStartPos = nextDelimiterPos + 1; + nextDelimiterPos = nextDelimiterIndex(buf, length, nextComponentStartPos); + componentLength = nextDelimiterPos - nextComponentStartPos; + } - if (componentLength == 2 || componentLength == 3) { - // Possibly region code. - const char* p = buf + nextComponentStartPos; - if (isValidRegionCode(p, componentLength)) { - mRegion = packLanguageOrRegion(p, componentLength, 'A', '0'); - } + if (componentLength == 2 || componentLength == 3) { + // Possibly region code. + const char* p = buf + nextComponentStartPos; + if (isValidRegionCode(p, componentLength)) { + mRegion = packLanguageOrRegion(p, componentLength, 'A', '0'); } + } - mHbLanguage = hb_language_from_string(getString().c_str(), -1); - mEmojiStyle = resolveEmojiStyle(buf, length, mScript); + mHbLanguage = hb_language_from_string(getString().c_str(), -1); + mEmojiStyle = resolveEmojiStyle(buf, length, mScript); } // static -FontLanguage::EmojiStyle FontLanguage::resolveEmojiStyle(const char* buf, size_t length, - uint32_t script) { - // First, lookup emoji subtag. - // 10 is the length of "-u-em-text", which is the shortest emoji subtag, - // unnecessary comparison can be avoided if total length is smaller than 10. - const size_t kMinSubtagLength = 10; - if (length >= kMinSubtagLength) { - static const char kPrefix[] = "-u-em-"; - const char *pos = std::search(buf, buf + length, kPrefix, kPrefix + strlen(kPrefix)); - if (pos != buf + length) { // found - pos += strlen(kPrefix); - const size_t remainingLength = length - (pos - buf); - if (isEmojiSubtag(pos, remainingLength, "emoji", 5)){ - return EMSTYLE_EMOJI; - } else if (isEmojiSubtag(pos, remainingLength, "text", 4)){ - return EMSTYLE_TEXT; - } else if (isEmojiSubtag(pos, remainingLength, "default", 7)){ - return EMSTYLE_DEFAULT; - } - } - } - - // If no emoji subtag was provided, resolve the emoji style from script code. - if (script == SCRIPT_TAG('Z', 's', 'y', 'e')) { +FontLanguage::EmojiStyle FontLanguage::resolveEmojiStyle(const char* buf, + size_t length, + uint32_t script) { + // First, lookup emoji subtag. + // 10 is the length of "-u-em-text", which is the shortest emoji subtag, + // unnecessary comparison can be avoided if total length is smaller than 10. + const size_t kMinSubtagLength = 10; + if (length >= kMinSubtagLength) { + static const char kPrefix[] = "-u-em-"; + const char* pos = + std::search(buf, buf + length, kPrefix, kPrefix + strlen(kPrefix)); + if (pos != buf + length) { // found + pos += strlen(kPrefix); + const size_t remainingLength = length - (pos - buf); + if (isEmojiSubtag(pos, remainingLength, "emoji", 5)) { return EMSTYLE_EMOJI; - } else if (script == SCRIPT_TAG('Z', 's', 'y', 'm')) { + } else if (isEmojiSubtag(pos, remainingLength, "text", 4)) { return EMSTYLE_TEXT; + } else if (isEmojiSubtag(pos, remainingLength, "default", 7)) { + return EMSTYLE_DEFAULT; + } } + } - return EMSTYLE_EMPTY; + // If no emoji subtag was provided, resolve the emoji style from script code. + if (script == SCRIPT_TAG('Z', 's', 'y', 'e')) { + return EMSTYLE_EMOJI; + } else if (script == SCRIPT_TAG('Z', 's', 'y', 'm')) { + return EMSTYLE_TEXT; + } + + return EMSTYLE_EMPTY; } -//static +// static uint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) { - uint8_t subScriptBits = 0u; - switch (script) { - case SCRIPT_TAG('B', 'o', 'p', 'o'): - subScriptBits = kBopomofoFlag; - break; - case SCRIPT_TAG('H', 'a', 'n', 'g'): - subScriptBits = kHangulFlag; - break; - case SCRIPT_TAG('H', 'a', 'n', 'b'): - // Bopomofo is almost exclusively used in Taiwan. - subScriptBits = kHanFlag | kBopomofoFlag; - break; - case SCRIPT_TAG('H', 'a', 'n', 'i'): - subScriptBits = kHanFlag; - break; - case SCRIPT_TAG('H', 'a', 'n', 's'): - subScriptBits = kHanFlag | kSimplifiedChineseFlag; - break; - case SCRIPT_TAG('H', 'a', 'n', 't'): - subScriptBits = kHanFlag | kTraditionalChineseFlag; - break; - case SCRIPT_TAG('H', 'i', 'r', 'a'): - subScriptBits = kHiraganaFlag; - break; - case SCRIPT_TAG('H', 'r', 'k', 't'): - subScriptBits = kKatakanaFlag | kHiraganaFlag; - break; - case SCRIPT_TAG('J', 'p', 'a', 'n'): - subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag; - break; - case SCRIPT_TAG('K', 'a', 'n', 'a'): - subScriptBits = kKatakanaFlag; - break; - case SCRIPT_TAG('K', 'o', 'r', 'e'): - subScriptBits = kHanFlag | kHangulFlag; - break; - } - return subScriptBits; + uint8_t subScriptBits = 0u; + switch (script) { + case SCRIPT_TAG('B', 'o', 'p', 'o'): + subScriptBits = kBopomofoFlag; + break; + case SCRIPT_TAG('H', 'a', 'n', 'g'): + subScriptBits = kHangulFlag; + break; + case SCRIPT_TAG('H', 'a', 'n', 'b'): + // Bopomofo is almost exclusively used in Taiwan. + subScriptBits = kHanFlag | kBopomofoFlag; + break; + case SCRIPT_TAG('H', 'a', 'n', 'i'): + subScriptBits = kHanFlag; + break; + case SCRIPT_TAG('H', 'a', 'n', 's'): + subScriptBits = kHanFlag | kSimplifiedChineseFlag; + break; + case SCRIPT_TAG('H', 'a', 'n', 't'): + subScriptBits = kHanFlag | kTraditionalChineseFlag; + break; + case SCRIPT_TAG('H', 'i', 'r', 'a'): + subScriptBits = kHiraganaFlag; + break; + case SCRIPT_TAG('H', 'r', 'k', 't'): + subScriptBits = kKatakanaFlag | kHiraganaFlag; + break; + case SCRIPT_TAG('J', 'p', 'a', 'n'): + subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag; + break; + case SCRIPT_TAG('K', 'a', 'n', 'a'): + subScriptBits = kKatakanaFlag; + break; + case SCRIPT_TAG('K', 'o', 'r', 'e'): + subScriptBits = kHanFlag | kHangulFlag; + break; + } + return subScriptBits; } std::string FontLanguage::getString() const { - if (isUnsupported()) { - return "und"; - } - char buf[16]; - size_t i = unpackLanguageOrRegion(mLanguage, buf, 'a', 'a'); - if (mScript != 0) { - buf[i++] = '-'; - buf[i++] = (mScript >> 24) & 0xFFu; - buf[i++] = (mScript >> 16) & 0xFFu; - buf[i++] = (mScript >> 8) & 0xFFu; - buf[i++] = mScript & 0xFFu; - } - if (mRegion != INVALID_CODE) { - buf[i++] = '-'; - i += unpackLanguageOrRegion(mRegion, buf + i, 'A', '0'); - } - return std::string(buf, i); + if (isUnsupported()) { + return "und"; + } + char buf[16]; + size_t i = unpackLanguageOrRegion(mLanguage, buf, 'a', 'a'); + if (mScript != 0) { + buf[i++] = '-'; + buf[i++] = (mScript >> 24) & 0xFFu; + buf[i++] = (mScript >> 16) & 0xFFu; + buf[i++] = (mScript >> 8) & 0xFFu; + buf[i++] = mScript & 0xFFu; + } + if (mRegion != INVALID_CODE) { + buf[i++] = '-'; + i += unpackLanguageOrRegion(mRegion, buf + i, 'A', '0'); + } + return std::string(buf, i); } bool FontLanguage::isEqualScript(const FontLanguage& other) const { - return other.mScript == mScript; + return other.mScript == mScript; } // static bool FontLanguage::supportsScript(uint8_t providedBits, uint8_t requestedBits) { - return requestedBits != 0 && (providedBits & requestedBits) == requestedBits; + return requestedBits != 0 && (providedBits & requestedBits) == requestedBits; } bool FontLanguage::supportsHbScript(hb_script_t script) const { - static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'), - "The Minikin script and HarfBuzz hb_script_t have different encodings."); - if (script == mScript) return true; - return supportsScript(mSubScriptBits, scriptToSubScriptBits(script)); + static_assert( + SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'), + "The Minikin script and HarfBuzz hb_script_t have different encodings."); + if (script == mScript) + return true; + return supportsScript(mSubScriptBits, scriptToSubScriptBits(script)); } int FontLanguage::calcScoreFor(const FontLanguages& supported) const { - bool languageScriptMatch = false; - bool subtagMatch = false; - bool scriptMatch = false; - - for (size_t i = 0; i < supported.size(); ++i) { - if (mEmojiStyle != EMSTYLE_EMPTY && - mEmojiStyle == supported[i].mEmojiStyle) { - subtagMatch = true; - if (mLanguage == supported[i].mLanguage) { - return 4; - } - } - if (isEqualScript(supported[i]) || - supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) { - scriptMatch = true; - if (mLanguage == supported[i].mLanguage) { - languageScriptMatch = true; - } - } + bool languageScriptMatch = false; + bool subtagMatch = false; + bool scriptMatch = false; + + for (size_t i = 0; i < supported.size(); ++i) { + if (mEmojiStyle != EMSTYLE_EMPTY && + mEmojiStyle == supported[i].mEmojiStyle) { + subtagMatch = true; + if (mLanguage == supported[i].mLanguage) { + return 4; + } } - - if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) { - scriptMatch = true; - if (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) { - return 3; - } + if (isEqualScript(supported[i]) || + supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) { + scriptMatch = true; + if (mLanguage == supported[i].mLanguage) { + languageScriptMatch = true; + } } + } - if (languageScriptMatch) { - return 3; - } else if (subtagMatch) { - return 2; - } else if (scriptMatch) { - return 1; + if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) { + scriptMatch = true; + if (mLanguage == supported[0].mLanguage && + supported.isAllTheSameLanguage()) { + return 3; } - return 0; + } + + if (languageScriptMatch) { + return 3; + } else if (subtagMatch) { + return 2; + } else if (scriptMatch) { + return 1; + } + return 0; } FontLanguages::FontLanguages(std::vector&& languages) : mLanguages(std::move(languages)) { - if (mLanguages.empty()) { - return; - } - - const FontLanguage& lang = mLanguages[0]; - - mIsAllTheSameLanguage = true; - mUnionOfSubScriptBits = lang.mSubScriptBits; - for (size_t i = 1; i < mLanguages.size(); ++i) { - mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits; - if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) { - mIsAllTheSameLanguage = false; - } + if (mLanguages.empty()) { + return; + } + + const FontLanguage& lang = mLanguages[0]; + + mIsAllTheSameLanguage = true; + mUnionOfSubScriptBits = lang.mSubScriptBits; + for (size_t i = 1; i < mLanguages.size(); ++i) { + mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits; + if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) { + mIsAllTheSameLanguage = false; } + } } #undef SCRIPT_TAG diff --git a/third_party/txt/src/minikin/FontLanguage.h b/third_party/txt/src/minikin/FontLanguage.h index 6a50b1d4cc8406f0ca55ec720fccc0a51f42331e..1a46abb5873424c52dfd2b1193362765284cc158 100644 --- a/third_party/txt/src/minikin/FontLanguage.h +++ b/third_party/txt/src/minikin/FontLanguage.h @@ -24,8 +24,8 @@ namespace minikin { -// Due to the limits in font fallback score calculation, we can't use anything more than 12 -// languages. +// Due to the limits in font fallback score calculation, we can't use anything +// more than 12 languages. const size_t FONT_LANGUAGES_LIMIT = 12; // The language or region code is encoded to 15 bits. @@ -37,120 +37,123 @@ class FontLanguages; // does not capture all possible information, only what directly affects // font rendering. struct FontLanguage { -public: - enum EmojiStyle : uint8_t { - EMSTYLE_EMPTY = 0, - EMSTYLE_DEFAULT = 1, - EMSTYLE_EMOJI = 2, - EMSTYLE_TEXT = 3, - }; - // Default constructor creates the unsupported language. - FontLanguage() - : mScript(0ul), - mLanguage(INVALID_CODE), - mRegion(INVALID_CODE), - mHbLanguage(HB_LANGUAGE_INVALID), - mSubScriptBits(0ul), - mEmojiStyle(EMSTYLE_EMPTY) {} - - // Parse from string - FontLanguage(const char* buf, size_t length); - - bool operator==(const FontLanguage other) const { - return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage && - mRegion == other.mRegion && mEmojiStyle == other.mEmojiStyle; - } - - bool operator!=(const FontLanguage other) const { - return !(*this == other); - } - - bool isUnsupported() const { return mLanguage == INVALID_CODE; } - EmojiStyle getEmojiStyle() const { return mEmojiStyle; } - hb_language_t getHbLanguage() const { return mHbLanguage; } - - - bool isEqualScript(const FontLanguage& other) const; - - // Returns true if this script supports the given script. For example, ja-Jpan supports Hira, - // ja-Hira doesn't support Jpan. - bool supportsHbScript(hb_script_t script) const; - - std::string getString() const; - - // Calculates a matching score. This score represents how well the input languages cover this - // language. The maximum score in the language list is returned. - // 0 = no match, 1 = script match, 2 = script and primary language match. - int calcScoreFor(const FontLanguages& supported) const; - - uint64_t getIdentifier() const { - return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 17) | ((uint64_t)mRegion << 2) | - mEmojiStyle; - } - -private: - friend class FontLanguages; // for FontLanguages constructor - - // ISO 15924 compliant script code. The 4 chars script code are packed into a 32 bit integer. - uint32_t mScript; - - // ISO 639-1 or ISO 639-2 compliant language code. - // The two- or three-letter language code is packed into a 15 bit integer. - // mLanguage = 0 means the FontLanguage is unsupported. - uint16_t mLanguage; - - // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is - // packed into a 15 bit integer. - uint16_t mRegion; - - // The language to be passed HarfBuzz shaper. - hb_language_t mHbLanguage; - - // For faster comparing, use 7 bits for specific scripts. - static const uint8_t kBopomofoFlag = 1u; - static const uint8_t kHanFlag = 1u << 1; - static const uint8_t kHangulFlag = 1u << 2; - static const uint8_t kHiraganaFlag = 1u << 3; - static const uint8_t kKatakanaFlag = 1u << 4; - static const uint8_t kSimplifiedChineseFlag = 1u << 5; - static const uint8_t kTraditionalChineseFlag = 1u << 6; - uint8_t mSubScriptBits; - - EmojiStyle mEmojiStyle; - - static uint8_t scriptToSubScriptBits(uint32_t script); - - static EmojiStyle resolveEmojiStyle(const char* buf, size_t length, uint32_t script); - - // Returns true if the provide subscript bits has the requested subscript bits. - // Note that this function returns false if the requested subscript bits are empty. - static bool supportsScript(uint8_t providedBits, uint8_t requestedBits); + public: + enum EmojiStyle : uint8_t { + EMSTYLE_EMPTY = 0, + EMSTYLE_DEFAULT = 1, + EMSTYLE_EMOJI = 2, + EMSTYLE_TEXT = 3, + }; + // Default constructor creates the unsupported language. + FontLanguage() + : mScript(0ul), + mLanguage(INVALID_CODE), + mRegion(INVALID_CODE), + mHbLanguage(HB_LANGUAGE_INVALID), + mSubScriptBits(0ul), + mEmojiStyle(EMSTYLE_EMPTY) {} + + // Parse from string + FontLanguage(const char* buf, size_t length); + + bool operator==(const FontLanguage other) const { + return !isUnsupported() && isEqualScript(other) && + mLanguage == other.mLanguage && mRegion == other.mRegion && + mEmojiStyle == other.mEmojiStyle; + } + + bool operator!=(const FontLanguage other) const { return !(*this == other); } + + bool isUnsupported() const { return mLanguage == INVALID_CODE; } + EmojiStyle getEmojiStyle() const { return mEmojiStyle; } + hb_language_t getHbLanguage() const { return mHbLanguage; } + + bool isEqualScript(const FontLanguage& other) const; + + // Returns true if this script supports the given script. For example, ja-Jpan + // supports Hira, ja-Hira doesn't support Jpan. + bool supportsHbScript(hb_script_t script) const; + + std::string getString() const; + + // Calculates a matching score. This score represents how well the input + // languages cover this language. The maximum score in the language list is + // returned. 0 = no match, 1 = script match, 2 = script and primary language + // match. + int calcScoreFor(const FontLanguages& supported) const; + + uint64_t getIdentifier() const { + return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 17) | + ((uint64_t)mRegion << 2) | mEmojiStyle; + } + + private: + friend class FontLanguages; // for FontLanguages constructor + + // ISO 15924 compliant script code. The 4 chars script code are packed into a + // 32 bit integer. + uint32_t mScript; + + // ISO 639-1 or ISO 639-2 compliant language code. + // The two- or three-letter language code is packed into a 15 bit integer. + // mLanguage = 0 means the FontLanguage is unsupported. + uint16_t mLanguage; + + // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit + // region code is packed into a 15 bit integer. + uint16_t mRegion; + + // The language to be passed HarfBuzz shaper. + hb_language_t mHbLanguage; + + // For faster comparing, use 7 bits for specific scripts. + static const uint8_t kBopomofoFlag = 1u; + static const uint8_t kHanFlag = 1u << 1; + static const uint8_t kHangulFlag = 1u << 2; + static const uint8_t kHiraganaFlag = 1u << 3; + static const uint8_t kKatakanaFlag = 1u << 4; + static const uint8_t kSimplifiedChineseFlag = 1u << 5; + static const uint8_t kTraditionalChineseFlag = 1u << 6; + uint8_t mSubScriptBits; + + EmojiStyle mEmojiStyle; + + static uint8_t scriptToSubScriptBits(uint32_t script); + + static EmojiStyle resolveEmojiStyle(const char* buf, + size_t length, + uint32_t script); + + // Returns true if the provide subscript bits has the requested subscript + // bits. Note that this function returns false if the requested subscript bits + // are empty. + static bool supportsScript(uint8_t providedBits, uint8_t requestedBits); }; // An immutable list of languages. class FontLanguages { -public: - explicit FontLanguages(std::vector&& languages); - FontLanguages() : mUnionOfSubScriptBits(0), mIsAllTheSameLanguage(false) {} - FontLanguages(FontLanguages&&) = default; + public: + explicit FontLanguages(std::vector&& languages); + FontLanguages() : mUnionOfSubScriptBits(0), mIsAllTheSameLanguage(false) {} + FontLanguages(FontLanguages&&) = default; - size_t size() const { return mLanguages.size(); } - bool empty() const { return mLanguages.empty(); } - const FontLanguage& operator[] (size_t n) const { return mLanguages[n]; } + size_t size() const { return mLanguages.size(); } + bool empty() const { return mLanguages.empty(); } + const FontLanguage& operator[](size_t n) const { return mLanguages[n]; } -private: - friend struct FontLanguage; // for calcScoreFor + private: + friend struct FontLanguage; // for calcScoreFor - std::vector mLanguages; - uint8_t mUnionOfSubScriptBits; - bool mIsAllTheSameLanguage; + std::vector mLanguages; + uint8_t mUnionOfSubScriptBits; + bool mIsAllTheSameLanguage; - uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; } - bool isAllTheSameLanguage() const { return mIsAllTheSameLanguage; } + uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; } + bool isAllTheSameLanguage() const { return mIsAllTheSameLanguage; } - // Do not copy and assign. - FontLanguages(const FontLanguages&) = delete; - void operator=(const FontLanguages&) = delete; + // Do not copy and assign. + FontLanguages(const FontLanguages&) = delete; + void operator=(const FontLanguages&) = delete; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/FontLanguageListCache.cpp b/third_party/txt/src/minikin/FontLanguageListCache.cpp index f1e14f0a66d56d8fc21a26fef089bf54081e0935..093ee269db530d93476bddb9a852a524d10a4985 100644 --- a/third_party/txt/src/minikin/FontLanguageListCache.cpp +++ b/third_party/txt/src/minikin/FontLanguageListCache.cpp @@ -31,126 +31,136 @@ namespace minikin { const uint32_t FontLanguageListCache::kEmptyListId; // Returns the text length of output. -static size_t toLanguageTag(char* output, size_t outSize, const std::string& locale) { +static size_t toLanguageTag(char* output, + size_t outSize, + const std::string& locale) { + output[0] = '\0'; + if (locale.empty()) { + return 0; + } + + size_t outLength = 0; + UErrorCode uErr = U_ZERO_ERROR; + outLength = uloc_canonicalize(locale.c_str(), output, outSize, &uErr); + if (U_FAILURE(uErr)) { + // unable to build a proper language identifier + ALOGD("uloc_canonicalize(\"%s\") failed: %s", locale.c_str(), + u_errorName(uErr)); output[0] = '\0'; - if (locale.empty()) { - return 0; - } - - size_t outLength = 0; - UErrorCode uErr = U_ZERO_ERROR; - outLength = uloc_canonicalize(locale.c_str(), output, outSize, &uErr); - if (U_FAILURE(uErr)) { - // unable to build a proper language identifier - ALOGD("uloc_canonicalize(\"%s\") failed: %s", locale.c_str(), u_errorName(uErr)); - output[0] = '\0'; - return 0; - } - - // Preserve "und" and "und-****" since uloc_addLikelySubtags changes "und" to "en-Latn-US". - if (strncmp(output, "und", 3) == 0 && - (outLength == 3 || (outLength == 8 && output[3] == '_'))) { - return outLength; - } + return 0; + } - char likelyChars[ULOC_FULLNAME_CAPACITY]; - uErr = U_ZERO_ERROR; - uloc_addLikelySubtags(output, likelyChars, ULOC_FULLNAME_CAPACITY, &uErr); - if (U_FAILURE(uErr)) { - // unable to build a proper language identifier - ALOGD("uloc_addLikelySubtags(\"%s\") failed: %s", output, u_errorName(uErr)); - output[0] = '\0'; - return 0; - } - - uErr = U_ZERO_ERROR; - outLength = uloc_toLanguageTag(likelyChars, output, outSize, FALSE, &uErr); - if (U_FAILURE(uErr)) { - // unable to build a proper language identifier - ALOGD("uloc_toLanguageTag(\"%s\") failed: %s", likelyChars, u_errorName(uErr)); - output[0] = '\0'; - return 0; - } + // Preserve "und" and "und-****" since uloc_addLikelySubtags changes "und" to + // "en-Latn-US". + if (strncmp(output, "und", 3) == 0 && + (outLength == 3 || (outLength == 8 && output[3] == '_'))) { + return outLength; + } + + char likelyChars[ULOC_FULLNAME_CAPACITY]; + uErr = U_ZERO_ERROR; + uloc_addLikelySubtags(output, likelyChars, ULOC_FULLNAME_CAPACITY, &uErr); + if (U_FAILURE(uErr)) { + // unable to build a proper language identifier + ALOGD("uloc_addLikelySubtags(\"%s\") failed: %s", output, + u_errorName(uErr)); + output[0] = '\0'; + return 0; + } + + uErr = U_ZERO_ERROR; + outLength = uloc_toLanguageTag(likelyChars, output, outSize, FALSE, &uErr); + if (U_FAILURE(uErr)) { + // unable to build a proper language identifier + ALOGD("uloc_toLanguageTag(\"%s\") failed: %s", likelyChars, + u_errorName(uErr)); + output[0] = '\0'; + return 0; + } #ifdef VERBOSE_DEBUG - ALOGD("ICU normalized '%s' to '%s'", locale.c_str(), output); + ALOGD("ICU normalized '%s' to '%s'", locale.c_str(), output); #endif - return outLength; + return outLength; } static std::vector parseLanguageList(const std::string& input) { - std::vector result; - size_t currentIdx = 0; - size_t commaLoc = 0; - char langTag[ULOC_FULLNAME_CAPACITY]; - std::unordered_set seen; - std::string locale(input.size(), 0); - - while ((commaLoc = input.find_first_of(',', currentIdx)) != std::string::npos) { - locale.assign(input, currentIdx, commaLoc - currentIdx); - currentIdx = commaLoc + 1; - size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale); - FontLanguage lang(langTag, length); - uint64_t identifier = lang.getIdentifier(); - if (!lang.isUnsupported() && seen.count(identifier) == 0) { - result.push_back(lang); - if (result.size() == FONT_LANGUAGES_LIMIT) { - break; - } - seen.insert(identifier); - } - } - if (result.size() < FONT_LANGUAGES_LIMIT) { - locale.assign(input, currentIdx, input.size() - currentIdx); - size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale); - FontLanguage lang(langTag, length); - uint64_t identifier = lang.getIdentifier(); - if (!lang.isUnsupported() && seen.count(identifier) == 0) { - result.push_back(lang); + std::vector result; + size_t currentIdx = 0; + size_t commaLoc = 0; + char langTag[ULOC_FULLNAME_CAPACITY]; + std::unordered_set seen; + std::string locale(input.size(), 0); + + while ((commaLoc = input.find_first_of(',', currentIdx)) != + std::string::npos) { + locale.assign(input, currentIdx, commaLoc - currentIdx); + currentIdx = commaLoc + 1; + size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale); + FontLanguage lang(langTag, length); + uint64_t identifier = lang.getIdentifier(); + if (!lang.isUnsupported() && seen.count(identifier) == 0) { + result.push_back(lang); + if (result.size() == FONT_LANGUAGES_LIMIT) { + break; } + seen.insert(identifier); + } + } + if (result.size() < FONT_LANGUAGES_LIMIT) { + locale.assign(input, currentIdx, input.size() - currentIdx); + size_t length = toLanguageTag(langTag, ULOC_FULLNAME_CAPACITY, locale); + FontLanguage lang(langTag, length); + uint64_t identifier = lang.getIdentifier(); + if (!lang.isUnsupported() && seen.count(identifier) == 0) { + result.push_back(lang); } - return result; + } + return result; } // static uint32_t FontLanguageListCache::getId(const std::string& languages) { - FontLanguageListCache* inst = FontLanguageListCache::getInstance(); - std::unordered_map::const_iterator it = - inst->mLanguageListLookupTable.find(languages); - if (it != inst->mLanguageListLookupTable.end()) { - return it->second; - } - - // Given language list is not in cache. Insert it and return newly assigned ID. - const uint32_t nextId = inst->mLanguageLists.size(); - FontLanguages fontLanguages(parseLanguageList(languages)); - if (fontLanguages.empty()) { - return kEmptyListId; - } - inst->mLanguageLists.push_back(std::move(fontLanguages)); - inst->mLanguageListLookupTable.insert(std::make_pair(languages, nextId)); - return nextId; + FontLanguageListCache* inst = FontLanguageListCache::getInstance(); + std::unordered_map::const_iterator it = + inst->mLanguageListLookupTable.find(languages); + if (it != inst->mLanguageListLookupTable.end()) { + return it->second; + } + + // Given language list is not in cache. Insert it and return newly assigned + // ID. + const uint32_t nextId = inst->mLanguageLists.size(); + FontLanguages fontLanguages(parseLanguageList(languages)); + if (fontLanguages.empty()) { + return kEmptyListId; + } + inst->mLanguageLists.push_back(std::move(fontLanguages)); + inst->mLanguageListLookupTable.insert(std::make_pair(languages, nextId)); + return nextId; } // static const FontLanguages& FontLanguageListCache::getById(uint32_t id) { - FontLanguageListCache* inst = FontLanguageListCache::getInstance(); - LOG_ALWAYS_FATAL_IF(id >= inst->mLanguageLists.size(), "Lookup by unknown language list ID."); - return inst->mLanguageLists[id]; + FontLanguageListCache* inst = FontLanguageListCache::getInstance(); + LOG_ALWAYS_FATAL_IF(id >= inst->mLanguageLists.size(), + "Lookup by unknown language list ID."); + return inst->mLanguageLists[id]; } // static FontLanguageListCache* FontLanguageListCache::getInstance() { - assertMinikinLocked(); - static FontLanguageListCache* instance = nullptr; - if (instance == nullptr) { - instance = new FontLanguageListCache(); - - // Insert an empty language list for mapping default language list to kEmptyListId. - // The default language list has only one FontLanguage and it is the unsupported language. - instance->mLanguageLists.push_back(FontLanguages()); - instance->mLanguageListLookupTable.insert(std::make_pair("", kEmptyListId)); - } - return instance; + assertMinikinLocked(); + static FontLanguageListCache* instance = nullptr; + if (instance == nullptr) { + instance = new FontLanguageListCache(); + + // Insert an empty language list for mapping default language list to + // kEmptyListId. The default language list has only one FontLanguage and it + // is the unsupported language. + instance->mLanguageLists.push_back(FontLanguages()); + instance->mLanguageListLookupTable.insert(std::make_pair("", kEmptyListId)); + } + return instance; } } // namespace minikin diff --git a/third_party/txt/src/minikin/FontLanguageListCache.h b/third_party/txt/src/minikin/FontLanguageListCache.h index 9bf156f9de7e969bf8232a72ae896658b0ccfa5d..c84dd8847798f967b80fe6c98bb0334ef7ec0eaf 100644 --- a/third_party/txt/src/minikin/FontLanguageListCache.h +++ b/third_party/txt/src/minikin/FontLanguageListCache.h @@ -25,30 +25,30 @@ namespace minikin { class FontLanguageListCache { -public: - // A special ID for the empty language list. - // This value must be 0 since the empty language list is inserted into mLanguageLists by - // default. - const static uint32_t kEmptyListId = 0; + public: + // A special ID for the empty language list. + // This value must be 0 since the empty language list is inserted into + // mLanguageLists by default. + const static uint32_t kEmptyListId = 0; - // Returns language list ID for the given string representation of FontLanguages. - // Caller should acquire a lock before calling the method. - static uint32_t getId(const std::string& languages); + // Returns language list ID for the given string representation of + // FontLanguages. Caller should acquire a lock before calling the method. + static uint32_t getId(const std::string& languages); - // Caller should acquire a lock before calling the method. - static const FontLanguages& getById(uint32_t id); + // Caller should acquire a lock before calling the method. + static const FontLanguages& getById(uint32_t id); -private: - FontLanguageListCache() {} // Singleton - ~FontLanguageListCache() {} + private: + FontLanguageListCache() {} // Singleton + ~FontLanguageListCache() {} - // Caller should acquire a lock before calling the method. - static FontLanguageListCache* getInstance(); + // Caller should acquire a lock before calling the method. + static FontLanguageListCache* getInstance(); - std::vector mLanguageLists; + std::vector mLanguageLists; - // A map from string representation of the font language list to the ID. - std::unordered_map mLanguageListLookupTable; + // A map from string representation of the font language list to the ID. + std::unordered_map mLanguageListLookupTable; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/FontUtils.cpp b/third_party/txt/src/minikin/FontUtils.cpp index c5a32f82e85861a26aa03098c1b2bd22891a4c7f..b6a0f120484dca206a77d4f71fae653bb48e291d 100644 --- a/third_party/txt/src/minikin/FontUtils.cpp +++ b/third_party/txt/src/minikin/FontUtils.cpp @@ -14,64 +14,70 @@ * limitations under the License. */ -#include #include +#include #include "FontUtils.h" namespace minikin { static uint16_t readU16(const uint8_t* data, size_t offset) { - return data[offset] << 8 | data[offset + 1]; + return data[offset] << 8 | data[offset + 1]; } static uint32_t readU32(const uint8_t* data, size_t offset) { - return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | - ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); + return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 | + ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]); } -bool analyzeStyle(const uint8_t* os2_data, size_t os2_size, int* weight, bool* italic) { - const size_t kUsWeightClassOffset = 4; - const size_t kFsSelectionOffset = 62; - const uint16_t kItalicFlag = (1 << 0); - if (os2_size < kFsSelectionOffset + 2) { - return false; - } - uint16_t weightClass = readU16(os2_data, kUsWeightClassOffset); - *weight = weightClass / 100; - uint16_t fsSelection = readU16(os2_data, kFsSelectionOffset); - *italic = (fsSelection & kItalicFlag) != 0; - return true; +bool analyzeStyle(const uint8_t* os2_data, + size_t os2_size, + int* weight, + bool* italic) { + const size_t kUsWeightClassOffset = 4; + const size_t kFsSelectionOffset = 62; + const uint16_t kItalicFlag = (1 << 0); + if (os2_size < kFsSelectionOffset + 2) { + return false; + } + uint16_t weightClass = readU16(os2_data, kUsWeightClassOffset); + *weight = weightClass / 100; + uint16_t fsSelection = readU16(os2_data, kFsSelectionOffset); + *italic = (fsSelection & kItalicFlag) != 0; + return true; } -void analyzeAxes(const uint8_t* fvar_data, size_t fvar_size, std::unordered_set* axes) { - const size_t kMajorVersionOffset = 0; - const size_t kMinorVersionOffset = 2; - const size_t kOffsetToAxesArrayOffset = 4; - const size_t kAxisCountOffset = 8; - const size_t kAxisSizeOffset = 10; +void analyzeAxes(const uint8_t* fvar_data, + size_t fvar_size, + std::unordered_set* axes) { + const size_t kMajorVersionOffset = 0; + const size_t kMinorVersionOffset = 2; + const size_t kOffsetToAxesArrayOffset = 4; + const size_t kAxisCountOffset = 8; + const size_t kAxisSizeOffset = 10; - axes->clear(); + axes->clear(); - if (fvar_size < kAxisSizeOffset + 2) { - return; - } - const uint16_t majorVersion = readU16(fvar_data, kMajorVersionOffset); - const uint16_t minorVersion = readU16(fvar_data, kMinorVersionOffset); - const uint32_t axisOffset = readU16(fvar_data, kOffsetToAxesArrayOffset); - const uint32_t axisCount = readU16(fvar_data, kAxisCountOffset); - const uint32_t axisSize = readU16(fvar_data, kAxisSizeOffset); + if (fvar_size < kAxisSizeOffset + 2) { + return; + } + const uint16_t majorVersion = readU16(fvar_data, kMajorVersionOffset); + const uint16_t minorVersion = readU16(fvar_data, kMinorVersionOffset); + const uint32_t axisOffset = readU16(fvar_data, kOffsetToAxesArrayOffset); + const uint32_t axisCount = readU16(fvar_data, kAxisCountOffset); + const uint32_t axisSize = readU16(fvar_data, kAxisSizeOffset); - if (majorVersion != 1 || minorVersion != 0 || axisOffset != 0x10 || axisSize != 0x14) { - return; // Unsupported version. - } - if (fvar_size < axisOffset + axisOffset * axisCount) { - return; // Invalid table size. - } - for (uint32_t i = 0; i < axisCount; ++i) { - size_t axisRecordOffset = axisOffset + i * axisSize; - uint32_t tag = readU32(fvar_data, axisRecordOffset); - axes->insert(tag); - } + if (majorVersion != 1 || minorVersion != 0 || axisOffset != 0x10 || + axisSize != 0x14) { + return; // Unsupported version. + } + if (fvar_size < axisOffset + axisOffset * axisCount) { + return; // Invalid table size. + } + for (uint32_t i = 0; i < axisCount; ++i) { + size_t axisRecordOffset = axisOffset + i * axisSize; + uint32_t tag = readU32(fvar_data, axisRecordOffset); + axes->insert(tag); + } } } // namespace minikin diff --git a/third_party/txt/src/minikin/FontUtils.h b/third_party/txt/src/minikin/FontUtils.h index d26d5e42a0854ce64b9571f6524c028359c6f813..b01e54bd26aa534aee5335b73d0c4119d1180e66 100644 --- a/third_party/txt/src/minikin/FontUtils.h +++ b/third_party/txt/src/minikin/FontUtils.h @@ -21,8 +21,13 @@ namespace minikin { -bool analyzeStyle(const uint8_t* os2_data, size_t os2_size, int* weight, bool* italic); -void analyzeAxes(const uint8_t* fvar_data, size_t fvar_size, std::unordered_set* axes); +bool analyzeStyle(const uint8_t* os2_data, + size_t os2_size, + int* weight, + bool* italic); +void analyzeAxes(const uint8_t* fvar_data, + size_t fvar_size, + std::unordered_set* axes); } // namespace minikin diff --git a/third_party/txt/src/minikin/GraphemeBreak.cpp b/third_party/txt/src/minikin/GraphemeBreak.cpp index 482621ce698450282a2d3214b2b8a979415bdea5..30262391d56101960d5632a4c9e001e529447763 100644 --- a/third_party/txt/src/minikin/GraphemeBreak.cpp +++ b/third_party/txt/src/minikin/GraphemeBreak.cpp @@ -15,227 +15,245 @@ */ #include -#include #include #include +#include -#include #include +#include #include "MinikinInternal.h" namespace minikin { int32_t tailoredGraphemeClusterBreak(uint32_t c) { - // Characters defined as Control that we want to treat them as Extend. - // These are curated manually. - if (c == 0x00AD // SHY - || c == 0x061C // ALM - || c == 0x180E // MONGOLIAN VOWEL SEPARATOR - || c == 0x200B // ZWSP - || c == 0x200E // LRM - || c == 0x200F // RLM - || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO - || ((c | 0xF) == 0x206F) // WJ, invisible math operators, LRI, RLI, FSI, PDI, - // and the deprecated invisible format controls - || c == 0xFEFF // BOM - || ((c | 0x7F) == 0xE007F)) // recently undeprecated tag characters in Plane 14 - return U_GCB_EXTEND; - // THAI CHARACTER SARA AM is treated as a normal letter by most other implementations: they - // allow a grapheme break before it. - else if (c == 0x0E33) - return U_GCB_OTHER; - else - return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); + // Characters defined as Control that we want to treat them as Extend. + // These are curated manually. + if (c == 0x00AD // SHY + || c == 0x061C // ALM + || c == 0x180E // MONGOLIAN VOWEL SEPARATOR + || c == 0x200B // ZWSP + || c == 0x200E // LRM + || c == 0x200F // RLM + || (0x202A <= c && c <= 0x202E) // LRE, RLE, PDF, LRO, RLO + || ((c | 0xF) == + 0x206F) // WJ, invisible math operators, LRI, RLI, FSI, PDI, + // and the deprecated invisible format controls + || c == 0xFEFF // BOM + || ((c | 0x7F) == + 0xE007F)) // recently undeprecated tag characters in Plane 14 + return U_GCB_EXTEND; + // THAI CHARACTER SARA AM is treated as a normal letter by most other + // implementations: they allow a grapheme break before it. + else if (c == 0x0E33) + return U_GCB_OTHER; + else + return u_getIntPropertyValue(c, UCHAR_GRAPHEME_CLUSTER_BREAK); } // Returns true for all characters whose IndicSyllabicCategory is Pure_Killer. // From http://www.unicode.org/Public/9.0.0/ucd/IndicSyllabicCategory.txt bool isPureKiller(uint32_t c) { - return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || c == 0x1714 || c == 0x1734 - || c == 0x17D1 || c == 0x1BAA || c == 0x1BF2 || c == 0x1BF3 || c == 0xA806 - || c == 0xA953 || c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B); + return (c == 0x0E3A || c == 0x0E4E || c == 0x0F84 || c == 0x103A || + c == 0x1714 || c == 0x1734 || c == 0x17D1 || c == 0x1BAA || + c == 0x1BF2 || c == 0x1BF3 || c == 0xA806 || c == 0xA953 || + c == 0xABED || c == 0x11134 || c == 0x112EA || c == 0x1172B); } -bool GraphemeBreak::isGraphemeBreak(const float* advances, const uint16_t* buf, size_t start, - size_t count, const size_t offset) { - // This implementation closely follows Unicode Standard Annex #29 on - // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), - // implementing a tailored version of extended grapheme clusters. - // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. +bool GraphemeBreak::isGraphemeBreak(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + const size_t offset) { + // This implementation closely follows Unicode Standard Annex #29 on + // Unicode Text Segmentation (http://www.unicode.org/reports/tr29/), + // implementing a tailored version of extended grapheme clusters. + // The GB rules refer to section 3.1.1, Grapheme Cluster Boundary Rules. - // Rule GB1, sot ÷; Rule GB2, ÷ eot - if (offset <= start || offset >= start + count) { - return true; - } - if (U16_IS_TRAIL(buf[offset])) { - // Don't break a surrogate pair, but a lonely trailing surrogate pair is a break - return !U16_IS_LEAD(buf[offset - 1]); - } - uint32_t c1 = 0; - uint32_t c2 = 0; - size_t offset_back = offset; - size_t offset_forward = offset; - U16_PREV(buf, start, offset_back, c1); - U16_NEXT(buf, offset_forward, start + count, c2); - int32_t p1 = tailoredGraphemeClusterBreak(c1); - int32_t p2 = tailoredGraphemeClusterBreak(c2); - // Rule GB3, CR x LF - if (p1 == U_GCB_CR && p2 == U_GCB_LF) { - return false; - } - // Rule GB4, (Control | CR | LF) ÷ - if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { - return true; - } - // Rule GB5, ÷ (Control | CR | LF) - if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { - return true; - } - // Rule GB6, L x ( L | V | LV | LVT ) - if (p1 == U_GCB_L && (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { - return false; - } - // Rule GB7, ( LV | V ) x ( V | T ) - if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { - return false; - } - // Rule GB8, ( LVT | T ) x T - if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) { - return false; - } - // Rule GB9, x (Extend | ZWJ); Rule GB9a, x SpacingMark; Rule GB9b, Prepend x - // TODO(abarth): Add U_GCB_ZWJ once we update ICU. - if (p2 == U_GCB_EXTEND || /* p2 == U_GCB_ZWJ || */ p2 == U_GCB_SPACING_MARK || p1 == U_GCB_PREPEND) { - return false; - } + // Rule GB1, sot ÷; Rule GB2, ÷ eot + if (offset <= start || offset >= start + count) { + return true; + } + if (U16_IS_TRAIL(buf[offset])) { + // Don't break a surrogate pair, but a lonely trailing surrogate pair is a + // break + return !U16_IS_LEAD(buf[offset - 1]); + } + uint32_t c1 = 0; + uint32_t c2 = 0; + size_t offset_back = offset; + size_t offset_forward = offset; + U16_PREV(buf, start, offset_back, c1); + U16_NEXT(buf, offset_forward, start + count, c2); + int32_t p1 = tailoredGraphemeClusterBreak(c1); + int32_t p2 = tailoredGraphemeClusterBreak(c2); + // Rule GB3, CR x LF + if (p1 == U_GCB_CR && p2 == U_GCB_LF) { + return false; + } + // Rule GB4, (Control | CR | LF) ÷ + if (p1 == U_GCB_CONTROL || p1 == U_GCB_CR || p1 == U_GCB_LF) { + return true; + } + // Rule GB5, ÷ (Control | CR | LF) + if (p2 == U_GCB_CONTROL || p2 == U_GCB_CR || p2 == U_GCB_LF) { + return true; + } + // Rule GB6, L x ( L | V | LV | LVT ) + if (p1 == U_GCB_L && + (p2 == U_GCB_L || p2 == U_GCB_V || p2 == U_GCB_LV || p2 == U_GCB_LVT)) { + return false; + } + // Rule GB7, ( LV | V ) x ( V | T ) + if ((p1 == U_GCB_LV || p1 == U_GCB_V) && (p2 == U_GCB_V || p2 == U_GCB_T)) { + return false; + } + // Rule GB8, ( LVT | T ) x T + if ((p1 == U_GCB_LVT || p1 == U_GCB_T) && p2 == U_GCB_T) { + return false; + } + // Rule GB9, x (Extend | ZWJ); Rule GB9a, x SpacingMark; Rule GB9b, Prepend x + // TODO(abarth): Add U_GCB_ZWJ once we update ICU. + if (p2 == U_GCB_EXTEND || /* p2 == U_GCB_ZWJ || */ p2 == U_GCB_SPACING_MARK || + p1 == U_GCB_PREPEND) { + return false; + } - // This is used to decide font-dependent grapheme clusters. If we don't have the advance - // information, we become conservative in grapheme breaking and assume that it has no advance. - const bool c2_has_advance = (advances != nullptr && advances[offset - start] != 0.0); + // This is used to decide font-dependent grapheme clusters. If we don't have + // the advance information, we become conservative in grapheme breaking and + // assume that it has no advance. + const bool c2_has_advance = + (advances != nullptr && advances[offset - start] != 0.0); - // All the following rules are font-dependent, in the way that if we know c2 has an advance, - // we definitely know that it cannot form a grapheme with the character(s) before it. So we - // make the decision in favor a grapheme break early. - if (c2_has_advance) { - return true; - } + // All the following rules are font-dependent, in the way that if we know c2 + // has an advance, we definitely know that it cannot form a grapheme with the + // character(s) before it. So we make the decision in favor a grapheme break + // early. + if (c2_has_advance) { + return true; + } - // Note: For Rule GB10 and GB11 below, we do not use the Unicode line breaking properties for - // determining emoji-ness and carry our own data, because our data could be more fresh than what - // ICU provides. - // - // Tailored version of Rule GB10, (E_Base | EBG) Extend* × E_Modifier. - // The rule itself says do not break between emoji base and emoji modifiers, skipping all Extend - // characters. Variation selectors are considered Extend, so they are handled fine. - // - // We tailor this by requiring that an actual ligature is formed. If the font doesn't form a - // ligature, we allow a break before the modifier. - if (isEmojiModifier(c2)) { - uint32_t c0 = c1; - size_t offset_backback = offset_back; - int32_t p0 = p1; - if (p0 == U_GCB_EXTEND && offset_backback > start) { - // skip over emoji variation selector - U16_PREV(buf, start, offset_backback, c0); - p0 = tailoredGraphemeClusterBreak(c0); - } - if (isEmojiBase(c0)) { - return false; - } + // Note: For Rule GB10 and GB11 below, we do not use the Unicode line breaking + // properties for determining emoji-ness and carry our own data, because our + // data could be more fresh than what ICU provides. + // + // Tailored version of Rule GB10, (E_Base | EBG) Extend* × E_Modifier. + // The rule itself says do not break between emoji base and emoji modifiers, + // skipping all Extend characters. Variation selectors are considered Extend, + // so they are handled fine. + // + // We tailor this by requiring that an actual ligature is formed. If the font + // doesn't form a ligature, we allow a break before the modifier. + if (isEmojiModifier(c2)) { + uint32_t c0 = c1; + size_t offset_backback = offset_back; + int32_t p0 = p1; + if (p0 == U_GCB_EXTEND && offset_backback > start) { + // skip over emoji variation selector + U16_PREV(buf, start, offset_backback, c0); + p0 = tailoredGraphemeClusterBreak(c0); + } + if (isEmojiBase(c0)) { + return false; } + } - // TODO(abarth): Enablet his code once we update ICU. - // Tailored version of Rule GB11, ZWJ × (Glue_After_Zwj | EBG) - // We try to make emoji sequences with ZWJ a single grapheme cluster, but only if they actually - // merge to one cluster. So we are more relaxed than the UAX #29 rules in accepting any emoji - // character after the ZWJ, but are tighter in that we only treat it as one cluster if a - // ligature is actually formed and we also require the character before the ZWJ to also be an - // emoji. - // if (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) { - // // look at character before ZWJ to see that both can participate in an emoji zwj sequence - // uint32_t c0 = 0; - // size_t offset_backback = offset_back; - // U16_PREV(buf, start, offset_backback, c0); - // if (c0 == 0xFE0F && offset_backback > start) { - // // skip over emoji variation selector - // U16_PREV(buf, start, offset_backback, c0); - // } - // if (isEmoji(c0)) { - // return false; - // } - // } + // TODO(abarth): Enablet his code once we update ICU. + // Tailored version of Rule GB11, ZWJ × (Glue_After_Zwj | EBG) + // We try to make emoji sequences with ZWJ a single grapheme cluster, but only + // if they actually merge to one cluster. So we are more relaxed than the UAX + // #29 rules in accepting any emoji character after the ZWJ, but are tighter + // in that we only treat it as one cluster if a ligature is actually formed + // and we also require the character before the ZWJ to also be an emoji. if + // (p1 == U_GCB_ZWJ && isEmoji(c2) && offset_back > start) { + // // look at character before ZWJ to see that both can participate in an + // emoji zwj sequence uint32_t c0 = 0; size_t offset_backback = + // offset_back; U16_PREV(buf, start, offset_backback, c0); if (c0 == + // 0xFE0F && offset_backback > start) { + // // skip over emoji variation selector + // U16_PREV(buf, start, offset_backback, c0); + // } + // if (isEmoji(c0)) { + // return false; + // } + // } - // Tailored version of Rule GB12 and Rule GB13 that look at even-odd cases. - // sot (RI RI)* RI x RI - // [^RI] (RI RI)* RI x RI - // - // If we have font information, we have already broken the cluster if and only if the second - // character had no advance, which means a ligature was formed. If we don't, we look back like - // UAX #29 recommends, but only up to 1000 code units. - if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { - if (advances != nullptr) { - // We have advances information. But if we are here, we already know c2 has no advance. - // So we should definitely disallow a break. - return false; - } else { - // Look at up to 1000 code units. - const size_t lookback_barrier = std::max((ssize_t)start, (ssize_t)offset_back - 1000); - size_t offset_backback = offset_back; - while (offset_backback > lookback_barrier) { - uint32_t c0 = 0; - U16_PREV(buf, lookback_barrier, offset_backback, c0); - if (tailoredGraphemeClusterBreak(c0) != U_GCB_REGIONAL_INDICATOR) { - offset_backback += U16_LENGTH(c0); - break; - } - } - // The number 4 comes from the number of code units in a whole flag. - return (offset - offset_backback) % 4 == 0; + // Tailored version of Rule GB12 and Rule GB13 that look at even-odd cases. + // sot (RI RI)* RI x RI + // [^RI] (RI RI)* RI x RI + // + // If we have font information, we have already broken the cluster if and only + // if the second character had no advance, which means a ligature was formed. + // If we don't, we look back like UAX #29 recommends, but only up to 1000 code + // units. + if (p1 == U_GCB_REGIONAL_INDICATOR && p2 == U_GCB_REGIONAL_INDICATOR) { + if (advances != nullptr) { + // We have advances information. But if we are here, we already know c2 + // has no advance. So we should definitely disallow a break. + return false; + } else { + // Look at up to 1000 code units. + const size_t lookback_barrier = + std::max((ssize_t)start, (ssize_t)offset_back - 1000); + size_t offset_backback = offset_back; + while (offset_backback > lookback_barrier) { + uint32_t c0 = 0; + U16_PREV(buf, lookback_barrier, offset_backback, c0); + if (tailoredGraphemeClusterBreak(c0) != U_GCB_REGIONAL_INDICATOR) { + offset_backback += U16_LENGTH(c0); + break; } + } + // The number 4 comes from the number of code units in a whole flag. + return (offset - offset_backback) % 4 == 0; } - // Cluster Indic syllables together (tailoring of UAX #29). - // Immediately after each virama (that is not just a pure killer) followed by a letter, we - // disallow grapheme breaks (if we are here, we don't know about advances, or we already know - // that c2 has no advance). - if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama - && !isPureKiller(c1) - && u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { - return false; - } - // Rule GB999, Any ÷ Any - return true; + } + // Cluster Indic syllables together (tailoring of UAX #29). + // Immediately after each virama (that is not just a pure killer) followed by + // a letter, we disallow grapheme breaks (if we are here, we don't know about + // advances, or we already know that c2 has no advance). + if (u_getIntPropertyValue(c1, UCHAR_CANONICAL_COMBINING_CLASS) == 9 // virama + && !isPureKiller(c1) && + u_getIntPropertyValue(c2, UCHAR_GENERAL_CATEGORY) == U_OTHER_LETTER) { + return false; + } + // Rule GB999, Any ÷ Any + return true; } -size_t GraphemeBreak::getTextRunCursor(const float* advances, const uint16_t* buf, size_t start, - size_t count, size_t offset, MoveOpt opt) { - switch (opt) { +size_t GraphemeBreak::getTextRunCursor(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + size_t offset, + MoveOpt opt) { + switch (opt) { case AFTER: - if (offset < start + count) { - offset++; - } - // fall through + if (offset < start + count) { + offset++; + } + // fall through case AT_OR_AFTER: - while (!isGraphemeBreak(advances, buf, start, count, offset)) { - offset++; - } - break; + while (!isGraphemeBreak(advances, buf, start, count, offset)) { + offset++; + } + break; case BEFORE: - if (offset > start) { - offset--; - } - // fall through + if (offset > start) { + offset--; + } + // fall through case AT_OR_BEFORE: - while (!isGraphemeBreak(advances, buf, start, count, offset)) { - offset--; - } - break; + while (!isGraphemeBreak(advances, buf, start, count, offset)) { + offset--; + } + break; case AT: - if (!isGraphemeBreak(advances, buf, start, count, offset)) { - offset = (size_t)-1; - } - break; - } - return offset; + if (!isGraphemeBreak(advances, buf, start, count, offset)) { + offset = (size_t)-1; + } + break; + } + return offset; } } // namespace minikin diff --git a/third_party/txt/src/minikin/GraphemeBreak.h b/third_party/txt/src/minikin/GraphemeBreak.h index f1b5102a0c7b6f8f2f21b8371fd9cd1cde1d6e74..233ec15c76766949adbf963d474b7fc6295b05c3 100644 --- a/third_party/txt/src/minikin/GraphemeBreak.h +++ b/third_party/txt/src/minikin/GraphemeBreak.h @@ -20,26 +20,33 @@ namespace minikin { class GraphemeBreak { -public: - // These values must be kept in sync with CURSOR_AFTER etc in Paint.java - enum MoveOpt { - AFTER = 0, - AT_OR_AFTER = 1, - BEFORE = 2, - AT_OR_BEFORE = 3, - AT = 4 - }; + public: + // These values must be kept in sync with CURSOR_AFTER etc in Paint.java + enum MoveOpt { + AFTER = 0, + AT_OR_AFTER = 1, + BEFORE = 2, + AT_OR_BEFORE = 3, + AT = 4 + }; - // Determine whether the given offset is a grapheme break. - // This implementation generally follows Unicode's UTR #29 extended - // grapheme break, with various tweaks. - static bool isGraphemeBreak(const float* advances, const uint16_t* buf, size_t start, - size_t count, size_t offset); + // Determine whether the given offset is a grapheme break. + // This implementation generally follows Unicode's UTR #29 extended + // grapheme break, with various tweaks. + static bool isGraphemeBreak(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + size_t offset); - // Matches Android's Java API. Note, return (size_t)-1 for AT to - // signal non-break because unsigned return type. - static size_t getTextRunCursor(const float* advances, const uint16_t* buf, size_t start, - size_t count, size_t offset, MoveOpt opt); + // Matches Android's Java API. Note, return (size_t)-1 for AT to + // signal non-break because unsigned return type. + static size_t getTextRunCursor(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + size_t offset, + MoveOpt opt); }; } // namespace minikin diff --git a/third_party/txt/src/minikin/HbFontCache.cpp b/third_party/txt/src/minikin/HbFontCache.cpp index 0de40113a0fcbcccf0889d9d9a9ae9b94e7af3c3..a78ee7a0d346074ff2c115d6c7c63b98d4832802 100644 --- a/third_party/txt/src/minikin/HbFontCache.cpp +++ b/third_party/txt/src/minikin/HbFontCache.cpp @@ -21,8 +21,8 @@ #include #include -#include #include +#include #include #include "MinikinInternal.h" @@ -30,97 +30,89 @@ namespace minikin { class HbFontCache : private android::OnEntryRemoved { -public: - HbFontCache() : mCache(kMaxEntries) { - mCache.setOnEntryRemovedListener(this); - } + public: + HbFontCache() : mCache(kMaxEntries) { + mCache.setOnEntryRemovedListener(this); + } - // callback for OnEntryRemoved - void operator()(int32_t& /* key */, hb_font_t*& value) { - hb_font_destroy(value); - } + // callback for OnEntryRemoved + void operator()(int32_t& /* key */, hb_font_t*& value) { + hb_font_destroy(value); + } - hb_font_t* get(int32_t fontId) { - return mCache.get(fontId); - } + hb_font_t* get(int32_t fontId) { return mCache.get(fontId); } - void put(int32_t fontId, hb_font_t* font) { - mCache.put(fontId, font); - } + void put(int32_t fontId, hb_font_t* font) { mCache.put(fontId, font); } - void clear() { - mCache.clear(); - } + void clear() { mCache.clear(); } - void remove(int32_t fontId) { - mCache.remove(fontId); - } + void remove(int32_t fontId) { mCache.remove(fontId); } -private: - static const size_t kMaxEntries = 100; + private: + static const size_t kMaxEntries = 100; - android::LruCache mCache; + android::LruCache mCache; }; HbFontCache* getFontCacheLocked() { - assertMinikinLocked(); - static HbFontCache* cache = nullptr; - if (cache == nullptr) { - cache = new HbFontCache(); - } - return cache; + assertMinikinLocked(); + static HbFontCache* cache = nullptr; + if (cache == nullptr) { + cache = new HbFontCache(); + } + return cache; } void purgeHbFontCacheLocked() { - assertMinikinLocked(); - getFontCacheLocked()->clear(); + assertMinikinLocked(); + getFontCacheLocked()->clear(); } void purgeHbFontLocked(const MinikinFont* minikinFont) { - assertMinikinLocked(); - const int32_t fontId = minikinFont->GetUniqueId(); - getFontCacheLocked()->remove(fontId); + assertMinikinLocked(); + const int32_t fontId = minikinFont->GetUniqueId(); + getFontCacheLocked()->remove(fontId); } // Returns a new reference to a hb_font_t object, caller is // responsible for calling hb_font_destroy() on it. hb_font_t* getHbFontLocked(const MinikinFont* minikinFont) { - assertMinikinLocked(); - // TODO: get rid of nullFaceFont - static hb_font_t* nullFaceFont = nullptr; - if (minikinFont == nullptr) { - if (nullFaceFont == nullptr) { - nullFaceFont = hb_font_create(nullptr); - } - return hb_font_reference(nullFaceFont); - } - - HbFontCache* fontCache = getFontCacheLocked(); - const int32_t fontId = minikinFont->GetUniqueId(); - hb_font_t* font = fontCache->get(fontId); - if (font != nullptr) { - return hb_font_reference(font); + assertMinikinLocked(); + // TODO: get rid of nullFaceFont + static hb_font_t* nullFaceFont = nullptr; + if (minikinFont == nullptr) { + if (nullFaceFont == nullptr) { + nullFaceFont = hb_font_create(nullptr); } + return hb_font_reference(nullFaceFont); + } - hb_face_t* face = minikinFont->CreateHarfBuzzFace(); - - hb_font_t* parent_font = hb_font_create(face); - hb_ot_font_set_funcs(parent_font); - - unsigned int upem = hb_face_get_upem(face); - hb_font_set_scale(parent_font, upem, upem); - - font = hb_font_create_sub_font(parent_font); - // TODO(abarth): Enable this code once we update harfbuzz. - // std::vector variations; - // for (const FontVariation& variation : minikinFont->GetAxes()) { - // variations.push_back({variation.axisTag, variation.value}); - // } - // hb_font_set_variations(font, variations.data(), variations.size()); - hb_font_destroy(parent_font); - hb_face_destroy(face); - fontCache->put(fontId, font); + HbFontCache* fontCache = getFontCacheLocked(); + const int32_t fontId = minikinFont->GetUniqueId(); + hb_font_t* font = fontCache->get(fontId); + if (font != nullptr) { return hb_font_reference(font); + } + + hb_face_t* face = minikinFont->CreateHarfBuzzFace(); + + hb_font_t* parent_font = hb_font_create(face); + hb_ot_font_set_funcs(parent_font); + + unsigned int upem = hb_face_get_upem(face); + hb_font_set_scale(parent_font, upem, upem); + + font = hb_font_create_sub_font(parent_font); + // TODO(abarth): Enable this code once we update harfbuzz. + // std::vector variations; + // for (const FontVariation& variation : minikinFont->GetAxes()) { + // variations.push_back({variation.axisTag, variation.value}); + // } + // hb_font_set_variations(font, variations.data(), variations.size()); + hb_font_destroy(parent_font); + hb_face_destroy(face); + fontCache->put(fontId, font); + return hb_font_reference(font); } } // namespace minikin diff --git a/third_party/txt/src/minikin/Hyphenator.cpp b/third_party/txt/src/minikin/Hyphenator.cpp index 0d1c0d7715efe318b246bad9cb3882873bd490bb..33ac1e7e133e81203f30f5b2baa89ab0b9f5ff16 100644 --- a/third_party/txt/src/minikin/Hyphenator.cpp +++ b/third_party/txt/src/minikin/Hyphenator.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include -#include -#include -#include #include #include +#include +#include +#include +#include // HACK: for reading pattern file #include @@ -37,104 +37,112 @@ static const uint16_t CHAR_SOFT_HYPHEN = 0x00AD; static const uint16_t CHAR_MIDDLE_DOT = 0x00B7; static const uint16_t CHAR_HYPHEN = 0x2010; -// The following are structs that correspond to tables inside the hyb file format +// The following are structs that correspond to tables inside the hyb file +// format struct AlphabetTable0 { - uint32_t version; - uint32_t min_codepoint; - uint32_t max_codepoint; - uint8_t data[1]; // actually flexible array, size is known at runtime + uint32_t version; + uint32_t min_codepoint; + uint32_t max_codepoint; + uint8_t data[1]; // actually flexible array, size is known at runtime }; struct AlphabetTable1 { - uint32_t version; - uint32_t n_entries; - uint32_t data[1]; // actually flexible array, size is known at runtime + uint32_t version; + uint32_t n_entries; + uint32_t data[1]; // actually flexible array, size is known at runtime - static uint32_t codepoint(uint32_t entry) { return entry >> 11; } - static uint32_t value(uint32_t entry) { return entry & 0x7ff; } + static uint32_t codepoint(uint32_t entry) { return entry >> 11; } + static uint32_t value(uint32_t entry) { return entry & 0x7ff; } }; struct Trie { - uint32_t version; - uint32_t char_mask; - uint32_t link_shift; - uint32_t link_mask; - uint32_t pattern_shift; - uint32_t n_entries; - uint32_t data[1]; // actually flexible array, size is known at runtime + uint32_t version; + uint32_t char_mask; + uint32_t link_shift; + uint32_t link_mask; + uint32_t pattern_shift; + uint32_t n_entries; + uint32_t data[1]; // actually flexible array, size is known at runtime }; struct Pattern { - uint32_t version; - uint32_t n_entries; - uint32_t pattern_offset; - uint32_t pattern_size; - uint32_t data[1]; // actually flexible array, size is known at runtime - - // accessors - static uint32_t len(uint32_t entry) { return entry >> 26; } - static uint32_t shift(uint32_t entry) { return (entry >> 20) & 0x3f; } - const uint8_t* buf(uint32_t entry) const { - return reinterpret_cast(this) + pattern_offset + (entry & 0xfffff); - } + uint32_t version; + uint32_t n_entries; + uint32_t pattern_offset; + uint32_t pattern_size; + uint32_t data[1]; // actually flexible array, size is known at runtime + + // accessors + static uint32_t len(uint32_t entry) { return entry >> 26; } + static uint32_t shift(uint32_t entry) { return (entry >> 20) & 0x3f; } + const uint8_t* buf(uint32_t entry) const { + return reinterpret_cast(this) + pattern_offset + + (entry & 0xfffff); + } }; struct Header { - uint32_t magic; - uint32_t version; - uint32_t alphabet_offset; - uint32_t trie_offset; - uint32_t pattern_offset; - uint32_t file_size; - - // accessors - const uint8_t* bytes() const { return reinterpret_cast(this); } - uint32_t alphabetVersion() const { - return *reinterpret_cast(bytes() + alphabet_offset); - } - const AlphabetTable0* alphabetTable0() const { - return reinterpret_cast(bytes() + alphabet_offset); - } - const AlphabetTable1* alphabetTable1() const { - return reinterpret_cast(bytes() + alphabet_offset); - } - const Trie* trieTable() const { - return reinterpret_cast(bytes() + trie_offset); - } - const Pattern* patternTable() const { - return reinterpret_cast(bytes() + pattern_offset); - } + uint32_t magic; + uint32_t version; + uint32_t alphabet_offset; + uint32_t trie_offset; + uint32_t pattern_offset; + uint32_t file_size; + + // accessors + const uint8_t* bytes() const { + return reinterpret_cast(this); + } + uint32_t alphabetVersion() const { + return *reinterpret_cast(bytes() + alphabet_offset); + } + const AlphabetTable0* alphabetTable0() const { + return reinterpret_cast(bytes() + alphabet_offset); + } + const AlphabetTable1* alphabetTable1() const { + return reinterpret_cast(bytes() + alphabet_offset); + } + const Trie* trieTable() const { + return reinterpret_cast(bytes() + trie_offset); + } + const Pattern* patternTable() const { + return reinterpret_cast(bytes() + pattern_offset); + } }; -Hyphenator* Hyphenator::loadBinary(const uint8_t* patternData, size_t minPrefix, size_t minSuffix) { - Hyphenator* result = new Hyphenator; - result->patternData = patternData; - result->minPrefix = minPrefix; - result->minSuffix = minSuffix; - return result; +Hyphenator* Hyphenator::loadBinary(const uint8_t* patternData, + size_t minPrefix, + size_t minSuffix) { + Hyphenator* result = new Hyphenator; + result->patternData = patternData; + result->minPrefix = minPrefix; + result->minSuffix = minSuffix; + return result; } -void Hyphenator::hyphenate(vector* result, const uint16_t* word, size_t len, - const icu::Locale& locale) { - result->clear(); - result->resize(len); - const size_t paddedLen = len + 2; // start and stop code each count for 1 - if (patternData != nullptr && - len >= minPrefix + minSuffix && paddedLen <= MAX_HYPHENATED_SIZE) { - uint16_t alpha_codes[MAX_HYPHENATED_SIZE]; - const HyphenationType hyphenValue = alphabetLookup(alpha_codes, word, len); - if (hyphenValue != HyphenationType::DONT_BREAK) { - hyphenateFromCodes(result->data(), alpha_codes, paddedLen, hyphenValue); - return; - } - // TODO: try NFC normalization - // TODO: handle non-BMP Unicode (requires remapping of offsets) +void Hyphenator::hyphenate(vector* result, + const uint16_t* word, + size_t len, + const icu::Locale& locale) { + result->clear(); + result->resize(len); + const size_t paddedLen = len + 2; // start and stop code each count for 1 + if (patternData != nullptr && len >= minPrefix + minSuffix && + paddedLen <= MAX_HYPHENATED_SIZE) { + uint16_t alpha_codes[MAX_HYPHENATED_SIZE]; + const HyphenationType hyphenValue = alphabetLookup(alpha_codes, word, len); + if (hyphenValue != HyphenationType::DONT_BREAK) { + hyphenateFromCodes(result->data(), alpha_codes, paddedLen, hyphenValue); + return; } - // Note that we will always get here if the word contains a hyphen or a soft hyphen, because the - // alphabet is not expected to contain a hyphen or a soft hyphen character, so alphabetLookup - // would return DONT_BREAK. - hyphenateWithNoPatterns(result->data(), word, len, locale); + // TODO: try NFC normalization + // TODO: handle non-BMP Unicode (requires remapping of offsets) + } + // Note that we will always get here if the word contains a hyphen or a soft + // hyphen, because the alphabet is not expected to contain a hyphen or a soft + // hyphen character, so alphabetLookup would return DONT_BREAK. + hyphenateWithNoPatterns(result->data(), word, len, locale); } // This function determines whether a character is like U+2010 HYPHEN in @@ -144,15 +152,15 @@ void Hyphenator::hyphenate(vector* result, const uint16_t* word // inspecting all the characters that have the Unicode line breaking // property of BA or HY and seeing which ones are hyphens. bool Hyphenator::isLineBreakingHyphen(uint32_t c) { - return (c == 0x002D || // HYPHEN-MINUS - c == 0x058A || // ARMENIAN HYPHEN - c == 0x05BE || // HEBREW PUNCTUATION MAQAF - c == 0x1400 || // CANADIAN SYLLABICS HYPHEN - c == 0x2010 || // HYPHEN - c == 0x2013 || // EN DASH - c == 0x2027 || // HYPHENATION POINT - c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN - c == 0x2E40); // DOUBLE HYPHEN + return (c == 0x002D || // HYPHEN-MINUS + c == 0x058A || // ARMENIAN HYPHEN + c == 0x05BE || // HEBREW PUNCTUATION MAQAF + c == 0x1400 || // CANADIAN SYLLABICS HYPHEN + c == 0x2010 || // HYPHEN + c == 0x2013 || // EN DASH + c == 0x2027 || // HYPHENATION POINT + c == 0x2E17 || // DOUBLE OBLIQUE HYPHEN + c == 0x2E40); // DOUBLE HYPHEN } const static uint32_t HYPHEN_STR[] = {0x2010, 0}; @@ -163,272 +171,290 @@ const static uint32_t ZWJ_STR[] = {0x200D, 0}; const static uint32_t ZWJ_AND_HYPHEN_STR[] = {0x200D, 0x2010, 0}; const uint32_t* HyphenEdit::getHyphenString(uint32_t hyph) { - switch (hyph) { - case INSERT_HYPHEN_AT_END: - case REPLACE_WITH_HYPHEN_AT_END: - case INSERT_HYPHEN_AT_START: - return HYPHEN_STR; - case INSERT_ARMENIAN_HYPHEN_AT_END: - return ARMENIAN_HYPHEN_STR; - case INSERT_MAQAF_AT_END: - return MAQAF_STR; - case INSERT_UCAS_HYPHEN_AT_END: - return UCAS_HYPHEN_STR; - case INSERT_ZWJ_AND_HYPHEN_AT_END: - return ZWJ_AND_HYPHEN_STR; - case INSERT_ZWJ_AT_START: - return ZWJ_STR; - default: - return nullptr; - } + switch (hyph) { + case INSERT_HYPHEN_AT_END: + case REPLACE_WITH_HYPHEN_AT_END: + case INSERT_HYPHEN_AT_START: + return HYPHEN_STR; + case INSERT_ARMENIAN_HYPHEN_AT_END: + return ARMENIAN_HYPHEN_STR; + case INSERT_MAQAF_AT_END: + return MAQAF_STR; + case INSERT_UCAS_HYPHEN_AT_END: + return UCAS_HYPHEN_STR; + case INSERT_ZWJ_AND_HYPHEN_AT_END: + return ZWJ_AND_HYPHEN_STR; + case INSERT_ZWJ_AT_START: + return ZWJ_STR; + default: + return nullptr; + } } uint32_t HyphenEdit::editForThisLine(HyphenationType type) { - switch (type) { - case HyphenationType::DONT_BREAK: - return NO_EDIT; - case HyphenationType::BREAK_AND_INSERT_HYPHEN: - return INSERT_HYPHEN_AT_END; - case HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN: - return INSERT_ARMENIAN_HYPHEN_AT_END; - case HyphenationType::BREAK_AND_INSERT_MAQAF: - return INSERT_MAQAF_AT_END; - case HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN: - return INSERT_UCAS_HYPHEN_AT_END; - case HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN: - return REPLACE_WITH_HYPHEN_AT_END; - case HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ: - return INSERT_ZWJ_AND_HYPHEN_AT_END; - default: - return BREAK_AT_END; - } + switch (type) { + case HyphenationType::DONT_BREAK: + return NO_EDIT; + case HyphenationType::BREAK_AND_INSERT_HYPHEN: + return INSERT_HYPHEN_AT_END; + case HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN: + return INSERT_ARMENIAN_HYPHEN_AT_END; + case HyphenationType::BREAK_AND_INSERT_MAQAF: + return INSERT_MAQAF_AT_END; + case HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN: + return INSERT_UCAS_HYPHEN_AT_END; + case HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN: + return REPLACE_WITH_HYPHEN_AT_END; + case HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ: + return INSERT_ZWJ_AND_HYPHEN_AT_END; + default: + return BREAK_AT_END; + } } uint32_t HyphenEdit::editForNextLine(HyphenationType type) { - switch (type) { - case HyphenationType::DONT_BREAK: - return NO_EDIT; - case HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE: - return INSERT_HYPHEN_AT_START; - case HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ: - return INSERT_ZWJ_AT_START; - default: - return BREAK_AT_START; - } + switch (type) { + case HyphenationType::DONT_BREAK: + return NO_EDIT; + case HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE: + return INSERT_HYPHEN_AT_START; + case HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ: + return INSERT_ZWJ_AT_START; + default: + return BREAK_AT_START; + } } static UScriptCode getScript(uint32_t codePoint) { - UErrorCode errorCode = U_ZERO_ERROR; - const UScriptCode script = uscript_getScript(static_cast(codePoint), &errorCode); - if (U_SUCCESS(errorCode)) { - return script; - } else { - return USCRIPT_INVALID_CODE; - } + UErrorCode errorCode = U_ZERO_ERROR; + const UScriptCode script = + uscript_getScript(static_cast(codePoint), &errorCode); + if (U_SUCCESS(errorCode)) { + return script; + } else { + return USCRIPT_INVALID_CODE; + } } static HyphenationType hyphenationTypeBasedOnScript(uint32_t codePoint) { - // Note: It's not clear what the best hyphen for Hebrew is. While maqaf is the "correct" hyphen - // for Hebrew, modern practice may have shifted towards Western hyphens. We use normal hyphens - // for now to be safe. BREAK_AND_INSERT_MAQAF is already implemented, so if we want to switch - // to maqaf for Hebrew, we can simply add a condition here. - const UScriptCode script = getScript(codePoint); - if (script == USCRIPT_KANNADA - || script == USCRIPT_MALAYALAM - || script == USCRIPT_TAMIL - || script == USCRIPT_TELUGU) { - // Grantha is not included, since we don't support non-BMP hyphenation yet. - return HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; - } else if (script == USCRIPT_ARMENIAN) { - return HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN; - } else if (script == USCRIPT_CANADIAN_ABORIGINAL) { - return HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN; - } else { - return HyphenationType::BREAK_AND_INSERT_HYPHEN; - } + // Note: It's not clear what the best hyphen for Hebrew is. While maqaf is the + // "correct" hyphen for Hebrew, modern practice may have shifted towards + // Western hyphens. We use normal hyphens for now to be safe. + // BREAK_AND_INSERT_MAQAF is already implemented, so if we want to switch to + // maqaf for Hebrew, we can simply add a condition here. + const UScriptCode script = getScript(codePoint); + if (script == USCRIPT_KANNADA || script == USCRIPT_MALAYALAM || + script == USCRIPT_TAMIL || script == USCRIPT_TELUGU) { + // Grantha is not included, since we don't support non-BMP hyphenation yet. + return HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; + } else if (script == USCRIPT_ARMENIAN) { + return HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN; + } else if (script == USCRIPT_CANADIAN_ABORIGINAL) { + return HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN; + } else { + return HyphenationType::BREAK_AND_INSERT_HYPHEN; + } } static inline int32_t getJoiningType(UChar32 codepoint) { - return u_getIntPropertyValue(codepoint, UCHAR_JOINING_TYPE); + return u_getIntPropertyValue(codepoint, UCHAR_JOINING_TYPE); } -// Assumption for caller: location must be >= 2 and word[location] == CHAR_SOFT_HYPHEN. -// This function decides if the letters before and after the hyphen should appear as joining. -static inline HyphenationType getHyphTypeForArabic(const uint16_t* word, size_t len, - size_t location) { - ssize_t i = location; - int32_t type = U_JT_NON_JOINING; - while (static_cast(i) < len && (type = getJoiningType(word[i])) == U_JT_TRANSPARENT) { - i++; +// Assumption for caller: location must be >= 2 and word[location] == +// CHAR_SOFT_HYPHEN. This function decides if the letters before and after the +// hyphen should appear as joining. +static inline HyphenationType getHyphTypeForArabic(const uint16_t* word, + size_t len, + size_t location) { + ssize_t i = location; + int32_t type = U_JT_NON_JOINING; + while (static_cast(i) < len && + (type = getJoiningType(word[i])) == U_JT_TRANSPARENT) { + i++; + } + if (type == U_JT_DUAL_JOINING || type == U_JT_RIGHT_JOINING || + type == U_JT_JOIN_CAUSING) { + // The next character is of the type that may join the last character. See + // if the last character is also of the right type. + i = location - 2; // Skip the soft hyphen + type = U_JT_NON_JOINING; + while (i >= 0 && (type = getJoiningType(word[i])) == U_JT_TRANSPARENT) { + i--; } - if (type == U_JT_DUAL_JOINING || type == U_JT_RIGHT_JOINING || type == U_JT_JOIN_CAUSING) { - // The next character is of the type that may join the last character. See if the last - // character is also of the right type. - i = location - 2; // Skip the soft hyphen - type = U_JT_NON_JOINING; - while (i >= 0 && (type = getJoiningType(word[i])) == U_JT_TRANSPARENT) { - i--; - } - if (type == U_JT_DUAL_JOINING || type == U_JT_LEFT_JOINING || type == U_JT_JOIN_CAUSING) { - return HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ; - } + if (type == U_JT_DUAL_JOINING || type == U_JT_LEFT_JOINING || + type == U_JT_JOIN_CAUSING) { + return HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ; } - return HyphenationType::BREAK_AND_INSERT_HYPHEN; + } + return HyphenationType::BREAK_AND_INSERT_HYPHEN; } -// Use various recommendations of UAX #14 Unicode Line Breaking Algorithm for hyphenating words -// that didn't match patterns, especially words that contain hyphens or soft hyphens (See sections -// 5.3, Use of Hyphen, and 5.4, Use of Soft Hyphen). -void Hyphenator::hyphenateWithNoPatterns(HyphenationType* result, const uint16_t* word, size_t len, - const icu::Locale& locale) { - result[0] = HyphenationType::DONT_BREAK; - for (size_t i = 1; i < len; i++) { - const uint16_t prevChar = word[i - 1]; - if (i > 1 && isLineBreakingHyphen(prevChar)) { - // Break after hyphens, but only if they don't start the word. - - if ((prevChar == CHAR_HYPHEN_MINUS || prevChar == CHAR_HYPHEN) - && strcmp(locale.getLanguage(), "pl") == 0 - && getScript(word[i]) == USCRIPT_LATIN ) { - // In Polish, hyphens get repeated at the next line. To be safe, - // we will do this only if the next character is Latin. - result[i] = HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE; - } else { - result[i] = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; - } - } else if (i > 1 && prevChar == CHAR_SOFT_HYPHEN) { - // Break after soft hyphens, but only if they don't start the word (a soft hyphen - // starting the word doesn't give any useful break opportunities). The type of the break - // is based on the script of the character we break on. - if (getScript(word[i]) == USCRIPT_ARABIC) { - // For Arabic, we need to look and see if the characters around the soft hyphen - // actually join. If they don't, we'll just insert a normal hyphen. - result[i] = getHyphTypeForArabic(word, len, i); - } else { - result[i] = hyphenationTypeBasedOnScript(word[i]); - } - } else if (prevChar == CHAR_MIDDLE_DOT - && minPrefix < i && i <= len - minSuffix - && ((word[i - 2] == 'l' && word[i] == 'l') - || (word[i - 2] == 'L' && word[i] == 'L')) - && strcmp(locale.getLanguage(), "ca") == 0) { - // In Catalan, "l·l" should break as "l-" on the first line - // and "l" on the next line. - result[i] = HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN; - } else { - result[i] = HyphenationType::DONT_BREAK; - } - } +// Use various recommendations of UAX #14 Unicode Line Breaking Algorithm for +// hyphenating words that didn't match patterns, especially words that contain +// hyphens or soft hyphens (See sections 5.3, Use of Hyphen, and 5.4, Use of +// Soft Hyphen). +void Hyphenator::hyphenateWithNoPatterns(HyphenationType* result, + const uint16_t* word, + size_t len, + const icu::Locale& locale) { + result[0] = HyphenationType::DONT_BREAK; + for (size_t i = 1; i < len; i++) { + const uint16_t prevChar = word[i - 1]; + if (i > 1 && isLineBreakingHyphen(prevChar)) { + // Break after hyphens, but only if they don't start the word. + + if ((prevChar == CHAR_HYPHEN_MINUS || prevChar == CHAR_HYPHEN) && + strcmp(locale.getLanguage(), "pl") == 0 && + getScript(word[i]) == USCRIPT_LATIN) { + // In Polish, hyphens get repeated at the next line. To be safe, + // we will do this only if the next character is Latin. + result[i] = HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE; + } else { + result[i] = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; + } + } else if (i > 1 && prevChar == CHAR_SOFT_HYPHEN) { + // Break after soft hyphens, but only if they don't start the word (a soft + // hyphen starting the word doesn't give any useful break opportunities). + // The type of the break is based on the script of the character we break + // on. + if (getScript(word[i]) == USCRIPT_ARABIC) { + // For Arabic, we need to look and see if the characters around the soft + // hyphen actually join. If they don't, we'll just insert a normal + // hyphen. + result[i] = getHyphTypeForArabic(word, len, i); + } else { + result[i] = hyphenationTypeBasedOnScript(word[i]); + } + } else if (prevChar == CHAR_MIDDLE_DOT && minPrefix < i && + i <= len - minSuffix && + ((word[i - 2] == 'l' && word[i] == 'l') || + (word[i - 2] == 'L' && word[i] == 'L')) && + strcmp(locale.getLanguage(), "ca") == 0) { + // In Catalan, "l·l" should break as "l-" on the first line + // and "l" on the next line. + result[i] = HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN; + } else { + result[i] = HyphenationType::DONT_BREAK; + } + } } -HyphenationType Hyphenator::alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, - size_t len) { - const Header* header = getHeader(); - HyphenationType result = HyphenationType::BREAK_AND_INSERT_HYPHEN; - // TODO: check header magic - uint32_t alphabetVersion = header->alphabetVersion(); - if (alphabetVersion == 0) { - const AlphabetTable0* alphabet = header->alphabetTable0(); - uint32_t min_codepoint = alphabet->min_codepoint; - uint32_t max_codepoint = alphabet->max_codepoint; - alpha_codes[0] = 0; // word start - for (size_t i = 0; i < len; i++) { - uint16_t c = word[i]; - if (c < min_codepoint || c >= max_codepoint) { - return HyphenationType::DONT_BREAK; - } - uint8_t code = alphabet->data[c - min_codepoint]; - if (code == 0) { - return HyphenationType::DONT_BREAK; - } - if (result == HyphenationType::BREAK_AND_INSERT_HYPHEN) { - result = hyphenationTypeBasedOnScript(c); - } - alpha_codes[i + 1] = code; - } - alpha_codes[len + 1] = 0; // word termination - return result; - } else if (alphabetVersion == 1) { - const AlphabetTable1* alphabet = header->alphabetTable1(); - size_t n_entries = alphabet->n_entries; - const uint32_t* begin = alphabet->data; - const uint32_t* end = begin + n_entries; - alpha_codes[0] = 0; - for (size_t i = 0; i < len; i++) { - uint16_t c = word[i]; - auto p = std::lower_bound(begin, end, c << 11); - if (p == end) { - return HyphenationType::DONT_BREAK; - } - uint32_t entry = *p; - if (AlphabetTable1::codepoint(entry) != c) { - return HyphenationType::DONT_BREAK; - } - if (result == HyphenationType::BREAK_AND_INSERT_HYPHEN) { - result = hyphenationTypeBasedOnScript(c); - } - alpha_codes[i + 1] = AlphabetTable1::value(entry); - } - alpha_codes[len + 1] = 0; - return result; +HyphenationType Hyphenator::alphabetLookup(uint16_t* alpha_codes, + const uint16_t* word, + size_t len) { + const Header* header = getHeader(); + HyphenationType result = HyphenationType::BREAK_AND_INSERT_HYPHEN; + // TODO: check header magic + uint32_t alphabetVersion = header->alphabetVersion(); + if (alphabetVersion == 0) { + const AlphabetTable0* alphabet = header->alphabetTable0(); + uint32_t min_codepoint = alphabet->min_codepoint; + uint32_t max_codepoint = alphabet->max_codepoint; + alpha_codes[0] = 0; // word start + for (size_t i = 0; i < len; i++) { + uint16_t c = word[i]; + if (c < min_codepoint || c >= max_codepoint) { + return HyphenationType::DONT_BREAK; + } + uint8_t code = alphabet->data[c - min_codepoint]; + if (code == 0) { + return HyphenationType::DONT_BREAK; + } + if (result == HyphenationType::BREAK_AND_INSERT_HYPHEN) { + result = hyphenationTypeBasedOnScript(c); + } + alpha_codes[i + 1] = code; } - return HyphenationType::DONT_BREAK; + alpha_codes[len + 1] = 0; // word termination + return result; + } else if (alphabetVersion == 1) { + const AlphabetTable1* alphabet = header->alphabetTable1(); + size_t n_entries = alphabet->n_entries; + const uint32_t* begin = alphabet->data; + const uint32_t* end = begin + n_entries; + alpha_codes[0] = 0; + for (size_t i = 0; i < len; i++) { + uint16_t c = word[i]; + auto p = std::lower_bound(begin, end, c << 11); + if (p == end) { + return HyphenationType::DONT_BREAK; + } + uint32_t entry = *p; + if (AlphabetTable1::codepoint(entry) != c) { + return HyphenationType::DONT_BREAK; + } + if (result == HyphenationType::BREAK_AND_INSERT_HYPHEN) { + result = hyphenationTypeBasedOnScript(c); + } + alpha_codes[i + 1] = AlphabetTable1::value(entry); + } + alpha_codes[len + 1] = 0; + return result; + } + return HyphenationType::DONT_BREAK; } /** - * Internal implementation, after conversion to codes. All case folding and normalization - * has been done by now, and all characters have been found in the alphabet. - * Note: len here is the padded length including 0 codes at start and end. + * Internal implementation, after conversion to codes. All case folding and + *normalization has been done by now, and all characters have been found in the + *alphabet. Note: len here is the padded length including 0 codes at start and + *end. **/ -void Hyphenator::hyphenateFromCodes(HyphenationType* result, const uint16_t* codes, size_t len, - HyphenationType hyphenValue) { - static_assert(sizeof(HyphenationType) == sizeof(uint8_t), "HyphnationType must be uint8_t."); - // Reuse the result array as a buffer for calculating intermediate hyphenation numbers. - uint8_t* buffer = reinterpret_cast(result); - - const Header* header = getHeader(); - const Trie* trie = header->trieTable(); - const Pattern* pattern = header->patternTable(); - uint32_t char_mask = trie->char_mask; - uint32_t link_shift = trie->link_shift; - uint32_t link_mask = trie->link_mask; - uint32_t pattern_shift = trie->pattern_shift; - size_t maxOffset = len - minSuffix - 1; - for (size_t i = 0; i < len - 1; i++) { - uint32_t node = 0; // index into Trie table - for (size_t j = i; j < len; j++) { - uint16_t c = codes[j]; - uint32_t entry = trie->data[node + c]; - if ((entry & char_mask) == c) { - node = (entry & link_mask) >> link_shift; - } else { - break; - } - uint32_t pat_ix = trie->data[node] >> pattern_shift; - // pat_ix contains a 3-tuple of length, shift (number of trailing zeros), and an offset - // into the buf pool. This is the pattern for the substring (i..j) we just matched, - // which we combine (via point-wise max) into the buffer vector. - if (pat_ix != 0) { - uint32_t pat_entry = pattern->data[pat_ix]; - int pat_len = Pattern::len(pat_entry); - int pat_shift = Pattern::shift(pat_entry); - const uint8_t* pat_buf = pattern->buf(pat_entry); - int offset = j + 1 - (pat_len + pat_shift); - // offset is the index within buffer that lines up with the start of pat_buf - int start = std::max((int)minPrefix - offset, 0); - int end = std::min(pat_len, (int)maxOffset - offset); - for (int k = start; k < end; k++) { - buffer[offset + k] = std::max(buffer[offset + k], pat_buf[k]); - } - } +void Hyphenator::hyphenateFromCodes(HyphenationType* result, + const uint16_t* codes, + size_t len, + HyphenationType hyphenValue) { + static_assert(sizeof(HyphenationType) == sizeof(uint8_t), + "HyphnationType must be uint8_t."); + // Reuse the result array as a buffer for calculating intermediate hyphenation + // numbers. + uint8_t* buffer = reinterpret_cast(result); + + const Header* header = getHeader(); + const Trie* trie = header->trieTable(); + const Pattern* pattern = header->patternTable(); + uint32_t char_mask = trie->char_mask; + uint32_t link_shift = trie->link_shift; + uint32_t link_mask = trie->link_mask; + uint32_t pattern_shift = trie->pattern_shift; + size_t maxOffset = len - minSuffix - 1; + for (size_t i = 0; i < len - 1; i++) { + uint32_t node = 0; // index into Trie table + for (size_t j = i; j < len; j++) { + uint16_t c = codes[j]; + uint32_t entry = trie->data[node + c]; + if ((entry & char_mask) == c) { + node = (entry & link_mask) >> link_shift; + } else { + break; + } + uint32_t pat_ix = trie->data[node] >> pattern_shift; + // pat_ix contains a 3-tuple of length, shift (number of trailing zeros), + // and an offset into the buf pool. This is the pattern for the substring + // (i..j) we just matched, which we combine (via point-wise max) into the + // buffer vector. + if (pat_ix != 0) { + uint32_t pat_entry = pattern->data[pat_ix]; + int pat_len = Pattern::len(pat_entry); + int pat_shift = Pattern::shift(pat_entry); + const uint8_t* pat_buf = pattern->buf(pat_entry); + int offset = j + 1 - (pat_len + pat_shift); + // offset is the index within buffer that lines up with the start of + // pat_buf + int start = std::max((int)minPrefix - offset, 0); + int end = std::min(pat_len, (int)maxOffset - offset); + for (int k = start; k < end; k++) { + buffer[offset + k] = std::max(buffer[offset + k], pat_buf[k]); } + } } - // Since the above calculation does not modify values outside - // [minPrefix, len - minSuffix], they are left as 0 = DONT_BREAK. - for (size_t i = minPrefix; i < maxOffset; i++) { - // Hyphenation opportunities happen when the hyphenation numbers are odd. - result[i] = (buffer[i] & 1u) ? hyphenValue : HyphenationType::DONT_BREAK; - } + } + // Since the above calculation does not modify values outside + // [minPrefix, len - minSuffix], they are left as 0 = DONT_BREAK. + for (size_t i = minPrefix; i < maxOffset; i++) { + // Hyphenation opportunities happen when the hyphenation numbers are odd. + result[i] = (buffer[i] & 1u) ? hyphenValue : HyphenationType::DONT_BREAK; + } } } // namespace minikin diff --git a/third_party/txt/src/minikin/Hyphenator.h b/third_party/txt/src/minikin/Hyphenator.h index 937fc646f55752d11f38a948938ece9e60f3b50d..a627a6078e8872078b14aa65d781350ca79cf0dc 100644 --- a/third_party/txt/src/minikin/Hyphenator.h +++ b/third_party/txt/src/minikin/Hyphenator.h @@ -20,12 +20,12 @@ #ifndef U_USING_ICU_NAMESPACE #define U_USING_ICU_NAMESPACE 0 -#endif // U_USING_ICU_NAMESPACE +#endif // U_USING_ICU_NAMESPACE -#include "unicode/locid.h" #include #include #include +#include "unicode/locid.h" #ifndef MINIKIN_HYPHENATOR_H #define MINIKIN_HYPHENATOR_H @@ -33,32 +33,38 @@ namespace minikin { enum class HyphenationType : uint8_t { - // Note: There are implicit assumptions scattered in the code that DONT_BREAK is 0. - - // Do not break. - DONT_BREAK = 0, - // Break the line and insert a normal hyphen. - BREAK_AND_INSERT_HYPHEN = 1, - // Break the line and insert an Armenian hyphen (U+058A). - BREAK_AND_INSERT_ARMENIAN_HYPHEN = 2, - // Break the line and insert a maqaf (Hebrew hyphen, U+05BE). - BREAK_AND_INSERT_MAQAF = 3, - // Break the line and insert a Canadian Syllabics hyphen (U+1400). - BREAK_AND_INSERT_UCAS_HYPHEN = 4, - // Break the line, but don't insert a hyphen. Used for cases when there is already a hyphen - // present or the script does not use a hyphen (e.g. in Malayalam). - BREAK_AND_DONT_INSERT_HYPHEN = 5, - // Break and replace the last code unit with hyphen. Used for Catalan "l·l" which hyphenates - // as "l-/l". - BREAK_AND_REPLACE_WITH_HYPHEN = 6, - // Break the line, and repeat the hyphen (which is the last character) at the beginning of the - // next line. Used in Polish, where "czerwono-niebieska" should hyphenate as - // "czerwono-/-niebieska". - BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE = 7, - // Break the line, insert a ZWJ and hyphen at the first line, and a ZWJ at the second line. - // This is used in Arabic script, mostly for writing systems of Central Asia. It's our default - // behavior when a soft hyphen is used in Arabic script. - BREAK_AND_INSERT_HYPHEN_AND_ZWJ = 8 + // Note: There are implicit assumptions scattered in the code that DONT_BREAK + // is 0. + + // Do not break. + DONT_BREAK = 0, + // Break the line and insert a normal hyphen. + BREAK_AND_INSERT_HYPHEN = 1, + // Break the line and insert an Armenian hyphen (U+058A). + BREAK_AND_INSERT_ARMENIAN_HYPHEN = 2, + // Break the line and insert a maqaf (Hebrew hyphen, U+05BE). + BREAK_AND_INSERT_MAQAF = 3, + // Break the line and insert a Canadian Syllabics hyphen (U+1400). + BREAK_AND_INSERT_UCAS_HYPHEN = 4, + // Break the line, but don't insert a hyphen. Used for cases when there is + // already a hyphen + // present or the script does not use a hyphen (e.g. in Malayalam). + BREAK_AND_DONT_INSERT_HYPHEN = 5, + // Break and replace the last code unit with hyphen. Used for Catalan "l·l" + // which hyphenates + // as "l-/l". + BREAK_AND_REPLACE_WITH_HYPHEN = 6, + // Break the line, and repeat the hyphen (which is the last character) at the + // beginning of the + // next line. Used in Polish, where "czerwono-niebieska" should hyphenate as + // "czerwono-/-niebieska". + BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE = 7, + // Break the line, insert a ZWJ and hyphen at the first line, and a ZWJ at the + // second line. + // This is used in Arabic script, mostly for writing systems of Central Asia. + // It's our default + // behavior when a soft hyphen is used in Arabic script. + BREAK_AND_INSERT_HYPHEN_AND_ZWJ = 8 }; // The hyphen edit represents an edit to the string when a word is @@ -67,112 +73,127 @@ enum class HyphenationType : uint8_t { // Note that a HyphenEdit can hold two types of edits at the same time, // One at the beginning of the string/line and one at the end. class HyphenEdit { -public: - static const uint32_t NO_EDIT = 0x00; - - static const uint32_t INSERT_HYPHEN_AT_END = 0x01; - static const uint32_t INSERT_ARMENIAN_HYPHEN_AT_END = 0x02; - static const uint32_t INSERT_MAQAF_AT_END = 0x03; - static const uint32_t INSERT_UCAS_HYPHEN_AT_END = 0x04; - static const uint32_t INSERT_ZWJ_AND_HYPHEN_AT_END = 0x05; - static const uint32_t REPLACE_WITH_HYPHEN_AT_END = 0x06; - static const uint32_t BREAK_AT_END = 0x07; - - static const uint32_t INSERT_HYPHEN_AT_START = 0x01 << 3; - static const uint32_t INSERT_ZWJ_AT_START = 0x02 << 3; - static const uint32_t BREAK_AT_START = 0x03 << 3; - - // Keep in sync with the definitions in the Java code at: - // frameworks/base/graphics/java/android/graphics/Paint.java - static const uint32_t MASK_END_OF_LINE = 0x07; - static const uint32_t MASK_START_OF_LINE = 0x03 << 3; - - inline static bool isReplacement(uint32_t hyph) { - return hyph == REPLACE_WITH_HYPHEN_AT_END; - } - - inline static bool isInsertion(uint32_t hyph) { - return (hyph == INSERT_HYPHEN_AT_END - || hyph == INSERT_ARMENIAN_HYPHEN_AT_END - || hyph == INSERT_MAQAF_AT_END - || hyph == INSERT_UCAS_HYPHEN_AT_END - || hyph == INSERT_ZWJ_AND_HYPHEN_AT_END - || hyph == INSERT_HYPHEN_AT_START - || hyph == INSERT_ZWJ_AT_START); - } - - const static uint32_t* getHyphenString(uint32_t hyph); - static uint32_t editForThisLine(HyphenationType type); - static uint32_t editForNextLine(HyphenationType type); - - HyphenEdit() : hyphen(NO_EDIT) { } - HyphenEdit(uint32_t hyphenInt) : hyphen(hyphenInt) { } // NOLINT(implicit) - uint32_t getHyphen() const { return hyphen; } - bool operator==(const HyphenEdit &other) const { return hyphen == other.hyphen; } - - uint32_t getEnd() const { return hyphen & MASK_END_OF_LINE; } - uint32_t getStart() const { return hyphen & MASK_START_OF_LINE; } - -private: - uint32_t hyphen; + public: + static const uint32_t NO_EDIT = 0x00; + + static const uint32_t INSERT_HYPHEN_AT_END = 0x01; + static const uint32_t INSERT_ARMENIAN_HYPHEN_AT_END = 0x02; + static const uint32_t INSERT_MAQAF_AT_END = 0x03; + static const uint32_t INSERT_UCAS_HYPHEN_AT_END = 0x04; + static const uint32_t INSERT_ZWJ_AND_HYPHEN_AT_END = 0x05; + static const uint32_t REPLACE_WITH_HYPHEN_AT_END = 0x06; + static const uint32_t BREAK_AT_END = 0x07; + + static const uint32_t INSERT_HYPHEN_AT_START = 0x01 << 3; + static const uint32_t INSERT_ZWJ_AT_START = 0x02 << 3; + static const uint32_t BREAK_AT_START = 0x03 << 3; + + // Keep in sync with the definitions in the Java code at: + // frameworks/base/graphics/java/android/graphics/Paint.java + static const uint32_t MASK_END_OF_LINE = 0x07; + static const uint32_t MASK_START_OF_LINE = 0x03 << 3; + + inline static bool isReplacement(uint32_t hyph) { + return hyph == REPLACE_WITH_HYPHEN_AT_END; + } + + inline static bool isInsertion(uint32_t hyph) { + return (hyph == INSERT_HYPHEN_AT_END || + hyph == INSERT_ARMENIAN_HYPHEN_AT_END || + hyph == INSERT_MAQAF_AT_END || hyph == INSERT_UCAS_HYPHEN_AT_END || + hyph == INSERT_ZWJ_AND_HYPHEN_AT_END || + hyph == INSERT_HYPHEN_AT_START || hyph == INSERT_ZWJ_AT_START); + } + + const static uint32_t* getHyphenString(uint32_t hyph); + static uint32_t editForThisLine(HyphenationType type); + static uint32_t editForNextLine(HyphenationType type); + + HyphenEdit() : hyphen(NO_EDIT) {} + HyphenEdit(uint32_t hyphenInt) : hyphen(hyphenInt) {} // NOLINT(implicit) + uint32_t getHyphen() const { return hyphen; } + bool operator==(const HyphenEdit& other) const { + return hyphen == other.hyphen; + } + + uint32_t getEnd() const { return hyphen & MASK_END_OF_LINE; } + uint32_t getStart() const { return hyphen & MASK_START_OF_LINE; } + + private: + uint32_t hyphen; }; // hyb file header; implementation details are in the .cpp file struct Header; class Hyphenator { -public: - // Compute the hyphenation of a word, storing the hyphenation in result vector. Each entry in - // the vector is a "hyphenation type" for a potential hyphenation that can be applied at the - // corresponding code unit offset in the word. - // - // Example: word is "hyphen", result is the following, corresponding to "hy-phen": - // [DONT_BREAK, DONT_BREAK, BREAK_AND_INSERT_HYPHEN, DONT_BREAK, DONT_BREAK, DONT_BREAK] - void hyphenate(std::vector* result, const uint16_t* word, size_t len, - const icu::Locale& locale); - - // Returns true if the codepoint is like U+2010 HYPHEN in line breaking and usage: a character - // immediately after which line breaks are allowed, but words containing it should not be - // automatically hyphenated. - static bool isLineBreakingHyphen(uint32_t cp); - - // pattern data is in binary format, as described in doc/hyb_file_format.md. Note: - // the caller is responsible for ensuring that the lifetime of the pattern data is - // at least as long as the Hyphenator object. - - // Note: nullptr is valid input, in which case the hyphenator only processes soft hyphens. - static Hyphenator* loadBinary(const uint8_t* patternData, size_t minPrefix, size_t minSuffix); - -private: - // apply various hyphenation rules including hard and soft hyphens, ignoring patterns - void hyphenateWithNoPatterns(HyphenationType* result, const uint16_t* word, size_t len, - const icu::Locale& locale); - - // Try looking up word in alphabet table, return DONT_BREAK if any code units fail to map. - // Otherwise, returns BREAK_AND_INSERT_HYPHEN, BREAK_AND_INSERT_ARMENIAN_HYPHEN, or - // BREAK_AND_DONT_INSERT_HYPHEN based on the the script of the characters seen. - // Note that this method writes len+2 entries into alpha_codes (including start and stop) - HyphenationType alphabetLookup(uint16_t* alpha_codes, const uint16_t* word, size_t len); - - // calculate hyphenation from patterns, assuming alphabet lookup has already been done - void hyphenateFromCodes(HyphenationType* result, const uint16_t* codes, size_t len, - HyphenationType hyphenValue); - - // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is used so - // that temporary buffers can be stack-allocated without waste, which is a slightly - // different use case. It measures UTF-16 code units. - static const size_t MAX_HYPHENATED_SIZE = 64; - - const uint8_t* patternData; - size_t minPrefix, minSuffix; - - // accessors for binary data - const Header* getHeader() const { - return reinterpret_cast(patternData); - } - + public: + // Compute the hyphenation of a word, storing the hyphenation in result + // vector. Each entry in the vector is a "hyphenation type" for a potential + // hyphenation that can be applied at the corresponding code unit offset in + // the word. + // + // Example: word is "hyphen", result is the following, corresponding to + // "hy-phen": [DONT_BREAK, DONT_BREAK, BREAK_AND_INSERT_HYPHEN, DONT_BREAK, + // DONT_BREAK, DONT_BREAK] + void hyphenate(std::vector* result, + const uint16_t* word, + size_t len, + const icu::Locale& locale); + + // Returns true if the codepoint is like U+2010 HYPHEN in line breaking and + // usage: a character immediately after which line breaks are allowed, but + // words containing it should not be automatically hyphenated. + static bool isLineBreakingHyphen(uint32_t cp); + + // pattern data is in binary format, as described in doc/hyb_file_format.md. + // Note: the caller is responsible for ensuring that the lifetime of the + // pattern data is at least as long as the Hyphenator object. + + // Note: nullptr is valid input, in which case the hyphenator only processes + // soft hyphens. + static Hyphenator* loadBinary(const uint8_t* patternData, + size_t minPrefix, + size_t minSuffix); + + private: + // apply various hyphenation rules including hard and soft hyphens, ignoring + // patterns + void hyphenateWithNoPatterns(HyphenationType* result, + const uint16_t* word, + size_t len, + const icu::Locale& locale); + + // Try looking up word in alphabet table, return DONT_BREAK if any code units + // fail to map. Otherwise, returns BREAK_AND_INSERT_HYPHEN, + // BREAK_AND_INSERT_ARMENIAN_HYPHEN, or BREAK_AND_DONT_INSERT_HYPHEN based on + // the the script of the characters seen. Note that this method writes len+2 + // entries into alpha_codes (including start and stop) + HyphenationType alphabetLookup(uint16_t* alpha_codes, + const uint16_t* word, + size_t len); + + // calculate hyphenation from patterns, assuming alphabet lookup has already + // been done + void hyphenateFromCodes(HyphenationType* result, + const uint16_t* codes, + size_t len, + HyphenationType hyphenValue); + + // See also LONGEST_HYPHENATED_WORD in LineBreaker.cpp. Here the constant is + // used so that temporary buffers can be stack-allocated without waste, which + // is a slightly different use case. It measures UTF-16 code units. + static const size_t MAX_HYPHENATED_SIZE = 64; + + const uint8_t* patternData; + size_t minPrefix, minSuffix; + + // accessors for binary data + const Header* getHeader() const { + return reinterpret_cast(patternData); + } }; } // namespace minikin -#endif // MINIKIN_HYPHENATOR_H +#endif // MINIKIN_HYPHENATOR_H diff --git a/third_party/txt/src/minikin/Layout.cpp b/third_party/txt/src/minikin/Layout.cpp index 59a05615c3763d202a2d6cd37eb092cf7a238de4..2079795f1e3d1b21d8f4641da5791e2abd7ee52c 100644 --- a/third_party/txt/src/minikin/Layout.cpp +++ b/third_party/txt/src/minikin/Layout.cpp @@ -16,13 +16,13 @@ #define LOG_TAG "Minikin" +#include +#include +#include #include #include #include // for debugging -#include #include -#include -#include #include #include @@ -32,13 +32,13 @@ #include #include +#include +#include #include "FontLanguage.h" #include "FontLanguageListCache.h" #include "HbFontCache.h" #include "LayoutUtils.h" #include "MinikinInternal.h" -#include -#include using std::string; using std::vector; @@ -48,1074 +48,1171 @@ namespace minikin { const int kDirection_Mask = 0x1; struct LayoutContext { - MinikinPaint paint; - FontStyle style; - std::vector hbFonts; // parallel to mFaces - - void clearHbFonts() { - for (size_t i = 0; i < hbFonts.size(); i++) { - hb_font_set_funcs(hbFonts[i], nullptr, nullptr, nullptr); - hb_font_destroy(hbFonts[i]); - } - hbFonts.clear(); + MinikinPaint paint; + FontStyle style; + std::vector hbFonts; // parallel to mFaces + + void clearHbFonts() { + for (size_t i = 0; i < hbFonts.size(); i++) { + hb_font_set_funcs(hbFonts[i], nullptr, nullptr, nullptr); + hb_font_destroy(hbFonts[i]); } + hbFonts.clear(); + } }; // Layout cache datatypes class LayoutCacheKey { -public: - LayoutCacheKey(const std::shared_ptr& collection, const MinikinPaint& paint, - FontStyle style, const uint16_t* chars, size_t start, size_t count, size_t nchars, - bool dir) - : mChars(chars), mNchars(nchars), - mStart(start), mCount(count), mId(collection->getId()), mStyle(style), - mSize(paint.size), mScaleX(paint.scaleX), mSkewX(paint.skewX), - mLetterSpacing(paint.letterSpacing), - mPaintFlags(paint.paintFlags), mHyphenEdit(paint.hyphenEdit), mIsRtl(dir), - mHash(computeHash()) { - } - bool operator==(const LayoutCacheKey &other) const; - - android::hash_t hash() const { - return mHash; - } - - void copyText() { - uint16_t* charsCopy = new uint16_t[mNchars]; - memcpy(charsCopy, mChars, mNchars * sizeof(uint16_t)); - mChars = charsCopy; - } - void freeText() { - delete[] mChars; - mChars = NULL; - } - - void doLayout(Layout* layout, LayoutContext* ctx, - const std::shared_ptr& collection) const { - layout->mAdvances.resize(mCount, 0); - ctx->clearHbFonts(); - layout->doLayoutRun(mChars, mStart, mCount, mNchars, mIsRtl, ctx, collection); - } - -private: - const uint16_t* mChars; - size_t mNchars; - size_t mStart; - size_t mCount; - uint32_t mId; // for the font collection - FontStyle mStyle; - float mSize; - float mScaleX; - float mSkewX; - float mLetterSpacing; - int32_t mPaintFlags; - HyphenEdit mHyphenEdit; - bool mIsRtl; - // Note: any fields added to MinikinPaint must also be reflected here. - // TODO: language matching (possibly integrate into style) - android::hash_t mHash; - - android::hash_t computeHash() const; + public: + LayoutCacheKey(const std::shared_ptr& collection, + const MinikinPaint& paint, + FontStyle style, + const uint16_t* chars, + size_t start, + size_t count, + size_t nchars, + bool dir) + : mChars(chars), + mNchars(nchars), + mStart(start), + mCount(count), + mId(collection->getId()), + mStyle(style), + mSize(paint.size), + mScaleX(paint.scaleX), + mSkewX(paint.skewX), + mLetterSpacing(paint.letterSpacing), + mPaintFlags(paint.paintFlags), + mHyphenEdit(paint.hyphenEdit), + mIsRtl(dir), + mHash(computeHash()) {} + bool operator==(const LayoutCacheKey& other) const; + + android::hash_t hash() const { return mHash; } + + void copyText() { + uint16_t* charsCopy = new uint16_t[mNchars]; + memcpy(charsCopy, mChars, mNchars * sizeof(uint16_t)); + mChars = charsCopy; + } + void freeText() { + delete[] mChars; + mChars = NULL; + } + + void doLayout(Layout* layout, + LayoutContext* ctx, + const std::shared_ptr& collection) const { + layout->mAdvances.resize(mCount, 0); + ctx->clearHbFonts(); + layout->doLayoutRun(mChars, mStart, mCount, mNchars, mIsRtl, ctx, + collection); + } + + private: + const uint16_t* mChars; + size_t mNchars; + size_t mStart; + size_t mCount; + uint32_t mId; // for the font collection + FontStyle mStyle; + float mSize; + float mScaleX; + float mSkewX; + float mLetterSpacing; + int32_t mPaintFlags; + HyphenEdit mHyphenEdit; + bool mIsRtl; + // Note: any fields added to MinikinPaint must also be reflected here. + // TODO: language matching (possibly integrate into style) + android::hash_t mHash; + + android::hash_t computeHash() const; }; class LayoutCache : private android::OnEntryRemoved { -public: - LayoutCache() : mCache(kMaxEntries) { - mCache.setOnEntryRemovedListener(this); - } - - void clear() { - mCache.clear(); + public: + LayoutCache() : mCache(kMaxEntries) { + mCache.setOnEntryRemovedListener(this); + } + + void clear() { mCache.clear(); } + + Layout* get(LayoutCacheKey& key, + LayoutContext* ctx, + const std::shared_ptr& collection) { + Layout* layout = mCache.get(key); + if (layout == NULL) { + key.copyText(); + layout = new Layout(); + key.doLayout(layout, ctx, collection); + mCache.put(key, layout); } + return layout; + } - Layout* get(LayoutCacheKey& key, LayoutContext* ctx, - const std::shared_ptr& collection) { - Layout* layout = mCache.get(key); - if (layout == NULL) { - key.copyText(); - layout = new Layout(); - key.doLayout(layout, ctx, collection); - mCache.put(key, layout); - } - return layout; - } - -private: - // callback for OnEntryRemoved - void operator()(LayoutCacheKey& key, Layout*& value) { - key.freeText(); - delete value; - } + private: + // callback for OnEntryRemoved + void operator()(LayoutCacheKey& key, Layout*& value) { + key.freeText(); + delete value; + } - android::LruCache mCache; + android::LruCache mCache; - //static const size_t kMaxEntries = LruCache::kUnlimitedCapacity; + // static const size_t kMaxEntries = LruCache::kUnlimitedCapacity; - // TODO: eviction based on memory footprint; for now, we just use a constant - // number of strings - static const size_t kMaxEntries = 5000; + // TODO: eviction based on memory footprint; for now, we just use a constant + // number of strings + static const size_t kMaxEntries = 5000; }; -static unsigned int disabledDecomposeCompatibility(hb_unicode_funcs_t*, hb_codepoint_t, - hb_codepoint_t*, void*) { - return 0; +static unsigned int disabledDecomposeCompatibility(hb_unicode_funcs_t*, + hb_codepoint_t, + hb_codepoint_t*, + void*) { + return 0; } class LayoutEngine { -public: - LayoutEngine() { - unicodeFunctions = hb_unicode_funcs_create(hb_icu_get_unicode_funcs()); - /* Disable the function used for compatibility decomposition */ - hb_unicode_funcs_set_decompose_compatibility_func( - unicodeFunctions, disabledDecomposeCompatibility, NULL, NULL); - hbBuffer = hb_buffer_create(); - hb_buffer_set_unicode_funcs(hbBuffer, unicodeFunctions); - } - - hb_buffer_t* hbBuffer; - hb_unicode_funcs_t* unicodeFunctions; - LayoutCache layoutCache; - - static LayoutEngine& getInstance() { - static LayoutEngine* instance = new LayoutEngine(); - return *instance; - } + public: + LayoutEngine() { + unicodeFunctions = hb_unicode_funcs_create(hb_icu_get_unicode_funcs()); + /* Disable the function used for compatibility decomposition */ + hb_unicode_funcs_set_decompose_compatibility_func( + unicodeFunctions, disabledDecomposeCompatibility, NULL, NULL); + hbBuffer = hb_buffer_create(); + hb_buffer_set_unicode_funcs(hbBuffer, unicodeFunctions); + } + + hb_buffer_t* hbBuffer; + hb_unicode_funcs_t* unicodeFunctions; + LayoutCache layoutCache; + + static LayoutEngine& getInstance() { + static LayoutEngine* instance = new LayoutEngine(); + return *instance; + } }; bool LayoutCacheKey::operator==(const LayoutCacheKey& other) const { - return mId == other.mId - && mStart == other.mStart - && mCount == other.mCount - && mStyle == other.mStyle - && mSize == other.mSize - && mScaleX == other.mScaleX - && mSkewX == other.mSkewX - && mLetterSpacing == other.mLetterSpacing - && mPaintFlags == other.mPaintFlags - && mHyphenEdit == other.mHyphenEdit - && mIsRtl == other.mIsRtl - && mNchars == other.mNchars - && !memcmp(mChars, other.mChars, mNchars * sizeof(uint16_t)); + return mId == other.mId && mStart == other.mStart && mCount == other.mCount && + mStyle == other.mStyle && mSize == other.mSize && + mScaleX == other.mScaleX && mSkewX == other.mSkewX && + mLetterSpacing == other.mLetterSpacing && + mPaintFlags == other.mPaintFlags && mHyphenEdit == other.mHyphenEdit && + mIsRtl == other.mIsRtl && mNchars == other.mNchars && + !memcmp(mChars, other.mChars, mNchars * sizeof(uint16_t)); } android::hash_t LayoutCacheKey::computeHash() const { - uint32_t hash = android::JenkinsHashMix(0, mId); - hash = android::JenkinsHashMix(hash, mStart); - hash = android::JenkinsHashMix(hash, mCount); - hash = android::JenkinsHashMix(hash, hash_type(mStyle)); - hash = android::JenkinsHashMix(hash, hash_type(mSize)); - hash = android::JenkinsHashMix(hash, hash_type(mScaleX)); - hash = android::JenkinsHashMix(hash, hash_type(mSkewX)); - hash = android::JenkinsHashMix(hash, hash_type(mLetterSpacing)); - hash = android::JenkinsHashMix(hash, hash_type(mPaintFlags)); - hash = android::JenkinsHashMix(hash, hash_type(mHyphenEdit.getHyphen())); - hash = android::JenkinsHashMix(hash, hash_type(mIsRtl)); - hash = android::JenkinsHashMixShorts(hash, mChars, mNchars); - return android::JenkinsHashWhiten(hash); + uint32_t hash = android::JenkinsHashMix(0, mId); + hash = android::JenkinsHashMix(hash, mStart); + hash = android::JenkinsHashMix(hash, mCount); + hash = android::JenkinsHashMix(hash, hash_type(mStyle)); + hash = android::JenkinsHashMix(hash, hash_type(mSize)); + hash = android::JenkinsHashMix(hash, hash_type(mScaleX)); + hash = android::JenkinsHashMix(hash, hash_type(mSkewX)); + hash = android::JenkinsHashMix(hash, hash_type(mLetterSpacing)); + hash = android::JenkinsHashMix(hash, hash_type(mPaintFlags)); + hash = android::JenkinsHashMix(hash, hash_type(mHyphenEdit.getHyphen())); + hash = android::JenkinsHashMix(hash, hash_type(mIsRtl)); + hash = android::JenkinsHashMixShorts(hash, mChars, mNchars); + return android::JenkinsHashWhiten(hash); } android::hash_t hash_type(const LayoutCacheKey& key) { - return key.hash(); + return key.hash(); } void MinikinRect::join(const MinikinRect& r) { - if (isEmpty()) { - set(r); - } else if (!r.isEmpty()) { - mLeft = std::min(mLeft, r.mLeft); - mTop = std::min(mTop, r.mTop); - mRight = std::max(mRight, r.mRight); - mBottom = std::max(mBottom, r.mBottom); - } + if (isEmpty()) { + set(r); + } else if (!r.isEmpty()) { + mLeft = std::min(mLeft, r.mLeft); + mTop = std::min(mTop, r.mTop); + mRight = std::max(mRight, r.mRight); + mBottom = std::max(mBottom, r.mBottom); + } } void Layout::reset() { - mGlyphs.clear(); - mFaces.clear(); - mBounds.setEmpty(); - mAdvances.clear(); - mAdvance = 0; + mGlyphs.clear(); + mFaces.clear(); + mBounds.setEmpty(); + mAdvances.clear(); + mAdvance = 0; } -static hb_position_t harfbuzzGetGlyphHorizontalAdvance(hb_font_t* /* hbFont */, void* fontData, - hb_codepoint_t glyph, void* /* userData */) { - MinikinPaint* paint = reinterpret_cast(fontData); - float advance = paint->font->GetHorizontalAdvance(glyph, *paint); - return 256 * advance + 0.5; +static hb_position_t harfbuzzGetGlyphHorizontalAdvance(hb_font_t* /* hbFont */, + void* fontData, + hb_codepoint_t glyph, + void* /* userData */) { + MinikinPaint* paint = reinterpret_cast(fontData); + float advance = paint->font->GetHorizontalAdvance(glyph, *paint); + return 256 * advance + 0.5; } -static hb_bool_t harfbuzzGetGlyphHorizontalOrigin(hb_font_t* /* hbFont */, void* /* fontData */, - hb_codepoint_t /* glyph */, hb_position_t* /* x */, hb_position_t* /* y */, - void* /* userData */) { - // Just return true, following the way that Harfbuzz-FreeType - // implementation does. - return true; +static hb_bool_t harfbuzzGetGlyphHorizontalOrigin(hb_font_t* /* hbFont */, + void* /* fontData */, + hb_codepoint_t /* glyph */, + hb_position_t* /* x */, + hb_position_t* /* y */, + void* /* userData */) { + // Just return true, following the way that Harfbuzz-FreeType + // implementation does. + return true; } hb_font_funcs_t* getHbFontFuncs(bool forColorBitmapFont) { - assertMinikinLocked(); - - static hb_font_funcs_t* hbFuncs = nullptr; - static hb_font_funcs_t* hbFuncsForColorBitmap = nullptr; - - hb_font_funcs_t** funcs = forColorBitmapFont ? &hbFuncs : &hbFuncsForColorBitmap; - if (*funcs == nullptr) { - *funcs = hb_font_funcs_create(); - if (forColorBitmapFont) { - // Don't override the h_advance function since we use HarfBuzz's implementation for - // emoji for performance reasons. - // Note that it is technically possible for a TrueType font to have outline and embedded - // bitmap at the same time. We ignore modified advances of hinted outline glyphs in that - // case. - } else { - // Override the h_advance function since we can't use HarfBuzz's implemenation. It may - // return the wrong value if the font uses hinting aggressively. - hb_font_funcs_set_glyph_h_advance_func(*funcs, harfbuzzGetGlyphHorizontalAdvance, 0, 0); - } - hb_font_funcs_set_glyph_h_origin_func(*funcs, harfbuzzGetGlyphHorizontalOrigin, 0, 0); - hb_font_funcs_make_immutable(*funcs); + assertMinikinLocked(); + + static hb_font_funcs_t* hbFuncs = nullptr; + static hb_font_funcs_t* hbFuncsForColorBitmap = nullptr; + + hb_font_funcs_t** funcs = + forColorBitmapFont ? &hbFuncs : &hbFuncsForColorBitmap; + if (*funcs == nullptr) { + *funcs = hb_font_funcs_create(); + if (forColorBitmapFont) { + // Don't override the h_advance function since we use HarfBuzz's + // implementation for emoji for performance reasons. Note that it is + // technically possible for a TrueType font to have outline and embedded + // bitmap at the same time. We ignore modified advances of hinted outline + // glyphs in that case. + } else { + // Override the h_advance function since we can't use HarfBuzz's + // implemenation. It may return the wrong value if the font uses hinting + // aggressively. + hb_font_funcs_set_glyph_h_advance_func( + *funcs, harfbuzzGetGlyphHorizontalAdvance, 0, 0); } - return *funcs; + hb_font_funcs_set_glyph_h_origin_func( + *funcs, harfbuzzGetGlyphHorizontalOrigin, 0, 0); + hb_font_funcs_make_immutable(*funcs); + } + return *funcs; } static bool isColorBitmapFont(hb_font_t* font) { - hb_face_t* face = hb_font_get_face(font); - HbBlob cbdt(hb_face_reference_table(face, HB_TAG('C', 'B', 'D', 'T'))); - return cbdt.size() > 0; + hb_face_t* face = hb_font_get_face(font); + HbBlob cbdt(hb_face_reference_table(face, HB_TAG('C', 'B', 'D', 'T'))); + return cbdt.size() > 0; } -static float HBFixedToFloat(hb_position_t v) -{ - return scalbnf (v, -8); +static float HBFixedToFloat(hb_position_t v) { + return scalbnf(v, -8); } -static hb_position_t HBFloatToFixed(float v) -{ - return scalbnf (v, +8); +static hb_position_t HBFloatToFixed(float v) { + return scalbnf(v, +8); } void Layout::dump() const { - for (size_t i = 0; i < mGlyphs.size(); i++) { - const LayoutGlyph& glyph = mGlyphs[i]; - std::cout << glyph.glyph_id << ": " << glyph.x << ", " << glyph.y << std::endl; - } + for (size_t i = 0; i < mGlyphs.size(); i++) { + const LayoutGlyph& glyph = mGlyphs[i]; + std::cout << glyph.glyph_id << ": " << glyph.x << ", " << glyph.y + << std::endl; + } } int Layout::findFace(const FakedFont& face, LayoutContext* ctx) { - unsigned int ix; - for (ix = 0; ix < mFaces.size(); ix++) { - if (mFaces[ix].font == face.font) { - return ix; - } + unsigned int ix; + for (ix = 0; ix < mFaces.size(); ix++) { + if (mFaces[ix].font == face.font) { + return ix; } - mFaces.push_back(face); - // Note: ctx == NULL means we're copying from the cache, no need to create - // corresponding hb_font object. - if (ctx != NULL) { - hb_font_t* font = getHbFontLocked(face.font); - // Temporarily removed to fix advance integer rounding. - // This is likely due to very old versions of harfbuzz and ICU. - // hb_font_set_funcs(font, getHbFontFuncs(isColorBitmapFont(font)), &ctx->paint, 0); - ctx->hbFonts.push_back(font); - } - return ix; + } + mFaces.push_back(face); + // Note: ctx == NULL means we're copying from the cache, no need to create + // corresponding hb_font object. + if (ctx != NULL) { + hb_font_t* font = getHbFontLocked(face.font); + // Temporarily removed to fix advance integer rounding. + // This is likely due to very old versions of harfbuzz and ICU. + // hb_font_set_funcs(font, getHbFontFuncs(isColorBitmapFont(font)), + // &ctx->paint, 0); + ctx->hbFonts.push_back(font); + } + return ix; } static hb_script_t codePointToScript(hb_codepoint_t codepoint) { - static hb_unicode_funcs_t* u = 0; - if (!u) { - u = LayoutEngine::getInstance().unicodeFunctions; - } - return hb_unicode_script(u, codepoint); + static hb_unicode_funcs_t* u = 0; + if (!u) { + u = LayoutEngine::getInstance().unicodeFunctions; + } + return hb_unicode_script(u, codepoint); } -static hb_codepoint_t decodeUtf16(const uint16_t* chars, size_t len, ssize_t* iter) { - const uint16_t v = chars[(*iter)++]; - // test whether v in (0xd800..0xdfff), lead or trail surrogate - if ((v & 0xf800) == 0xd800) { - // test whether v in (0xd800..0xdbff), lead surrogate - if (size_t(*iter) < len && (v & 0xfc00) == 0xd800) { - const uint16_t v2 = chars[(*iter)++]; - // test whether v2 in (0xdc00..0xdfff), trail surrogate - if ((v2 & 0xfc00) == 0xdc00) { - // (0xd800 0xdc00) in utf-16 maps to 0x10000 in ucs-32 - const hb_codepoint_t delta = (0xd800 << 10) + 0xdc00 - 0x10000; - return (((hb_codepoint_t)v) << 10) + v2 - delta; - } - (*iter) -= 1; - return 0xFFFDu; - } else { - return 0xFFFDu; - } +static hb_codepoint_t decodeUtf16(const uint16_t* chars, + size_t len, + ssize_t* iter) { + const uint16_t v = chars[(*iter)++]; + // test whether v in (0xd800..0xdfff), lead or trail surrogate + if ((v & 0xf800) == 0xd800) { + // test whether v in (0xd800..0xdbff), lead surrogate + if (size_t(*iter) < len && (v & 0xfc00) == 0xd800) { + const uint16_t v2 = chars[(*iter)++]; + // test whether v2 in (0xdc00..0xdfff), trail surrogate + if ((v2 & 0xfc00) == 0xdc00) { + // (0xd800 0xdc00) in utf-16 maps to 0x10000 in ucs-32 + const hb_codepoint_t delta = (0xd800 << 10) + 0xdc00 - 0x10000; + return (((hb_codepoint_t)v) << 10) + v2 - delta; + } + (*iter) -= 1; + return 0xFFFDu; } else { - return v; + return 0xFFFDu; } + } else { + return v; + } } -static hb_script_t getScriptRun(const uint16_t* chars, size_t len, ssize_t* iter) { - if (size_t(*iter) == len) { - return HB_SCRIPT_UNKNOWN; - } - uint32_t cp = decodeUtf16(chars, len, iter); - hb_script_t current_script = codePointToScript(cp); - for (;;) { - if (size_t(*iter) == len) - break; - const ssize_t prev_iter = *iter; - cp = decodeUtf16(chars, len, iter); - const hb_script_t script = codePointToScript(cp); - if (script != current_script) { - if (current_script == HB_SCRIPT_INHERITED || - current_script == HB_SCRIPT_COMMON) { - current_script = script; - } else if (script == HB_SCRIPT_INHERITED || - script == HB_SCRIPT_COMMON) { - continue; - } else { - *iter = prev_iter; - break; - } - } - } - if (current_script == HB_SCRIPT_INHERITED) { - current_script = HB_SCRIPT_COMMON; +static hb_script_t getScriptRun(const uint16_t* chars, + size_t len, + ssize_t* iter) { + if (size_t(*iter) == len) { + return HB_SCRIPT_UNKNOWN; + } + uint32_t cp = decodeUtf16(chars, len, iter); + hb_script_t current_script = codePointToScript(cp); + for (;;) { + if (size_t(*iter) == len) + break; + const ssize_t prev_iter = *iter; + cp = decodeUtf16(chars, len, iter); + const hb_script_t script = codePointToScript(cp); + if (script != current_script) { + if (current_script == HB_SCRIPT_INHERITED || + current_script == HB_SCRIPT_COMMON) { + current_script = script; + } else if (script == HB_SCRIPT_INHERITED || script == HB_SCRIPT_COMMON) { + continue; + } else { + *iter = prev_iter; + break; + } } + } + if (current_script == HB_SCRIPT_INHERITED) { + current_script = HB_SCRIPT_COMMON; + } - return current_script; + return current_script; } /** - * Disable certain scripts (mostly those with cursive connection) from having letterspacing - * applied. See https://github.com/behdad/harfbuzz/issues/64 for more details. + * Disable certain scripts (mostly those with cursive connection) from having + * letterspacing applied. See https://github.com/behdad/harfbuzz/issues/64 for + * more details. */ static bool isScriptOkForLetterspacing(hb_script_t script) { - return !( - script == HB_SCRIPT_ARABIC || - script == HB_SCRIPT_NKO || - script == HB_SCRIPT_PSALTER_PAHLAVI || - script == HB_SCRIPT_MANDAIC || - script == HB_SCRIPT_MONGOLIAN || - script == HB_SCRIPT_PHAGS_PA || - script == HB_SCRIPT_DEVANAGARI || - script == HB_SCRIPT_BENGALI || - script == HB_SCRIPT_GURMUKHI || - script == HB_SCRIPT_MODI || - script == HB_SCRIPT_SHARADA || - script == HB_SCRIPT_SYLOTI_NAGRI || - script == HB_SCRIPT_TIRHUTA || - script == HB_SCRIPT_OGHAM - ); + return !(script == HB_SCRIPT_ARABIC || script == HB_SCRIPT_NKO || + script == HB_SCRIPT_PSALTER_PAHLAVI || script == HB_SCRIPT_MANDAIC || + script == HB_SCRIPT_MONGOLIAN || script == HB_SCRIPT_PHAGS_PA || + script == HB_SCRIPT_DEVANAGARI || script == HB_SCRIPT_BENGALI || + script == HB_SCRIPT_GURMUKHI || script == HB_SCRIPT_MODI || + script == HB_SCRIPT_SHARADA || script == HB_SCRIPT_SYLOTI_NAGRI || + script == HB_SCRIPT_TIRHUTA || script == HB_SCRIPT_OGHAM); } class BidiText { -public: - class Iter { - public: - struct RunInfo { - int32_t mRunStart; - int32_t mRunLength; - bool mIsRtl; - }; - - Iter(UBiDi* bidi, size_t start, size_t end, size_t runIndex, size_t runCount, bool isRtl); - - bool operator!= (const Iter& other) const { - return mIsEnd != other.mIsEnd || mNextRunIndex != other.mNextRunIndex - || mBidi != other.mBidi; - } - - const RunInfo& operator* () const { - return mRunInfo; - } - - const Iter& operator++ () { - updateRunInfo(); - return *this; - } - - private: - UBiDi* const mBidi; - bool mIsEnd; - size_t mNextRunIndex; - const size_t mRunCount; - const int32_t mStart; - const int32_t mEnd; - RunInfo mRunInfo; - - void updateRunInfo(); + public: + class Iter { + public: + struct RunInfo { + int32_t mRunStart; + int32_t mRunLength; + bool mIsRtl; }; - BidiText(const uint16_t* buf, size_t start, size_t count, size_t bufSize, int bidiFlags); + Iter(UBiDi* bidi, + size_t start, + size_t end, + size_t runIndex, + size_t runCount, + bool isRtl); - ~BidiText() { - if (mBidi) { - ubidi_close(mBidi); - } + bool operator!=(const Iter& other) const { + return mIsEnd != other.mIsEnd || mNextRunIndex != other.mNextRunIndex || + mBidi != other.mBidi; } - Iter begin () const { - return Iter(mBidi, mStart, mEnd, 0, mRunCount, mIsRtl); + const RunInfo& operator*() const { return mRunInfo; } + + const Iter& operator++() { + updateRunInfo(); + return *this; } - Iter end() const { - return Iter(mBidi, mStart, mEnd, mRunCount, mRunCount, mIsRtl); + private: + UBiDi* const mBidi; + bool mIsEnd; + size_t mNextRunIndex; + const size_t mRunCount; + const int32_t mStart; + const int32_t mEnd; + RunInfo mRunInfo; + + void updateRunInfo(); + }; + + BidiText(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + int bidiFlags); + + ~BidiText() { + if (mBidi) { + ubidi_close(mBidi); } + } + + Iter begin() const { return Iter(mBidi, mStart, mEnd, 0, mRunCount, mIsRtl); } + + Iter end() const { + return Iter(mBidi, mStart, mEnd, mRunCount, mRunCount, mIsRtl); + } -private: - const size_t mStart; - const size_t mEnd; - const size_t mBufSize; - UBiDi* mBidi; - size_t mRunCount; - bool mIsRtl; + private: + const size_t mStart; + const size_t mEnd; + const size_t mBufSize; + UBiDi* mBidi; + size_t mRunCount; + bool mIsRtl; - BidiText(const BidiText&) = delete; - void operator=(const BidiText&) = delete; + BidiText(const BidiText&) = delete; + void operator=(const BidiText&) = delete; }; -BidiText::Iter::Iter(UBiDi* bidi, size_t start, size_t end, size_t runIndex, size_t runCount, - bool isRtl) - : mBidi(bidi), mIsEnd(runIndex == runCount), mNextRunIndex(runIndex), mRunCount(runCount), - mStart(start), mEnd(end), mRunInfo() { - if (mRunCount == 1) { - mRunInfo.mRunStart = start; - mRunInfo.mRunLength = end - start; - mRunInfo.mIsRtl = isRtl; - mNextRunIndex = mRunCount; - return; - } - updateRunInfo(); +BidiText::Iter::Iter(UBiDi* bidi, + size_t start, + size_t end, + size_t runIndex, + size_t runCount, + bool isRtl) + : mBidi(bidi), + mIsEnd(runIndex == runCount), + mNextRunIndex(runIndex), + mRunCount(runCount), + mStart(start), + mEnd(end), + mRunInfo() { + if (mRunCount == 1) { + mRunInfo.mRunStart = start; + mRunInfo.mRunLength = end - start; + mRunInfo.mIsRtl = isRtl; + mNextRunIndex = mRunCount; + return; + } + updateRunInfo(); } void BidiText::Iter::updateRunInfo() { - if (mNextRunIndex == mRunCount) { - // All runs have been iterated. - mIsEnd = true; - return; - } - int32_t startRun = -1; - int32_t lengthRun = -1; - const UBiDiDirection runDir = ubidi_getVisualRun(mBidi, mNextRunIndex, &startRun, &lengthRun); - mNextRunIndex++; - if (startRun == -1 || lengthRun == -1) { - ALOGE("invalid visual run"); - // skip the invalid run. - updateRunInfo(); - return; - } - const int32_t runEnd = std::min(startRun + lengthRun, mEnd); - mRunInfo.mRunStart = std::max(startRun, mStart); - mRunInfo.mRunLength = runEnd - mRunInfo.mRunStart; - if (mRunInfo.mRunLength <= 0) { - // skip the empty run. - updateRunInfo(); - return; - } - mRunInfo.mIsRtl = (runDir == UBIDI_RTL); + if (mNextRunIndex == mRunCount) { + // All runs have been iterated. + mIsEnd = true; + return; + } + int32_t startRun = -1; + int32_t lengthRun = -1; + const UBiDiDirection runDir = + ubidi_getVisualRun(mBidi, mNextRunIndex, &startRun, &lengthRun); + mNextRunIndex++; + if (startRun == -1 || lengthRun == -1) { + ALOGE("invalid visual run"); + // skip the invalid run. + updateRunInfo(); + return; + } + const int32_t runEnd = std::min(startRun + lengthRun, mEnd); + mRunInfo.mRunStart = std::max(startRun, mStart); + mRunInfo.mRunLength = runEnd - mRunInfo.mRunStart; + if (mRunInfo.mRunLength <= 0) { + // skip the empty run. + updateRunInfo(); + return; + } + mRunInfo.mIsRtl = (runDir == UBIDI_RTL); } -BidiText::BidiText(const uint16_t* buf, size_t start, size_t count, size_t bufSize, int bidiFlags) - : mStart(start), mEnd(start + count), mBufSize(bufSize), mBidi(NULL), mRunCount(1), +BidiText::BidiText(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + int bidiFlags) + : mStart(start), + mEnd(start + count), + mBufSize(bufSize), + mBidi(NULL), + mRunCount(1), mIsRtl((bidiFlags & kDirection_Mask) != 0) { - if (bidiFlags == kBidi_Force_LTR || bidiFlags == kBidi_Force_RTL) { - // force single run. - return; - } - mBidi = ubidi_open(); - if (!mBidi) { - ALOGE("error creating bidi object"); - return; - } - UErrorCode status = U_ZERO_ERROR; - // Set callbacks to override bidi classes of new emoji - ubidi_setClassCallback(mBidi, emojiBidiOverride, nullptr, nullptr, nullptr, &status); - if (!U_SUCCESS(status)) { - ALOGE("error setting bidi callback function, status = %d", status); - return; - } - - UBiDiLevel bidiReq = bidiFlags; - if (bidiFlags == kBidi_Default_LTR) { - bidiReq = UBIDI_DEFAULT_LTR; - } else if (bidiFlags == kBidi_Default_RTL) { - bidiReq = UBIDI_DEFAULT_RTL; - } - ubidi_setPara(mBidi, buf, mBufSize, bidiReq, NULL, &status); - if (!U_SUCCESS(status)) { - ALOGE("error calling ubidi_setPara, status = %d", status); - return; - } - const int paraDir = ubidi_getParaLevel(mBidi) & kDirection_Mask; - const ssize_t rc = ubidi_countRuns(mBidi, &status); - if (!U_SUCCESS(status) || rc < 0) { - ALOGW("error counting bidi runs, status = %d", status); - } - if (!U_SUCCESS(status) || rc <= 1) { - mIsRtl = (paraDir == kBidi_RTL); - return; - } - mRunCount = rc; + if (bidiFlags == kBidi_Force_LTR || bidiFlags == kBidi_Force_RTL) { + // force single run. + return; + } + mBidi = ubidi_open(); + if (!mBidi) { + ALOGE("error creating bidi object"); + return; + } + UErrorCode status = U_ZERO_ERROR; + // Set callbacks to override bidi classes of new emoji + ubidi_setClassCallback(mBidi, emojiBidiOverride, nullptr, nullptr, nullptr, + &status); + if (!U_SUCCESS(status)) { + ALOGE("error setting bidi callback function, status = %d", status); + return; + } + + UBiDiLevel bidiReq = bidiFlags; + if (bidiFlags == kBidi_Default_LTR) { + bidiReq = UBIDI_DEFAULT_LTR; + } else if (bidiFlags == kBidi_Default_RTL) { + bidiReq = UBIDI_DEFAULT_RTL; + } + ubidi_setPara(mBidi, buf, mBufSize, bidiReq, NULL, &status); + if (!U_SUCCESS(status)) { + ALOGE("error calling ubidi_setPara, status = %d", status); + return; + } + const int paraDir = ubidi_getParaLevel(mBidi) & kDirection_Mask; + const ssize_t rc = ubidi_countRuns(mBidi, &status); + if (!U_SUCCESS(status) || rc < 0) { + ALOGW("error counting bidi runs, status = %d", status); + } + if (!U_SUCCESS(status) || rc <= 1) { + mIsRtl = (paraDir == kBidi_RTL); + return; + } + mRunCount = rc; } -void Layout::doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - int bidiFlags, const FontStyle &style, const MinikinPaint &paint, - const std::shared_ptr& collection) { - std::lock_guard _l(gMinikinLock); - - LayoutContext ctx; - ctx.style = style; - ctx.paint = paint; - - reset(); - mAdvances.resize(count, 0); - - for (const BidiText::Iter::RunInfo& runInfo : BidiText(buf, start, count, bufSize, bidiFlags)) { - doLayoutRunCached(buf, runInfo.mRunStart, runInfo.mRunLength, bufSize, runInfo.mIsRtl, &ctx, - start, collection, this, NULL); - } - ctx.clearHbFonts(); +void Layout::doLayout(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + int bidiFlags, + const FontStyle& style, + const MinikinPaint& paint, + const std::shared_ptr& collection) { + std::lock_guard _l(gMinikinLock); + + LayoutContext ctx; + ctx.style = style; + ctx.paint = paint; + + reset(); + mAdvances.resize(count, 0); + + for (const BidiText::Iter::RunInfo& runInfo : + BidiText(buf, start, count, bufSize, bidiFlags)) { + doLayoutRunCached(buf, runInfo.mRunStart, runInfo.mRunLength, bufSize, + runInfo.mIsRtl, &ctx, start, collection, this, NULL); + } + ctx.clearHbFonts(); } -float Layout::measureText(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - int bidiFlags, const FontStyle &style, const MinikinPaint &paint, - const std::shared_ptr& collection, float* advances) { - std::lock_guard _l(gMinikinLock); - - LayoutContext ctx; - ctx.style = style; - ctx.paint = paint; - - float advance = 0; - for (const BidiText::Iter::RunInfo& runInfo : BidiText(buf, start, count, bufSize, bidiFlags)) { - float* advancesForRun = advances ? advances + (runInfo.mRunStart - start) : advances; - advance += doLayoutRunCached(buf, runInfo.mRunStart, runInfo.mRunLength, bufSize, - runInfo.mIsRtl, &ctx, 0, collection, NULL, advancesForRun); - } - - ctx.clearHbFonts(); - return advance; +float Layout::measureText(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + int bidiFlags, + const FontStyle& style, + const MinikinPaint& paint, + const std::shared_ptr& collection, + float* advances) { + std::lock_guard _l(gMinikinLock); + + LayoutContext ctx; + ctx.style = style; + ctx.paint = paint; + + float advance = 0; + for (const BidiText::Iter::RunInfo& runInfo : + BidiText(buf, start, count, bufSize, bidiFlags)) { + float* advancesForRun = + advances ? advances + (runInfo.mRunStart - start) : advances; + advance += doLayoutRunCached(buf, runInfo.mRunStart, runInfo.mRunLength, + bufSize, runInfo.mIsRtl, &ctx, 0, collection, + NULL, advancesForRun); + } + + ctx.clearHbFonts(); + return advance; } -float Layout::doLayoutRunCached(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - bool isRtl, LayoutContext* ctx, size_t dstStart, - const std::shared_ptr& collection, Layout* layout, float* advances) { - const uint32_t originalHyphen = ctx->paint.hyphenEdit.getHyphen(); - float advance = 0; - if (!isRtl) { - // left to right - size_t wordstart = - start == bufSize ? start : getPrevWordBreakForCache(buf, start + 1, bufSize); - size_t wordend; - for (size_t iter = start; iter < start + count; iter = wordend) { - wordend = getNextWordBreakForCache(buf, iter, bufSize); - // Only apply hyphen to the first or last word in the string. - uint32_t hyphen = originalHyphen; - if (iter != start) { // Not the first word - hyphen &= ~HyphenEdit::MASK_START_OF_LINE; - } - if (wordend < start + count) { // Not the last word - hyphen &= ~HyphenEdit::MASK_END_OF_LINE; - } - ctx->paint.hyphenEdit = hyphen; - size_t wordcount = std::min(start + count, wordend) - iter; - advance += doLayoutWord(buf + wordstart, iter - wordstart, wordcount, - wordend - wordstart, isRtl, ctx, iter - dstStart, collection, layout, - advances ? advances + (iter - start) : advances); - wordstart = wordend; - } - } else { - // right to left - size_t wordstart; - size_t end = start + count; - size_t wordend = end == 0 ? 0 : getNextWordBreakForCache(buf, end - 1, bufSize); - for (size_t iter = end; iter > start; iter = wordstart) { - wordstart = getPrevWordBreakForCache(buf, iter, bufSize); - // Only apply hyphen to the first (rightmost) or last (leftmost) word in the string. - uint32_t hyphen = originalHyphen; - if (wordstart > start) { // Not the first word - hyphen &= ~HyphenEdit::MASK_START_OF_LINE; - } - if (iter != end) { // Not the last word - hyphen &= ~HyphenEdit::MASK_END_OF_LINE; - } - ctx->paint.hyphenEdit = hyphen; - size_t bufStart = std::max(start, wordstart); - advance += doLayoutWord(buf + wordstart, bufStart - wordstart, iter - bufStart, - wordend - wordstart, isRtl, ctx, bufStart - dstStart, collection, layout, - advances ? advances + (bufStart - start) : advances); - wordend = wordstart; - } +float Layout::doLayoutRunCached( + const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + bool isRtl, + LayoutContext* ctx, + size_t dstStart, + const std::shared_ptr& collection, + Layout* layout, + float* advances) { + const uint32_t originalHyphen = ctx->paint.hyphenEdit.getHyphen(); + float advance = 0; + if (!isRtl) { + // left to right + size_t wordstart = start == bufSize + ? start + : getPrevWordBreakForCache(buf, start + 1, bufSize); + size_t wordend; + for (size_t iter = start; iter < start + count; iter = wordend) { + wordend = getNextWordBreakForCache(buf, iter, bufSize); + // Only apply hyphen to the first or last word in the string. + uint32_t hyphen = originalHyphen; + if (iter != start) { // Not the first word + hyphen &= ~HyphenEdit::MASK_START_OF_LINE; + } + if (wordend < start + count) { // Not the last word + hyphen &= ~HyphenEdit::MASK_END_OF_LINE; + } + ctx->paint.hyphenEdit = hyphen; + size_t wordcount = std::min(start + count, wordend) - iter; + advance += doLayoutWord(buf + wordstart, iter - wordstart, wordcount, + wordend - wordstart, isRtl, ctx, iter - dstStart, + collection, layout, + advances ? advances + (iter - start) : advances); + wordstart = wordend; + } + } else { + // right to left + size_t wordstart; + size_t end = start + count; + size_t wordend = + end == 0 ? 0 : getNextWordBreakForCache(buf, end - 1, bufSize); + for (size_t iter = end; iter > start; iter = wordstart) { + wordstart = getPrevWordBreakForCache(buf, iter, bufSize); + // Only apply hyphen to the first (rightmost) or last (leftmost) word in + // the string. + uint32_t hyphen = originalHyphen; + if (wordstart > start) { // Not the first word + hyphen &= ~HyphenEdit::MASK_START_OF_LINE; + } + if (iter != end) { // Not the last word + hyphen &= ~HyphenEdit::MASK_END_OF_LINE; + } + ctx->paint.hyphenEdit = hyphen; + size_t bufStart = std::max(start, wordstart); + advance += doLayoutWord( + buf + wordstart, bufStart - wordstart, iter - bufStart, + wordend - wordstart, isRtl, ctx, bufStart - dstStart, collection, + layout, advances ? advances + (bufStart - start) : advances); + wordend = wordstart; } - return advance; + } + return advance; } -float Layout::doLayoutWord(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - bool isRtl, LayoutContext* ctx, size_t bufStart, - const std::shared_ptr& collection, Layout* layout, float* advances) { - LayoutCache& cache = LayoutEngine::getInstance().layoutCache; - LayoutCacheKey key(collection, ctx->paint, ctx->style, buf, start, count, bufSize, isRtl); - - float wordSpacing = count == 1 && isWordSpace(buf[start]) ? ctx->paint.wordSpacing : 0; - - float advance; - if (ctx->paint.skipCache()) { - Layout layoutForWord; - key.doLayout(&layoutForWord, ctx, collection); - if (layout) { - layout->appendLayout(&layoutForWord, bufStart, wordSpacing); - } - if (advances) { - layoutForWord.getAdvances(advances); - } - advance = layoutForWord.getAdvance(); - } else { - Layout* layoutForWord = cache.get(key, ctx, collection); - if (layout) { - layout->appendLayout(layoutForWord, bufStart, wordSpacing); - } - if (advances) { - layoutForWord->getAdvances(advances); - } - advance = layoutForWord->getAdvance(); +float Layout::doLayoutWord(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + bool isRtl, + LayoutContext* ctx, + size_t bufStart, + const std::shared_ptr& collection, + Layout* layout, + float* advances) { + LayoutCache& cache = LayoutEngine::getInstance().layoutCache; + LayoutCacheKey key(collection, ctx->paint, ctx->style, buf, start, count, + bufSize, isRtl); + + float wordSpacing = + count == 1 && isWordSpace(buf[start]) ? ctx->paint.wordSpacing : 0; + + float advance; + if (ctx->paint.skipCache()) { + Layout layoutForWord; + key.doLayout(&layoutForWord, ctx, collection); + if (layout) { + layout->appendLayout(&layoutForWord, bufStart, wordSpacing); } + if (advances) { + layoutForWord.getAdvances(advances); + } + advance = layoutForWord.getAdvance(); + } else { + Layout* layoutForWord = cache.get(key, ctx, collection); + if (layout) { + layout->appendLayout(layoutForWord, bufStart, wordSpacing); + } + if (advances) { + layoutForWord->getAdvances(advances); + } + advance = layoutForWord->getAdvance(); + } - if (wordSpacing != 0) { - advance += wordSpacing; - if (advances) { - advances[0] += wordSpacing; - } + if (wordSpacing != 0) { + advance += wordSpacing; + if (advances) { + advances[0] += wordSpacing; } - return advance; + } + return advance; } -static void addFeatures(const string &str, vector* features) { - if (!str.size()) - return; - - const char* start = str.c_str(); - const char* end = start + str.size(); - - while (start < end) { - static hb_feature_t feature; - const char* p = strchr(start, ','); - if (!p) - p = end; - /* We do not allow setting features on ranges. As such, reject any - * setting that has non-universal range. */ - if (hb_feature_from_string (start, p - start, &feature) - && feature.start == 0 && feature.end == (unsigned int) -1) - features->push_back(feature); - start = p + 1; - } +static void addFeatures(const string& str, vector* features) { + if (!str.size()) + return; + + const char* start = str.c_str(); + const char* end = start + str.size(); + + while (start < end) { + static hb_feature_t feature; + const char* p = strchr(start, ','); + if (!p) + p = end; + /* We do not allow setting features on ranges. As such, reject any + * setting that has non-universal range. */ + if (hb_feature_from_string(start, p - start, &feature) && + feature.start == 0 && feature.end == (unsigned int)-1) + features->push_back(feature); + start = p + 1; + } } static const hb_codepoint_t CHAR_HYPHEN = 0x2010; /* HYPHEN */ -static inline hb_codepoint_t determineHyphenChar(hb_codepoint_t preferredHyphen, hb_font_t* font) { +static inline hb_codepoint_t determineHyphenChar(hb_codepoint_t preferredHyphen, + hb_font_t* font) { #if WIP_NEEDS_ICU_UPDATE - (void)CHAR_HYPHEN; - return 0x002D; // HYPHEN-MINUS -#else // WIP_NEEDS_ICU_UPDATE - hb_codepoint_t glyph; - if (preferredHyphen == 0x058A /* ARMENIAN_HYPHEN */ - || preferredHyphen == 0x05BE /* HEBREW PUNCTUATION MAQAF */ - || preferredHyphen == 0x1400 /* CANADIAN SYLLABIC HYPHEN */) { - if (hb_font_get_nominal_glyph(font, preferredHyphen, &glyph)) { - return preferredHyphen; - } else { - // The original hyphen requested was not supported. Let's try and see if the - // Unicode hyphen is supported. - preferredHyphen = CHAR_HYPHEN; - } + (void)CHAR_HYPHEN; + return 0x002D; // HYPHEN-MINUS +#else // WIP_NEEDS_ICU_UPDATE + hb_codepoint_t glyph; + if (preferredHyphen == 0x058A /* ARMENIAN_HYPHEN */ + || preferredHyphen == 0x05BE /* HEBREW PUNCTUATION MAQAF */ + || preferredHyphen == 0x1400 /* CANADIAN SYLLABIC HYPHEN */) { + if (hb_font_get_nominal_glyph(font, preferredHyphen, &glyph)) { + return preferredHyphen; + } else { + // The original hyphen requested was not supported. Let's try and see if + // the Unicode hyphen is supported. + preferredHyphen = CHAR_HYPHEN; } - if (preferredHyphen == CHAR_HYPHEN) { /* HYPHEN */ - // Fallback to ASCII HYPHEN-MINUS if the font didn't have a glyph for the preferred hyphen. - // Note that we intentionally don't do anything special if the font doesn't have a - // HYPHEN-MINUS either, so a tofu could be shown, hinting towards something missing. - if (!hb_font_get_nominal_glyph(font, preferredHyphen, &glyph)) { - return 0x002D; // HYPHEN-MINUS - } + } + if (preferredHyphen == CHAR_HYPHEN) { /* HYPHEN */ + // Fallback to ASCII HYPHEN-MINUS if the font didn't have a glyph for the + // preferred hyphen. Note that we intentionally don't do anything special if + // the font doesn't have a HYPHEN-MINUS either, so a tofu could be shown, + // hinting towards something missing. + if (!hb_font_get_nominal_glyph(font, preferredHyphen, &glyph)) { + return 0x002D; // HYPHEN-MINUS } - return preferredHyphen; -#endif // WIP_NEEDS_ICU_UPDATE + } + return preferredHyphen; +#endif // WIP_NEEDS_ICU_UPDATE } -static inline void addHyphenToHbBuffer(hb_buffer_t* buffer, hb_font_t* font, uint32_t hyphen, - uint32_t cluster) { - const uint32_t* hyphenStr = HyphenEdit::getHyphenString(hyphen); - while (*hyphenStr != 0) { - hb_codepoint_t hyphenChar = determineHyphenChar(*hyphenStr, font); - hb_buffer_add(buffer, hyphenChar, cluster); - hyphenStr++; - } +static inline void addHyphenToHbBuffer(hb_buffer_t* buffer, + hb_font_t* font, + uint32_t hyphen, + uint32_t cluster) { + const uint32_t* hyphenStr = HyphenEdit::getHyphenString(hyphen); + while (*hyphenStr != 0) { + hb_codepoint_t hyphenChar = determineHyphenChar(*hyphenStr, font); + hb_buffer_add(buffer, hyphenChar, cluster); + hyphenStr++; + } } -// Returns the cluster value assigned to the first codepoint added to the buffer, which can be used -// to translate cluster values returned by HarfBuzz to input indices. +// Returns the cluster value assigned to the first codepoint added to the +// buffer, which can be used to translate cluster values returned by HarfBuzz to +// input indices. static inline uint32_t addToHbBuffer(hb_buffer_t* buffer, - const uint16_t* buf, size_t start, size_t count, size_t bufSize, - ssize_t scriptRunStart, ssize_t scriptRunEnd, - HyphenEdit hyphenEdit, hb_font_t* hbFont) { - - // Only hyphenate the very first script run for starting hyphens. - const uint32_t startHyphen = (scriptRunStart == 0) - ? hyphenEdit.getStart() - : HyphenEdit::NO_EDIT; - // Only hyphenate the very last script run for ending hyphens. - const uint32_t endHyphen = (static_cast(scriptRunEnd) == count) - ? hyphenEdit.getEnd() - : HyphenEdit::NO_EDIT; - - // In the following code, we drop the pre-context and/or post-context if there is a - // hyphen edit at that end. This is not absolutely necessary, since HarfBuzz uses - // contexts only for joining scripts at the moment, e.g. to determine if the first or - // last letter of a text range to shape should take a joining form based on an - // adjacent letter or joiner (that comes from the context). + const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + ssize_t scriptRunStart, + ssize_t scriptRunEnd, + HyphenEdit hyphenEdit, + hb_font_t* hbFont) { + // Only hyphenate the very first script run for starting hyphens. + const uint32_t startHyphen = + (scriptRunStart == 0) ? hyphenEdit.getStart() : HyphenEdit::NO_EDIT; + // Only hyphenate the very last script run for ending hyphens. + const uint32_t endHyphen = (static_cast(scriptRunEnd) == count) + ? hyphenEdit.getEnd() + : HyphenEdit::NO_EDIT; + + // In the following code, we drop the pre-context and/or post-context if there + // is a hyphen edit at that end. This is not absolutely necessary, since + // HarfBuzz uses contexts only for joining scripts at the moment, e.g. to + // determine if the first or last letter of a text range to shape should take + // a joining form based on an adjacent letter or joiner (that comes from the + // context). + // + // TODO: Revisit this for: + // 1. Desperate breaks for joining scripts like Arabic (where it may be better + // to keep + // the context); + // 2. Special features like start-of-word font features (not implemented in + // HarfBuzz + // yet). + + // We don't have any start-of-line replacement edit yet, so we don't need to + // check for those. + if (HyphenEdit::isInsertion(startHyphen)) { + // A cluster value of zero guarantees that the inserted hyphen will be in + // the same cluster with the next codepoint, since there is no pre-context. + addHyphenToHbBuffer(buffer, hbFont, startHyphen, 0 /* cluster value */); + } + + const uint16_t* hbText; + int hbTextLength; + unsigned int hbItemOffset; + unsigned int hbItemLength = scriptRunEnd - scriptRunStart; // This is >= 1. + + const bool hasEndInsertion = HyphenEdit::isInsertion(endHyphen); + const bool hasEndReplacement = HyphenEdit::isReplacement(endHyphen); + if (hasEndReplacement) { + // Skip the last code unit while copying the buffer for HarfBuzz if it's a + // replacement. We don't need to worry about non-BMP characters yet since + // replacements are only done for code units at the moment. + hbItemLength -= 1; + } + + if (startHyphen == HyphenEdit::NO_EDIT) { + // No edit at the beginning. Use the whole pre-context. + hbText = buf; + hbItemOffset = start + scriptRunStart; + } else { + // There's an edit at the beginning. Drop the pre-context and start the + // buffer at where we want to start shaping. + hbText = buf + start + scriptRunStart; + hbItemOffset = 0; + } + + if (endHyphen == HyphenEdit::NO_EDIT) { + // No edit at the end, use the whole post-context. + hbTextLength = (buf + bufSize) - hbText; + } else { + // There is an edit at the end. Drop the post-context. + hbTextLength = hbItemOffset + hbItemLength; + } + + hb_buffer_add_utf16(buffer, hbText, hbTextLength, hbItemOffset, hbItemLength); + + unsigned int numCodepoints; + hb_glyph_info_t* cpInfo = hb_buffer_get_glyph_infos(buffer, &numCodepoints); + + // Add the hyphen at the end, if there's any. + if (hasEndInsertion || hasEndReplacement) { + // When a hyphen is inserted, by assigning the added hyphen and the last + // codepoint added to the HarfBuzz buffer to the same cluster, we can make + // sure that they always remain in the same cluster, even if the last + // codepoint gets merged into another cluster (for example when it's a + // combining mark). // - // TODO: Revisit this for: - // 1. Desperate breaks for joining scripts like Arabic (where it may be better to keep - // the context); - // 2. Special features like start-of-word font features (not implemented in HarfBuzz - // yet). - - // We don't have any start-of-line replacement edit yet, so we don't need to check for - // those. - if (HyphenEdit::isInsertion(startHyphen)) { - // A cluster value of zero guarantees that the inserted hyphen will be in the same - // cluster with the next codepoint, since there is no pre-context. - addHyphenToHbBuffer(buffer, hbFont, startHyphen, 0 /* cluster value */); - } - - const uint16_t* hbText; - int hbTextLength; - unsigned int hbItemOffset; - unsigned int hbItemLength = scriptRunEnd - scriptRunStart; // This is >= 1. - - const bool hasEndInsertion = HyphenEdit::isInsertion(endHyphen); - const bool hasEndReplacement = HyphenEdit::isReplacement(endHyphen); - if (hasEndReplacement) { - // Skip the last code unit while copying the buffer for HarfBuzz if it's a replacement. We - // don't need to worry about non-BMP characters yet since replacements are only done for - // code units at the moment. - hbItemLength -= 1; - } - - if (startHyphen == HyphenEdit::NO_EDIT) { - // No edit at the beginning. Use the whole pre-context. - hbText = buf; - hbItemOffset = start + scriptRunStart; + // When a replacement happens instead, we want it to get the cluster value + // of the character it's replacing, which is one "codepoint length" larger + // than the last cluster. But since the character replaced is always just + // one code unit, we can just add 1. + uint32_t hyphenCluster; + if (numCodepoints == 0) { + // Nothing was added to the HarfBuzz buffer. This can only happen if + // we have a replacement that is replacing a one-code unit script run. + hyphenCluster = 0; } else { - // There's an edit at the beginning. Drop the pre-context and start the buffer at where we - // want to start shaping. - hbText = buf + start + scriptRunStart; - hbItemOffset = 0; + hyphenCluster = + cpInfo[numCodepoints - 1].cluster + (uint32_t)hasEndReplacement; } + addHyphenToHbBuffer(buffer, hbFont, endHyphen, hyphenCluster); + // Since we have just added to the buffer, cpInfo no longer necessarily + // points to the right place. Refresh it. + cpInfo = + hb_buffer_get_glyph_infos(buffer, nullptr /* we don't need the size */); + } + return cpInfo[0].cluster; +} - if (endHyphen == HyphenEdit::NO_EDIT) { - // No edit at the end, use the whole post-context. - hbTextLength = (buf + bufSize) - hbText; - } else { - // There is an edit at the end. Drop the post-context. - hbTextLength = hbItemOffset + hbItemLength; +void Layout::doLayoutRun(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + bool isRtl, + LayoutContext* ctx, + const std::shared_ptr& collection) { + hb_buffer_t* buffer = LayoutEngine::getInstance().hbBuffer; + vector items; + collection->itemize(buf + start, count, ctx->style, &items); + + vector features; + // Disable default-on non-required ligature features if letter-spacing + // See http://dev.w3.org/csswg/css-text-3/#letter-spacing-property + // "When the effective spacing between two characters is not zero (due to + // either justification or a non-zero value of letter-spacing), user agents + // should not apply optional ligatures." + if (fabs(ctx->paint.letterSpacing) > 0.03) { + static const hb_feature_t no_liga = {HB_TAG('l', 'i', 'g', 'a'), 0, 0, ~0u}; + static const hb_feature_t no_clig = {HB_TAG('c', 'l', 'i', 'g'), 0, 0, ~0u}; + features.push_back(no_liga); + features.push_back(no_clig); + } + addFeatures(ctx->paint.fontFeatureSettings, &features); + + double size = ctx->paint.size; + double scaleX = ctx->paint.scaleX; + + float x = mAdvance; + float y = 0; + for (int run_ix = isRtl ? items.size() - 1 : 0; + isRtl ? run_ix >= 0 : run_ix < static_cast(items.size()); + isRtl ? --run_ix : ++run_ix) { + FontCollection::Run& run = items[run_ix]; + if (run.fakedFont.font == NULL) { + ALOGE("no font for run starting u+%04x length %d", buf[run.start], + run.end - run.start); + continue; } + int font_ix = findFace(run.fakedFont, ctx); + ctx->paint.font = mFaces[font_ix].font; + ctx->paint.fakery = mFaces[font_ix].fakery; + hb_font_t* hbFont = ctx->hbFonts[font_ix]; +#ifdef VERBOSE_DEBUG + ALOGD("Run %zu, font %d [%d:%d]", run_ix, font_ix, run.start, run.end); +#endif - hb_buffer_add_utf16(buffer, hbText, hbTextLength, hbItemOffset, hbItemLength); - - unsigned int numCodepoints; - hb_glyph_info_t* cpInfo = hb_buffer_get_glyph_infos(buffer, &numCodepoints); - - // Add the hyphen at the end, if there's any. - if (hasEndInsertion || hasEndReplacement) { - // When a hyphen is inserted, by assigning the added hyphen and the last - // codepoint added to the HarfBuzz buffer to the same cluster, we can make sure - // that they always remain in the same cluster, even if the last codepoint gets - // merged into another cluster (for example when it's a combining mark). - // - // When a replacement happens instead, we want it to get the cluster value of - // the character it's replacing, which is one "codepoint length" larger than - // the last cluster. But since the character replaced is always just one - // code unit, we can just add 1. - uint32_t hyphenCluster; - if (numCodepoints == 0) { - // Nothing was added to the HarfBuzz buffer. This can only happen if - // we have a replacement that is replacing a one-code unit script run. - hyphenCluster = 0; + hb_font_set_ppem(hbFont, size * scaleX, size); + hb_font_set_scale(hbFont, HBFloatToFixed(size * scaleX), + HBFloatToFixed(size)); + + const bool is_color_bitmap_font = isColorBitmapFont(hbFont); + + // TODO: if there are multiple scripts within a font in an RTL run, + // we need to reorder those runs. This is unlikely with our current + // font stack, but should be done for correctness. + + // Note: scriptRunStart and scriptRunEnd, as well as run.start and run.end, + // run between 0 and count. + ssize_t scriptRunEnd; + for (ssize_t scriptRunStart = run.start; scriptRunStart < run.end; + scriptRunStart = scriptRunEnd) { + scriptRunEnd = scriptRunStart; + hb_script_t script = + getScriptRun(buf + start, run.end, &scriptRunEnd /* iterator */); + // After the last line, scriptRunEnd is guaranteed to have increased, + // since the only time getScriptRun does not increase its iterator is when + // it has already reached the end of the buffer. But that can't happen, + // since if we have already reached the end of the buffer, we should have + // had (scriptRunEnd == run.end), which means (scriptRunStart == run.end) + // which is impossible due to the exit condition of the for loop. So we + // can be sure that scriptRunEnd > scriptRunStart. + + double letterSpace = 0.0; + double letterSpaceHalfLeft = 0.0; + double letterSpaceHalfRight = 0.0; + + if (ctx->paint.letterSpacing != 0.0 && + isScriptOkForLetterspacing(script)) { + letterSpace = ctx->paint.letterSpacing * size * scaleX; + if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { + letterSpace = round(letterSpace); + letterSpaceHalfLeft = floor(letterSpace * 0.5); } else { - hyphenCluster = cpInfo[numCodepoints - 1].cluster + (uint32_t) hasEndReplacement; + letterSpaceHalfLeft = letterSpace * 0.5; } - addHyphenToHbBuffer(buffer, hbFont, endHyphen, hyphenCluster); - // Since we have just added to the buffer, cpInfo no longer necessarily points to - // the right place. Refresh it. - cpInfo = hb_buffer_get_glyph_infos(buffer, nullptr /* we don't need the size */); - } - return cpInfo[0].cluster; -} - - -void Layout::doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - bool isRtl, LayoutContext* ctx, const std::shared_ptr& collection) { - hb_buffer_t* buffer = LayoutEngine::getInstance().hbBuffer; - vector items; - collection->itemize(buf + start, count, ctx->style, &items); - - vector features; - // Disable default-on non-required ligature features if letter-spacing - // See http://dev.w3.org/csswg/css-text-3/#letter-spacing-property - // "When the effective spacing between two characters is not zero (due to - // either justification or a non-zero value of letter-spacing), user agents - // should not apply optional ligatures." - if (fabs(ctx->paint.letterSpacing) > 0.03) - { - static const hb_feature_t no_liga = { HB_TAG('l', 'i', 'g', 'a'), 0, 0, ~0u }; - static const hb_feature_t no_clig = { HB_TAG('c', 'l', 'i', 'g'), 0, 0, ~0u }; - features.push_back(no_liga); - features.push_back(no_clig); - } - addFeatures(ctx->paint.fontFeatureSettings, &features); - - double size = ctx->paint.size; - double scaleX = ctx->paint.scaleX; - - float x = mAdvance; - float y = 0; - for (int run_ix = isRtl ? items.size() - 1 : 0; - isRtl ? run_ix >= 0 : run_ix < static_cast(items.size()); - isRtl ? --run_ix : ++run_ix) { - FontCollection::Run &run = items[run_ix]; - if (run.fakedFont.font == NULL) { - ALOGE("no font for run starting u+%04x length %d", buf[run.start], run.end - run.start); - continue; + letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft; + } + + hb_buffer_clear_contents(buffer); + hb_buffer_set_script(buffer, script); + hb_buffer_set_direction(buffer, + isRtl ? HB_DIRECTION_RTL : HB_DIRECTION_LTR); + const FontLanguages& langList = + FontLanguageListCache::getById(ctx->style.getLanguageListId()); + if (langList.size() != 0) { + const FontLanguage* hbLanguage = &langList[0]; + for (size_t i = 0; i < langList.size(); ++i) { + if (langList[i].supportsHbScript(script)) { + hbLanguage = &langList[i]; + break; + } } - int font_ix = findFace(run.fakedFont, ctx); - ctx->paint.font = mFaces[font_ix].font; - ctx->paint.fakery = mFaces[font_ix].fakery; - hb_font_t* hbFont = ctx->hbFonts[font_ix]; + hb_buffer_set_language(buffer, hbLanguage->getHbLanguage()); + } + + const uint32_t clusterStart = + addToHbBuffer(buffer, buf, start, count, bufSize, scriptRunStart, + scriptRunEnd, ctx->paint.hyphenEdit, hbFont); + + hb_shape(hbFont, buffer, features.empty() ? NULL : &features[0], + features.size()); + unsigned int numGlyphs; + hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer, &numGlyphs); + hb_glyph_position_t* positions = + hb_buffer_get_glyph_positions(buffer, NULL); + + // At this point in the code, the cluster values in the info buffer + // correspond to the input characters with some shift. The cluster value + // clusterStart corresponds to the first character passed to HarfBuzz, + // which is at buf[start + scriptRunStart] whose advance needs to be saved + // into mAdvances[scriptRunStart]. So cluster values need to be reduced by + // (clusterStart - scriptRunStart) to get converted to indices of + // mAdvances. + const ssize_t clusterOffset = clusterStart - scriptRunStart; + + if (numGlyphs) { + mAdvances[info[0].cluster - clusterOffset] += letterSpaceHalfLeft; + x += letterSpaceHalfLeft; + } + for (unsigned int i = 0; i < numGlyphs; i++) { #ifdef VERBOSE_DEBUG - ALOGD("Run %zu, font %d [%d:%d]", run_ix, font_ix, run.start, run.end); + ALOGD("%d %d %d %d", positions[i].x_advance, positions[i].y_advance, + positions[i].x_offset, positions[i].y_offset); + ALOGD("DoLayout %u: %f; %d, %d", info[i].codepoint, + HBFixedToFloat(positions[i].x_advance), positions[i].x_offset, + positions[i].y_offset); #endif + if (i > 0 && info[i - 1].cluster != info[i].cluster) { + mAdvances[info[i - 1].cluster - clusterOffset] += + letterSpaceHalfRight; + mAdvances[info[i].cluster - clusterOffset] += letterSpaceHalfLeft; + x += letterSpace; + } - hb_font_set_ppem(hbFont, size * scaleX, size); - hb_font_set_scale(hbFont, HBFloatToFixed(size * scaleX), HBFloatToFixed(size)); - - const bool is_color_bitmap_font = isColorBitmapFont(hbFont); - - // TODO: if there are multiple scripts within a font in an RTL run, - // we need to reorder those runs. This is unlikely with our current - // font stack, but should be done for correctness. - - // Note: scriptRunStart and scriptRunEnd, as well as run.start and run.end, run between 0 - // and count. - ssize_t scriptRunEnd; - for (ssize_t scriptRunStart = run.start; - scriptRunStart < run.end; - scriptRunStart = scriptRunEnd) { - scriptRunEnd = scriptRunStart; - hb_script_t script = getScriptRun(buf + start, run.end, &scriptRunEnd /* iterator */); - // After the last line, scriptRunEnd is guaranteed to have increased, since the only - // time getScriptRun does not increase its iterator is when it has already reached the - // end of the buffer. But that can't happen, since if we have already reached the end - // of the buffer, we should have had (scriptRunEnd == run.end), which means - // (scriptRunStart == run.end) which is impossible due to the exit condition of the for - // loop. So we can be sure that scriptRunEnd > scriptRunStart. - - double letterSpace = 0.0; - double letterSpaceHalfLeft = 0.0; - double letterSpaceHalfRight = 0.0; - - if (ctx->paint.letterSpacing != 0.0 && isScriptOkForLetterspacing(script)) { - letterSpace = ctx->paint.letterSpacing * size * scaleX; - if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { - letterSpace = round(letterSpace); - letterSpaceHalfLeft = floor(letterSpace * 0.5); - } else { - letterSpaceHalfLeft = letterSpace * 0.5; - } - letterSpaceHalfRight = letterSpace - letterSpaceHalfLeft; - } - - hb_buffer_clear_contents(buffer); - hb_buffer_set_script(buffer, script); - hb_buffer_set_direction(buffer, isRtl? HB_DIRECTION_RTL : HB_DIRECTION_LTR); - const FontLanguages& langList = - FontLanguageListCache::getById(ctx->style.getLanguageListId()); - if (langList.size() != 0) { - const FontLanguage* hbLanguage = &langList[0]; - for (size_t i = 0; i < langList.size(); ++i) { - if (langList[i].supportsHbScript(script)) { - hbLanguage = &langList[i]; - break; - } - } - hb_buffer_set_language(buffer, hbLanguage->getHbLanguage()); - } - - const uint32_t clusterStart = addToHbBuffer( - buffer, - buf, start, count, bufSize, - scriptRunStart, scriptRunEnd, - ctx->paint.hyphenEdit, hbFont); - - hb_shape(hbFont, buffer, features.empty() ? NULL : &features[0], features.size()); - unsigned int numGlyphs; - hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer, &numGlyphs); - hb_glyph_position_t* positions = hb_buffer_get_glyph_positions(buffer, NULL); - - // At this point in the code, the cluster values in the info buffer correspond to the - // input characters with some shift. The cluster value clusterStart corresponds to the - // first character passed to HarfBuzz, which is at buf[start + scriptRunStart] whose - // advance needs to be saved into mAdvances[scriptRunStart]. So cluster values need to - // be reduced by (clusterStart - scriptRunStart) to get converted to indices of - // mAdvances. - const ssize_t clusterOffset = clusterStart - scriptRunStart; - - if (numGlyphs) - { - mAdvances[info[0].cluster - clusterOffset] += letterSpaceHalfLeft; - x += letterSpaceHalfLeft; - } - for (unsigned int i = 0; i < numGlyphs; i++) { -#ifdef VERBOSE_DEBUG - ALOGD("%d %d %d %d", - positions[i].x_advance, positions[i].y_advance, - positions[i].x_offset, positions[i].y_offset); - ALOGD("DoLayout %u: %f; %d, %d", - info[i].codepoint, HBFixedToFloat(positions[i].x_advance), - positions[i].x_offset, positions[i].y_offset); -#endif - if (i > 0 && info[i - 1].cluster != info[i].cluster) { - mAdvances[info[i - 1].cluster - clusterOffset] += letterSpaceHalfRight; - mAdvances[info[i].cluster - clusterOffset] += letterSpaceHalfLeft; - x += letterSpace; - } - - hb_codepoint_t glyph_ix = info[i].codepoint; - float xoff = HBFixedToFloat(positions[i].x_offset); - float yoff = -HBFixedToFloat(positions[i].y_offset); - xoff += yoff * ctx->paint.skewX; - LayoutGlyph glyph = {font_ix, glyph_ix, x + xoff, y + yoff}; - mGlyphs.push_back(glyph); - float xAdvance = HBFixedToFloat(positions[i].x_advance); - if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { - xAdvance = roundf(xAdvance); - } - MinikinRect glyphBounds; - hb_glyph_extents_t extents = {}; - if (is_color_bitmap_font && hb_font_get_glyph_extents(hbFont, glyph_ix, &extents)) { - // Note that it is technically possible for a TrueType font to have outline and - // embedded bitmap at the same time. We ignore modified bbox of hinted outline - // glyphs in that case. - glyphBounds.mLeft = roundf(HBFixedToFloat(extents.x_bearing)); - glyphBounds.mTop = roundf(HBFixedToFloat(-extents.y_bearing)); - glyphBounds.mRight = roundf(HBFixedToFloat(extents.x_bearing + extents.width)); - glyphBounds.mBottom = - roundf(HBFixedToFloat(-extents.y_bearing - extents.height)); - } else { - ctx->paint.font->GetBounds(&glyphBounds, glyph_ix, ctx->paint); - } - glyphBounds.offset(x + xoff, y + yoff); - mBounds.join(glyphBounds); - if (static_cast(info[i].cluster - clusterOffset) < count) { - mAdvances[info[i].cluster - clusterOffset] += xAdvance; - } else { - ALOGE("cluster %zu (start %zu) out of bounds of count %zu", - info[i].cluster - clusterOffset, start, count); - } - x += xAdvance; - } - if (numGlyphs) - { - mAdvances[info[numGlyphs - 1].cluster - clusterOffset] += letterSpaceHalfRight; - x += letterSpaceHalfRight; - } + hb_codepoint_t glyph_ix = info[i].codepoint; + float xoff = HBFixedToFloat(positions[i].x_offset); + float yoff = -HBFixedToFloat(positions[i].y_offset); + xoff += yoff * ctx->paint.skewX; + LayoutGlyph glyph = {font_ix, glyph_ix, x + xoff, y + yoff}; + mGlyphs.push_back(glyph); + float xAdvance = HBFixedToFloat(positions[i].x_advance); + if ((ctx->paint.paintFlags & LinearTextFlag) == 0) { + xAdvance = roundf(xAdvance); } + MinikinRect glyphBounds; + hb_glyph_extents_t extents = {}; + if (is_color_bitmap_font && + hb_font_get_glyph_extents(hbFont, glyph_ix, &extents)) { + // Note that it is technically possible for a TrueType font to have + // outline and embedded bitmap at the same time. We ignore modified + // bbox of hinted outline glyphs in that case. + glyphBounds.mLeft = roundf(HBFixedToFloat(extents.x_bearing)); + glyphBounds.mTop = roundf(HBFixedToFloat(-extents.y_bearing)); + glyphBounds.mRight = + roundf(HBFixedToFloat(extents.x_bearing + extents.width)); + glyphBounds.mBottom = + roundf(HBFixedToFloat(-extents.y_bearing - extents.height)); + } else { + ctx->paint.font->GetBounds(&glyphBounds, glyph_ix, ctx->paint); + } + glyphBounds.offset(x + xoff, y + yoff); + mBounds.join(glyphBounds); + if (static_cast(info[i].cluster - clusterOffset) < count) { + mAdvances[info[i].cluster - clusterOffset] += xAdvance; + } else { + ALOGE("cluster %zu (start %zu) out of bounds of count %zu", + info[i].cluster - clusterOffset, start, count); + } + x += xAdvance; + } + if (numGlyphs) { + mAdvances[info[numGlyphs - 1].cluster - clusterOffset] += + letterSpaceHalfRight; + x += letterSpaceHalfRight; + } } - mAdvance = x; + } + mAdvance = x; } void Layout::appendLayout(Layout* src, size_t start, float extraAdvance) { - int fontMapStack[16]; - int* fontMap; - if (src->mFaces.size() < sizeof(fontMapStack) / sizeof(fontMapStack[0])) { - fontMap = fontMapStack; - } else { - fontMap = new int[src->mFaces.size()]; - } - for (size_t i = 0; i < src->mFaces.size(); i++) { - int font_ix = findFace(src->mFaces[i], NULL); - fontMap[i] = font_ix; - } - int x0 = mAdvance; - for (size_t i = 0; i < src->mGlyphs.size(); i++) { - LayoutGlyph& srcGlyph = src->mGlyphs[i]; - int font_ix = fontMap[srcGlyph.font_ix]; - unsigned int glyph_id = srcGlyph.glyph_id; - float x = x0 + srcGlyph.x; - float y = srcGlyph.y; - LayoutGlyph glyph = {font_ix, glyph_id, x, y}; - mGlyphs.push_back(glyph); - } - for (size_t i = 0; i < src->mAdvances.size(); i++) { - mAdvances[i + start] = src->mAdvances[i]; - if (i == 0) - mAdvances[i + start] += extraAdvance; - } - MinikinRect srcBounds(src->mBounds); - srcBounds.offset(x0, 0); - mBounds.join(srcBounds); - mAdvance += src->mAdvance + extraAdvance; - - if (fontMap != fontMapStack) { - delete[] fontMap; - } + int fontMapStack[16]; + int* fontMap; + if (src->mFaces.size() < sizeof(fontMapStack) / sizeof(fontMapStack[0])) { + fontMap = fontMapStack; + } else { + fontMap = new int[src->mFaces.size()]; + } + for (size_t i = 0; i < src->mFaces.size(); i++) { + int font_ix = findFace(src->mFaces[i], NULL); + fontMap[i] = font_ix; + } + int x0 = mAdvance; + for (size_t i = 0; i < src->mGlyphs.size(); i++) { + LayoutGlyph& srcGlyph = src->mGlyphs[i]; + int font_ix = fontMap[srcGlyph.font_ix]; + unsigned int glyph_id = srcGlyph.glyph_id; + float x = x0 + srcGlyph.x; + float y = srcGlyph.y; + LayoutGlyph glyph = {font_ix, glyph_id, x, y}; + mGlyphs.push_back(glyph); + } + for (size_t i = 0; i < src->mAdvances.size(); i++) { + mAdvances[i + start] = src->mAdvances[i]; + if (i == 0) + mAdvances[i + start] += extraAdvance; + } + MinikinRect srcBounds(src->mBounds); + srcBounds.offset(x0, 0); + mBounds.join(srcBounds); + mAdvance += src->mAdvance + extraAdvance; + + if (fontMap != fontMapStack) { + delete[] fontMap; + } } size_t Layout::nGlyphs() const { - return mGlyphs.size(); + return mGlyphs.size(); } const MinikinFont* Layout::getFont(int i) const { - const LayoutGlyph& glyph = mGlyphs[i]; - return mFaces[glyph.font_ix].font; + const LayoutGlyph& glyph = mGlyphs[i]; + return mFaces[glyph.font_ix].font; } FontFakery Layout::getFakery(int i) const { - const LayoutGlyph& glyph = mGlyphs[i]; - return mFaces[glyph.font_ix].fakery; + const LayoutGlyph& glyph = mGlyphs[i]; + return mFaces[glyph.font_ix].fakery; } unsigned int Layout::getGlyphId(int i) const { - const LayoutGlyph& glyph = mGlyphs[i]; - return glyph.glyph_id; + const LayoutGlyph& glyph = mGlyphs[i]; + return glyph.glyph_id; } float Layout::getX(int i) const { - const LayoutGlyph& glyph = mGlyphs[i]; - return glyph.x; + const LayoutGlyph& glyph = mGlyphs[i]; + return glyph.x; } float Layout::getY(int i) const { - const LayoutGlyph& glyph = mGlyphs[i]; - return glyph.y; + const LayoutGlyph& glyph = mGlyphs[i]; + return glyph.y; } float Layout::getAdvance() const { - return mAdvance; + return mAdvance; } void Layout::getAdvances(float* advances) { - memcpy(advances, &mAdvances[0], mAdvances.size() * sizeof(float)); + memcpy(advances, &mAdvances[0], mAdvances.size() * sizeof(float)); } void Layout::getBounds(MinikinRect* bounds) const { - bounds->set(mBounds); + bounds->set(mBounds); } void Layout::purgeCaches() { - std::lock_guard _l(gMinikinLock); - LayoutCache& layoutCache = LayoutEngine::getInstance().layoutCache; - layoutCache.clear(); - purgeHbFontCacheLocked(); + std::lock_guard _l(gMinikinLock); + LayoutCache& layoutCache = LayoutEngine::getInstance().layoutCache; + layoutCache.clear(); + purgeHbFontCacheLocked(); } } // namespace minikin diff --git a/third_party/txt/src/minikin/Layout.h b/third_party/txt/src/minikin/Layout.h index 6d1de2fbd2c1bdef32ec08d7943c2755aace5482..90476ee03d8f36992ff6ec14b66ad68e5eb4e8b9 100644 --- a/third_party/txt/src/minikin/Layout.h +++ b/third_party/txt/src/minikin/Layout.h @@ -27,115 +27,145 @@ namespace minikin { struct LayoutGlyph { - // index into mFaces and mHbFonts vectors. We could imagine - // moving this into a run length representation, because it's - // more efficient for long strings, and we'll probably need - // something like that for paint attributes (color, underline, - // fake b/i, etc), as having those per-glyph is bloated. - int font_ix; - - unsigned int glyph_id; - float x; - float y; + // index into mFaces and mHbFonts vectors. We could imagine + // moving this into a run length representation, because it's + // more efficient for long strings, and we'll probably need + // something like that for paint attributes (color, underline, + // fake b/i, etc), as having those per-glyph is bloated. + int font_ix; + + unsigned int glyph_id; + float x; + float y; }; // Internal state used during layout operation struct LayoutContext; enum { - kBidi_LTR = 0, - kBidi_RTL = 1, - kBidi_Default_LTR = 2, - kBidi_Default_RTL = 3, - kBidi_Force_LTR = 4, - kBidi_Force_RTL = 5, - - kBidi_Mask = 0x7 + kBidi_LTR = 0, + kBidi_RTL = 1, + kBidi_Default_LTR = 2, + kBidi_Default_RTL = 3, + kBidi_Force_LTR = 4, + kBidi_Force_RTL = 5, + + kBidi_Mask = 0x7 }; // Lifecycle and threading assumptions for Layout: // The object is assumed to be owned by a single thread; multiple threads // may not mutate it at the same time. class Layout { -public: - - Layout() : mGlyphs(), mAdvances(), mFaces(), mAdvance(0), mBounds() { - mBounds.setEmpty(); - } - - Layout(Layout&& layout) = default; - - // Forbid copying and assignment. - Layout(const Layout&) = delete; - void operator=(const Layout&) = delete; - - void dump() const; - - void doLayout(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - int bidiFlags, const FontStyle &style, const MinikinPaint &paint, - const std::shared_ptr& collection); - - static float measureText(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - int bidiFlags, const FontStyle &style, const MinikinPaint &paint, - const std::shared_ptr& collection, float* advances); - - // public accessors - size_t nGlyphs() const; - const MinikinFont* getFont(int i) const; - FontFakery getFakery(int i) const; - unsigned int getGlyphId(int i) const; - float getX(int i) const; - float getY(int i) const; - - float getAdvance() const; - - // Get advances, copying into caller-provided buffer. The size of this - // buffer must match the length of the string (count arg to doLayout). - void getAdvances(float* advances); - - // The i parameter is an offset within the buf relative to start, it is < count, where - // start and count are the parameters to doLayout - float getCharAdvance(size_t i) const { return mAdvances[i]; } - - void getBounds(MinikinRect* rect) const; - - // Purge all caches, useful in low memory conditions - static void purgeCaches(); - -private: - friend class LayoutCacheKey; - - // Find a face in the mFaces vector, or create a new entry - int findFace(const FakedFont& face, LayoutContext* ctx); - - // Clears layout, ready to be used again - void reset(); - - // Lay out a single bidi run - // When layout is not null, layout info will be stored in the object. - // When advances is not null, measurement results will be stored in the array. - static float doLayoutRunCached(const uint16_t* buf, size_t runStart, size_t runLength, - size_t bufSize, bool isRtl, LayoutContext* ctx, size_t dstStart, - const std::shared_ptr& collection, Layout* layout, float* advances); - - // Lay out a single word - static float doLayoutWord(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - bool isRtl, LayoutContext* ctx, size_t bufStart, - const std::shared_ptr& collection, Layout* layout, float* advances); - - // Lay out a single bidi run - void doLayoutRun(const uint16_t* buf, size_t start, size_t count, size_t bufSize, - bool isRtl, LayoutContext* ctx, const std::shared_ptr& collection); - - // Append another layout (for example, cached value) into this one - void appendLayout(Layout* src, size_t start, float extraAdvance); - - std::vector mGlyphs; - std::vector mAdvances; - - std::vector mFaces; - float mAdvance; - MinikinRect mBounds; + public: + Layout() : mGlyphs(), mAdvances(), mFaces(), mAdvance(0), mBounds() { + mBounds.setEmpty(); + } + + Layout(Layout&& layout) = default; + + // Forbid copying and assignment. + Layout(const Layout&) = delete; + void operator=(const Layout&) = delete; + + void dump() const; + + void doLayout(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + int bidiFlags, + const FontStyle& style, + const MinikinPaint& paint, + const std::shared_ptr& collection); + + static float measureText(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + int bidiFlags, + const FontStyle& style, + const MinikinPaint& paint, + const std::shared_ptr& collection, + float* advances); + + // public accessors + size_t nGlyphs() const; + const MinikinFont* getFont(int i) const; + FontFakery getFakery(int i) const; + unsigned int getGlyphId(int i) const; + float getX(int i) const; + float getY(int i) const; + + float getAdvance() const; + + // Get advances, copying into caller-provided buffer. The size of this + // buffer must match the length of the string (count arg to doLayout). + void getAdvances(float* advances); + + // The i parameter is an offset within the buf relative to start, it is < + // count, where start and count are the parameters to doLayout + float getCharAdvance(size_t i) const { return mAdvances[i]; } + + void getBounds(MinikinRect* rect) const; + + // Purge all caches, useful in low memory conditions + static void purgeCaches(); + + private: + friend class LayoutCacheKey; + + // Find a face in the mFaces vector, or create a new entry + int findFace(const FakedFont& face, LayoutContext* ctx); + + // Clears layout, ready to be used again + void reset(); + + // Lay out a single bidi run + // When layout is not null, layout info will be stored in the object. + // When advances is not null, measurement results will be stored in the array. + static float doLayoutRunCached( + const uint16_t* buf, + size_t runStart, + size_t runLength, + size_t bufSize, + bool isRtl, + LayoutContext* ctx, + size_t dstStart, + const std::shared_ptr& collection, + Layout* layout, + float* advances); + + // Lay out a single word + static float doLayoutWord(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + bool isRtl, + LayoutContext* ctx, + size_t bufStart, + const std::shared_ptr& collection, + Layout* layout, + float* advances); + + // Lay out a single bidi run + void doLayoutRun(const uint16_t* buf, + size_t start, + size_t count, + size_t bufSize, + bool isRtl, + LayoutContext* ctx, + const std::shared_ptr& collection); + + // Append another layout (for example, cached value) into this one + void appendLayout(Layout* src, size_t start, float extraAdvance); + + std::vector mGlyphs; + std::vector mAdvances; + + std::vector mFaces; + float mAdvance; + MinikinRect mBounds; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/LayoutUtils.cpp b/third_party/txt/src/minikin/LayoutUtils.cpp index a3238d448d3bfc63e8de44ccf4ce9ef2b3661570..e461892d26bbde961093ce10f444363d45f4359e 100644 --- a/third_party/txt/src/minikin/LayoutUtils.cpp +++ b/third_party/txt/src/minikin/LayoutUtils.cpp @@ -23,10 +23,11 @@ namespace minikin { const uint16_t CHAR_NBSP = 0x00A0; /* - * Determine whether the code unit is a word space for the purposes of justification. + * Determine whether the code unit is a word space for the purposes of + * justification. */ bool isWordSpace(uint16_t code_unit) { - return code_unit == ' ' || code_unit == CHAR_NBSP; + return code_unit == ' ' || code_unit == CHAR_NBSP; } /** @@ -35,55 +36,60 @@ bool isWordSpace(uint16_t code_unit) { * heuristic, but should be accurate most of the time. */ static bool isWordBreakAfter(uint16_t c) { - if (isWordSpace(c) || (c >= 0x2000 && c <= 0x200a) || c == 0x3000) { - // spaces - return true; - } - // Note: kana is not included, as sophisticated fonts may kern kana - return false; + if (isWordSpace(c) || (c >= 0x2000 && c <= 0x200a) || c == 0x3000) { + // spaces + return true; + } + // Note: kana is not included, as sophisticated fonts may kern kana + return false; } static bool isWordBreakBefore(uint16_t c) { - // CJK ideographs (and yijing hexagram symbols) - return isWordBreakAfter(c) || (c >= 0x3400 && c <= 0x9fff); + // CJK ideographs (and yijing hexagram symbols) + return isWordBreakAfter(c) || (c >= 0x3400 && c <= 0x9fff); } /** * Return offset of previous word break. It is either < offset or == 0. */ -size_t getPrevWordBreakForCache( - const uint16_t* chars, size_t offset, size_t len) { - if (offset == 0) return 0; - if (offset > len) offset = len; - if (isWordBreakBefore(chars[offset - 1])) { - return offset - 1; - } - for (size_t i = offset - 1; i > 0; i--) { - if (isWordBreakBefore(chars[i]) || isWordBreakAfter(chars[i - 1])) { - return i; - } - } +size_t getPrevWordBreakForCache(const uint16_t* chars, + size_t offset, + size_t len) { + if (offset == 0) return 0; + if (offset > len) + offset = len; + if (isWordBreakBefore(chars[offset - 1])) { + return offset - 1; + } + for (size_t i = offset - 1; i > 0; i--) { + if (isWordBreakBefore(chars[i]) || isWordBreakAfter(chars[i - 1])) { + return i; + } + } + return 0; } /** * Return offset of next word break. It is either > offset or == len. */ -size_t getNextWordBreakForCache( - const uint16_t* chars, size_t offset, size_t len) { - if (offset >= len) return len; - if (isWordBreakAfter(chars[offset])) { - return offset + 1; - } - for (size_t i = offset + 1; i < len; i++) { - // No need to check isWordBreakAfter(chars[i - 1]) since it is checked - // in previous iteration. Note that isWordBreakBefore returns true - // whenever isWordBreakAfter returns true. - if (isWordBreakBefore(chars[i])) { - return i; - } - } +size_t getNextWordBreakForCache(const uint16_t* chars, + size_t offset, + size_t len) { + if (offset >= len) return len; + if (isWordBreakAfter(chars[offset])) { + return offset + 1; + } + for (size_t i = offset + 1; i < len; i++) { + // No need to check isWordBreakAfter(chars[i - 1]) since it is checked + // in previous iteration. Note that isWordBreakBefore returns true + // whenever isWordBreakAfter returns true. + if (isWordBreakBefore(chars[i])) { + return i; + } + } + return len; } } // namespace minikin diff --git a/third_party/txt/src/minikin/LayoutUtils.h b/third_party/txt/src/minikin/LayoutUtils.h index f13f634eaa1d03080a17e01110a36afd20853813..2dcc6c510f472d8e2449ef049dac2bae9ddf8e95 100644 --- a/third_party/txt/src/minikin/LayoutUtils.h +++ b/third_party/txt/src/minikin/LayoutUtils.h @@ -23,7 +23,8 @@ namespace minikin { /* - * Determine whether the code unit is a word space for the purposes of justification. + * Determine whether the code unit is a word space for the purposes of + * justification. */ bool isWordSpace(uint16_t code_unit); @@ -34,8 +35,9 @@ bool isWordSpace(uint16_t code_unit); * kerning or complex script processing. This is necessarily a * heuristic, but should be accurate most of the time. */ -size_t getPrevWordBreakForCache( - const uint16_t* chars, size_t offset, size_t len); +size_t getPrevWordBreakForCache(const uint16_t* chars, + size_t offset, + size_t len); /** * Return offset of next word break. It is either > offset or == len. @@ -44,8 +46,9 @@ size_t getPrevWordBreakForCache( * kerning or complex script processing. This is necessarily a * heuristic, but should be accurate most of the time. */ -size_t getNextWordBreakForCache( - const uint16_t* chars, size_t offset, size_t len); +size_t getNextWordBreakForCache(const uint16_t* chars, + size_t offset, + size_t len); } // namespace minikin #endif // MINIKIN_LAYOUT_UTILS_H diff --git a/third_party/txt/src/minikin/LineBreaker.cpp b/third_party/txt/src/minikin/LineBreaker.cpp index 2a29989ff92be50c5423c4ebb5063b317759a16e..c445e8542ab17b7819658ba6b8c1b24293c1b2b5 100644 --- a/third_party/txt/src/minikin/LineBreaker.cpp +++ b/third_party/txt/src/minikin/LineBreaker.cpp @@ -22,9 +22,9 @@ #include -#include "LayoutUtils.h" #include #include +#include "LayoutUtils.h" using std::vector; @@ -32,8 +32,8 @@ namespace minikin { const int CHAR_TAB = 0x0009; -// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. All these -// constants are larger than any reasonable actual width score. +// Large scores in a hierarchy; we prefer desperate breaks to an overfull line. +// All these constants are larger than any reasonable actual width score. const float SCORE_INFTY = std::numeric_limits::max(); const float SCORE_OVERFULL = 1e12f; const float SCORE_DESPERATE = 1e10f; @@ -41,483 +41,511 @@ const float SCORE_DESPERATE = 1e10f; // Multiplier for hyphen penalty on last line. const float LAST_LINE_PENALTY_MULTIPLIER = 4.0f; // Penalty assigned to each line break (to try to minimize number of lines) -// TODO: when we implement full justification (so spaces can shrink and stretch), this is -// probably not the most appropriate method. +// TODO: when we implement full justification (so spaces can shrink and +// stretch), this is probably not the most appropriate method. const float LINE_PENALTY_MULTIPLIER = 2.0f; // Penalty assigned to shrinking the whitepsace. const float SHRINK_PENALTY_MULTIPLIER = 4.0f; -// Very long words trigger O(n^2) behavior in hyphenation, so we disable hyphenation for -// unreasonably long words. This is somewhat of a heuristic because extremely long words -// are possible in some languages. This does mean that very long real words can get -// broken by desperate breaks, with no hyphens. +// Very long words trigger O(n^2) behavior in hyphenation, so we disable +// hyphenation for unreasonably long words. This is somewhat of a heuristic +// because extremely long words are possible in some languages. This does mean +// that very long real words can get broken by desperate breaks, with no +// hyphens. const size_t LONGEST_HYPHENATED_WORD = 45; -// When the text buffer is within this limit, capacity of vectors is retained at finish(), -// to avoid allocation. +// When the text buffer is within this limit, capacity of vectors is retained at +// finish(), to avoid allocation. const size_t MAX_TEXT_BUF_RETAIN = 32678; // Maximum amount that spaces can shrink, in justified text. const float SHRINKABILITY = 1.0 / 3.0; void LineBreaker::setLocale(const icu::Locale& locale, Hyphenator* hyphenator) { - mWordBreaker.setLocale(locale); - mLocale = locale; - mHyphenator = hyphenator; + mWordBreaker.setLocale(locale); + mLocale = locale; + mHyphenator = hyphenator; } void LineBreaker::setText() { - mWordBreaker.setText(mTextBuf.data(), mTextBuf.size()); - - // handle initial break here because addStyleRun may never be called - mWordBreaker.next(); - mCandidates.clear(); - Candidate cand = {0, 0, 0.0, 0.0, 0.0, 0.0, 0, 0, 0, HyphenationType::DONT_BREAK}; - mCandidates.push_back(cand); - - // reset greedy breaker state - mBreaks.clear(); - mWidths.clear(); - mFlags.clear(); - mLastBreak = 0; - mBestBreak = 0; - mBestScore = SCORE_INFTY; - mPreBreak = 0; - mLastHyphenation = HyphenEdit::NO_EDIT; - mFirstTabIndex = INT_MAX; - mSpaceCount = 0; + mWordBreaker.setText(mTextBuf.data(), mTextBuf.size()); + + // handle initial break here because addStyleRun may never be called + mWordBreaker.next(); + mCandidates.clear(); + Candidate cand = {0, 0, 0.0, 0.0, 0.0, + 0.0, 0, 0, 0, HyphenationType::DONT_BREAK}; + mCandidates.push_back(cand); + + // reset greedy breaker state + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + mLastBreak = 0; + mBestBreak = 0; + mBestScore = SCORE_INFTY; + mPreBreak = 0; + mLastHyphenation = HyphenEdit::NO_EDIT; + mFirstTabIndex = INT_MAX; + mSpaceCount = 0; } -void LineBreaker::setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth) { - mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); +void LineBreaker::setLineWidths(float firstWidth, + int firstWidthLineCount, + float restWidth) { + mLineWidths.setWidths(firstWidth, firstWidthLineCount, restWidth); } - void LineBreaker::setIndents(const std::vector& indents) { - mLineWidths.setIndents(indents); + mLineWidths.setIndents(indents); } -// This function determines whether a character is a space that disappears at end of line. -// It is the Unicode set: [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], -// plus '\n'. -// Note: all such characters are in the BMP, so it's ok to use code units for this. +// This function determines whether a character is a space that disappears at +// end of line. It is the Unicode set: +// [[:General_Category=Space_Separator:]-[:Line_Break=Glue:]], plus '\n'. Note: +// all such characters are in the BMP, so it's ok to use code units for this. static bool isLineEndSpace(uint16_t c) { - return c == '\n' || c == ' ' || c == 0x1680 || (0x2000 <= c && c <= 0x200A && c != 0x2007) || - c == 0x205F || c == 0x3000; + return c == '\n' || c == ' ' || c == 0x1680 || + (0x2000 <= c && c <= 0x200A && c != 0x2007) || c == 0x205F || + c == 0x3000; } -// Ordinarily, this method measures the text in the range given. However, when paint -// is nullptr, it assumes the widths have already been calculated and stored in the -// width buffer. -// This method finds the candidate word breaks (using the ICU break iterator) and sends them -// to addCandidate. -float LineBreaker::addStyleRun(MinikinPaint* paint, const std::shared_ptr& typeface, - FontStyle style, size_t start, size_t end, bool isRtl, double letterSpacing) { - float width = 0.0f; - int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; - - float hyphenPenalty = 0.0; - if (paint != nullptr) { - width = Layout::measureText(mTextBuf.data(), start, end - start, mTextBuf.size(), bidiFlags, - style, *paint, typeface, mCharWidths.data() + start); - - // a heuristic that seems to perform well - hyphenPenalty = 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); - if (mHyphenationFrequency == kHyphenationFrequency_Normal) { - hyphenPenalty *= 4.0; // TODO: Replace with a better value after some testing - } - - if (mJustified) { - // Make hyphenation more aggressive for fully justified text (so that "normal" in - // justified mode is the same as "full" in ragged-right). - hyphenPenalty *= 0.25; - } else { - // Line penalty is zero for justified text. - mLinePenalty = std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); - } +// Ordinarily, this method measures the text in the range given. However, when +// paint is nullptr, it assumes the widths have already been calculated and +// stored in the width buffer. This method finds the candidate word breaks +// (using the ICU break iterator) and sends them to addCandidate. +float LineBreaker::addStyleRun(MinikinPaint* paint, + const std::shared_ptr& typeface, + FontStyle style, + size_t start, + size_t end, + bool isRtl, + double letterSpacing) { + float width = 0.0f; + int bidiFlags = isRtl ? kBidi_Force_RTL : kBidi_Force_LTR; + + float hyphenPenalty = 0.0; + if (paint != nullptr) { + width = Layout::measureText(mTextBuf.data(), start, end - start, + mTextBuf.size(), bidiFlags, style, *paint, + typeface, mCharWidths.data() + start); + + // a heuristic that seems to perform well + hyphenPenalty = + 0.5 * paint->size * paint->scaleX * mLineWidths.getLineWidth(0); + if (mHyphenationFrequency == kHyphenationFrequency_Normal) { + hyphenPenalty *= + 4.0; // TODO: Replace with a better value after some testing } - size_t current = (size_t)mWordBreaker.current(); - size_t afterWord = start; - size_t lastBreak = start; - ParaWidth lastBreakWidth = mWidth; - ParaWidth postBreak = mWidth; - size_t postSpaceCount = mSpaceCount; - for (size_t i = start; i < end; i++) { - uint16_t c = mTextBuf[i]; - if (c == CHAR_TAB) { - mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); - if (mFirstTabIndex == INT_MAX) { - mFirstTabIndex = (int)i; - } - // fall back to greedy; other modes don't know how to deal with tabs - mStrategy = kBreakStrategy_Greedy; - } else { - if (isWordSpace(c)) mSpaceCount += 1; - mWidth += mCharWidths[i]; - if (c == '\n') mWidth += INT_MAX; - if (!isLineEndSpace(c)) { - postBreak = mWidth; - postSpaceCount = mSpaceCount; - afterWord = i + 1; - } - } - if (i + 1 == current) { - size_t wordStart = mWordBreaker.wordStart(); - size_t wordEnd = mWordBreaker.wordEnd(); - if (paint != nullptr && mHyphenator != nullptr && - mHyphenationFrequency != kHyphenationFrequency_None && - wordStart >= start && wordEnd > wordStart && - wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) { - mHyphenator->hyphenate(&mHyphBuf, - &mTextBuf[wordStart], - wordEnd - wordStart, - mLocale); + if (mJustified) { + // Make hyphenation more aggressive for fully justified text (so that + // "normal" in justified mode is the same as "full" in ragged-right). + hyphenPenalty *= 0.25; + } else { + // Line penalty is zero for justified text. + mLinePenalty = + std::max(mLinePenalty, hyphenPenalty * LINE_PENALTY_MULTIPLIER); + } + } + + size_t current = (size_t)mWordBreaker.current(); + size_t afterWord = start; + size_t lastBreak = start; + ParaWidth lastBreakWidth = mWidth; + ParaWidth postBreak = mWidth; + size_t postSpaceCount = mSpaceCount; + for (size_t i = start; i < end; i++) { + uint16_t c = mTextBuf[i]; + if (c == CHAR_TAB) { + mWidth = mPreBreak + mTabStops.nextTab(mWidth - mPreBreak); + if (mFirstTabIndex == INT_MAX) { + mFirstTabIndex = (int)i; + } + // fall back to greedy; other modes don't know how to deal with tabs + mStrategy = kBreakStrategy_Greedy; + } else { + if (isWordSpace(c)) + mSpaceCount += 1; + mWidth += mCharWidths[i]; + if (c == '\n') + mWidth += INT_MAX; + if (!isLineEndSpace(c)) { + postBreak = mWidth; + postSpaceCount = mSpaceCount; + afterWord = i + 1; + } + } + if (i + 1 == current) { + size_t wordStart = mWordBreaker.wordStart(); + size_t wordEnd = mWordBreaker.wordEnd(); + if (paint != nullptr && mHyphenator != nullptr && + mHyphenationFrequency != kHyphenationFrequency_None && + wordStart >= start && wordEnd > wordStart && + wordEnd - wordStart <= LONGEST_HYPHENATED_WORD) { + mHyphenator->hyphenate(&mHyphBuf, &mTextBuf[wordStart], + wordEnd - wordStart, mLocale); #if VERBOSE_DEBUG - std::string hyphenatedString; - for (size_t j = wordStart; j < wordEnd; j++) { - if (mHyphBuf[j - wordStart] == HyphenationType::BREAK_AND_INSERT_HYPHEN) { - hyphenatedString.push_back('-'); - } - // Note: only works with ASCII, should do UTF-8 conversion here - hyphenatedString.push_back(buffer()[j]); - } - ALOGD("hyphenated string: %s", hyphenatedString.c_str()); + std::string hyphenatedString; + for (size_t j = wordStart; j < wordEnd; j++) { + if (mHyphBuf[j - wordStart] == + HyphenationType::BREAK_AND_INSERT_HYPHEN) { + hyphenatedString.push_back('-'); + } + // Note: only works with ASCII, should do UTF-8 conversion here + hyphenatedString.push_back(buffer()[j]); + } + ALOGD("hyphenated string: %s", hyphenatedString.c_str()); #endif - // measure hyphenated substrings - for (size_t j = wordStart; j < wordEnd; j++) { - HyphenationType hyph = mHyphBuf[j - wordStart]; - if (hyph != HyphenationType::DONT_BREAK) { - paint->hyphenEdit = HyphenEdit::editForThisLine(hyph); - const float firstPartWidth = Layout::measureText(mTextBuf.data(), - lastBreak, j - lastBreak, mTextBuf.size(), bidiFlags, style, - *paint, typeface, nullptr); - ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth; - - paint->hyphenEdit = HyphenEdit::editForNextLine(hyph); - const float secondPartWidth = Layout::measureText(mTextBuf.data(), j, - afterWord - j, mTextBuf.size(), bidiFlags, style, *paint, - typeface, nullptr); - ParaWidth hyphPreBreak = postBreak - secondPartWidth; - - addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, postSpaceCount, - hyphenPenalty, hyph); - - paint->hyphenEdit = HyphenEdit::NO_EDIT; - } - } - } - - // Skip break for zero-width characters inside replacement span - if (paint != nullptr || current == end || mCharWidths[current] > 0) { - float penalty = hyphenPenalty * mWordBreaker.breakBadness(); - addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, penalty, - HyphenationType::DONT_BREAK); - } - lastBreak = current; - lastBreakWidth = mWidth; - current = (size_t)mWordBreaker.next(); + // measure hyphenated substrings + for (size_t j = wordStart; j < wordEnd; j++) { + HyphenationType hyph = mHyphBuf[j - wordStart]; + if (hyph != HyphenationType::DONT_BREAK) { + paint->hyphenEdit = HyphenEdit::editForThisLine(hyph); + const float firstPartWidth = Layout::measureText( + mTextBuf.data(), lastBreak, j - lastBreak, mTextBuf.size(), + bidiFlags, style, *paint, typeface, nullptr); + ParaWidth hyphPostBreak = lastBreakWidth + firstPartWidth; + + paint->hyphenEdit = HyphenEdit::editForNextLine(hyph); + const float secondPartWidth = Layout::measureText( + mTextBuf.data(), j, afterWord - j, mTextBuf.size(), bidiFlags, + style, *paint, typeface, nullptr); + ParaWidth hyphPreBreak = postBreak - secondPartWidth; + + addWordBreak(j, hyphPreBreak, hyphPostBreak, postSpaceCount, + postSpaceCount, hyphenPenalty, hyph); + + paint->hyphenEdit = HyphenEdit::NO_EDIT; + } } + } + + // Skip break for zero-width characters inside replacement span + if (paint != nullptr || current == end || mCharWidths[current] > 0) { + float penalty = hyphenPenalty * mWordBreaker.breakBadness(); + addWordBreak(current, mWidth, postBreak, mSpaceCount, postSpaceCount, + penalty, HyphenationType::DONT_BREAK); + } + lastBreak = current; + lastBreakWidth = mWidth; + current = (size_t)mWordBreaker.next(); } + } - return width; + return width; } -// add a word break (possibly for a hyphenated fragment), and add desperate breaks if -// needed (ie when word exceeds current line width) -void LineBreaker::addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, - size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph) { - Candidate cand; - ParaWidth width = mCandidates.back().preBreak; - if (postBreak - width > currentLineWidth()) { - // Add desperate breaks. - // Note: these breaks are based on the shaping of the (non-broken) original text; they - // are imprecise especially in the presence of kerning, ligatures, and Arabic shaping. - size_t i = mCandidates.back().offset; - width += mCharWidths[i++]; - for (; i < offset; i++) { - float w = mCharWidths[i]; - if (w > 0) { - cand.offset = i; - cand.preBreak = width; - cand.postBreak = width; - // postSpaceCount doesn't include trailing spaces - cand.preSpaceCount = postSpaceCount; - cand.postSpaceCount = postSpaceCount; - cand.penalty = SCORE_DESPERATE; - cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; +// add a word break (possibly for a hyphenated fragment), and add desperate +// breaks if needed (ie when word exceeds current line width) +void LineBreaker::addWordBreak(size_t offset, + ParaWidth preBreak, + ParaWidth postBreak, + size_t preSpaceCount, + size_t postSpaceCount, + float penalty, + HyphenationType hyph) { + Candidate cand; + ParaWidth width = mCandidates.back().preBreak; + if (postBreak - width > currentLineWidth()) { + // Add desperate breaks. + // Note: these breaks are based on the shaping of the (non-broken) original + // text; they are imprecise especially in the presence of kerning, + // ligatures, and Arabic shaping. + size_t i = mCandidates.back().offset; + width += mCharWidths[i++]; + for (; i < offset; i++) { + float w = mCharWidths[i]; + if (w > 0) { + cand.offset = i; + cand.preBreak = width; + cand.postBreak = width; + // postSpaceCount doesn't include trailing spaces + cand.preSpaceCount = postSpaceCount; + cand.postSpaceCount = postSpaceCount; + cand.penalty = SCORE_DESPERATE; + cand.hyphenType = HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN; #if VERBOSE_DEBUG - ALOGD("desperate cand: %zd %g:%g", - mCandidates.size(), cand.postBreak, cand.preBreak); + ALOGD("desperate cand: %zd %g:%g", mCandidates.size(), cand.postBreak, + cand.preBreak); #endif - addCandidate(cand); - width += w; - } - } + addCandidate(cand); + width += w; + } } - - cand.offset = offset; - cand.preBreak = preBreak; - cand.postBreak = postBreak; - cand.penalty = penalty; - cand.preSpaceCount = preSpaceCount; - cand.postSpaceCount = postSpaceCount; - cand.hyphenType = hyph; + } + + cand.offset = offset; + cand.preBreak = preBreak; + cand.postBreak = postBreak; + cand.penalty = penalty; + cand.preSpaceCount = preSpaceCount; + cand.postSpaceCount = postSpaceCount; + cand.hyphenType = hyph; #if VERBOSE_DEBUG - ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); + ALOGD("cand: %zd %g:%g", mCandidates.size(), cand.postBreak, cand.preBreak); #endif - addCandidate(cand); + addCandidate(cand); } // Helper method for addCandidate() void LineBreaker::pushGreedyBreak() { - const Candidate& bestCandidate = mCandidates[mBestBreak]; - pushBreak(bestCandidate.offset, bestCandidate.postBreak - mPreBreak, - mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType)); - mBestScore = SCORE_INFTY; + const Candidate& bestCandidate = mCandidates[mBestBreak]; + pushBreak( + bestCandidate.offset, bestCandidate.postBreak - mPreBreak, + mLastHyphenation | HyphenEdit::editForThisLine(bestCandidate.hyphenType)); + mBestScore = SCORE_INFTY; #if VERBOSE_DEBUG - ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); + ALOGD("break: %d %g", mBreaks.back(), mWidths.back()); #endif - mLastBreak = mBestBreak; - mPreBreak = bestCandidate.preBreak; - mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType); + mLastBreak = mBestBreak; + mPreBreak = bestCandidate.preBreak; + mLastHyphenation = HyphenEdit::editForNextLine(bestCandidate.hyphenType); } // TODO performance: could avoid populating mCandidates if greedy only void LineBreaker::addCandidate(Candidate cand) { - const size_t candIndex = mCandidates.size(); - mCandidates.push_back(cand); - - // mLastBreak is the index of the last line break we decided to do in mCandidates, - // and mPreBreak is its preBreak value. mBestBreak is the index of the best line breaking candidate - // we have found since then, and mBestScore is its penalty. - if (cand.postBreak - mPreBreak > currentLineWidth()) { - // This break would create an overfull line, pick the best break and break there (greedy) - if (mBestBreak == mLastBreak) { - // No good break has been found since last break. Break here. - mBestBreak = candIndex; - } - pushGreedyBreak(); + const size_t candIndex = mCandidates.size(); + mCandidates.push_back(cand); + + // mLastBreak is the index of the last line break we decided to do in + // mCandidates, and mPreBreak is its preBreak value. mBestBreak is the index + // of the best line breaking candidate we have found since then, and + // mBestScore is its penalty. + if (cand.postBreak - mPreBreak > currentLineWidth()) { + // This break would create an overfull line, pick the best break and break + // there (greedy) + if (mBestBreak == mLastBreak) { + // No good break has been found since last break. Break here. + mBestBreak = candIndex; } - - while (mLastBreak != candIndex && cand.postBreak - mPreBreak > currentLineWidth()) { - // We should rarely come here. But if we are here, we have broken the line, but the - // remaining part still doesn't fit. We now need to break at the second best place after the - // last break, but we have not kept that information, so we need to go back and find it. - // - // In some really rare cases, postBreak - preBreak of a candidate itself may be over the - // current line width. We protect ourselves against an infinite loop in that case by - // checking that we have not broken the line at this candidate already. - for (size_t i = mLastBreak + 1; i < candIndex; i++) { - const float penalty = mCandidates[i].penalty; - if (penalty <= mBestScore) { - mBestBreak = i; - mBestScore = penalty; - } - } - if (mBestBreak == mLastBreak) { - // We didn't find anything good. Break here. - mBestBreak = candIndex; - } - pushGreedyBreak(); + pushGreedyBreak(); + } + + while (mLastBreak != candIndex && + cand.postBreak - mPreBreak > currentLineWidth()) { + // We should rarely come here. But if we are here, we have broken the line, + // but the remaining part still doesn't fit. We now need to break at the + // second best place after the last break, but we have not kept that + // information, so we need to go back and find it. + // + // In some really rare cases, postBreak - preBreak of a candidate itself may + // be over the current line width. We protect ourselves against an infinite + // loop in that case by checking that we have not broken the line at this + // candidate already. + for (size_t i = mLastBreak + 1; i < candIndex; i++) { + const float penalty = mCandidates[i].penalty; + if (penalty <= mBestScore) { + mBestBreak = i; + mBestScore = penalty; + } } - - if (cand.penalty <= mBestScore) { - mBestBreak = candIndex; - mBestScore = cand.penalty; + if (mBestBreak == mLastBreak) { + // We didn't find anything good. Break here. + mBestBreak = candIndex; } + pushGreedyBreak(); + } + + if (cand.penalty <= mBestScore) { + mBestBreak = candIndex; + mBestScore = cand.penalty; + } } void LineBreaker::pushBreak(int offset, float width, uint8_t hyphenEdit) { - mBreaks.push_back(offset); - mWidths.push_back(width); - int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; - flags |= hyphenEdit; - mFlags.push_back(flags); - mFirstTabIndex = INT_MAX; + mBreaks.push_back(offset); + mWidths.push_back(width); + int flags = (mFirstTabIndex < mBreaks.back()) << kTab_Shift; + flags |= hyphenEdit; + mFlags.push_back(flags); + mFirstTabIndex = INT_MAX; } void LineBreaker::addReplacement(size_t start, size_t end, float width) { - mCharWidths[start] = width; - std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); - addStyleRun(nullptr, nullptr, FontStyle(), start, end, false, 0); + mCharWidths[start] = width; + std::fill(&mCharWidths[start + 1], &mCharWidths[end], 0.0f); + addStyleRun(nullptr, nullptr, FontStyle(), start, end, false, 0); } // Get the width of a space. May return 0 if there are no spaces. -// Note: if there are multiple different widths for spaces (for example, because of mixing of -// fonts), it's only guaranteed to pick one. +// Note: if there are multiple different widths for spaces (for example, because +// of mixing of fonts), it's only guaranteed to pick one. float LineBreaker::getSpaceWidth() const { - for (size_t i = 0; i < mTextBuf.size(); i++) { - if (isWordSpace(mTextBuf[i])) { - return mCharWidths[i]; - } + for (size_t i = 0; i < mTextBuf.size(); i++) { + if (isWordSpace(mTextBuf[i])) { + return mCharWidths[i]; } - return 0.0f; + } + return 0.0f; } float LineBreaker::currentLineWidth() const { - return mLineWidths.getLineWidth(mBreaks.size()); + return mLineWidths.getLineWidth(mBreaks.size()); } void LineBreaker::computeBreaksGreedy() { - // All breaks but the last have been added in addCandidate already. - size_t nCand = mCandidates.size(); - if (nCand > 0 && (nCand == 1 || mLastBreak != nCand - 1)) { - pushBreak(mCandidates[nCand - 1].offset, mCandidates[nCand - 1].postBreak - mPreBreak, - mLastHyphenation); - // don't need to update mBestScore, because we're done + // All breaks but the last have been added in addCandidate already. + size_t nCand = mCandidates.size(); + if (nCand > 0 && (nCand == 1 || mLastBreak != nCand - 1)) { + pushBreak(mCandidates[nCand - 1].offset, + mCandidates[nCand - 1].postBreak - mPreBreak, mLastHyphenation); + // don't need to update mBestScore, because we're done #if VERBOSE_DEBUG - ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); + ALOGD("final break: %d %g", mBreaks.back(), mWidths.back()); #endif - } + } } // Follow "prev" links in mCandidates array, and copy to result arrays. void LineBreaker::finishBreaksOptimal() { - // clear existing greedy break result - mBreaks.clear(); - mWidths.clear(); - mFlags.clear(); - size_t nCand = mCandidates.size(); - size_t prev; - for (size_t i = nCand - 1; i > 0; i = prev) { - prev = mCandidates[i].prev; - mBreaks.push_back(mCandidates[i].offset); - mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); - int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType); - if (prev > 0) { - flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType); - } - mFlags.push_back(flags); + // clear existing greedy break result + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + size_t nCand = mCandidates.size(); + size_t prev; + for (size_t i = nCand - 1; i > 0; i = prev) { + prev = mCandidates[i].prev; + mBreaks.push_back(mCandidates[i].offset); + mWidths.push_back(mCandidates[i].postBreak - mCandidates[prev].preBreak); + int flags = HyphenEdit::editForThisLine(mCandidates[i].hyphenType); + if (prev > 0) { + flags |= HyphenEdit::editForNextLine(mCandidates[prev].hyphenType); } - std::reverse(mBreaks.begin(), mBreaks.end()); - std::reverse(mWidths.begin(), mWidths.end()); - std::reverse(mFlags.begin(), mFlags.end()); + mFlags.push_back(flags); + } + std::reverse(mBreaks.begin(), mBreaks.end()); + std::reverse(mWidths.begin(), mWidths.end()); + std::reverse(mFlags.begin(), mFlags.end()); } void LineBreaker::computeBreaksOptimal(bool isRectangle) { - size_t active = 0; - size_t nCand = mCandidates.size(); - float width = mLineWidths.getLineWidth(0); - float shortLineFactor = mJustified ? 0.75f : 0.5f; - float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f; - - // "i" iterates through candidates for the end of the line. - for (size_t i = 1; i < nCand; i++) { - bool atEnd = i == nCand - 1; - float best = SCORE_INFTY; - size_t bestPrev = 0; - size_t lineNumberLast = 0; - - if (!isRectangle) { - size_t lineNumberLast = mCandidates[active].lineNumber; - width = mLineWidths.getLineWidth(lineNumberLast); + size_t active = 0; + size_t nCand = mCandidates.size(); + float width = mLineWidths.getLineWidth(0); + float shortLineFactor = mJustified ? 0.75f : 0.5f; + float maxShrink = mJustified ? SHRINKABILITY * getSpaceWidth() : 0.0f; + + // "i" iterates through candidates for the end of the line. + for (size_t i = 1; i < nCand; i++) { + bool atEnd = i == nCand - 1; + float best = SCORE_INFTY; + size_t bestPrev = 0; + size_t lineNumberLast = 0; + + if (!isRectangle) { + size_t lineNumberLast = mCandidates[active].lineNumber; + width = mLineWidths.getLineWidth(lineNumberLast); + } + ParaWidth leftEdge = mCandidates[i].postBreak - width; + float bestHope = 0; + + // "j" iterates through candidates for the beginning of the line. + for (size_t j = active; j < i; j++) { + if (!isRectangle) { + size_t lineNumber = mCandidates[j].lineNumber; + if (lineNumber != lineNumberLast) { + float widthNew = mLineWidths.getLineWidth(lineNumber); + if (widthNew != width) { + leftEdge = mCandidates[i].postBreak - width; + bestHope = 0; + width = widthNew; + } + lineNumberLast = lineNumber; } - ParaWidth leftEdge = mCandidates[i].postBreak - width; - float bestHope = 0; - - // "j" iterates through candidates for the beginning of the line. - for (size_t j = active; j < i; j++) { - if (!isRectangle) { - size_t lineNumber = mCandidates[j].lineNumber; - if (lineNumber != lineNumberLast) { - float widthNew = mLineWidths.getLineWidth(lineNumber); - if (widthNew != width) { - leftEdge = mCandidates[i].postBreak - width; - bestHope = 0; - width = widthNew; - } - lineNumberLast = lineNumber; - } - } - float jScore = mCandidates[j].score; - if (jScore + bestHope >= best) continue; - float delta = mCandidates[j].preBreak - leftEdge; - - // compute width score for line - - // Note: the "bestHope" optimization makes the assumption that, when delta is - // non-negative, widthScore will increase monotonically as successive candidate - // breaks are considered. - float widthScore = 0.0f; - float additionalPenalty = 0.0f; - if ((atEnd || !mJustified) && delta < 0) { - widthScore = SCORE_OVERFULL; - } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { - // increase penalty for hyphen on last line - additionalPenalty = LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; - // Penalize very short (< 1 - shortLineFactor of total width) lines. - float underfill = delta - shortLineFactor * width; - widthScore = underfill > 0 ? underfill * underfill : 0; - } else { - widthScore = delta * delta; - if (delta < 0) { - if (-delta < maxShrink * - (mCandidates[i].postSpaceCount - mCandidates[j].preSpaceCount)) { - widthScore *= SHRINK_PENALTY_MULTIPLIER; - } else { - widthScore = SCORE_OVERFULL; - } - } - } - - if (delta < 0) { - active = j + 1; - } else { - bestHope = widthScore; - } - - float score = jScore + widthScore + additionalPenalty; - if (score <= best) { - best = score; - bestPrev = j; - } + } + float jScore = mCandidates[j].score; + if (jScore + bestHope >= best) + continue; + float delta = mCandidates[j].preBreak - leftEdge; + + // compute width score for line + + // Note: the "bestHope" optimization makes the assumption that, when delta + // is non-negative, widthScore will increase monotonically as successive + // candidate breaks are considered. + float widthScore = 0.0f; + float additionalPenalty = 0.0f; + if ((atEnd || !mJustified) && delta < 0) { + widthScore = SCORE_OVERFULL; + } else if (atEnd && mStrategy != kBreakStrategy_Balanced) { + // increase penalty for hyphen on last line + additionalPenalty = + LAST_LINE_PENALTY_MULTIPLIER * mCandidates[j].penalty; + // Penalize very short (< 1 - shortLineFactor of total width) lines. + float underfill = delta - shortLineFactor * width; + widthScore = underfill > 0 ? underfill * underfill : 0; + } else { + widthScore = delta * delta; + if (delta < 0) { + if (-delta < maxShrink * (mCandidates[i].postSpaceCount - + mCandidates[j].preSpaceCount)) { + widthScore *= SHRINK_PENALTY_MULTIPLIER; + } else { + widthScore = SCORE_OVERFULL; + } } - mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty; - mCandidates[i].prev = bestPrev; - mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; + } + + if (delta < 0) { + active = j + 1; + } else { + bestHope = widthScore; + } + + float score = jScore + widthScore + additionalPenalty; + if (score <= best) { + best = score; + bestPrev = j; + } + } + mCandidates[i].score = best + mCandidates[i].penalty + mLinePenalty; + mCandidates[i].prev = bestPrev; + mCandidates[i].lineNumber = mCandidates[bestPrev].lineNumber + 1; #if VERBOSE_DEBUG - ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, mCandidates[i].prev); + ALOGD("break %zd: score=%g, prev=%zd", i, mCandidates[i].score, + mCandidates[i].prev); #endif - } - finishBreaksOptimal(); + } + finishBreaksOptimal(); } size_t LineBreaker::computeBreaks() { - if (mStrategy == kBreakStrategy_Greedy) { - computeBreaksGreedy(); - } else { - computeBreaksOptimal(mLineWidths.isConstant()); - } - return mBreaks.size(); + if (mStrategy == kBreakStrategy_Greedy) { + computeBreaksGreedy(); + } else { + computeBreaksOptimal(mLineWidths.isConstant()); + } + return mBreaks.size(); } void LineBreaker::finish() { - mWordBreaker.finish(); - mWidth = 0; - mLineWidths.clear(); - mCandidates.clear(); - mBreaks.clear(); - mWidths.clear(); - mFlags.clear(); - if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { - mTextBuf.clear(); - mTextBuf.shrink_to_fit(); - mCharWidths.clear(); - mCharWidths.shrink_to_fit(); - mHyphBuf.clear(); - mHyphBuf.shrink_to_fit(); - mCandidates.shrink_to_fit(); - mBreaks.shrink_to_fit(); - mWidths.shrink_to_fit(); - mFlags.shrink_to_fit(); - } - mStrategy = kBreakStrategy_Greedy; - mHyphenationFrequency = kHyphenationFrequency_Normal; - mLinePenalty = 0.0f; - mJustified = false; + mWordBreaker.finish(); + mWidth = 0; + mLineWidths.clear(); + mCandidates.clear(); + mBreaks.clear(); + mWidths.clear(); + mFlags.clear(); + if (mTextBuf.size() > MAX_TEXT_BUF_RETAIN) { + mTextBuf.clear(); + mTextBuf.shrink_to_fit(); + mCharWidths.clear(); + mCharWidths.shrink_to_fit(); + mHyphBuf.clear(); + mHyphBuf.shrink_to_fit(); + mCandidates.shrink_to_fit(); + mBreaks.shrink_to_fit(); + mWidths.shrink_to_fit(); + mFlags.shrink_to_fit(); + } + mStrategy = kBreakStrategy_Greedy; + mHyphenationFrequency = kHyphenationFrequency_Normal; + mLinePenalty = 0.0f; + mJustified = false; } } // namespace minikin diff --git a/third_party/txt/src/minikin/LineBreaker.h b/third_party/txt/src/minikin/LineBreaker.h index 32374e1b121b6445a459912efbedfaccf4eeb05b..0bfddbb5df35d41b69e1df1a11321e3234a322dd 100644 --- a/third_party/txt/src/minikin/LineBreaker.h +++ b/third_party/txt/src/minikin/LineBreaker.h @@ -24,240 +24,247 @@ #ifndef U_USING_ICU_NAMESPACE #define U_USING_ICU_NAMESPACE 0 -#endif // U_USING_ICU_NAMESPACE +#endif // U_USING_ICU_NAMESPACE -#include "unicode/brkiter.h" -#include "unicode/locid.h" #include #include #include "minikin/FontCollection.h" #include "minikin/Hyphenator.h" #include "minikin/MinikinFont.h" #include "minikin/WordBreaker.h" +#include "unicode/brkiter.h" +#include "unicode/locid.h" namespace minikin { enum BreakStrategy { - kBreakStrategy_Greedy = 0, - kBreakStrategy_HighQuality = 1, - kBreakStrategy_Balanced = 2 + kBreakStrategy_Greedy = 0, + kBreakStrategy_HighQuality = 1, + kBreakStrategy_Balanced = 2 }; enum HyphenationFrequency { - kHyphenationFrequency_None = 0, - kHyphenationFrequency_Normal = 1, - kHyphenationFrequency_Full = 2 + kHyphenationFrequency_None = 0, + kHyphenationFrequency_Normal = 1, + kHyphenationFrequency_Full = 2 }; // TODO: want to generalize to be able to handle array of line widths class LineWidths { - public: - void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { - mFirstWidth = firstWidth; - mFirstWidthLineCount = firstWidthLineCount; - mRestWidth = restWidth; - } - void setIndents(const std::vector& indents) { - mIndents = indents; - } - bool isConstant() const { - // technically mFirstWidthLineCount == 0 would count too, but doesn't actually happen - return mRestWidth == mFirstWidth && mIndents.empty(); - } - float getLineWidth(int line) const { - float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; - if (!mIndents.empty()) { - if ((size_t)line < mIndents.size()) { - width -= mIndents[line]; - } else { - width -= mIndents.back(); - } - } - return width; - } - void clear() { - mIndents.clear(); - } - private: - float mFirstWidth; - int mFirstWidthLineCount; - float mRestWidth; - std::vector mIndents; + public: + void setWidths(float firstWidth, int firstWidthLineCount, float restWidth) { + mFirstWidth = firstWidth; + mFirstWidthLineCount = firstWidthLineCount; + mRestWidth = restWidth; + } + void setIndents(const std::vector& indents) { mIndents = indents; } + bool isConstant() const { + // technically mFirstWidthLineCount == 0 would count too, but doesn't + // actually happen + return mRestWidth == mFirstWidth && mIndents.empty(); + } + float getLineWidth(int line) const { + float width = (line < mFirstWidthLineCount) ? mFirstWidth : mRestWidth; + if (!mIndents.empty()) { + if ((size_t)line < mIndents.size()) { + width -= mIndents[line]; + } else { + width -= mIndents.back(); + } + } + return width; + } + void clear() { mIndents.clear(); } + + private: + float mFirstWidth; + int mFirstWidthLineCount; + float mRestWidth; + std::vector mIndents; }; class TabStops { - public: - void set(const int* stops, size_t nStops, int tabWidth) { - if (stops != nullptr) { - mStops.assign(stops, stops + nStops); - } else { - mStops.clear(); - } - mTabWidth = tabWidth; - } - float nextTab(float widthSoFar) const { - for (size_t i = 0; i < mStops.size(); i++) { - if (mStops[i] > widthSoFar) { - return mStops[i]; - } - } - return floor(widthSoFar / mTabWidth + 1) * mTabWidth; - } - private: - std::vector mStops; - int mTabWidth; + public: + void set(const int* stops, size_t nStops, int tabWidth) { + if (stops != nullptr) { + mStops.assign(stops, stops + nStops); + } else { + mStops.clear(); + } + mTabWidth = tabWidth; + } + float nextTab(float widthSoFar) const { + for (size_t i = 0; i < mStops.size(); i++) { + if (mStops[i] > widthSoFar) { + return mStops[i]; + } + } + return floor(widthSoFar / mTabWidth + 1) * mTabWidth; + } + + private: + std::vector mStops; + int mTabWidth; }; class LineBreaker { - public: - const static int kTab_Shift = 29; // keep synchronized with TAB_MASK in StaticLayout.java - - // Note: Locale persists across multiple invocations (it is not cleaned up by finish()), - // explicitly to avoid the cost of creating ICU BreakIterator objects. It should always - // be set on the first invocation, but callers are encouraged not to call again unless - // locale has actually changed. - // That logic could be here but it's better for performance that it's upstream because of - // the cost of constructing and comparing the ICU Locale object. - // Note: caller is responsible for managing lifetime of hyphenator - void setLocale(const icu::Locale& locale, Hyphenator* hyphenator); - - void resize(size_t size) { - mTextBuf.resize(size); - mCharWidths.resize(size); - } - - size_t size() const { - return mTextBuf.size(); - } - - uint16_t* buffer() { - return mTextBuf.data(); - } - - float* charWidths() { - return mCharWidths.data(); - } - - // set text to current contents of buffer - void setText(); - - void setLineWidths(float firstWidth, int firstWidthLineCount, float restWidth); - - void setIndents(const std::vector& indents); - - void setTabStops(const int* stops, size_t nStops, int tabWidth) { - mTabStops.set(stops, nStops, tabWidth); - } - - BreakStrategy getStrategy() const { return mStrategy; } - - void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } - - void setJustified(bool justified) { mJustified = justified; } - - HyphenationFrequency getHyphenationFrequency() const { return mHyphenationFrequency; } - - void setHyphenationFrequency(HyphenationFrequency frequency) { - mHyphenationFrequency = frequency; - } - - // TODO: this class is actually fairly close to being general and not tied to using - // Minikin to do the shaping of the strings. The main thing that would need to be changed - // is having some kind of callback (or virtual class, or maybe even template), which could - // easily be instantiated with Minikin's Layout. Future work for when needed. - float addStyleRun(MinikinPaint* paint, const std::shared_ptr& typeface, - FontStyle style, size_t start, size_t end, bool isRtl, double letterSpacing = 0); - - void addReplacement(size_t start, size_t end, float width); - - size_t computeBreaks(); - - const int* getBreaks() const { - return mBreaks.data(); - } - - const float* getWidths() const { - return mWidths.data(); - } - - const int* getFlags() const { - return mFlags.data(); - } + public: + const static int kTab_Shift = + 29; // keep synchronized with TAB_MASK in StaticLayout.java + + // Note: Locale persists across multiple invocations (it is not cleaned up by + // finish()), explicitly to avoid the cost of creating ICU BreakIterator + // objects. It should always be set on the first invocation, but callers are + // encouraged not to call again unless locale has actually changed. That logic + // could be here but it's better for performance that it's upstream because of + // the cost of constructing and comparing the ICU Locale object. + // Note: caller is responsible for managing lifetime of hyphenator + void setLocale(const icu::Locale& locale, Hyphenator* hyphenator); + + void resize(size_t size) { + mTextBuf.resize(size); + mCharWidths.resize(size); + } - void finish(); + size_t size() const { return mTextBuf.size(); } - private: - // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for - // very large paragraphs, accuracy could degrade using only 32-bit float. Note however - // that float is used extensively on the Java side for this. This is a typedef so that - // we can easily change it based on performance/accuracy tradeoff. - typedef double ParaWidth; + uint16_t* buffer() { return mTextBuf.data(); } - // A single candidate break - struct Candidate { - size_t offset; // offset to text buffer, in code units - size_t prev; // index to previous break - ParaWidth preBreak; // width of text until this point, if we decide to not break here - ParaWidth postBreak; // width of text until this point, if we decide to break here - float penalty; // penalty of this break (for example, hyphen penalty) - float score; // best score found for this break - size_t lineNumber; // only updated for non-constant line widths - size_t preSpaceCount; // preceding space count before breaking - size_t postSpaceCount; // preceding space count after breaking - HyphenationType hyphenType; - }; + float* charWidths() { return mCharWidths.data(); } - float currentLineWidth() const; + // set text to current contents of buffer + void setText(); - void addWordBreak(size_t offset, ParaWidth preBreak, ParaWidth postBreak, - size_t preSpaceCount, size_t postSpaceCount, float penalty, HyphenationType hyph); + void setLineWidths(float firstWidth, + int firstWidthLineCount, + float restWidth); - void addCandidate(Candidate cand); - void pushGreedyBreak(); + void setIndents(const std::vector& indents); - // push an actual break to the output. Takes care of setting flags for tab - void pushBreak(int offset, float width, uint8_t hyphenEdit); + void setTabStops(const int* stops, size_t nStops, int tabWidth) { + mTabStops.set(stops, nStops, tabWidth); + } - float getSpaceWidth() const; + BreakStrategy getStrategy() const { return mStrategy; } - void computeBreaksGreedy(); + void setStrategy(BreakStrategy strategy) { mStrategy = strategy; } - void computeBreaksOptimal(bool isRectangular); + void setJustified(bool justified) { mJustified = justified; } - void finishBreaksOptimal(); + HyphenationFrequency getHyphenationFrequency() const { + return mHyphenationFrequency; + } - WordBreaker mWordBreaker; - icu::Locale mLocale; - std::vectormTextBuf; - std::vectormCharWidths; + void setHyphenationFrequency(HyphenationFrequency frequency) { + mHyphenationFrequency = frequency; + } - Hyphenator* mHyphenator; - std::vector mHyphBuf; + // TODO: this class is actually fairly close to being general and not tied to + // using Minikin to do the shaping of the strings. The main thing that would + // need to be changed is having some kind of callback (or virtual class, or + // maybe even template), which could easily be instantiated with Minikin's + // Layout. Future work for when needed. + float addStyleRun(MinikinPaint* paint, + const std::shared_ptr& typeface, + FontStyle style, + size_t start, + size_t end, + bool isRtl, + double letterSpacing = 0); + + void addReplacement(size_t start, size_t end, float width); + + size_t computeBreaks(); + + const int* getBreaks() const { return mBreaks.data(); } + + const float* getWidths() const { return mWidths.data(); } + + const int* getFlags() const { return mFlags.data(); } + + void finish(); + + private: + // ParaWidth is used to hold cumulative width from beginning of paragraph. + // Note that for very large paragraphs, accuracy could degrade using only + // 32-bit float. Note however that float is used extensively on the Java side + // for this. This is a typedef so that we can easily change it based on + // performance/accuracy tradeoff. + typedef double ParaWidth; + + // A single candidate break + struct Candidate { + size_t offset; // offset to text buffer, in code units + size_t prev; // index to previous break + ParaWidth preBreak; // width of text until this point, if we decide to not + // break here + ParaWidth postBreak; // width of text until this point, if we decide to + // break here + float penalty; // penalty of this break (for example, hyphen penalty) + float score; // best score found for this break + size_t lineNumber; // only updated for non-constant line widths + size_t preSpaceCount; // preceding space count before breaking + size_t postSpaceCount; // preceding space count after breaking + HyphenationType hyphenType; + }; + + float currentLineWidth() const; + + void addWordBreak(size_t offset, + ParaWidth preBreak, + ParaWidth postBreak, + size_t preSpaceCount, + size_t postSpaceCount, + float penalty, + HyphenationType hyph); + + void addCandidate(Candidate cand); + void pushGreedyBreak(); - // layout parameters - BreakStrategy mStrategy = kBreakStrategy_Greedy; - HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal; - bool mJustified; - LineWidths mLineWidths; - TabStops mTabStops; + // push an actual break to the output. Takes care of setting flags for tab + void pushBreak(int offset, float width, uint8_t hyphenEdit); - // result of line breaking - std::vector mBreaks; - std::vector mWidths; - std::vector mFlags; + float getSpaceWidth() const; - ParaWidth mWidth = 0; - std::vector mCandidates; - float mLinePenalty = 0.0f; + void computeBreaksGreedy(); - // the following are state for greedy breaker (updated while adding style runs) - size_t mLastBreak; - size_t mBestBreak; - float mBestScore; - ParaWidth mPreBreak; // prebreak of last break - uint32_t mLastHyphenation; // hyphen edit of last break kept for next line - int mFirstTabIndex; - size_t mSpaceCount; + void computeBreaksOptimal(bool isRectangular); + + void finishBreaksOptimal(); + + WordBreaker mWordBreaker; + icu::Locale mLocale; + std::vector mTextBuf; + std::vector mCharWidths; + + Hyphenator* mHyphenator; + std::vector mHyphBuf; + + // layout parameters + BreakStrategy mStrategy = kBreakStrategy_Greedy; + HyphenationFrequency mHyphenationFrequency = kHyphenationFrequency_Normal; + bool mJustified; + LineWidths mLineWidths; + TabStops mTabStops; + + // result of line breaking + std::vector mBreaks; + std::vector mWidths; + std::vector mFlags; + + ParaWidth mWidth = 0; + std::vector mCandidates; + float mLinePenalty = 0.0f; + + // the following are state for greedy breaker (updated while adding style + // runs) + size_t mLastBreak; + size_t mBestBreak; + float mBestScore; + ParaWidth mPreBreak; // prebreak of last break + uint32_t mLastHyphenation; // hyphen edit of last break kept for next line + int mFirstTabIndex; + size_t mSpaceCount; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/Measurement.cpp b/third_party/txt/src/minikin/Measurement.cpp index 89797134c05059b6e0217efc5606703078b7a162..87d08465ddddbe1d9caf2d1eb8b2ccf06228ecc0 100644 --- a/third_party/txt/src/minikin/Measurement.cpp +++ b/third_party/txt/src/minikin/Measurement.cpp @@ -16,8 +16,8 @@ #define LOG_TAG "Minikin" -#include #include +#include #include @@ -26,99 +26,112 @@ namespace minikin { -// These could be considered helper methods of layout, but need only be loosely coupled, so -// are separate. +// These could be considered helper methods of layout, but need only be loosely +// coupled, so are separate. -static float getRunAdvance(const float* advances, const uint16_t* buf, size_t layoutStart, - size_t start, size_t count, size_t offset) { - float advance = 0.0f; - size_t lastCluster = start; - float clusterWidth = 0.0f; - for (size_t i = start; i < offset; i++) { - float charAdvance = advances[i - layoutStart]; - if (charAdvance != 0.0f) { - advance += charAdvance; - lastCluster = i; - clusterWidth = charAdvance; - } +static float getRunAdvance(const float* advances, + const uint16_t* buf, + size_t layoutStart, + size_t start, + size_t count, + size_t offset) { + float advance = 0.0f; + size_t lastCluster = start; + float clusterWidth = 0.0f; + for (size_t i = start; i < offset; i++) { + float charAdvance = advances[i - layoutStart]; + if (charAdvance != 0.0f) { + advance += charAdvance; + lastCluster = i; + clusterWidth = charAdvance; } - if (offset < start + count && advances[offset - layoutStart] == 0.0f) { - // In the middle of a cluster, distribute width of cluster so that each grapheme cluster - // gets an equal share. - // TODO: get caret information out of font when that's available - size_t nextCluster; - for (nextCluster = offset + 1; nextCluster < start + count; nextCluster++) { - if (advances[nextCluster - layoutStart] != 0.0f) break; - } - int numGraphemeClusters = 0; - int numGraphemeClustersAfter = 0; - for (size_t i = lastCluster; i < nextCluster; i++) { - bool isAfter = i >= offset; - if (GraphemeBreak::isGraphemeBreak( - advances + (start - layoutStart), buf, start, count, i)) { - numGraphemeClusters++; - if (isAfter) { - numGraphemeClustersAfter++; - } - } - } - if (numGraphemeClusters > 0) { - advance -= clusterWidth * numGraphemeClustersAfter / numGraphemeClusters; + } + if (offset < start + count && advances[offset - layoutStart] == 0.0f) { + // In the middle of a cluster, distribute width of cluster so that each + // grapheme cluster gets an equal share. + // TODO: get caret information out of font when that's available + size_t nextCluster; + for (nextCluster = offset + 1; nextCluster < start + count; nextCluster++) { + if (advances[nextCluster - layoutStart] != 0.0f) + break; + } + int numGraphemeClusters = 0; + int numGraphemeClustersAfter = 0; + for (size_t i = lastCluster; i < nextCluster; i++) { + bool isAfter = i >= offset; + if (GraphemeBreak::isGraphemeBreak(advances + (start - layoutStart), buf, + start, count, i)) { + numGraphemeClusters++; + if (isAfter) { + numGraphemeClustersAfter++; } + } } - return advance; + if (numGraphemeClusters > 0) { + advance -= clusterWidth * numGraphemeClustersAfter / numGraphemeClusters; + } + } + return advance; } -float getRunAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count, - size_t offset) { - return getRunAdvance(advances, buf, start, start, count, offset); +float getRunAdvance(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + size_t offset) { + return getRunAdvance(advances, buf, start, start, count, offset); } /** - * Essentially the inverse of getRunAdvance. Compute the value of offset for which the - * measured caret comes closest to the provided advance param, and which is on a grapheme - * cluster boundary. + * Essentially the inverse of getRunAdvance. Compute the value of offset for + * which the measured caret comes closest to the provided advance param, and + * which is on a grapheme cluster boundary. * - * The actual implementation fast-forwards through clusters to get "close", then does a finer-grain - * search within the cluster and grapheme breaks. + * The actual implementation fast-forwards through clusters to get "close", then + * does a finer-grain search within the cluster and grapheme breaks. */ -size_t getOffsetForAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count, - float advance) { - float x = 0.0f, xLastClusterStart = 0.0f, xSearchStart = 0.0f; - size_t lastClusterStart = start, searchStart = start; - for (size_t i = start; i < start + count; i++) { - if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) { - searchStart = lastClusterStart; - xSearchStart = xLastClusterStart; - } - float width = advances[i - start]; - if (width != 0.0f) { - lastClusterStart = i; - xLastClusterStart = x; - x += width; - if (x > advance) { - break; - } - } +size_t getOffsetForAdvance(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + float advance) { + float x = 0.0f, xLastClusterStart = 0.0f, xSearchStart = 0.0f; + size_t lastClusterStart = start, searchStart = start; + for (size_t i = start; i < start + count; i++) { + if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) { + searchStart = lastClusterStart; + xSearchStart = xLastClusterStart; } - size_t best = searchStart; - float bestDist = FLT_MAX; - for (size_t i = searchStart; i <= start + count; i++) { - if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) { - // "getRunAdvance(layout, buf, start, count, i) - advance" but more efficient - float delta = getRunAdvance(advances, buf, start, searchStart, count - searchStart, i) + float width = advances[i - start]; + if (width != 0.0f) { + lastClusterStart = i; + xLastClusterStart = x; + x += width; + if (x > advance) { + break; + } + } + } + size_t best = searchStart; + float bestDist = FLT_MAX; + for (size_t i = searchStart; i <= start + count; i++) { + if (GraphemeBreak::isGraphemeBreak(advances, buf, start, count, i)) { + // "getRunAdvance(layout, buf, start, count, i) - advance" but more + // efficient + float delta = getRunAdvance(advances, buf, start, searchStart, + count - searchStart, i) + xSearchStart - advance; - if (std::abs(delta) < bestDist) { - bestDist = std::abs(delta); - best = i; - } - if (delta >= 0.0f) { - break; - } - } + if (std::abs(delta) < bestDist) { + bestDist = std::abs(delta); + best = i; + } + if (delta >= 0.0f) { + break; + } } - return best; + } + return best; } } // namespace minikin diff --git a/third_party/txt/src/minikin/Measurement.h b/third_party/txt/src/minikin/Measurement.h index b00c2120a73c59127b30f80383b160855f77d9b0..fac2848e160610c6df5ebc5e1ab8686cfc7cb9b1 100644 --- a/third_party/txt/src/minikin/Measurement.h +++ b/third_party/txt/src/minikin/Measurement.h @@ -21,11 +21,17 @@ namespace minikin { -float getRunAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count, - size_t offset); +float getRunAdvance(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + size_t offset); -size_t getOffsetForAdvance(const float* advances, const uint16_t* buf, size_t start, size_t count, - float advance); +size_t getOffsetForAdvance(const float* advances, + const uint16_t* buf, + size_t start, + size_t count, + float advance); } // namespace minikin diff --git a/third_party/txt/src/minikin/MinikinFont.cpp b/third_party/txt/src/minikin/MinikinFont.cpp index d2f9aca171362f7d9a288c5446b902ab5fd61fce..a39699475daf961af0f707df3cf44aaab670453d 100644 --- a/third_party/txt/src/minikin/MinikinFont.cpp +++ b/third_party/txt/src/minikin/MinikinFont.cpp @@ -21,8 +21,8 @@ namespace minikin { MinikinFont::~MinikinFont() { - std::lock_guard _l(gMinikinLock); - purgeHbFontLocked(this); + std::lock_guard _l(gMinikinLock); + purgeHbFontLocked(this); } } // namespace minikin diff --git a/third_party/txt/src/minikin/MinikinFont.h b/third_party/txt/src/minikin/MinikinFont.h index 01bfe46c16d7947b425468a64c64c297f8211f14..b7d69562af4f6961af7060c18445036991c44c65 100644 --- a/third_party/txt/src/minikin/MinikinFont.h +++ b/third_party/txt/src/minikin/MinikinFont.h @@ -17,8 +17,8 @@ #ifndef MINIKIN_FONT_H #define MINIKIN_FONT_H -#include #include +#include #include #include @@ -31,90 +31,94 @@ namespace minikin { class MinikinFont; // Possibly move into own .h file? -// Note: if you add a field here, either add it to LayoutCacheKey or to skipCache() +// Note: if you add a field here, either add it to LayoutCacheKey or to +// skipCache() struct MinikinPaint { - MinikinPaint() : font(nullptr), size(0), scaleX(0), skewX(0), letterSpacing(0), wordSpacing(0), - paintFlags(0), fakery(), hyphenEdit(), fontFeatureSettings() { } - - bool skipCache() const { - return !fontFeatureSettings.empty(); - } - - MinikinFont *font; - float size; - float scaleX; - float skewX; - float letterSpacing; - float wordSpacing; - uint32_t paintFlags; - FontFakery fakery; - HyphenEdit hyphenEdit; - std::string fontFeatureSettings; + MinikinPaint() + : font(nullptr), + size(0), + scaleX(0), + skewX(0), + letterSpacing(0), + wordSpacing(0), + paintFlags(0), + fakery(), + hyphenEdit(), + fontFeatureSettings() {} + + bool skipCache() const { return !fontFeatureSettings.empty(); } + + MinikinFont* font; + float size; + float scaleX; + float skewX; + float letterSpacing; + float wordSpacing; + uint32_t paintFlags; + FontFakery fakery; + HyphenEdit hyphenEdit; + std::string fontFeatureSettings; }; // Only a few flags affect layout, but those that do should have values // consistent with Android's paint flags. enum MinikinPaintFlags { - LinearTextFlag = 0x40, + LinearTextFlag = 0x40, }; struct MinikinRect { - float mLeft, mTop, mRight, mBottom; - bool isEmpty() const { - return mLeft == mRight || mTop == mBottom; - } - void set(const MinikinRect& r) { - mLeft = r.mLeft; - mTop = r.mTop; - mRight = r.mRight; - mBottom = r.mBottom; - } - void offset(float dx, float dy) { - mLeft += dx; - mTop += dy; - mRight += dx; - mBottom += dy; - } - void setEmpty() { - mLeft = mTop = mRight = mBottom = 0; - } - void join(const MinikinRect& r); + float mLeft, mTop, mRight, mBottom; + bool isEmpty() const { return mLeft == mRight || mTop == mBottom; } + void set(const MinikinRect& r) { + mLeft = r.mLeft; + mTop = r.mTop; + mRight = r.mRight; + mBottom = r.mBottom; + } + void offset(float dx, float dy) { + mLeft += dx; + mTop += dy; + mRight += dx; + mBottom += dy; + } + void setEmpty() { mLeft = mTop = mRight = mBottom = 0; } + void join(const MinikinRect& r); }; // Callback for freeing data -typedef void (*MinikinDestroyFunc) (void* data); +typedef void (*MinikinDestroyFunc)(void* data); class MinikinFont { -public: - explicit MinikinFont(int32_t uniqueId) : mUniqueId(uniqueId) {} + public: + explicit MinikinFont(int32_t uniqueId) : mUniqueId(uniqueId) {} + + virtual ~MinikinFont(); - virtual ~MinikinFont(); + virtual float GetHorizontalAdvance(uint32_t glyph_id, + const MinikinPaint& paint) const = 0; - virtual float GetHorizontalAdvance(uint32_t glyph_id, - const MinikinPaint &paint) const = 0; + virtual void GetBounds(MinikinRect* bounds, + uint32_t glyph_id, + const MinikinPaint& paint) const = 0; - virtual void GetBounds(MinikinRect* bounds, uint32_t glyph_id, - const MinikinPaint &paint) const = 0; + virtual hb_face_t* CreateHarfBuzzFace() const { return nullptr; } - virtual hb_face_t* CreateHarfBuzzFace() const { - return nullptr; - } + virtual const std::vector& GetAxes() const = 0; - virtual const std::vector& GetAxes() const = 0; + virtual std::shared_ptr createFontWithVariation( + const std::vector&) const { + return nullptr; + } - virtual std::shared_ptr createFontWithVariation( - const std::vector&) const { - return nullptr; - } + static uint32_t MakeTag(char c1, char c2, char c3, char c4) { + return ((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | ((uint32_t)c3 << 8) | + (uint32_t)c4; + } - static uint32_t MakeTag(char c1, char c2, char c3, char c4) { - return ((uint32_t)c1 << 24) | ((uint32_t)c2 << 16) | - ((uint32_t)c3 << 8) | (uint32_t)c4; - } + int32_t GetUniqueId() const { return mUniqueId; } - int32_t GetUniqueId() const { return mUniqueId; } -private: - const int32_t mUniqueId; + private: + const int32_t mUniqueId; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/MinikinInternal.cpp b/third_party/txt/src/minikin/MinikinInternal.cpp index 9cd7bddab5e5cd0bd3519d3e1120c1c1797a2937..69e87f0c26ba4143ff9d303de85b610dfaa47335 100644 --- a/third_party/txt/src/minikin/MinikinInternal.cpp +++ b/third_party/txt/src/minikin/MinikinInternal.cpp @@ -28,17 +28,17 @@ std::mutex gMinikinLock; void assertMinikinLocked() { #ifdef ENABLE_RACE_DETECTION - LOG_ALWAYS_FATAL_IF(gMinikinLock.tryLock() == 0); + LOG_ALWAYS_FATAL_IF(gMinikinLock.tryLock() == 0); #endif } hb_blob_t* getFontTable(const MinikinFont* minikinFont, uint32_t tag) { - assertMinikinLocked(); - hb_font_t* font = getHbFontLocked(minikinFont); - hb_face_t* face = hb_font_get_face(font); - hb_blob_t* blob = hb_face_reference_table(face, tag); - hb_font_destroy(font); - return blob; + assertMinikinLocked(); + hb_font_t* font = getHbFontLocked(minikinFont); + hb_face_t* face = hb_font_get_face(font); + hb_blob_t* blob = hb_face_reference_table(face, tag); + hb_font_destroy(font); + return blob; } } // namespace minikin diff --git a/third_party/txt/src/minikin/MinikinInternal.h b/third_party/txt/src/minikin/MinikinInternal.h index 54395221e7324204666423655d0f7646869f2627..fe887123e3f25b97be144f8127509e3a16b41178 100644 --- a/third_party/txt/src/minikin/MinikinInternal.h +++ b/third_party/txt/src/minikin/MinikinInternal.h @@ -42,27 +42,22 @@ constexpr uint32_t MAX_UNICODE_CODE_POINT = 0x10FFFF; // An RAII wrapper for hb_blob_t class HbBlob { -public: - // Takes ownership of hb_blob_t object, caller is no longer - // responsible for calling hb_blob_destroy(). - explicit HbBlob(hb_blob_t* blob) : mBlob(blob) { - } - - ~HbBlob() { - hb_blob_destroy(mBlob); - } - - const uint8_t* get() const { - const char* data = hb_blob_get_data(mBlob, nullptr); - return reinterpret_cast(data); - } - - size_t size() const { - return (size_t)hb_blob_get_length(mBlob); - } - -private: - hb_blob_t* mBlob; + public: + // Takes ownership of hb_blob_t object, caller is no longer + // responsible for calling hb_blob_destroy(). + explicit HbBlob(hb_blob_t* blob) : mBlob(blob) {} + + ~HbBlob() { hb_blob_destroy(mBlob); } + + const uint8_t* get() const { + const char* data = hb_blob_get_data(mBlob, nullptr); + return reinterpret_cast(data); + } + + size_t size() const { return (size_t)hb_blob_get_length(mBlob); } + + private: + hb_blob_t* mBlob; }; } // namespace minikin diff --git a/third_party/txt/src/minikin/SparseBitSet.cpp b/third_party/txt/src/minikin/SparseBitSet.cpp index 9fad6a0cc77a3e7b4f69a225ddbb2af7bddba83c..002eacae3fac4364ef59f68d7a41070a7cb505ca 100644 --- a/third_party/txt/src/minikin/SparseBitSet.cpp +++ b/third_party/txt/src/minikin/SparseBitSet.cpp @@ -28,119 +28,124 @@ namespace minikin { const uint32_t SparseBitSet::kNotFound; uint32_t SparseBitSet::calcNumPages(const uint32_t* ranges, size_t nRanges) { - bool haveZeroPage = false; - uint32_t nonzeroPageEnd = 0; - uint32_t nPages = 0; - for (size_t i = 0; i < nRanges; i++) { - uint32_t start = ranges[i * 2]; - uint32_t end = ranges[i * 2 + 1]; - uint32_t startPage = start >> kLogValuesPerPage; - uint32_t endPage = (end - 1) >> kLogValuesPerPage; - if (startPage >= nonzeroPageEnd) { - if (startPage > nonzeroPageEnd) { - if (!haveZeroPage) { - haveZeroPage = true; - nPages++; - } - } - nPages++; + bool haveZeroPage = false; + uint32_t nonzeroPageEnd = 0; + uint32_t nPages = 0; + for (size_t i = 0; i < nRanges; i++) { + uint32_t start = ranges[i * 2]; + uint32_t end = ranges[i * 2 + 1]; + uint32_t startPage = start >> kLogValuesPerPage; + uint32_t endPage = (end - 1) >> kLogValuesPerPage; + if (startPage >= nonzeroPageEnd) { + if (startPage > nonzeroPageEnd) { + if (!haveZeroPage) { + haveZeroPage = true; + nPages++; } - nPages += endPage - startPage; - nonzeroPageEnd = endPage + 1; + } + nPages++; } - return nPages; + nPages += endPage - startPage; + nonzeroPageEnd = endPage + 1; + } + return nPages; } void SparseBitSet::initFromRanges(const uint32_t* ranges, size_t nRanges) { - if (nRanges == 0) { - return; - } - const uint32_t maxVal = ranges[nRanges * 2 - 1]; - if (maxVal >= kMaximumCapacity) { - return; - } - mMaxVal = maxVal; - mIndices.reset(new uint16_t[(mMaxVal + kPageMask) >> kLogValuesPerPage]); - uint32_t nPages = calcNumPages(ranges, nRanges); - mBitmaps.reset(new element[nPages << (kLogValuesPerPage - kLogBitsPerEl)]()); - mZeroPageIndex = noZeroPage; - uint32_t nonzeroPageEnd = 0; - uint32_t currentPage = 0; - for (size_t i = 0; i < nRanges; i++) { - uint32_t start = ranges[i * 2]; - uint32_t end = ranges[i * 2 + 1]; - LOG_ALWAYS_FATAL_IF(end < start); // make sure range size is nonnegative - uint32_t startPage = start >> kLogValuesPerPage; - uint32_t endPage = (end - 1) >> kLogValuesPerPage; - if (startPage >= nonzeroPageEnd) { - if (startPage > nonzeroPageEnd) { - if (mZeroPageIndex == noZeroPage) { - mZeroPageIndex = (currentPage++) << (kLogValuesPerPage - kLogBitsPerEl); - } - for (uint32_t j = nonzeroPageEnd; j < startPage; j++) { - mIndices[j] = mZeroPageIndex; - } - } - mIndices[startPage] = (currentPage++) << (kLogValuesPerPage - kLogBitsPerEl); - } - - size_t index = ((currentPage - 1) << (kLogValuesPerPage - kLogBitsPerEl)) + - ((start & kPageMask) >> kLogBitsPerEl); - size_t nElements = (end - (start & ~kElMask) + kElMask) >> kLogBitsPerEl; - if (nElements == 1) { - mBitmaps[index] |= (kElAllOnes >> (start & kElMask)) & - (kElAllOnes << ((~end + 1) & kElMask)); - } else { - mBitmaps[index] |= kElAllOnes >> (start & kElMask); - for (size_t j = 1; j < nElements - 1; j++) { - mBitmaps[index + j] = kElAllOnes; - } - mBitmaps[index + nElements - 1] |= kElAllOnes << ((~end + 1) & kElMask); + if (nRanges == 0) { + return; + } + const uint32_t maxVal = ranges[nRanges * 2 - 1]; + if (maxVal >= kMaximumCapacity) { + return; + } + mMaxVal = maxVal; + mIndices.reset(new uint16_t[(mMaxVal + kPageMask) >> kLogValuesPerPage]); + uint32_t nPages = calcNumPages(ranges, nRanges); + mBitmaps.reset(new element[nPages << (kLogValuesPerPage - kLogBitsPerEl)]()); + mZeroPageIndex = noZeroPage; + uint32_t nonzeroPageEnd = 0; + uint32_t currentPage = 0; + for (size_t i = 0; i < nRanges; i++) { + uint32_t start = ranges[i * 2]; + uint32_t end = ranges[i * 2 + 1]; + LOG_ALWAYS_FATAL_IF(end < start); // make sure range size is nonnegative + uint32_t startPage = start >> kLogValuesPerPage; + uint32_t endPage = (end - 1) >> kLogValuesPerPage; + if (startPage >= nonzeroPageEnd) { + if (startPage > nonzeroPageEnd) { + if (mZeroPageIndex == noZeroPage) { + mZeroPageIndex = (currentPage++) + << (kLogValuesPerPage - kLogBitsPerEl); } - for (size_t j = startPage + 1; j < endPage + 1; j++) { - mIndices[j] = (currentPage++) << (kLogValuesPerPage - kLogBitsPerEl); + for (uint32_t j = nonzeroPageEnd; j < startPage; j++) { + mIndices[j] = mZeroPageIndex; } - nonzeroPageEnd = endPage + 1; + } + mIndices[startPage] = (currentPage++) + << (kLogValuesPerPage - kLogBitsPerEl); + } + + size_t index = ((currentPage - 1) << (kLogValuesPerPage - kLogBitsPerEl)) + + ((start & kPageMask) >> kLogBitsPerEl); + size_t nElements = (end - (start & ~kElMask) + kElMask) >> kLogBitsPerEl; + if (nElements == 1) { + mBitmaps[index] |= (kElAllOnes >> (start & kElMask)) & + (kElAllOnes << ((~end + 1) & kElMask)); + } else { + mBitmaps[index] |= kElAllOnes >> (start & kElMask); + for (size_t j = 1; j < nElements - 1; j++) { + mBitmaps[index + j] = kElAllOnes; + } + mBitmaps[index + nElements - 1] |= kElAllOnes << ((~end + 1) & kElMask); + } + for (size_t j = startPage + 1; j < endPage + 1; j++) { + mIndices[j] = (currentPage++) << (kLogValuesPerPage - kLogBitsPerEl); } + nonzeroPageEnd = endPage + 1; + } } int SparseBitSet::CountLeadingZeros(element x) { - // Note: GCC / clang builtin - return sizeof(element) <= sizeof(int) ? __builtin_clz(x) : __builtin_clzl(x); + // Note: GCC / clang builtin + return sizeof(element) <= sizeof(int) ? __builtin_clz(x) : __builtin_clzl(x); } uint32_t SparseBitSet::nextSetBit(uint32_t fromIndex) const { - if (fromIndex >= mMaxVal) { - return kNotFound; - } - uint32_t fromPage = fromIndex >> kLogValuesPerPage; - const element* bitmap = &mBitmaps[mIndices[fromPage]]; - uint32_t offset = (fromIndex & kPageMask) >> kLogBitsPerEl; - element e = bitmap[offset] & (kElAllOnes >> (fromIndex & kElMask)); + if (fromIndex >= mMaxVal) { + return kNotFound; + } + uint32_t fromPage = fromIndex >> kLogValuesPerPage; + const element* bitmap = &mBitmaps[mIndices[fromPage]]; + uint32_t offset = (fromIndex & kPageMask) >> kLogBitsPerEl; + element e = bitmap[offset] & (kElAllOnes >> (fromIndex & kElMask)); + if (e != 0) { + return (fromIndex & ~kElMask) + CountLeadingZeros(e); + } + for (uint32_t j = offset + 1; j < (1 << (kLogValuesPerPage - kLogBitsPerEl)); + j++) { + e = bitmap[j]; if (e != 0) { - return (fromIndex & ~kElMask) + CountLeadingZeros(e); + return (fromIndex & ~kPageMask) + (j << kLogBitsPerEl) + + CountLeadingZeros(e); } - for (uint32_t j = offset + 1; j < (1 << (kLogValuesPerPage - kLogBitsPerEl)); j++) { - e = bitmap[j]; - if (e != 0) { - return (fromIndex & ~kPageMask) + (j << kLogBitsPerEl) + CountLeadingZeros(e); - } + } + uint32_t maxPage = (mMaxVal + kPageMask) >> kLogValuesPerPage; + for (uint32_t page = fromPage + 1; page < maxPage; page++) { + uint16_t index = mIndices[page]; + if (index == mZeroPageIndex) { + continue; } - uint32_t maxPage = (mMaxVal + kPageMask) >> kLogValuesPerPage; - for (uint32_t page = fromPage + 1; page < maxPage; page++) { - uint16_t index = mIndices[page]; - if (index == mZeroPageIndex) { - continue; - } - bitmap = &mBitmaps[index]; - for (uint32_t j = 0; j < (1 << (kLogValuesPerPage - kLogBitsPerEl)); j++) { - e = bitmap[j]; - if (e != 0) { - return (page << kLogValuesPerPage) + (j << kLogBitsPerEl) + CountLeadingZeros(e); - } - } + bitmap = &mBitmaps[index]; + for (uint32_t j = 0; j < (1 << (kLogValuesPerPage - kLogBitsPerEl)); j++) { + e = bitmap[j]; + if (e != 0) { + return (page << kLogValuesPerPage) + (j << kLogBitsPerEl) + + CountLeadingZeros(e); + } } - return kNotFound; + } + return kNotFound; } } // namespace minikin diff --git a/third_party/txt/src/minikin/SparseBitSet.h b/third_party/txt/src/minikin/SparseBitSet.h index 62aece209db571862621888bfb6f85f74d5c656b..c0bf576920f4ab14416bf5331242b73e1a64551f 100644 --- a/third_party/txt/src/minikin/SparseBitSet.h +++ b/third_party/txt/src/minikin/SparseBitSet.h @@ -32,68 +32,68 @@ namespace minikin { // large gaps. The motivating example is Unicode coverage of a font, but // the abstraction itself is fully general. class SparseBitSet { -public: - // Create an empty bit set. - SparseBitSet() : mMaxVal(0) {} - - // Initialize the set to a new value, represented by ranges. For - // simplicity, these ranges are arranged as pairs of values, - // inclusive of start, exclusive of end, laid out in a uint32 array. - SparseBitSet(const uint32_t* ranges, size_t nRanges) : SparseBitSet() { - initFromRanges(ranges, nRanges); - } - - SparseBitSet(SparseBitSet&&) = default; - SparseBitSet& operator=(SparseBitSet&&) = default; - - // Determine whether the value is included in the set - bool get(uint32_t ch) const { - if (ch >= mMaxVal) return false; - const uint32_t *bitmap = &mBitmaps[mIndices[ch >> kLogValuesPerPage]]; - uint32_t index = ch & kPageMask; - return (bitmap[index >> kLogBitsPerEl] & (kElFirst >> (index & kElMask))) != 0; - } - - // One more than the maximum value in the set, or zero if empty - uint32_t length() const { - return mMaxVal; - } - - // The next set bit starting at fromIndex, inclusive, or kNotFound - // if none exists. - uint32_t nextSetBit(uint32_t fromIndex) const; - - static const uint32_t kNotFound = ~0u; - -private: - void initFromRanges(const uint32_t* ranges, size_t nRanges); - - static const uint32_t kMaximumCapacity = 0xFFFFFF; - static const int kLogValuesPerPage = 8; - static const int kPageMask = (1 << kLogValuesPerPage) - 1; - static const int kLogBytesPerEl = 2; - static const int kLogBitsPerEl = kLogBytesPerEl + 3; - static const int kElMask = (1 << kLogBitsPerEl) - 1; - // invariant: sizeof(element) == (1 << kLogBytesPerEl) - typedef uint32_t element; - static const element kElAllOnes = ~((element)0); - static const element kElFirst = ((element)1) << kElMask; - static const uint16_t noZeroPage = 0xFFFF; - - static uint32_t calcNumPages(const uint32_t* ranges, size_t nRanges); - static int CountLeadingZeros(element x); - - uint32_t mMaxVal; - - std::unique_ptr mIndices; - std::unique_ptr mBitmaps; - uint16_t mZeroPageIndex; - - // Forbid copy and assign. - SparseBitSet(const SparseBitSet&) = delete; - void operator=(const SparseBitSet&) = delete; + public: + // Create an empty bit set. + SparseBitSet() : mMaxVal(0) {} + + // Initialize the set to a new value, represented by ranges. For + // simplicity, these ranges are arranged as pairs of values, + // inclusive of start, exclusive of end, laid out in a uint32 array. + SparseBitSet(const uint32_t* ranges, size_t nRanges) : SparseBitSet() { + initFromRanges(ranges, nRanges); + } + + SparseBitSet(SparseBitSet&&) = default; + SparseBitSet& operator=(SparseBitSet&&) = default; + + // Determine whether the value is included in the set + bool get(uint32_t ch) const { + if (ch >= mMaxVal) + return false; + const uint32_t* bitmap = &mBitmaps[mIndices[ch >> kLogValuesPerPage]]; + uint32_t index = ch & kPageMask; + return (bitmap[index >> kLogBitsPerEl] & (kElFirst >> (index & kElMask))) != + 0; + } + + // One more than the maximum value in the set, or zero if empty + uint32_t length() const { return mMaxVal; } + + // The next set bit starting at fromIndex, inclusive, or kNotFound + // if none exists. + uint32_t nextSetBit(uint32_t fromIndex) const; + + static const uint32_t kNotFound = ~0u; + + private: + void initFromRanges(const uint32_t* ranges, size_t nRanges); + + static const uint32_t kMaximumCapacity = 0xFFFFFF; + static const int kLogValuesPerPage = 8; + static const int kPageMask = (1 << kLogValuesPerPage) - 1; + static const int kLogBytesPerEl = 2; + static const int kLogBitsPerEl = kLogBytesPerEl + 3; + static const int kElMask = (1 << kLogBitsPerEl) - 1; + // invariant: sizeof(element) == (1 << kLogBytesPerEl) + typedef uint32_t element; + static const element kElAllOnes = ~((element)0); + static const element kElFirst = ((element)1) << kElMask; + static const uint16_t noZeroPage = 0xFFFF; + + static uint32_t calcNumPages(const uint32_t* ranges, size_t nRanges); + static int CountLeadingZeros(element x); + + uint32_t mMaxVal; + + std::unique_ptr mIndices; + std::unique_ptr mBitmaps; + uint16_t mZeroPageIndex; + + // Forbid copy and assign. + SparseBitSet(const SparseBitSet&) = delete; + void operator=(const SparseBitSet&) = delete; }; } // namespace minikin -#endif // MINIKIN_SPARSE_BIT_SET_H +#endif // MINIKIN_SPARSE_BIT_SET_H diff --git a/third_party/txt/src/minikin/WordBreaker.cpp b/third_party/txt/src/minikin/WordBreaker.cpp index 3577ddd6aa033bffe6edad5ce2ac16c433c355be..5b69acd3430fe7cf1af1e3f98ebcae31cf07ffd2 100644 --- a/third_party/txt/src/minikin/WordBreaker.cpp +++ b/third_party/txt/src/minikin/WordBreaker.cpp @@ -32,240 +32,251 @@ const uint32_t CHAR_SOFT_HYPHEN = 0x00AD; const uint32_t CHAR_ZWJ = 0x200D; void WordBreaker::setLocale(const icu::Locale& locale) { - UErrorCode status = U_ZERO_ERROR; - mBreakIterator.reset(icu::BreakIterator::createLineInstance(locale, status)); - // TODO: handle failure status - if (mText != nullptr) { - mBreakIterator->setText(&mUText, status); - } - mIteratorWasReset = true; + UErrorCode status = U_ZERO_ERROR; + mBreakIterator.reset(icu::BreakIterator::createLineInstance(locale, status)); + // TODO: handle failure status + if (mText != nullptr) { + mBreakIterator->setText(&mUText, status); + } + mIteratorWasReset = true; } void WordBreaker::setText(const uint16_t* data, size_t size) { - mText = data; - mTextSize = size; - mIteratorWasReset = false; - mLast = 0; - mCurrent = 0; - mScanOffset = 0; - mInEmailOrUrl = false; - UErrorCode status = U_ZERO_ERROR; - utext_openUChars(&mUText, data, size, &status); - mBreakIterator->setText(&mUText, status); - mBreakIterator->first(); + mText = data; + mTextSize = size; + mIteratorWasReset = false; + mLast = 0; + mCurrent = 0; + mScanOffset = 0; + mInEmailOrUrl = false; + UErrorCode status = U_ZERO_ERROR; + utext_openUChars(&mUText, data, size, &status); + mBreakIterator->setText(&mUText, status); + mBreakIterator->first(); } ssize_t WordBreaker::current() const { - return mCurrent; + return mCurrent; } /** - * Determine whether a line break at position i within the buffer buf is valid. This - * represents customization beyond the ICU behavior, because plain ICU provides some - * line break opportunities that we don't want. + * Determine whether a line break at position i within the buffer buf is valid. + *This represents customization beyond the ICU behavior, because plain ICU + *provides some line break opportunities that we don't want. **/ static bool isBreakValid(const uint16_t* buf, size_t bufEnd, size_t i) { - uint32_t codePoint; - size_t prev_offset = i; - U16_PREV(buf, 0, prev_offset, codePoint); - // Do not break on hard or soft hyphens. These are handled by automatic hyphenation. - if (Hyphenator::isLineBreakingHyphen(codePoint) || codePoint == CHAR_SOFT_HYPHEN) { - // txt addition: Temporarily always break on hyphen. Changed from false to true. - return true; - } - // For Myanmar kinzi sequences, created by . This is to go - // around a bug in ICU line breaking: http://bugs.icu-project.org/trac/ticket/12561. To avoid - // too much looking around in the strings, we simply avoid breaking after any Myanmar virama, - // where no line break could be imagined, since the Myanmar virama is a pure stacker. - if (codePoint == 0x1039) { // MYANMAR SIGN VIRAMA - return false; - } + uint32_t codePoint; + size_t prev_offset = i; + U16_PREV(buf, 0, prev_offset, codePoint); + // Do not break on hard or soft hyphens. These are handled by automatic + // hyphenation. + if (Hyphenator::isLineBreakingHyphen(codePoint) || + codePoint == CHAR_SOFT_HYPHEN) { + // txt addition: Temporarily always break on hyphen. Changed from false to + // true. + return true; + } + // For Myanmar kinzi sequences, created by . This is to go around a bug in ICU line breaking: + // http://bugs.icu-project.org/trac/ticket/12561. To avoid too much looking + // around in the strings, we simply avoid breaking after any Myanmar virama, + // where no line break could be imagined, since the Myanmar virama is a pure + // stacker. + if (codePoint == 0x1039) { // MYANMAR SIGN VIRAMA + return false; + } - uint32_t next_codepoint; - size_t next_offset = i; - U16_NEXT(buf, next_offset, bufEnd, next_codepoint); + uint32_t next_codepoint; + size_t next_offset = i; + U16_NEXT(buf, next_offset, bufEnd, next_codepoint); - // Rule LB8 for Emoji ZWJ sequences. We need to do this ourselves since we may have fresher - // emoji data than ICU does. - if (codePoint == CHAR_ZWJ && isEmoji(next_codepoint)) { - return false; - } + // Rule LB8 for Emoji ZWJ sequences. We need to do this ourselves since we may + // have fresher emoji data than ICU does. + if (codePoint == CHAR_ZWJ && isEmoji(next_codepoint)) { + return false; + } - // Rule LB30b. We need to this ourselves since we may have fresher emoji data than ICU does. - if (isEmojiModifier(next_codepoint)) { - if (codePoint == 0xFE0F && prev_offset > 0) { - // skip over emoji variation selector - U16_PREV(buf, 0, prev_offset, codePoint); - } - if (isEmojiBase(codePoint)) { - return false; - } + // Rule LB30b. We need to this ourselves since we may have fresher emoji data + // than ICU does. + if (isEmojiModifier(next_codepoint)) { + if (codePoint == 0xFE0F && prev_offset > 0) { + // skip over emoji variation selector + U16_PREV(buf, 0, prev_offset, codePoint); } - return true; + if (isEmojiBase(codePoint)) { + return false; + } + } + return true; } // Customized iteratorNext that takes care of both resets and our modifications // to ICU's behavior. int32_t WordBreaker::iteratorNext() { - int32_t result; - do { - if (mIteratorWasReset) { - result = mBreakIterator->following(mCurrent); - mIteratorWasReset = false; - } else { - result = mBreakIterator->next(); - } - } while (!(result == icu::BreakIterator::DONE || (size_t)result == mTextSize - || isBreakValid(mText, mTextSize, result))); - return result; + int32_t result; + do { + if (mIteratorWasReset) { + result = mBreakIterator->following(mCurrent); + mIteratorWasReset = false; + } else { + result = mBreakIterator->next(); + } + } while (!(result == icu::BreakIterator::DONE || + (size_t)result == mTextSize || + isBreakValid(mText, mTextSize, result))); + return result; } -// Chicago Manual of Style recommends breaking after these characters in URLs and email addresses +// Chicago Manual of Style recommends breaking after these characters in URLs +// and email addresses static bool breakAfter(uint16_t c) { - return c == ':' || c == '=' || c == '&'; + return c == ':' || c == '=' || c == '&'; } -// Chicago Manual of Style recommends breaking before these characters in URLs and email addresses +// Chicago Manual of Style recommends breaking before these characters in URLs +// and email addresses static bool breakBefore(uint16_t c) { - return c == '~' || c == '.' || c == ',' || c == '-' || c == '_' || c == '?' || c == '#' - || c == '%' || c == '=' || c == '&'; + return c == '~' || c == '.' || c == ',' || c == '-' || c == '_' || c == '?' || + c == '#' || c == '%' || c == '=' || c == '&'; } enum ScanState { - START, - SAW_AT, - SAW_COLON, - SAW_COLON_SLASH, - SAW_COLON_SLASH_SLASH, + START, + SAW_AT, + SAW_COLON, + SAW_COLON_SLASH, + SAW_COLON_SLASH_SLASH, }; void WordBreaker::detectEmailOrUrl() { - // scan forward from current ICU position for email address or URL - if (mLast >= mScanOffset) { - ScanState state = START; - size_t i; - for (i = mLast; i < mTextSize; i++) { - uint16_t c = mText[i]; - // scan only ASCII characters, stop at space - if (!(' ' < c && c <= 0x007E)) { - break; - } - if (state == START && c == '@') { - state = SAW_AT; - } else if (state == START && c == ':') { - state = SAW_COLON; - } else if (state == SAW_COLON || state == SAW_COLON_SLASH) { - if (c == '/') { - state = static_cast((int)state + 1); // next state adds a slash - } else { - state = START; - } - } - } - if (state == SAW_AT || state == SAW_COLON_SLASH_SLASH) { - if (!mBreakIterator->isBoundary(i)) { - // If there are combining marks or such at the end of the URL or the email address, - // consider them a part of the URL or the email, and skip to the next actual - // boundary. - i = mBreakIterator->following(i); - } - mInEmailOrUrl = true; - mIteratorWasReset = true; + // scan forward from current ICU position for email address or URL + if (mLast >= mScanOffset) { + ScanState state = START; + size_t i; + for (i = mLast; i < mTextSize; i++) { + uint16_t c = mText[i]; + // scan only ASCII characters, stop at space + if (!(' ' < c && c <= 0x007E)) { + break; + } + if (state == START && c == '@') { + state = SAW_AT; + } else if (state == START && c == ':') { + state = SAW_COLON; + } else if (state == SAW_COLON || state == SAW_COLON_SLASH) { + if (c == '/') { + state = static_cast((int)state + + 1); // next state adds a slash } else { - mInEmailOrUrl = false; + state = START; } - mScanOffset = i; + } } + if (state == SAW_AT || state == SAW_COLON_SLASH_SLASH) { + if (!mBreakIterator->isBoundary(i)) { + // If there are combining marks or such at the end of the URL or the + // email address, consider them a part of the URL or the email, and skip + // to the next actual boundary. + i = mBreakIterator->following(i); + } + mInEmailOrUrl = true; + mIteratorWasReset = true; + } else { + mInEmailOrUrl = false; + } + mScanOffset = i; + } } ssize_t WordBreaker::findNextBreakInEmailOrUrl() { - // special rules for email addresses and URL's as per Chicago Manual of Style (16th ed.) - uint16_t lastChar = mText[mLast]; - ssize_t i; - for (i = mLast + 1; i < mScanOffset; i++) { - if (breakAfter(lastChar)) { - break; - } - // break after double slash - if (lastChar == '/' && i >= mLast + 2 && mText[i - 2] == '/') { - break; - } - const uint16_t thisChar = mText[i]; - // never break after hyphen - if (lastChar != '-') { - if (breakBefore(thisChar)) { - break; - } - // break before single slash - if (thisChar == '/' && lastChar != '/' && - !(i + 1 < mScanOffset && mText[i + 1] == '/')) { - break; - } - } - lastChar = thisChar; + // special rules for email addresses and URL's as per Chicago Manual of Style + // (16th ed.) + uint16_t lastChar = mText[mLast]; + ssize_t i; + for (i = mLast + 1; i < mScanOffset; i++) { + if (breakAfter(lastChar)) { + break; + } + // break after double slash + if (lastChar == '/' && i >= mLast + 2 && mText[i - 2] == '/') { + break; } - return i; + const uint16_t thisChar = mText[i]; + // never break after hyphen + if (lastChar != '-') { + if (breakBefore(thisChar)) { + break; + } + // break before single slash + if (thisChar == '/' && lastChar != '/' && + !(i + 1 < mScanOffset && mText[i + 1] == '/')) { + break; + } + } + lastChar = thisChar; + } + return i; } ssize_t WordBreaker::next() { - mLast = mCurrent; + mLast = mCurrent; - detectEmailOrUrl(); - if (mInEmailOrUrl) { - mCurrent = findNextBreakInEmailOrUrl(); - } else { // Business as usual - mCurrent = (ssize_t) iteratorNext(); - } - return mCurrent; + detectEmailOrUrl(); + if (mInEmailOrUrl) { + mCurrent = findNextBreakInEmailOrUrl(); + } else { // Business as usual + mCurrent = (ssize_t)iteratorNext(); + } + return mCurrent; } ssize_t WordBreaker::wordStart() const { - if (mInEmailOrUrl) { - return mLast; - } - ssize_t result = mLast; - while (result < mCurrent) { - UChar32 c; - ssize_t ix = result; - U16_NEXT(mText, ix, mCurrent, c); - const int32_t lb = u_getIntPropertyValue(c, UCHAR_LINE_BREAK); - // strip leading punctuation, defined as OP and QU line breaking classes, - // see UAX #14 - if (!(lb == U_LB_OPEN_PUNCTUATION || lb == U_LB_QUOTATION)) { - break; - } - result = ix; + if (mInEmailOrUrl) { + return mLast; + } + ssize_t result = mLast; + while (result < mCurrent) { + UChar32 c; + ssize_t ix = result; + U16_NEXT(mText, ix, mCurrent, c); + const int32_t lb = u_getIntPropertyValue(c, UCHAR_LINE_BREAK); + // strip leading punctuation, defined as OP and QU line breaking classes, + // see UAX #14 + if (!(lb == U_LB_OPEN_PUNCTUATION || lb == U_LB_QUOTATION)) { + break; } - return result; + result = ix; + } + return result; } ssize_t WordBreaker::wordEnd() const { - if (mInEmailOrUrl) { - return mLast; - } - ssize_t result = mCurrent; - while (result > mLast) { - UChar32 c; - ssize_t ix = result; - U16_PREV(mText, mLast, ix, c); - const int32_t gc_mask = U_GET_GC_MASK(c); - // strip trailing space and punctuation - if ((gc_mask & (U_GC_ZS_MASK | U_GC_P_MASK)) == 0) { - break; - } - result = ix; + if (mInEmailOrUrl) { + return mLast; + } + ssize_t result = mCurrent; + while (result > mLast) { + UChar32 c; + ssize_t ix = result; + U16_PREV(mText, mLast, ix, c); + const int32_t gc_mask = U_GET_GC_MASK(c); + // strip trailing space and punctuation + if ((gc_mask & (U_GC_ZS_MASK | U_GC_P_MASK)) == 0) { + break; } - return result; + result = ix; + } + return result; } int WordBreaker::breakBadness() const { - return (mInEmailOrUrl && mCurrent < mScanOffset) ? 1 : 0; + return (mInEmailOrUrl && mCurrent < mScanOffset) ? 1 : 0; } void WordBreaker::finish() { - mText = nullptr; - // Note: calling utext_close multiply is safe - utext_close(&mUText); + mText = nullptr; + // Note: calling utext_close multiply is safe + utext_close(&mUText); } } // namespace minikin diff --git a/third_party/txt/src/minikin/WordBreaker.h b/third_party/txt/src/minikin/WordBreaker.h index 6971ce2013c6273e5d1ea7811abcf1d904b05d72..60424783d4d01b98b6b6e93a6e4c25e99a2abfe8 100644 --- a/third_party/txt/src/minikin/WordBreaker.h +++ b/third_party/txt/src/minikin/WordBreaker.h @@ -23,53 +23,52 @@ #ifndef MINIKIN_WORD_BREAKER_H #define MINIKIN_WORD_BREAKER_H -#include "unicode/brkiter.h" #include +#include "unicode/brkiter.h" namespace minikin { class WordBreaker { -public: - ~WordBreaker() { - finish(); - } + public: + ~WordBreaker() { finish(); } - void setLocale(const icu::Locale& locale); + void setLocale(const icu::Locale& locale); - void setText(const uint16_t* data, size_t size); + void setText(const uint16_t* data, size_t size); - // Advance iterator to next word break. Return offset, or -1 if EOT - ssize_t next(); + // Advance iterator to next word break. Return offset, or -1 if EOT + ssize_t next(); - // Current offset of iterator, equal to 0 at BOT or last return from next() - ssize_t current() const; + // Current offset of iterator, equal to 0 at BOT or last return from next() + ssize_t current() const; - // After calling next(), wordStart() and wordEnd() are offsets defining the previous - // word. If wordEnd <= wordStart, it's not a word for the purpose of hyphenation. - ssize_t wordStart() const; + // After calling next(), wordStart() and wordEnd() are offsets defining the + // previous word. If wordEnd <= wordStart, it's not a word for the purpose of + // hyphenation. + ssize_t wordStart() const; - ssize_t wordEnd() const; + ssize_t wordEnd() const; - int breakBadness() const; + int breakBadness() const; - void finish(); + void finish(); -private: - int32_t iteratorNext(); - void detectEmailOrUrl(); - ssize_t findNextBreakInEmailOrUrl(); + private: + int32_t iteratorNext(); + void detectEmailOrUrl(); + ssize_t findNextBreakInEmailOrUrl(); - std::unique_ptr mBreakIterator; - UText mUText = UTEXT_INITIALIZER; - const uint16_t* mText = nullptr; - size_t mTextSize; - ssize_t mLast; - ssize_t mCurrent; - bool mIteratorWasReset; + std::unique_ptr mBreakIterator; + UText mUText = UTEXT_INITIALIZER; + const uint16_t* mText = nullptr; + size_t mTextSize; + ssize_t mLast; + ssize_t mCurrent; + bool mIteratorWasReset; - // state for the email address / url detector - ssize_t mScanOffset; - bool mInEmailOrUrl; + // state for the email address / url detector + ssize_t mScanOffset; + bool mInEmailOrUrl; }; } // namespace minikin diff --git a/third_party/txt/src/utils/JenkinsHash.cpp b/third_party/txt/src/utils/JenkinsHash.cpp index 76b04883b15678a3593f5ea68af08983f16c1031..4df6d2a2bb2fb42a1e58e37c68e81e56001edec5 100644 --- a/third_party/txt/src/utils/JenkinsHash.cpp +++ b/third_party/txt/src/utils/JenkinsHash.cpp @@ -27,47 +27,51 @@ namespace android { #ifdef __clang__ __attribute__((no_sanitize("integer"))) #endif -hash_t JenkinsHashWhiten(uint32_t hash) { - hash += (hash << 3); - hash ^= (hash >> 11); - hash += (hash << 15); - return hash; +hash_t +JenkinsHashWhiten(uint32_t hash) { + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; } uint32_t JenkinsHashMixBytes(uint32_t hash, const uint8_t* bytes, size_t size) { - if (size > UINT32_MAX) { - abort(); - } - hash = JenkinsHashMix(hash, (uint32_t)size); - size_t i; - for (i = 0; i < (size & -4); i += 4) { - uint32_t data = bytes[i] | (bytes[i+1] << 8) | (bytes[i+2] << 16) | (bytes[i+3] << 24); - hash = JenkinsHashMix(hash, data); - } - if (size & 3) { - uint32_t data = bytes[i]; - data |= ((size & 3) > 1) ? (bytes[i+1] << 8) : 0; - data |= ((size & 3) > 2) ? (bytes[i+2] << 16) : 0; - hash = JenkinsHashMix(hash, data); - } - return hash; + if (size > UINT32_MAX) { + abort(); + } + hash = JenkinsHashMix(hash, (uint32_t)size); + size_t i; + for (i = 0; i < (size & -4); i += 4) { + uint32_t data = bytes[i] | (bytes[i + 1] << 8) | (bytes[i + 2] << 16) | + (bytes[i + 3] << 24); + hash = JenkinsHashMix(hash, data); + } + if (size & 3) { + uint32_t data = bytes[i]; + data |= ((size & 3) > 1) ? (bytes[i + 1] << 8) : 0; + data |= ((size & 3) > 2) ? (bytes[i + 2] << 16) : 0; + hash = JenkinsHashMix(hash, data); + } + return hash; } -uint32_t JenkinsHashMixShorts(uint32_t hash, const uint16_t* shorts, size_t size) { - if (size > UINT32_MAX) { - abort(); - } - hash = JenkinsHashMix(hash, (uint32_t)size); - size_t i; - for (i = 0; i < (size & -2); i += 2) { - uint32_t data = shorts[i] | (shorts[i+1] << 16); - hash = JenkinsHashMix(hash, data); - } - if (size & 1) { - uint32_t data = shorts[i]; - hash = JenkinsHashMix(hash, data); - } - return hash; +uint32_t JenkinsHashMixShorts(uint32_t hash, + const uint16_t* shorts, + size_t size) { + if (size > UINT32_MAX) { + abort(); + } + hash = JenkinsHashMix(hash, (uint32_t)size); + size_t i; + for (i = 0; i < (size & -2); i += 2) { + uint32_t data = shorts[i] | (shorts[i + 1] << 16); + hash = JenkinsHashMix(hash, data); + } + if (size & 1) { + uint32_t data = shorts[i]; + hash = JenkinsHashMix(hash, data); + } + return hash; } -} +} // namespace android diff --git a/third_party/txt/src/utils/JenkinsHash.h b/third_party/txt/src/utils/JenkinsHash.h index 027c10c7e0a25d4154dedcfb834d1dfb90d81b61..25a778b4bc0448f6b3e436b6fd8547141dc3fab3 100644 --- a/third_party/txt/src/utils/JenkinsHash.h +++ b/third_party/txt/src/utils/JenkinsHash.h @@ -32,11 +32,12 @@ namespace android { #ifdef __clang__ __attribute__((no_sanitize("integer"))) #endif -inline uint32_t JenkinsHashMix(uint32_t hash, uint32_t data) { - hash += data; - hash += (hash << 10); - hash ^= (hash >> 6); - return hash; +inline uint32_t +JenkinsHashMix(uint32_t hash, uint32_t data) { + hash += data; + hash += (hash << 10); + hash ^= (hash >> 6); + return hash; } hash_t JenkinsHashWhiten(uint32_t hash); @@ -44,8 +45,10 @@ hash_t JenkinsHashWhiten(uint32_t hash); /* Helpful utility functions for hashing data in 32 bit chunks */ uint32_t JenkinsHashMixBytes(uint32_t hash, const uint8_t* bytes, size_t size); -uint32_t JenkinsHashMixShorts(uint32_t hash, const uint16_t* shorts, size_t size); +uint32_t JenkinsHashMixShorts(uint32_t hash, + const uint16_t* shorts, + size_t size); -} +} // namespace android -#endif // ANDROID_JENKINS_HASH_H +#endif // ANDROID_JENKINS_HASH_H diff --git a/third_party/txt/src/utils/LruCache.h b/third_party/txt/src/utils/LruCache.h index 89dccd6138d446301646c764d15216b642622179..95ee8aa145031a8e86ee23b440d3879caeab5c0c 100644 --- a/third_party/txt/src/utils/LruCache.h +++ b/third_party/txt/src/utils/LruCache.h @@ -27,272 +27,276 @@ namespace android { /** * GenerationCache callback used when an item is removed */ -template +template class OnEntryRemoved { -public: - virtual ~OnEntryRemoved() { }; - virtual void operator()(EntryKey& key, EntryValue& value) = 0; -}; // class OnEntryRemoved + public: + virtual ~OnEntryRemoved(){}; + virtual void operator()(EntryKey& key, EntryValue& value) = 0; +}; // class OnEntryRemoved template class LruCache { -public: - explicit LruCache(uint32_t maxCapacity); - virtual ~LruCache(); - - enum Capacity { - kUnlimitedCapacity, - }; - - void setOnEntryRemovedListener(OnEntryRemoved* listener); - size_t size() const; - const TValue& get(const TKey& key); - bool put(const TKey& key, const TValue& value); - bool remove(const TKey& key); - bool removeOldest(); - void clear(); - const TValue& peekOldestValue(); - -private: - LruCache(const LruCache& that); // disallow copy constructor - - // Super class so that we can have entries having only a key reference, for searches. - class KeyedEntry { - public: - virtual const TKey& getKey() const = 0; - // Make sure the right destructor is executed so that keys and values are deleted. - virtual ~KeyedEntry() {} - }; - - class Entry final : public KeyedEntry { - public: - TKey key; - TValue value; - Entry* parent; - Entry* child; - - Entry(TKey _key, TValue _value) : key(_key), value(_value), parent(NULL), child(NULL) { - } - const TKey& getKey() const final { return key; } - }; - - class EntryForSearch : public KeyedEntry { - public: - const TKey& key; - EntryForSearch(const TKey& key_) : key(key_) { - } - const TKey& getKey() const final { return key; } - }; - - struct HashForEntry : public std::unary_function { - size_t operator() (const KeyedEntry* entry) const { - return hash_type(entry->getKey()); - }; + public: + explicit LruCache(uint32_t maxCapacity); + virtual ~LruCache(); + + enum Capacity { + kUnlimitedCapacity, + }; + + void setOnEntryRemovedListener(OnEntryRemoved* listener); + size_t size() const; + const TValue& get(const TKey& key); + bool put(const TKey& key, const TValue& value); + bool remove(const TKey& key); + bool removeOldest(); + void clear(); + const TValue& peekOldestValue(); + + private: + LruCache(const LruCache& that); // disallow copy constructor + + // Super class so that we can have entries having only a key reference, for + // searches. + class KeyedEntry { + public: + virtual const TKey& getKey() const = 0; + // Make sure the right destructor is executed so that keys and values are + // deleted. + virtual ~KeyedEntry() {} + }; + + class Entry final : public KeyedEntry { + public: + TKey key; + TValue value; + Entry* parent; + Entry* child; + + Entry(TKey _key, TValue _value) + : key(_key), value(_value), parent(NULL), child(NULL) {} + const TKey& getKey() const final { return key; } + }; + + class EntryForSearch : public KeyedEntry { + public: + const TKey& key; + EntryForSearch(const TKey& key_) : key(key_) {} + const TKey& getKey() const final { return key; } + }; + + struct HashForEntry : public std::unary_function { + size_t operator()(const KeyedEntry* entry) const { + return hash_type(entry->getKey()); }; + }; - struct EqualityForHashedEntries : public std::unary_function { - bool operator() (const KeyedEntry* lhs, const KeyedEntry* rhs) const { - return lhs->getKey() == rhs->getKey(); - }; + struct EqualityForHashedEntries + : public std::unary_function { + bool operator()(const KeyedEntry* lhs, const KeyedEntry* rhs) const { + return lhs->getKey() == rhs->getKey(); }; + }; + + // All entries in the set will be Entry*. Using the weaker KeyedEntry as to + // allow entries that have only a key reference, for searching. + typedef std:: + unordered_set + LruCacheSet; + + void attachToCache(Entry& entry); + void detachFromCache(Entry& entry); + + typename LruCacheSet::iterator findByKey(const TKey& key) { + EntryForSearch entryForSearch(key); + typename LruCacheSet::iterator result = mSet->find(&entryForSearch); + return result; + } + + std::unique_ptr mSet; + OnEntryRemoved* mListener; + Entry* mOldest; + Entry* mYoungest; + uint32_t mMaxCapacity; + TValue mNullValue; + + public: + // To be used like: + // while (it.next()) { + // it.value(); it.key(); + // } + class Iterator { + public: + Iterator(const LruCache& cache) + : mCache(cache), + mIterator(mCache.mSet->begin()), + mBeginReturned(false) {} + + bool next() { + if (mIterator == mCache.mSet->end()) { + return false; + } + if (!mBeginReturned) { + // mIterator has been initialized to the beginning and + // hasn't been returned. Do not advance: + mBeginReturned = true; + } else { + std::advance(mIterator, 1); + } + bool ret = (mIterator != mCache.mSet->end()); + return ret; + } - // All entries in the set will be Entry*. Using the weaker KeyedEntry as to allow entries - // that have only a key reference, for searching. - typedef std::unordered_set LruCacheSet; - - void attachToCache(Entry& entry); - void detachFromCache(Entry& entry); - - typename LruCacheSet::iterator findByKey(const TKey& key) { - EntryForSearch entryForSearch(key); - typename LruCacheSet::iterator result = mSet->find(&entryForSearch); - return result; + const TValue& value() const { + // All the elements in the set are of type Entry. See comment in the + // definition of LruCacheSet above. + return reinterpret_cast(*mIterator)->value; } - std::unique_ptr mSet; - OnEntryRemoved* mListener; - Entry* mOldest; - Entry* mYoungest; - uint32_t mMaxCapacity; - TValue mNullValue; - -public: - // To be used like: - // while (it.next()) { - // it.value(); it.key(); - // } - class Iterator { - public: - Iterator(const LruCache& cache): - mCache(cache), mIterator(mCache.mSet->begin()), mBeginReturned(false) { - } - - bool next() { - if (mIterator == mCache.mSet->end()) { - return false; - } - if (!mBeginReturned) { - // mIterator has been initialized to the beginning and - // hasn't been returned. Do not advance: - mBeginReturned = true; - } else { - std::advance(mIterator, 1); - } - bool ret = (mIterator != mCache.mSet->end()); - return ret; - } - - const TValue& value() const { - // All the elements in the set are of type Entry. See comment in the definition - // of LruCacheSet above. - return reinterpret_cast(*mIterator)->value; - } - - const TKey& key() const { - return (*mIterator)->getKey(); - } - private: - const LruCache& mCache; - typename LruCacheSet::iterator mIterator; - bool mBeginReturned; - }; + const TKey& key() const { return (*mIterator)->getKey(); } + + private: + const LruCache& mCache; + typename LruCacheSet::iterator mIterator; + bool mBeginReturned; + }; }; // Implementation is here, because it's fully templated template LruCache::LruCache(uint32_t maxCapacity) - : mSet(new LruCacheSet()) - , mListener(NULL) - , mOldest(NULL) - , mYoungest(NULL) - , mMaxCapacity(maxCapacity) - , mNullValue(0) { - mSet->max_load_factor(1.0); + : mSet(new LruCacheSet()), + mListener(NULL), + mOldest(NULL), + mYoungest(NULL), + mMaxCapacity(maxCapacity), + mNullValue(0) { + mSet->max_load_factor(1.0); }; template LruCache::~LruCache() { - // Need to delete created entries. - clear(); + // Need to delete created entries. + clear(); }; -template +template void LruCache::setOnEntryRemovedListener(OnEntryRemoved* listener) { - mListener = listener; + mListener = listener; } template size_t LruCache::size() const { - return mSet->size(); + return mSet->size(); } template const TValue& LruCache::get(const TKey& key) { - typename LruCacheSet::const_iterator find_result = findByKey(key); - if (find_result == mSet->end()) { - return mNullValue; - } - // All the elements in the set are of type Entry. See comment in the definition - // of LruCacheSet above. - Entry *entry = reinterpret_cast(*find_result); - detachFromCache(*entry); - attachToCache(*entry); - return entry->value; + typename LruCacheSet::const_iterator find_result = findByKey(key); + if (find_result == mSet->end()) { + return mNullValue; + } + // All the elements in the set are of type Entry. See comment in the + // definition of LruCacheSet above. + Entry* entry = reinterpret_cast(*find_result); + detachFromCache(*entry); + attachToCache(*entry); + return entry->value; } template bool LruCache::put(const TKey& key, const TValue& value) { - if (mMaxCapacity != kUnlimitedCapacity && size() >= mMaxCapacity) { - removeOldest(); - } + if (mMaxCapacity != kUnlimitedCapacity && size() >= mMaxCapacity) { + removeOldest(); + } - if (findByKey(key) != mSet->end()) { - return false; - } + if (findByKey(key) != mSet->end()) { + return false; + } - Entry* newEntry = new Entry(key, value); - mSet->insert(newEntry); - attachToCache(*newEntry); - return true; + Entry* newEntry = new Entry(key, value); + mSet->insert(newEntry); + attachToCache(*newEntry); + return true; } template bool LruCache::remove(const TKey& key) { - typename LruCacheSet::const_iterator find_result = findByKey(key); - if (find_result == mSet->end()) { - return false; - } - // All the elements in the set are of type Entry. See comment in the definition - // of LruCacheSet above. - Entry* entry = reinterpret_cast(*find_result); - mSet->erase(entry); - if (mListener) { - (*mListener)(entry->key, entry->value); - } - detachFromCache(*entry); - delete entry; - return true; + typename LruCacheSet::const_iterator find_result = findByKey(key); + if (find_result == mSet->end()) { + return false; + } + // All the elements in the set are of type Entry. See comment in the + // definition of LruCacheSet above. + Entry* entry = reinterpret_cast(*find_result); + mSet->erase(entry); + if (mListener) { + (*mListener)(entry->key, entry->value); + } + detachFromCache(*entry); + delete entry; + return true; } template bool LruCache::removeOldest() { - if (mOldest != NULL) { - return remove(mOldest->key); - // TODO: should probably abort if false - } - return false; + if (mOldest != NULL) { + return remove(mOldest->key); + // TODO: should probably abort if false + } + return false; } template const TValue& LruCache::peekOldestValue() { - if (mOldest) { - return mOldest->value; - } - return mNullValue; + if (mOldest) { + return mOldest->value; + } + return mNullValue; } template void LruCache::clear() { - if (mListener) { - for (Entry* p = mOldest; p != NULL; p = p->child) { - (*mListener)(p->key, p->value); - } - } - mYoungest = NULL; - mOldest = NULL; - for (auto entry : *mSet.get()) { - delete entry; + if (mListener) { + for (Entry* p = mOldest; p != NULL; p = p->child) { + (*mListener)(p->key, p->value); } - mSet->clear(); + } + mYoungest = NULL; + mOldest = NULL; + for (auto entry : *mSet.get()) { + delete entry; + } + mSet->clear(); } template void LruCache::attachToCache(Entry& entry) { - if (mYoungest == NULL) { - mYoungest = mOldest = &entry; - } else { - entry.parent = mYoungest; - mYoungest->child = &entry; - mYoungest = &entry; - } + if (mYoungest == NULL) { + mYoungest = mOldest = &entry; + } else { + entry.parent = mYoungest; + mYoungest->child = &entry; + mYoungest = &entry; + } } template void LruCache::detachFromCache(Entry& entry) { - if (entry.parent != NULL) { - entry.parent->child = entry.child; - } else { - mOldest = entry.child; - } - if (entry.child != NULL) { - entry.child->parent = entry.parent; - } else { - mYoungest = entry.parent; - } - - entry.parent = NULL; - entry.child = NULL; + if (entry.parent != NULL) { + entry.parent->child = entry.child; + } else { + mOldest = entry.child; + } + if (entry.child != NULL) { + entry.child->parent = entry.parent; + } else { + mYoungest = entry.parent; + } + + entry.parent = NULL; + entry.child = NULL; } -} -#endif // ANDROID_UTILS_LRU_CACHE_H +} // namespace android +#endif // ANDROID_UTILS_LRU_CACHE_H diff --git a/third_party/txt/src/utils/TypeHelpers.h b/third_party/txt/src/utils/TypeHelpers.h index 28fbca508a37038830ef81e7615bd75e9922ef13..d0935ad0fc0384e698ad68371d5c4f730489d91d 100644 --- a/third_party/txt/src/utils/TypeHelpers.h +++ b/third_party/txt/src/utils/TypeHelpers.h @@ -32,61 +32,91 @@ namespace android { * Types traits */ -template struct trait_trivial_ctor { enum { value = false }; }; -template struct trait_trivial_dtor { enum { value = false }; }; -template struct trait_trivial_copy { enum { value = false }; }; -template struct trait_trivial_move { enum { value = false }; }; -template struct trait_pointer { enum { value = false }; }; -template struct trait_pointer { enum { value = true }; }; +template +struct trait_trivial_ctor { + enum { value = false }; +}; +template +struct trait_trivial_dtor { + enum { value = false }; +}; +template +struct trait_trivial_copy { + enum { value = false }; +}; +template +struct trait_trivial_move { + enum { value = false }; +}; +template +struct trait_pointer { + enum { value = false }; +}; +template +struct trait_pointer { + enum { value = true }; +}; template struct traits { - enum { - // whether this type is a pointer - is_pointer = trait_pointer::value, - // whether this type's constructor is a no-op - has_trivial_ctor = is_pointer || trait_trivial_ctor::value, - // whether this type's destructor is a no-op - has_trivial_dtor = is_pointer || trait_trivial_dtor::value, - // whether this type type can be copy-constructed with memcpy - has_trivial_copy = is_pointer || trait_trivial_copy::value, - // whether this type can be moved with memmove - has_trivial_move = is_pointer || trait_trivial_move::value - }; + enum { + // whether this type is a pointer + is_pointer = trait_pointer::value, + // whether this type's constructor is a no-op + has_trivial_ctor = is_pointer || trait_trivial_ctor::value, + // whether this type's destructor is a no-op + has_trivial_dtor = is_pointer || trait_trivial_dtor::value, + // whether this type type can be copy-constructed with memcpy + has_trivial_copy = is_pointer || trait_trivial_copy::value, + // whether this type can be moved with memmove + has_trivial_move = is_pointer || trait_trivial_move::value + }; }; template struct aggregate_traits { - enum { - is_pointer = false, - has_trivial_ctor = - traits::has_trivial_ctor && traits::has_trivial_ctor, - has_trivial_dtor = - traits::has_trivial_dtor && traits::has_trivial_dtor, - has_trivial_copy = - traits::has_trivial_copy && traits::has_trivial_copy, - has_trivial_move = - traits::has_trivial_move && traits::has_trivial_move - }; + enum { + is_pointer = false, + has_trivial_ctor = + traits::has_trivial_ctor && traits::has_trivial_ctor, + has_trivial_dtor = + traits::has_trivial_dtor && traits::has_trivial_dtor, + has_trivial_copy = + traits::has_trivial_copy && traits::has_trivial_copy, + has_trivial_move = + traits::has_trivial_move && traits::has_trivial_move + }; }; -#define ANDROID_TRIVIAL_CTOR_TRAIT( T ) \ - template<> struct trait_trivial_ctor< T > { enum { value = true }; }; - -#define ANDROID_TRIVIAL_DTOR_TRAIT( T ) \ - template<> struct trait_trivial_dtor< T > { enum { value = true }; }; - -#define ANDROID_TRIVIAL_COPY_TRAIT( T ) \ - template<> struct trait_trivial_copy< T > { enum { value = true }; }; - -#define ANDROID_TRIVIAL_MOVE_TRAIT( T ) \ - template<> struct trait_trivial_move< T > { enum { value = true }; }; - -#define ANDROID_BASIC_TYPES_TRAITS( T ) \ - ANDROID_TRIVIAL_CTOR_TRAIT( T ) \ - ANDROID_TRIVIAL_DTOR_TRAIT( T ) \ - ANDROID_TRIVIAL_COPY_TRAIT( T ) \ - ANDROID_TRIVIAL_MOVE_TRAIT( T ) +#define ANDROID_TRIVIAL_CTOR_TRAIT(T) \ + template <> \ + struct trait_trivial_ctor { \ + enum { value = true }; \ + }; + +#define ANDROID_TRIVIAL_DTOR_TRAIT(T) \ + template <> \ + struct trait_trivial_dtor { \ + enum { value = true }; \ + }; + +#define ANDROID_TRIVIAL_COPY_TRAIT(T) \ + template <> \ + struct trait_trivial_copy { \ + enum { value = true }; \ + }; + +#define ANDROID_TRIVIAL_MOVE_TRAIT(T) \ + template <> \ + struct trait_trivial_move { \ + enum { value = true }; \ + }; + +#define ANDROID_BASIC_TYPES_TRAITS(T) \ + ANDROID_TRIVIAL_CTOR_TRAIT(T) \ + ANDROID_TRIVIAL_DTOR_TRAIT(T) \ + ANDROID_TRIVIAL_COPY_TRAIT(T) \ + ANDROID_TRIVIAL_MOVE_TRAIT(T) // --------------------------------------------------------------------------- @@ -94,153 +124,151 @@ struct aggregate_traits { * basic types traits */ -ANDROID_BASIC_TYPES_TRAITS( void ) -ANDROID_BASIC_TYPES_TRAITS( bool ) -ANDROID_BASIC_TYPES_TRAITS( char ) -ANDROID_BASIC_TYPES_TRAITS( unsigned char ) -ANDROID_BASIC_TYPES_TRAITS( short ) -ANDROID_BASIC_TYPES_TRAITS( unsigned short ) -ANDROID_BASIC_TYPES_TRAITS( int ) -ANDROID_BASIC_TYPES_TRAITS( unsigned int ) -ANDROID_BASIC_TYPES_TRAITS( long ) -ANDROID_BASIC_TYPES_TRAITS( unsigned long ) -ANDROID_BASIC_TYPES_TRAITS( long long ) -ANDROID_BASIC_TYPES_TRAITS( unsigned long long ) -ANDROID_BASIC_TYPES_TRAITS( float ) -ANDROID_BASIC_TYPES_TRAITS( double ) +ANDROID_BASIC_TYPES_TRAITS(void) +ANDROID_BASIC_TYPES_TRAITS(bool) +ANDROID_BASIC_TYPES_TRAITS(char) +ANDROID_BASIC_TYPES_TRAITS(unsigned char) +ANDROID_BASIC_TYPES_TRAITS(short) +ANDROID_BASIC_TYPES_TRAITS(unsigned short) +ANDROID_BASIC_TYPES_TRAITS(int) +ANDROID_BASIC_TYPES_TRAITS(unsigned int) +ANDROID_BASIC_TYPES_TRAITS(long) +ANDROID_BASIC_TYPES_TRAITS(unsigned long) +ANDROID_BASIC_TYPES_TRAITS(long long) +ANDROID_BASIC_TYPES_TRAITS(unsigned long long) +ANDROID_BASIC_TYPES_TRAITS(float) +ANDROID_BASIC_TYPES_TRAITS(double) // --------------------------------------------------------------------------- - /* * compare and order types */ -template inline -int strictly_order_type(const TYPE& lhs, const TYPE& rhs) { - return (lhs < rhs) ? 1 : 0; +template +inline int strictly_order_type(const TYPE& lhs, const TYPE& rhs) { + return (lhs < rhs) ? 1 : 0; } -template inline -int compare_type(const TYPE& lhs, const TYPE& rhs) { - return strictly_order_type(rhs, lhs) - strictly_order_type(lhs, rhs); +template +inline int compare_type(const TYPE& lhs, const TYPE& rhs) { + return strictly_order_type(rhs, lhs) - strictly_order_type(lhs, rhs); } /* * create, destroy, copy and move types... */ -template inline -void construct_type(TYPE* p, size_t n) { - if (!traits::has_trivial_ctor) { - while (n > 0) { - n--; - new(p++) TYPE; - } +template +inline void construct_type(TYPE* p, size_t n) { + if (!traits::has_trivial_ctor) { + while (n > 0) { + n--; + new (p++) TYPE; } + } } -template inline -void destroy_type(TYPE* p, size_t n) { - if (!traits::has_trivial_dtor) { - while (n > 0) { - n--; - p->~TYPE(); - p++; - } +template +inline void destroy_type(TYPE* p, size_t n) { + if (!traits::has_trivial_dtor) { + while (n > 0) { + n--; + p->~TYPE(); + p++; } + } } -template -typename std::enable_if::has_trivial_copy>::type -inline -copy_type(TYPE* d, const TYPE* s, size_t n) { - memcpy(d,s,n*sizeof(TYPE)); +template +typename std::enable_if::has_trivial_copy>::type inline copy_type( + TYPE* d, + const TYPE* s, + size_t n) { + memcpy(d, s, n * sizeof(TYPE)); } -template -typename std::enable_if::has_trivial_copy>::type -inline -copy_type(TYPE* d, const TYPE* s, size_t n) { - while (n > 0) { - n--; - new(d) TYPE(*s); - d++, s++; - } +template +typename std::enable_if::has_trivial_copy>::type inline copy_type( + TYPE* d, + const TYPE* s, + size_t n) { + while (n > 0) { + n--; + new (d) TYPE(*s); + d++, s++; + } } -template inline -void splat_type(TYPE* where, const TYPE* what, size_t n) { - if (!traits::has_trivial_copy) { - while (n > 0) { - n--; - new(where) TYPE(*what); - where++; - } - } else { - while (n > 0) { - n--; - *where++ = *what; - } +template +inline void splat_type(TYPE* where, const TYPE* what, size_t n) { + if (!traits::has_trivial_copy) { + while (n > 0) { + n--; + new (where) TYPE(*what); + where++; } + } else { + while (n > 0) { + n--; + *where++ = *what; + } + } } -template -struct use_trivial_move : public std::integral_constant::has_trivial_dtor && traits::has_trivial_copy) - || traits::has_trivial_move -> {}; - -template -typename std::enable_if::value>::type -inline -move_forward_type(TYPE* d, const TYPE* s, size_t n = 1) { - memmove(d, s, n*sizeof(TYPE)); +template +struct use_trivial_move + : public std::integral_constant::has_trivial_dtor && + traits::has_trivial_copy) || + traits::has_trivial_move> {}; + +template +typename std::enable_if::value>:: + type inline move_forward_type(TYPE* d, const TYPE* s, size_t n = 1) { + memmove(d, s, n * sizeof(TYPE)); } -template -typename std::enable_if::value>::type -inline -move_forward_type(TYPE* d, const TYPE* s, size_t n = 1) { - d += n; - s += n; - while (n > 0) { - n--; - --d, --s; - if (!traits::has_trivial_copy) { - new(d) TYPE(*s); - } else { - *d = *s; - } - if (!traits::has_trivial_dtor) { - s->~TYPE(); - } +template +typename std::enable_if::value>:: + type inline move_forward_type(TYPE* d, const TYPE* s, size_t n = 1) { + d += n; + s += n; + while (n > 0) { + n--; + --d, --s; + if (!traits::has_trivial_copy) { + new (d) TYPE(*s); + } else { + *d = *s; + } + if (!traits::has_trivial_dtor) { + s->~TYPE(); } + } } -template -typename std::enable_if::value>::type -inline -move_backward_type(TYPE* d, const TYPE* s, size_t n = 1) { - memmove(d, s, n*sizeof(TYPE)); +template +typename std::enable_if::value>:: + type inline move_backward_type(TYPE* d, const TYPE* s, size_t n = 1) { + memmove(d, s, n * sizeof(TYPE)); } -template -typename std::enable_if::value>::type -inline -move_backward_type(TYPE* d, const TYPE* s, size_t n = 1) { - while (n > 0) { - n--; - if (!traits::has_trivial_copy) { - new(d) TYPE(*s); - } else { - *d = *s; - } - if (!traits::has_trivial_dtor) { - s->~TYPE(); - } - d++, s++; +template +typename std::enable_if::value>:: + type inline move_backward_type(TYPE* d, const TYPE* s, size_t n = 1) { + while (n > 0) { + n--; + if (!traits::has_trivial_copy) { + new (d) TYPE(*s); + } else { + *d = *s; } + if (!traits::has_trivial_dtor) { + s->~TYPE(); + } + d++, s++; + } } // --------------------------------------------------------------------------- @@ -251,43 +279,43 @@ move_backward_type(TYPE* d, const TYPE* s, size_t n = 1) { template struct key_value_pair_t { - typedef KEY key_t; - typedef VALUE value_t; - - KEY key; - VALUE value; - key_value_pair_t() { } - key_value_pair_t(const key_value_pair_t& o) : key(o.key), value(o.value) { } - key_value_pair_t& operator=(const key_value_pair_t& o) { - key = o.key; - value = o.value; - return *this; - } - key_value_pair_t(const KEY& k, const VALUE& v) : key(k), value(v) { } - explicit key_value_pair_t(const KEY& k) : key(k) { } - inline bool operator < (const key_value_pair_t& o) const { - return strictly_order_type(key, o.key); - } - inline const KEY& getKey() const { - return key; - } - inline const VALUE& getValue() const { - return value; - } + typedef KEY key_t; + typedef VALUE value_t; + + KEY key; + VALUE value; + key_value_pair_t() {} + key_value_pair_t(const key_value_pair_t& o) : key(o.key), value(o.value) {} + key_value_pair_t& operator=(const key_value_pair_t& o) { + key = o.key; + value = o.value; + return *this; + } + key_value_pair_t(const KEY& k, const VALUE& v) : key(k), value(v) {} + explicit key_value_pair_t(const KEY& k) : key(k) {} + inline bool operator<(const key_value_pair_t& o) const { + return strictly_order_type(key, o.key); + } + inline const KEY& getKey() const { return key; } + inline const VALUE& getValue() const { return value; } }; template -struct trait_trivial_ctor< key_value_pair_t > -{ enum { value = aggregate_traits::has_trivial_ctor }; }; +struct trait_trivial_ctor> { + enum { value = aggregate_traits::has_trivial_ctor }; +}; template -struct trait_trivial_dtor< key_value_pair_t > -{ enum { value = aggregate_traits::has_trivial_dtor }; }; +struct trait_trivial_dtor> { + enum { value = aggregate_traits::has_trivial_dtor }; +}; template -struct trait_trivial_copy< key_value_pair_t > -{ enum { value = aggregate_traits::has_trivial_copy }; }; +struct trait_trivial_copy> { + enum { value = aggregate_traits::has_trivial_copy }; +}; template -struct trait_trivial_move< key_value_pair_t > -{ enum { value = aggregate_traits::has_trivial_move }; }; +struct trait_trivial_move> { + enum { value = aggregate_traits::has_trivial_move }; +}; // --------------------------------------------------------------------------- @@ -300,18 +328,24 @@ template hash_t hash_type(const TKey& key); /* Built-in hash code specializations */ -#define ANDROID_INT32_HASH(T) \ - template <> inline hash_t hash_type(const T& value) { return hash_t(value); } -#define ANDROID_INT64_HASH(T) \ - template <> inline hash_t hash_type(const T& value) { \ - return hash_t((value >> 32) ^ value); } -#define ANDROID_REINTERPRET_HASH(T, R) \ - template <> inline hash_t hash_type(const T& value) { \ - R newValue; \ - static_assert(sizeof(newValue) == sizeof(value), "size mismatch"); \ - memcpy(&newValue, &value, sizeof(newValue)); \ - return hash_type(newValue); \ - } +#define ANDROID_INT32_HASH(T) \ + template <> \ + inline hash_t hash_type(const T& value) { \ + return hash_t(value); \ + } +#define ANDROID_INT64_HASH(T) \ + template <> \ + inline hash_t hash_type(const T& value) { \ + return hash_t((value >> 32) ^ value); \ + } +#define ANDROID_REINTERPRET_HASH(T, R) \ + template <> \ + inline hash_t hash_type(const T& value) { \ + R newValue; \ + static_assert(sizeof(newValue) == sizeof(value), "size mismatch"); \ + memcpy(&newValue, &value, sizeof(newValue)); \ + return hash_type(newValue); \ + } ANDROID_INT32_HASH(bool) ANDROID_INT32_HASH(int8_t) @@ -325,12 +359,13 @@ ANDROID_INT64_HASH(uint64_t) ANDROID_REINTERPRET_HASH(float, uint32_t) ANDROID_REINTERPRET_HASH(double, uint64_t) -template inline hash_t hash_type(T* const & value) { - return hash_type(uintptr_t(value)); +template +inline hash_t hash_type(T* const& value) { + return hash_type(uintptr_t(value)); } -}; // namespace android +}; // namespace android // --------------------------------------------------------------------------- -#endif // ANDROID_TYPE_HELPERS_H +#endif // ANDROID_TYPE_HELPERS_H diff --git a/third_party/txt/tests/old/perftests/FontCollection.cpp b/third_party/txt/tests/old/perftests/FontCollection.cpp index 55789f91d1bc9bc0a8fab3f0711eca2787d05846..1ec4741ddd208c6fbfffdf474bff3949336c41ba 100644 --- a/third_party/txt/tests/old/perftests/FontCollection.cpp +++ b/third_party/txt/tests/old/perftests/FontCollection.cpp @@ -17,10 +17,10 @@ #include -#include #include -#include #include +#include +#include namespace minikin { @@ -28,74 +28,83 @@ const char* SYSTEM_FONT_PATH = "/system/fonts/"; const char* SYSTEM_FONT_XML = "/system/etc/fonts.xml"; static void BM_FontCollection_construct(benchmark::State& state) { - std::vector> families = - getFontFamilies(SYSTEM_FONT_PATH, SYSTEM_FONT_XML); - while (state.KeepRunning()) { - std::make_shared(families); - } + std::vector> families = + getFontFamilies(SYSTEM_FONT_PATH, SYSTEM_FONT_XML); + while (state.KeepRunning()) { + std::make_shared(families); + } } BENCHMARK(BM_FontCollection_construct); static void BM_FontCollection_hasVariationSelector(benchmark::State& state) { - std::shared_ptr collection( - getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); + std::shared_ptr collection( + getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); - uint32_t baseCp = state.range(0); - uint32_t vsCp = state.range(1); + uint32_t baseCp = state.range(0); + uint32_t vsCp = state.range(1); - char titleBuffer[64]; - snprintf(titleBuffer, 64, "hasVariationSelector U+%04X,U+%04X", baseCp, vsCp); - state.SetLabel(titleBuffer); + char titleBuffer[64]; + snprintf(titleBuffer, 64, "hasVariationSelector U+%04X,U+%04X", baseCp, vsCp); + state.SetLabel(titleBuffer); - while (state.KeepRunning()) { - collection->hasVariationSelector(baseCp, vsCp); - } + while (state.KeepRunning()) { + collection->hasVariationSelector(baseCp, vsCp); + } } // TODO: Rewrite with BENCHMARK_CAPTURE for better test name. BENCHMARK(BM_FontCollection_hasVariationSelector) - ->ArgPair(0x2708, 0xFE0F) - ->ArgPair(0x2708, 0xFE0E) - ->ArgPair(0x3402, 0xE0100); + ->ArgPair(0x2708, 0xFE0F) + ->ArgPair(0x2708, 0xFE0E) + ->ArgPair(0x3402, 0xE0100); struct ItemizeTestCases { - std::string itemizeText; - std::string languageTag; - std::string labelText; + std::string itemizeText; + std::string languageTag; + std::string labelText; } ITEMIZE_TEST_CASES[] = { - { "'A' 'n' 'd' 'r' 'o' 'i' 'd'", "en", "English" }, - { "U+4E16", "zh-Hans", "CJK Ideograph" }, - { "U+4E16", "zh-Hans,zh-Hant,ja,en,es,pt,fr,de", "CJK Ideograph with many language fallback" }, - { "U+3402 U+E0100", "ja", "CJK Ideograph with variation selector" }, - { "'A' 'n' U+0E1A U+0E31 U+0645 U+062D U+0648", "en", "Mixture of English, Thai and Arabic" }, - { "U+2708 U+FE0E", "en", "Emoji with variation selector" }, - { "U+0031 U+FE0F U+20E3", "en", "KEYCAP" }, + {"'A' 'n' 'd' 'r' 'o' 'i' 'd'", "en", "English"}, + {"U+4E16", "zh-Hans", "CJK Ideograph"}, + {"U+4E16", "zh-Hans,zh-Hant,ja,en,es,pt,fr,de", + "CJK Ideograph with many language fallback"}, + {"U+3402 U+E0100", "ja", "CJK Ideograph with variation selector"}, + {"'A' 'n' U+0E1A U+0E31 U+0645 U+062D U+0648", "en", + "Mixture of English, Thai and Arabic"}, + {"U+2708 U+FE0E", "en", "Emoji with variation selector"}, + {"U+0031 U+FE0F U+20E3", "en", "KEYCAP"}, }; static void BM_FontCollection_itemize(benchmark::State& state) { - std::shared_ptr collection( - getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); - - size_t testIndex = state.range(0); - state.SetLabel("Itemize: " + ITEMIZE_TEST_CASES[testIndex].labelText); - - uint16_t buffer[64]; - size_t utf16_length = 0; - ParseUnicode( - buffer, 64, ITEMIZE_TEST_CASES[testIndex].itemizeText.c_str(), &utf16_length, nullptr); - std::vector result; - FontStyle style(FontStyle::registerLanguageList(ITEMIZE_TEST_CASES[testIndex].languageTag)); - - std::lock_guard _l(gMinikinLock); - while (state.KeepRunning()) { - result.clear(); - collection->itemize(buffer, utf16_length, style, &result); - } + std::shared_ptr collection( + getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); + + size_t testIndex = state.range(0); + state.SetLabel("Itemize: " + ITEMIZE_TEST_CASES[testIndex].labelText); + + uint16_t buffer[64]; + size_t utf16_length = 0; + ParseUnicode(buffer, 64, ITEMIZE_TEST_CASES[testIndex].itemizeText.c_str(), + &utf16_length, nullptr); + std::vector result; + FontStyle style(FontStyle::registerLanguageList( + ITEMIZE_TEST_CASES[testIndex].languageTag)); + + std::lock_guard _l(gMinikinLock); + while (state.KeepRunning()) { + result.clear(); + collection->itemize(buffer, utf16_length, style, &result); + } } // TODO: Rewrite with BENCHMARK_CAPTURE once it is available in Android. BENCHMARK(BM_FontCollection_itemize) - ->Arg(0)->Arg(1)->Arg(2)->Arg(3)->Arg(4)->Arg(5)->Arg(6); + ->Arg(0) + ->Arg(1) + ->Arg(2) + ->Arg(3) + ->Arg(4) + ->Arg(5) + ->Arg(6); } // namespace minikin diff --git a/third_party/txt/tests/old/perftests/FontFamily.cpp b/third_party/txt/tests/old/perftests/FontFamily.cpp index 9ab61e1fa7ce57d79bf9c5c2080718a710738ba5..9331ce92c65d7015098c580e77b841acc3ef9127 100644 --- a/third_party/txt/tests/old/perftests/FontFamily.cpp +++ b/third_party/txt/tests/old/perftests/FontFamily.cpp @@ -21,13 +21,14 @@ namespace minikin { static void BM_FontFamily_create(benchmark::State& state) { - std::shared_ptr minikinFont = - std::make_shared("/system/fonts/NotoSansCJK-Regular.ttc", 0); + std::shared_ptr minikinFont = + std::make_shared( + "/system/fonts/NotoSansCJK-Regular.ttc", 0); - while (state.KeepRunning()) { - std::shared_ptr family = std::make_shared( - std::vector({Font(minikinFont, FontStyle())})); - } + while (state.KeepRunning()) { + std::shared_ptr family = std::make_shared( + std::vector({Font(minikinFont, FontStyle())})); + } } BENCHMARK(BM_FontFamily_create); diff --git a/third_party/txt/tests/old/perftests/FontLanguage.cpp b/third_party/txt/tests/old/perftests/FontLanguage.cpp index 6c9c84de88865645bfd88b2ec807c43aebe90f9d..8919699b9b255ae169a1bb41b7d723a463f21a74 100644 --- a/third_party/txt/tests/old/perftests/FontLanguage.cpp +++ b/third_party/txt/tests/old/perftests/FontLanguage.cpp @@ -20,23 +20,23 @@ namespace minikin { static void BM_FontLanguage_en_US(benchmark::State& state) { - while (state.KeepRunning()) { - FontLanguage language("en-US", 5); - } + while (state.KeepRunning()) { + FontLanguage language("en-US", 5); + } } BENCHMARK(BM_FontLanguage_en_US); static void BM_FontLanguage_en_Latn_US(benchmark::State& state) { - while (state.KeepRunning()) { - FontLanguage language("en-Latn-US", 10); - } + while (state.KeepRunning()) { + FontLanguage language("en-Latn-US", 10); + } } BENCHMARK(BM_FontLanguage_en_Latn_US); static void BM_FontLanguage_en_Latn_US_u_em_emoji(benchmark::State& state) { - while (state.KeepRunning()) { - FontLanguage language("en-Latn-US-u-em-emoji", 21); - } + while (state.KeepRunning()) { + FontLanguage language("en-Latn-US-u-em-emoji", 21); + } } BENCHMARK(BM_FontLanguage_en_Latn_US_u_em_emoji); diff --git a/third_party/txt/tests/old/perftests/GraphemeBreak.cpp b/third_party/txt/tests/old/perftests/GraphemeBreak.cpp index 830586f44e127c8f7cbb45fbf864734733b316d1..a7e4a01e6352f3e6f59c3dbe28a69b3ad55e2341 100644 --- a/third_party/txt/tests/old/perftests/GraphemeBreak.cpp +++ b/third_party/txt/tests/old/perftests/GraphemeBreak.cpp @@ -17,63 +17,65 @@ #include -#include "minikin/GraphemeBreak.h" #include "UnicodeUtils.h" +#include "minikin/GraphemeBreak.h" namespace minikin { const char* ASCII_TEST_STR = "'L' 'o' 'r' 'e' 'm' ' ' 'i' 'p' 's' 'u' 'm' '.'"; // U+261D: WHITE UP POINTING INDEX // U+1F3FD: EMOJI MODIFIER FITZPATRICK TYPE-4 -const char* EMOJI_TEST_STR = "U+261D U+1F3FD U+261D U+1F3FD U+261D U+1F3FD U+261D U+1F3FD"; +const char* EMOJI_TEST_STR = + "U+261D U+1F3FD U+261D U+1F3FD U+261D U+1F3FD U+261D U+1F3FD"; // U+1F1FA: REGIONAL INDICATOR SYMBOL LETTER U // U+1F1F8: REGIONAL INDICATOR SYMBOL LETTER S const char* FLAGS_TEST_STR = "U+1F1FA U+1F1F8 U+1F1FA U+1F1F8 U+1F1FA U+1F1F8"; // TODO: Migrate BENCHMARK_CAPTURE for parameterizing. static void BM_GraphemeBreak_Ascii(benchmark::State& state) { - size_t result_size; - uint16_t buffer[12]; - ParseUnicode(buffer, 12, ASCII_TEST_STR, &result_size, nullptr); - LOG_ALWAYS_FATAL_IF(result_size != 12); - const size_t testIndex = state.range(0); - while (state.KeepRunning()) { - GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex); - } + size_t result_size; + uint16_t buffer[12]; + ParseUnicode(buffer, 12, ASCII_TEST_STR, &result_size, nullptr); + LOG_ALWAYS_FATAL_IF(result_size != 12); + const size_t testIndex = state.range(0); + while (state.KeepRunning()) { + GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex); + } } BENCHMARK(BM_GraphemeBreak_Ascii) - ->Arg(0) // Begining of the text. - ->Arg(1) // Middle of the text. + ->Arg(0) // Begining of the text. + ->Arg(1) // Middle of the text. ->Arg(12); // End of the text. static void BM_GraphemeBreak_Emoji(benchmark::State& state) { - size_t result_size; - uint16_t buffer[12]; - ParseUnicode(buffer, 12, EMOJI_TEST_STR, &result_size, nullptr); - LOG_ALWAYS_FATAL_IF(result_size != 12); - const size_t testIndex = state.range(0); - while (state.KeepRunning()) { - GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex); - } + size_t result_size; + uint16_t buffer[12]; + ParseUnicode(buffer, 12, EMOJI_TEST_STR, &result_size, nullptr); + LOG_ALWAYS_FATAL_IF(result_size != 12); + const size_t testIndex = state.range(0); + while (state.KeepRunning()) { + GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex); + } } BENCHMARK(BM_GraphemeBreak_Emoji) - ->Arg(1) // Middle of emoji modifier sequence. - ->Arg(2) // Middle of the surrogate pairs. - ->Arg(3); // After emoji modifier sequence. Here is boundary of grapheme cluster. + ->Arg(1) // Middle of emoji modifier sequence. + ->Arg(2) // Middle of the surrogate pairs. + ->Arg(3); // After emoji modifier sequence. Here is boundary of grapheme + // cluster. static void BM_GraphemeBreak_Emoji_Flags(benchmark::State& state) { - size_t result_size; - uint16_t buffer[12]; - ParseUnicode(buffer, 12, FLAGS_TEST_STR, &result_size, nullptr); - LOG_ALWAYS_FATAL_IF(result_size != 12); - const size_t testIndex = state.range(0); - while (state.KeepRunning()) { - GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex); - } + size_t result_size; + uint16_t buffer[12]; + ParseUnicode(buffer, 12, FLAGS_TEST_STR, &result_size, nullptr); + LOG_ALWAYS_FATAL_IF(result_size != 12); + const size_t testIndex = state.range(0); + while (state.KeepRunning()) { + GraphemeBreak::isGraphemeBreak(nullptr, buffer, 0, result_size, testIndex); + } } BENCHMARK(BM_GraphemeBreak_Emoji_Flags) - ->Arg(2) // Middle of flag sequence. - ->Arg(4) // After flag sequence. Here is boundary of grapheme cluster. - ->Arg(10); // Middle of 3rd flag sequence. + ->Arg(2) // Middle of flag sequence. + ->Arg(4) // After flag sequence. Here is boundary of grapheme cluster. + ->Arg(10); // Middle of 3rd flag sequence. } // namespace minikin diff --git a/third_party/txt/tests/old/perftests/Hyphenator.cpp b/third_party/txt/tests/old/perftests/Hyphenator.cpp index ae6249875ad032dff13e2629c471003515dfd208..2c87c3555a1dba21eefa8d554a40affc8740a259 100644 --- a/third_party/txt/tests/old/perftests/Hyphenator.cpp +++ b/third_party/txt/tests/old/perftests/Hyphenator.cpp @@ -15,9 +15,9 @@ */ #include -#include #include #include +#include namespace minikin { @@ -27,29 +27,29 @@ const int enUsMinSuffix = 3; const icu::Locale& usLocale = icu::Locale::getUS(); static void BM_Hyphenator_short_word(benchmark::State& state) { - Hyphenator* hyphenator = Hyphenator::loadBinary( - readWholeFile(enUsHyph).data(), enUsMinPrefix, enUsMinSuffix); - std::vector word = utf8ToUtf16("hyphen"); - std::vector result; - while (state.KeepRunning()) { - hyphenator->hyphenate(&result, word.data(), word.size(), usLocale); - } - Hyphenator::loadBinary(nullptr, 2, 2); + Hyphenator* hyphenator = Hyphenator::loadBinary( + readWholeFile(enUsHyph).data(), enUsMinPrefix, enUsMinSuffix); + std::vector word = utf8ToUtf16("hyphen"); + std::vector result; + while (state.KeepRunning()) { + hyphenator->hyphenate(&result, word.data(), word.size(), usLocale); + } + Hyphenator::loadBinary(nullptr, 2, 2); } // TODO: Use BENCHMARK_CAPTURE for parametrise. BENCHMARK(BM_Hyphenator_short_word); static void BM_Hyphenator_long_word(benchmark::State& state) { - Hyphenator* hyphenator = Hyphenator::loadBinary( - readWholeFile(enUsHyph).data(), enUsMinPrefix, enUsMinSuffix); - std::vector word = utf8ToUtf16( - "Pneumonoultramicroscopicsilicovolcanoconiosis"); - std::vector result; - while (state.KeepRunning()) { - hyphenator->hyphenate(&result, word.data(), word.size(), usLocale); - } - Hyphenator::loadBinary(nullptr, 2, 2); + Hyphenator* hyphenator = Hyphenator::loadBinary( + readWholeFile(enUsHyph).data(), enUsMinPrefix, enUsMinSuffix); + std::vector word = + utf8ToUtf16("Pneumonoultramicroscopicsilicovolcanoconiosis"); + std::vector result; + while (state.KeepRunning()) { + hyphenator->hyphenate(&result, word.data(), word.size(), usLocale); + } + Hyphenator::loadBinary(nullptr, 2, 2); } // TODO: Use BENCHMARK_CAPTURE for parametrise. diff --git a/third_party/txt/tests/old/perftests/WordBreaker.cpp b/third_party/txt/tests/old/perftests/WordBreaker.cpp index f9ef2144db6706cdb64bcf19b6f13c21418c8023..70661e107611867a8e69ffffe54329b28b87d10d 100644 --- a/third_party/txt/tests/old/perftests/WordBreaker.cpp +++ b/third_party/txt/tests/old/perftests/WordBreaker.cpp @@ -15,22 +15,24 @@ */ #include -#include "minikin/WordBreaker.h" #include "UnicodeUtils.h" +#include "minikin/WordBreaker.h" namespace minikin { static void BM_WordBreaker_English(benchmark::State& state) { - const char* kLoremIpsum = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do " - "eiusmod tempor incididunt ut labore et dolore magna aliqua."; + const char* kLoremIpsum = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do " + "eiusmod tempor incididunt ut labore et dolore magna aliqua."; - WordBreaker wb; - wb.setLocale(icu::Locale::getEnglish()); - std::vector text = utf8ToUtf16(kLoremIpsum); - while (state.KeepRunning()) { - wb.setText(text.data(), text.size()); - while (wb.next() != -1) {} + WordBreaker wb; + wb.setLocale(icu::Locale::getEnglish()); + std::vector text = utf8ToUtf16(kLoremIpsum); + while (state.KeepRunning()) { + wb.setText(text.data(), text.size()); + while (wb.next() != -1) { } + } } BENCHMARK(BM_WordBreaker_English); diff --git a/third_party/txt/tests/old/perftests/main.cpp b/third_party/txt/tests/old/perftests/main.cpp index e6f9d14cda5659c11cffd394fb6be3481d9d0699..7110ddff4eb8677b1d420a69f23c2aa27b526cbc 100644 --- a/third_party/txt/tests/old/perftests/main.cpp +++ b/third_party/txt/tests/old/perftests/main.cpp @@ -21,26 +21,26 @@ #include #include -#include #include +#include int main(int argc, char** argv) { - const char* fn = "/system/usr/icu/" U_ICUDATA_NAME ".dat"; - int fd = open(fn, O_RDONLY); - LOG_ALWAYS_FATAL_IF(fd == -1); - struct stat st; - LOG_ALWAYS_FATAL_IF(fstat(fd, &st) != 0); - void* data = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + const char* fn = "/system/usr/icu/" U_ICUDATA_NAME ".dat"; + int fd = open(fn, O_RDONLY); + LOG_ALWAYS_FATAL_IF(fd == -1); + struct stat st; + LOG_ALWAYS_FATAL_IF(fstat(fd, &st) != 0); + void* data = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); - UErrorCode errorCode = U_ZERO_ERROR; - udata_setCommonData(data, &errorCode); - LOG_ALWAYS_FATAL_IF(U_FAILURE(errorCode)); - u_init(&errorCode); - LOG_ALWAYS_FATAL_IF(U_FAILURE(errorCode)); + UErrorCode errorCode = U_ZERO_ERROR; + udata_setCommonData(data, &errorCode); + LOG_ALWAYS_FATAL_IF(U_FAILURE(errorCode)); + u_init(&errorCode); + LOG_ALWAYS_FATAL_IF(U_FAILURE(errorCode)); - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); - u_cleanup(); - return 0; + u_cleanup(); + return 0; } diff --git a/third_party/txt/tests/old/stresstest/MultithreadTest.cpp b/third_party/txt/tests/old/stresstest/MultithreadTest.cpp index 08c94b99fe57cca8f30b08a55352539694ee9fc9..3475345e3d24c62f5721cea0a8b90b3a2c968a26 100644 --- a/third_party/txt/tests/old/stresstest/MultithreadTest.cpp +++ b/third_party/txt/tests/old/stresstest/MultithreadTest.cpp @@ -23,10 +23,10 @@ #include +#include "../util/FontTestUtils.h" #include "MinikinInternal.h" #include "minikin/FontCollection.h" #include "minikin/Layout.h" -#include "../util/FontTestUtils.h" namespace minikin { @@ -41,69 +41,72 @@ std::mutex gMutex; std::condition_variable gCv; bool gReady = false; -static std::vector generateTestText( - std::mt19937* mt, int lettersInWord, int wordsInText) { - std::uniform_int_distribution dist('A', 'Z'); - - std::vector text; - text.reserve((lettersInWord + 1) * wordsInText - 1); - for (int i = 0; i < wordsInText; ++i) { - if (i != 0) { - text.emplace_back(' '); - } - for (int j = 0; j < lettersInWord; ++j) { - text.emplace_back(dist(*mt)); - } +static std::vector generateTestText(std::mt19937* mt, + int lettersInWord, + int wordsInText) { + std::uniform_int_distribution dist('A', 'Z'); + + std::vector text; + text.reserve((lettersInWord + 1) * wordsInText - 1); + for (int i = 0; i < wordsInText; ++i) { + if (i != 0) { + text.emplace_back(' '); + } + for (int j = 0; j < lettersInWord; ++j) { + text.emplace_back(dist(*mt)); } - return text; + } + return text; } static void thread_main(int tid) { - { - // Wait until all threads are created. - std::unique_lock lock(gMutex); - gCv.wait(lock, [] { return gReady; }); - } - - std::mt19937 mt(tid); - MinikinPaint paint; - - for (int i = 0; i < COLLECTION_COUNT_PER_THREAD; ++i) { - std::shared_ptr collection( - getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); - - for (int j = 0; j < LAYOUT_COUNT_PER_COLLECTION; ++j) { - // Generates 10 of 3-letter words so that the word sometimes hit the cache. - Layout layout; - std::vector text = generateTestText(&mt, 3, 10); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), - paint, collection); - std::vector advances(text.size()); - layout.getAdvances(advances.data()); - for (size_t k = 0; k < advances.size(); ++k) { - // MinikinFontForTest always returns 10.0f for horizontal advance. - LOG_ALWAYS_FATAL_IF(advances[k] != 10.0f, "Memory corruption detected."); - } - } + { + // Wait until all threads are created. + std::unique_lock lock(gMutex); + gCv.wait(lock, [] { return gReady; }); + } + + std::mt19937 mt(tid); + MinikinPaint paint; + + for (int i = 0; i < COLLECTION_COUNT_PER_THREAD; ++i) { + std::shared_ptr collection( + getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); + + for (int j = 0; j < LAYOUT_COUNT_PER_COLLECTION; ++j) { + // Generates 10 of 3-letter words so that the word sometimes hit the + // cache. + Layout layout; + std::vector text = generateTestText(&mt, 3, 10); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, collection); + std::vector advances(text.size()); + layout.getAdvances(advances.data()); + for (size_t k = 0; k < advances.size(); ++k) { + // MinikinFontForTest always returns 10.0f for horizontal advance. + LOG_ALWAYS_FATAL_IF(advances[k] != 10.0f, + "Memory corruption detected."); + } } + } } TEST(MultithreadTest, ThreadSafeStressTest) { - std::vector threads; - - { - std::unique_lock lock(gMutex); - threads.reserve(NUM_THREADS); - for (int i = 0; i < NUM_THREADS; ++i) { - threads.emplace_back(&thread_main, i); - } - gReady = true; - } - gCv.notify_all(); + std::vector threads; - for (auto& thread : threads) { - thread.join(); + { + std::unique_lock lock(gMutex); + threads.reserve(NUM_THREADS); + for (int i = 0; i < NUM_THREADS; ++i) { + threads.emplace_back(&thread_main, i); } + gReady = true; + } + gCv.notify_all(); + + for (auto& thread : threads) { + thread.join(); + } } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/CmapCoverageTest.cpp b/third_party/txt/tests/old/unittest/CmapCoverageTest.cpp index dd61940868a16fee5ad437c1af4a14a6b1896edd..2d046a38afaeb1d3b0b2cf6c2a37faf54426690c 100644 --- a/third_party/txt/tests/old/unittest/CmapCoverageTest.cpp +++ b/third_party/txt/tests/old/unittest/CmapCoverageTest.cpp @@ -16,609 +16,638 @@ #include -#include #include -#include +#include #include +#include namespace minikin { size_t writeU16(uint16_t x, uint8_t* out, size_t offset) { - out[offset] = x >> 8; - out[offset + 1] = x; - return offset + 2; + out[offset] = x >> 8; + out[offset + 1] = x; + return offset + 2; } size_t writeI16(int16_t sx, uint8_t* out, size_t offset) { - return writeU16(static_cast(sx), out, offset); + return writeU16(static_cast(sx), out, offset); } size_t writeU32(uint32_t x, uint8_t* out, size_t offset) { - out[offset] = x >> 24; - out[offset + 1] = x >> 16; - out[offset + 2] = x >> 8; - out[offset + 3] = x; - return offset + 4; + out[offset] = x >> 24; + out[offset + 1] = x >> 16; + out[offset + 2] = x >> 8; + out[offset + 3] = x; + return offset + 4; } -// Returns valid cmap format 4 table contents. All glyph ID is same value as code point. (e.g. -// 'a' (U+0061) is mapped to Glyph ID = 0x0061). -// 'range' should be specified with inclusive-inclusive values. -static std::vector buildCmapFormat4Table(const std::vector& ranges) { - uint16_t segmentCount = ranges.size() / 2 + 1 /* +1 for end marker */; - - const size_t numOfUint16 = - 8 /* format, length, languages, segCountX2, searchRange, entrySelector, rangeShift, pad */ + - segmentCount * 4 /* endCount, startCount, idRange, idRangeOffset */; - const size_t finalLength = sizeof(uint16_t) * numOfUint16; - - std::vector out(finalLength); - size_t head = 0; - head = writeU16(4, out.data(), head); // format - head = writeU16(finalLength, out.data(), head); // length - head = writeU16(0, out.data(), head); // langauge - - const uint16_t searchRange = 2 * (1 << static_cast(floor(log2(segmentCount)))); - - head = writeU16(segmentCount * 2, out.data(), head); // segCountX2 - head = writeU16(searchRange, out.data(), head); // searchRange - head = writeU16(__builtin_ctz(searchRange) - 1, out.data(), head); // entrySelector - head = writeU16(segmentCount * 2 - searchRange, out.data(), head); // rangeShift - - size_t endCountHead = head; - size_t startCountHead = head + segmentCount * sizeof(uint16_t) + 2 /* padding */; - size_t idDeltaHead = startCountHead + segmentCount * sizeof(uint16_t); - size_t idRangeOffsetHead = idDeltaHead + segmentCount * sizeof(uint16_t); - - for (size_t i = 0; i < ranges.size() / 2; ++i) { - const uint16_t begin = ranges[i * 2]; - const uint16_t end = ranges[i * 2 + 1]; - startCountHead = writeU16(begin, out.data(), startCountHead); - endCountHead = writeU16(end, out.data(), endCountHead); - // map glyph ID as the same value of the code point. - idDeltaHead = writeU16(0, out.data(), idDeltaHead); - idRangeOffsetHead = writeU16(0 /* we don't use this */, out.data(), idRangeOffsetHead); - } - - // fill end marker - endCountHead = writeU16(0xFFFF, out.data(), endCountHead); - startCountHead = writeU16(0xFFFF, out.data(), startCountHead); - idDeltaHead = writeU16(1, out.data(), idDeltaHead); - idRangeOffsetHead = writeU16(0, out.data(), idRangeOffsetHead); - LOG_ALWAYS_FATAL_IF(endCountHead > finalLength); - LOG_ALWAYS_FATAL_IF(startCountHead > finalLength); - LOG_ALWAYS_FATAL_IF(idDeltaHead > finalLength); - LOG_ALWAYS_FATAL_IF(idRangeOffsetHead != finalLength); - return out; +// Returns valid cmap format 4 table contents. All glyph ID is same value as +// code point. (e.g. 'a' (U+0061) is mapped to Glyph ID = 0x0061). 'range' +// should be specified with inclusive-inclusive values. +static std::vector buildCmapFormat4Table( + const std::vector& ranges) { + uint16_t segmentCount = ranges.size() / 2 + 1 /* +1 for end marker */; + + const size_t numOfUint16 = + 8 /* format, length, languages, segCountX2, searchRange, entrySelector, rangeShift, pad */ + + segmentCount * 4 /* endCount, startCount, idRange, idRangeOffset */; + const size_t finalLength = sizeof(uint16_t) * numOfUint16; + + std::vector out(finalLength); + size_t head = 0; + head = writeU16(4, out.data(), head); // format + head = writeU16(finalLength, out.data(), head); // length + head = writeU16(0, out.data(), head); // langauge + + const uint16_t searchRange = + 2 * (1 << static_cast(floor(log2(segmentCount)))); + + head = writeU16(segmentCount * 2, out.data(), head); // segCountX2 + head = writeU16(searchRange, out.data(), head); // searchRange + head = writeU16(__builtin_ctz(searchRange) - 1, out.data(), + head); // entrySelector + head = + writeU16(segmentCount * 2 - searchRange, out.data(), head); // rangeShift + + size_t endCountHead = head; + size_t startCountHead = + head + segmentCount * sizeof(uint16_t) + 2 /* padding */; + size_t idDeltaHead = startCountHead + segmentCount * sizeof(uint16_t); + size_t idRangeOffsetHead = idDeltaHead + segmentCount * sizeof(uint16_t); + + for (size_t i = 0; i < ranges.size() / 2; ++i) { + const uint16_t begin = ranges[i * 2]; + const uint16_t end = ranges[i * 2 + 1]; + startCountHead = writeU16(begin, out.data(), startCountHead); + endCountHead = writeU16(end, out.data(), endCountHead); + // map glyph ID as the same value of the code point. + idDeltaHead = writeU16(0, out.data(), idDeltaHead); + idRangeOffsetHead = + writeU16(0 /* we don't use this */, out.data(), idRangeOffsetHead); + } + + // fill end marker + endCountHead = writeU16(0xFFFF, out.data(), endCountHead); + startCountHead = writeU16(0xFFFF, out.data(), startCountHead); + idDeltaHead = writeU16(1, out.data(), idDeltaHead); + idRangeOffsetHead = writeU16(0, out.data(), idRangeOffsetHead); + LOG_ALWAYS_FATAL_IF(endCountHead > finalLength); + LOG_ALWAYS_FATAL_IF(startCountHead > finalLength); + LOG_ALWAYS_FATAL_IF(idDeltaHead > finalLength); + LOG_ALWAYS_FATAL_IF(idRangeOffsetHead != finalLength); + return out; } -// Returns valid cmap format 4 table contents. All glyph ID is same value as code point. (e.g. -// 'a' (U+0061) is mapped to Glyph ID = 0x0061). -// 'range' should be specified with inclusive-inclusive values. -static std::vector buildCmapFormat12Table(const std::vector& ranges) { - uint32_t numGroups = ranges.size() / 2; - - const size_t finalLength = 2 /* format */ + 2 /* reserved */ + 4 /* length */ + - 4 /* languages */ + 4 /* numGroups */ + 12 /* size of a group */ * numGroups; - - std::vector out(finalLength); - size_t head = 0; - head = writeU16(12, out.data(), head); // format - head = writeU16(0, out.data(), head); // reserved - head = writeU32(finalLength, out.data(), head); // length - head = writeU32(0, out.data(), head); // langauge - head = writeU32(numGroups, out.data(), head); // numGroups - - for (uint32_t i = 0; i < numGroups; ++i) { - const uint32_t start = ranges[2 * i]; - const uint32_t end = ranges[2 * i + 1]; - head = writeU32(start, out.data(), head); - head = writeU32(end, out.data(), head); - // map glyph ID as the same value of the code point. - // TODO: Use glyph IDs lower than 65535. - // Cmap can store 32 bit glyph ID but due to the size of numGlyph, a font file can contain - // up to 65535 glyphs in a file. - head = writeU32(start, out.data(), head); - } - - LOG_ALWAYS_FATAL_IF(head != finalLength); - return out; +// Returns valid cmap format 4 table contents. All glyph ID is same value as +// code point. (e.g. 'a' (U+0061) is mapped to Glyph ID = 0x0061). 'range' +// should be specified with inclusive-inclusive values. +static std::vector buildCmapFormat12Table( + const std::vector& ranges) { + uint32_t numGroups = ranges.size() / 2; + + const size_t finalLength = + 2 /* format */ + 2 /* reserved */ + 4 /* length */ + 4 /* languages */ + + 4 /* numGroups */ + 12 /* size of a group */ * numGroups; + + std::vector out(finalLength); + size_t head = 0; + head = writeU16(12, out.data(), head); // format + head = writeU16(0, out.data(), head); // reserved + head = writeU32(finalLength, out.data(), head); // length + head = writeU32(0, out.data(), head); // langauge + head = writeU32(numGroups, out.data(), head); // numGroups + + for (uint32_t i = 0; i < numGroups; ++i) { + const uint32_t start = ranges[2 * i]; + const uint32_t end = ranges[2 * i + 1]; + head = writeU32(start, out.data(), head); + head = writeU32(end, out.data(), head); + // map glyph ID as the same value of the code point. + // TODO: Use glyph IDs lower than 65535. + // Cmap can store 32 bit glyph ID but due to the size of numGlyph, a font + // file can contain up to 65535 glyphs in a file. + head = writeU32(start, out.data(), head); + } + + LOG_ALWAYS_FATAL_IF(head != finalLength); + return out; } class CmapBuilder { -public: - static constexpr size_t kEncodingTableHead = 4; - static constexpr size_t kEncodingTableSize = 8; - - CmapBuilder(int numTables) : mNumTables(numTables), mCurrentTableIndex(0) { - const size_t headerSize = - 2 /* version */ + 2 /* numTables */ + kEncodingTableSize * numTables; - out.resize(headerSize); - writeU16(0, out.data(), 0); - writeU16(numTables, out.data(), 2); - } - - void appendTable(uint16_t platformId, uint16_t encodingId, - const std::vector& table) { - appendEncodingTable(platformId, encodingId, out.size()); - out.insert(out.end(), table.begin(), table.end()); - } - - // TODO: Introduce Format 14 table builder. - - std::vector build() { - LOG_ALWAYS_FATAL_IF(mCurrentTableIndex != mNumTables); - return out; - } - - // Helper functions. - static std::vector buildSingleFormat4Cmap(uint16_t platformId, uint16_t encodingId, - const std::vector& ranges) { - CmapBuilder builder(1); - builder.appendTable(platformId, encodingId, buildCmapFormat4Table(ranges)); - return builder.build(); - } - - static std::vector buildSingleFormat12Cmap(uint16_t platformId, uint16_t encodingId, - const std::vector& ranges) { - CmapBuilder builder(1); - builder.appendTable(platformId, encodingId, buildCmapFormat12Table(ranges)); - return builder.build(); - } - -private: - void appendEncodingTable(uint16_t platformId, uint16_t encodingId, uint32_t offset) { - LOG_ALWAYS_FATAL_IF(mCurrentTableIndex == mNumTables); - - const size_t currentEncodingTableHead = - kEncodingTableHead + mCurrentTableIndex * kEncodingTableSize; - size_t head = writeU16(platformId, out.data(), currentEncodingTableHead); - head = writeU16(encodingId, out.data(), head); - head = writeU32(offset, out.data(), head); - LOG_ALWAYS_FATAL_IF((head - currentEncodingTableHead) != kEncodingTableSize); - mCurrentTableIndex++; - } - - int mNumTables; - int mCurrentTableIndex; - std::vector out; + public: + static constexpr size_t kEncodingTableHead = 4; + static constexpr size_t kEncodingTableSize = 8; + + CmapBuilder(int numTables) : mNumTables(numTables), mCurrentTableIndex(0) { + const size_t headerSize = + 2 /* version */ + 2 /* numTables */ + kEncodingTableSize * numTables; + out.resize(headerSize); + writeU16(0, out.data(), 0); + writeU16(numTables, out.data(), 2); + } + + void appendTable(uint16_t platformId, + uint16_t encodingId, + const std::vector& table) { + appendEncodingTable(platformId, encodingId, out.size()); + out.insert(out.end(), table.begin(), table.end()); + } + + // TODO: Introduce Format 14 table builder. + + std::vector build() { + LOG_ALWAYS_FATAL_IF(mCurrentTableIndex != mNumTables); + return out; + } + + // Helper functions. + static std::vector buildSingleFormat4Cmap( + uint16_t platformId, + uint16_t encodingId, + const std::vector& ranges) { + CmapBuilder builder(1); + builder.appendTable(platformId, encodingId, buildCmapFormat4Table(ranges)); + return builder.build(); + } + + static std::vector buildSingleFormat12Cmap( + uint16_t platformId, + uint16_t encodingId, + const std::vector& ranges) { + CmapBuilder builder(1); + builder.appendTable(platformId, encodingId, buildCmapFormat12Table(ranges)); + return builder.build(); + } + + private: + void appendEncodingTable(uint16_t platformId, + uint16_t encodingId, + uint32_t offset) { + LOG_ALWAYS_FATAL_IF(mCurrentTableIndex == mNumTables); + + const size_t currentEncodingTableHead = + kEncodingTableHead + mCurrentTableIndex * kEncodingTableSize; + size_t head = writeU16(platformId, out.data(), currentEncodingTableHead); + head = writeU16(encodingId, out.data(), head); + head = writeU32(offset, out.data(), head); + LOG_ALWAYS_FATAL_IF((head - currentEncodingTableHead) != + kEncodingTableSize); + mCurrentTableIndex++; + } + + int mNumTables; + int mCurrentTableIndex; + std::vector out; }; TEST(CmapCoverageTest, SingleFormat4_brokenCmap) { - bool has_cmap_format_14_subtable = false; - { - SCOPED_TRACE("Reading beyond buffer size - Too small cmap size"); - std::vector cmap = - CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector({'a', 'a'})); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), 3 /* too small */, &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Reading beyond buffer size - space needed for tables goes beyond cmap size"); - std::vector cmap = - CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector({'a', 'a'})); - - writeU16(1000, cmap.data(), 2 /* offset of num tables in cmap header */); - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Reading beyond buffer size - Invalid offset in encoding table"); - std::vector cmap = - CmapBuilder::buildSingleFormat4Cmap(0, 0, std::vector({'a', 'a'})); - - writeU16(1000, cmap.data(), 8 /* offset of the offset in the first encoding record */); - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + { + SCOPED_TRACE("Reading beyond buffer size - Too small cmap size"); + std::vector cmap = CmapBuilder::buildSingleFormat4Cmap( + 0, 0, std::vector({'a', 'a'})); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), 3 /* too small */, &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE( + "Reading beyond buffer size - space needed for tables goes beyond cmap " + "size"); + std::vector cmap = CmapBuilder::buildSingleFormat4Cmap( + 0, 0, std::vector({'a', 'a'})); + + writeU16(1000, cmap.data(), 2 /* offset of num tables in cmap header */); + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE( + "Reading beyond buffer size - Invalid offset in encoding table"); + std::vector cmap = CmapBuilder::buildSingleFormat4Cmap( + 0, 0, std::vector({'a', 'a'})); + + writeU16(1000, cmap.data(), + 8 /* offset of the offset in the first encoding record */); + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, SingleFormat4) { - bool has_cmap_format_14_subtable = false; - struct TestCast { - std::string testTitle; - uint16_t platformId; - uint16_t encodingId; - } TEST_CASES[] = { - { "Platform 0, Encoding 0", 0, 0 }, - { "Platform 0, Encoding 1", 0, 1 }, - { "Platform 0, Encoding 2", 0, 2 }, - { "Platform 0, Encoding 3", 0, 3 }, - { "Platform 3, Encoding 1", 3, 1 }, - }; - - for (const auto& testCase : TEST_CASES) { - SCOPED_TRACE(testCase.testTitle.c_str()); - std::vector cmap = CmapBuilder::buildSingleFormat4Cmap( - testCase.platformId, testCase.encodingId, std::vector({'a', 'a'})); - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); - EXPECT_FALSE(coverage.get('b')); - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + struct TestCast { + std::string testTitle; + uint16_t platformId; + uint16_t encodingId; + } TEST_CASES[] = { + {"Platform 0, Encoding 0", 0, 0}, {"Platform 0, Encoding 1", 0, 1}, + {"Platform 0, Encoding 2", 0, 2}, {"Platform 0, Encoding 3", 0, 3}, + {"Platform 3, Encoding 1", 3, 1}, + }; + + for (const auto& testCase : TEST_CASES) { + SCOPED_TRACE(testCase.testTitle.c_str()); + std::vector cmap = CmapBuilder::buildSingleFormat4Cmap( + testCase.platformId, testCase.encodingId, + std::vector({'a', 'a'})); + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); + EXPECT_FALSE(coverage.get('b')); + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, SingleFormat12) { - bool has_cmap_format_14_subtable = false; - - struct TestCast { - std::string testTitle; - uint16_t platformId; - uint16_t encodingId; - } TEST_CASES[] = { - { "Platform 0, Encoding 4", 0, 4 }, - { "Platform 0, Encoding 6", 0, 6 }, - { "Platform 3, Encoding 10", 3, 10 }, - }; - - for (const auto& testCase : TEST_CASES) { - SCOPED_TRACE(testCase.testTitle.c_str()); - std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( - testCase.platformId, testCase.encodingId, std::vector({'a', 'a'})); - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); - EXPECT_FALSE(coverage.get('b')); - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + + struct TestCast { + std::string testTitle; + uint16_t platformId; + uint16_t encodingId; + } TEST_CASES[] = { + {"Platform 0, Encoding 4", 0, 4}, + {"Platform 0, Encoding 6", 0, 6}, + {"Platform 3, Encoding 10", 3, 10}, + }; + + for (const auto& testCase : TEST_CASES) { + SCOPED_TRACE(testCase.testTitle.c_str()); + std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( + testCase.platformId, testCase.encodingId, + std::vector({'a', 'a'})); + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); + EXPECT_FALSE(coverage.get('b')); + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, Format12_beyondTheUnicodeLimit) { - bool has_cmap_format_14_subtable = false; - { - SCOPED_TRACE("Starting range is out of Unicode code point. Should be ignored."); - std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( - 0, 0, std::vector({'a', 'a', 0x110000, 0x110000})); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); - EXPECT_FALSE(coverage.get(0x110000)); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Ending range is out of Unicode code point. Should be ignored."); - std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( - 0, 0, std::vector({'a', 'a', 0x10FF00, 0x110000})); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); - EXPECT_TRUE(coverage.get(0x10FF00)); - EXPECT_TRUE(coverage.get(0x10FFFF)); - EXPECT_FALSE(coverage.get(0x110000)); - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + { + SCOPED_TRACE( + "Starting range is out of Unicode code point. Should be ignored."); + std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( + 0, 0, std::vector({'a', 'a', 0x110000, 0x110000})); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); + EXPECT_FALSE(coverage.get(0x110000)); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE( + "Ending range is out of Unicode code point. Should be ignored."); + std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( + 0, 0, std::vector({'a', 'a', 0x10FF00, 0x110000})); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); + EXPECT_TRUE(coverage.get(0x10FF00)); + EXPECT_TRUE(coverage.get(0x10FFFF)); + EXPECT_FALSE(coverage.get(0x110000)); + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, notSupportedEncodings) { - bool has_cmap_format_14_subtable = false; - - struct TestCast { - std::string testTitle; - uint16_t platformId; - uint16_t encodingId; - } TEST_CASES[] = { - // Any encodings with platform 2 is not supported. - { "Platform 2, Encoding 0", 2, 0 }, - { "Platform 2, Encoding 1", 2, 1 }, - { "Platform 2, Encoding 2", 2, 2 }, - { "Platform 2, Encoding 3", 2, 3 }, - // UCS-2 or UCS-4 are supported on Platform == 3. Others are not supported. - { "Platform 3, Encoding 0", 3, 0 }, // Symbol - { "Platform 3, Encoding 2", 3, 2 }, // ShiftJIS - { "Platform 3, Encoding 3", 3, 3 }, // RPC - { "Platform 3, Encoding 4", 3, 4 }, // Big5 - { "Platform 3, Encoding 5", 3, 5 }, // Wansung - { "Platform 3, Encoding 6", 3, 6 }, // Johab - { "Platform 3, Encoding 7", 3, 7 }, // Reserved - { "Platform 3, Encoding 8", 3, 8 }, // Reserved - { "Platform 3, Encoding 9", 3, 9 }, // Reserved - // Uknown platforms - { "Platform 4, Encoding 0", 4, 0 }, - { "Platform 5, Encoding 1", 5, 1 }, - { "Platform 6, Encoding 0", 6, 0 }, - { "Platform 7, Encoding 1", 7, 1 }, - }; - - for (const auto& testCase : TEST_CASES) { - SCOPED_TRACE(testCase.testTitle.c_str()); - CmapBuilder builder(1); - std::vector cmap = CmapBuilder::buildSingleFormat4Cmap( - testCase.platformId, testCase.encodingId, std::vector({'a', 'a'})); - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + + struct TestCast { + std::string testTitle; + uint16_t platformId; + uint16_t encodingId; + } TEST_CASES[] = { + // Any encodings with platform 2 is not supported. + {"Platform 2, Encoding 0", 2, 0}, + {"Platform 2, Encoding 1", 2, 1}, + {"Platform 2, Encoding 2", 2, 2}, + {"Platform 2, Encoding 3", 2, 3}, + // UCS-2 or UCS-4 are supported on Platform == 3. Others are not + // supported. + {"Platform 3, Encoding 0", 3, 0}, // Symbol + {"Platform 3, Encoding 2", 3, 2}, // ShiftJIS + {"Platform 3, Encoding 3", 3, 3}, // RPC + {"Platform 3, Encoding 4", 3, 4}, // Big5 + {"Platform 3, Encoding 5", 3, 5}, // Wansung + {"Platform 3, Encoding 6", 3, 6}, // Johab + {"Platform 3, Encoding 7", 3, 7}, // Reserved + {"Platform 3, Encoding 8", 3, 8}, // Reserved + {"Platform 3, Encoding 9", 3, 9}, // Reserved + // Uknown platforms + {"Platform 4, Encoding 0", 4, 0}, + {"Platform 5, Encoding 1", 5, 1}, + {"Platform 6, Encoding 0", 6, 0}, + {"Platform 7, Encoding 1", 7, 1}, + }; + + for (const auto& testCase : TEST_CASES) { + SCOPED_TRACE(testCase.testTitle.c_str()); + CmapBuilder builder(1); + std::vector cmap = CmapBuilder::buildSingleFormat4Cmap( + testCase.platformId, testCase.encodingId, + std::vector({'a', 'a'})); + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, brokenFormat4Table) { - bool has_cmap_format_14_subtable = false; - { - SCOPED_TRACE("Too small table cmap size"); - std::vector table = buildCmapFormat4Table(std::vector({'a', 'a'})); - table.resize(2); // Remove trailing data. - - CmapBuilder builder(1); - builder.appendTable(0, 0, table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Too many segments"); - std::vector table = buildCmapFormat4Table(std::vector({'a', 'a'})); - writeU16(5000, table.data(), 6 /* segment count offset */); // 5000 segments. - CmapBuilder builder(1); - builder.appendTable(0, 0, table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Inversed range"); - std::vector table = buildCmapFormat4Table(std::vector({'b', 'b'})); - // Put smaller end code point to inverse the range. - writeU16('a', table.data(), 14 /* the first element of endCount offset */); - CmapBuilder builder(1); - builder.appendTable(0, 0, table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + { + SCOPED_TRACE("Too small table cmap size"); + std::vector table = + buildCmapFormat4Table(std::vector({'a', 'a'})); + table.resize(2); // Remove trailing data. + + CmapBuilder builder(1); + builder.appendTable(0, 0, table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Too many segments"); + std::vector table = + buildCmapFormat4Table(std::vector({'a', 'a'})); + writeU16(5000, table.data(), + 6 /* segment count offset */); // 5000 segments. + CmapBuilder builder(1); + builder.appendTable(0, 0, table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Inversed range"); + std::vector table = + buildCmapFormat4Table(std::vector({'b', 'b'})); + // Put smaller end code point to inverse the range. + writeU16('a', table.data(), 14 /* the first element of endCount offset */); + CmapBuilder builder(1); + builder.appendTable(0, 0, table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, brokenFormat12Table) { - bool has_cmap_format_14_subtable = false; - { - SCOPED_TRACE("Too small cmap size"); - std::vector table = buildCmapFormat12Table(std::vector({'a', 'a'})); - table.resize(2); // Remove trailing data. - - CmapBuilder builder(1); - builder.appendTable(0, 0, table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Too many groups"); - std::vector table = buildCmapFormat12Table(std::vector({'a', 'a'})); - writeU32(5000, table.data(), 12 /* num group offset */); // 5000 groups. - - CmapBuilder builder(1); - builder.appendTable(0, 0, table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Inversed range."); - std::vector table = buildCmapFormat12Table(std::vector({'a', 'a'})); - // Put larger start code point to inverse the range. - writeU32('b', table.data(), 16 /* start code point offset in the first group */); - - CmapBuilder builder(1); - builder.appendTable(0, 0, table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Too large code point"); - std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( - 0, 0, std::vector({0x110000, 0x110000})); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_EQ(0U, coverage.length()); - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + { + SCOPED_TRACE("Too small cmap size"); + std::vector table = + buildCmapFormat12Table(std::vector({'a', 'a'})); + table.resize(2); // Remove trailing data. + + CmapBuilder builder(1); + builder.appendTable(0, 0, table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Too many groups"); + std::vector table = + buildCmapFormat12Table(std::vector({'a', 'a'})); + writeU32(5000, table.data(), 12 /* num group offset */); // 5000 groups. + + CmapBuilder builder(1); + builder.appendTable(0, 0, table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Inversed range."); + std::vector table = + buildCmapFormat12Table(std::vector({'a', 'a'})); + // Put larger start code point to inverse the range. + writeU32('b', table.data(), + 16 /* start code point offset in the first group */); + + CmapBuilder builder(1); + builder.appendTable(0, 0, table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Too large code point"); + std::vector cmap = CmapBuilder::buildSingleFormat12Cmap( + 0, 0, std::vector({0x110000, 0x110000})); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_EQ(0U, coverage.length()); + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, TableSelection_Priority) { - bool has_cmap_format_14_subtable = false; - std::vector highestFormat12Table = - buildCmapFormat12Table(std::vector({'a', 'a'})); - std::vector highestFormat4Table = - buildCmapFormat4Table(std::vector({'a', 'a'})); - std::vector format4 = buildCmapFormat4Table(std::vector({'b', 'b'})); - std::vector format12 = buildCmapFormat12Table(std::vector({'b', 'b'})); - - { - SCOPED_TRACE("(platform, encoding) = (3, 10) is the highest priority."); - - struct LowerPriorityTable { - uint16_t platformId; - uint16_t encodingId; - const std::vector& table; - } LOWER_PRIORITY_TABLES[] = { - { 0, 0, format4 }, - { 0, 1, format4 }, - { 0, 2, format4 }, - { 0, 3, format4 }, - { 0, 4, format12 }, - { 0, 6, format12 }, - { 3, 1, format4 }, - }; - - for (const auto& table : LOWER_PRIORITY_TABLES) { - CmapBuilder builder(2); - builder.appendTable(table.platformId, table.encodingId, table.table); - builder.appendTable(3, 10, highestFormat12Table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from highest table - EXPECT_FALSE(coverage.get('b')); // should not use other table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } + bool has_cmap_format_14_subtable = false; + std::vector highestFormat12Table = + buildCmapFormat12Table(std::vector({'a', 'a'})); + std::vector highestFormat4Table = + buildCmapFormat4Table(std::vector({'a', 'a'})); + std::vector format4 = + buildCmapFormat4Table(std::vector({'b', 'b'})); + std::vector format12 = + buildCmapFormat12Table(std::vector({'b', 'b'})); + + { + SCOPED_TRACE("(platform, encoding) = (3, 10) is the highest priority."); + + struct LowerPriorityTable { + uint16_t platformId; + uint16_t encodingId; + const std::vector& table; + } LOWER_PRIORITY_TABLES[] = { + {0, 0, format4}, {0, 1, format4}, {0, 2, format4}, {0, 3, format4}, + {0, 4, format12}, {0, 6, format12}, {3, 1, format4}, + }; + + for (const auto& table : LOWER_PRIORITY_TABLES) { + CmapBuilder builder(2); + builder.appendTable(table.platformId, table.encodingId, table.table); + builder.appendTable(3, 10, highestFormat12Table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from highest table + EXPECT_FALSE(coverage.get('b')); // should not use other table. + EXPECT_FALSE(has_cmap_format_14_subtable); } - { - SCOPED_TRACE("(platform, encoding) = (3, 1) case"); - - struct LowerPriorityTable { - uint16_t platformId; - uint16_t encodingId; - const std::vector& table; - } LOWER_PRIORITY_TABLES[] = { - { 0, 0, format4 }, - { 0, 1, format4 }, - { 0, 2, format4 }, - { 0, 3, format4 }, - }; - - for (const auto& table : LOWER_PRIORITY_TABLES) { - CmapBuilder builder(2); - builder.appendTable(table.platformId, table.encodingId, table.table); - builder.appendTable(3, 1, highestFormat4Table); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from highest table - EXPECT_FALSE(coverage.get('b')); // should not use other table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } + } + { + SCOPED_TRACE("(platform, encoding) = (3, 1) case"); + + struct LowerPriorityTable { + uint16_t platformId; + uint16_t encodingId; + const std::vector& table; + } LOWER_PRIORITY_TABLES[] = { + {0, 0, format4}, + {0, 1, format4}, + {0, 2, format4}, + {0, 3, format4}, + }; + + for (const auto& table : LOWER_PRIORITY_TABLES) { + CmapBuilder builder(2); + builder.appendTable(table.platformId, table.encodingId, table.table); + builder.appendTable(3, 1, highestFormat4Table); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from highest table + EXPECT_FALSE(coverage.get('b')); // should not use other table. + EXPECT_FALSE(has_cmap_format_14_subtable); } + } } TEST(CmapCoverageTest, TableSelection_SkipBrokenFormat4Table) { - SparseBitSet coverage; - bool has_cmap_format_14_subtable = false; - std::vector validTable = - buildCmapFormat4Table(std::vector({'a', 'a'})); - { - SCOPED_TRACE("Unsupported format"); - CmapBuilder builder(2); - std::vector table = - buildCmapFormat4Table(std::vector({'b', 'b'})); - writeU16(0, table.data(), 0 /* format offset */); - builder.appendTable(3, 1, table); - builder.appendTable(0, 0, validTable); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from valid table - EXPECT_FALSE(coverage.get('b')); // should not use invalid table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Invalid language"); - CmapBuilder builder(2); - std::vector table = - buildCmapFormat4Table(std::vector({'b', 'b'})); - writeU16(1, table.data(), 4 /* language offset */); - builder.appendTable(3, 1, table); - builder.appendTable(0, 0, validTable); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from valid table - EXPECT_FALSE(coverage.get('b')); // should not use invalid table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Invalid length"); - CmapBuilder builder(2); - std::vector table = - buildCmapFormat4Table(std::vector({'b', 'b'})); - writeU16(5000, table.data(), 2 /* length offset */); - builder.appendTable(3, 1, table); - builder.appendTable(0, 0, validTable); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from valid table - EXPECT_FALSE(coverage.get('b')); // should not use invalid table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } + SparseBitSet coverage; + bool has_cmap_format_14_subtable = false; + std::vector validTable = + buildCmapFormat4Table(std::vector({'a', 'a'})); + { + SCOPED_TRACE("Unsupported format"); + CmapBuilder builder(2); + std::vector table = + buildCmapFormat4Table(std::vector({'b', 'b'})); + writeU16(0, table.data(), 0 /* format offset */); + builder.appendTable(3, 1, table); + builder.appendTable(0, 0, validTable); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from valid table + EXPECT_FALSE(coverage.get('b')); // should not use invalid table. + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Invalid language"); + CmapBuilder builder(2); + std::vector table = + buildCmapFormat4Table(std::vector({'b', 'b'})); + writeU16(1, table.data(), 4 /* language offset */); + builder.appendTable(3, 1, table); + builder.appendTable(0, 0, validTable); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from valid table + EXPECT_FALSE(coverage.get('b')); // should not use invalid table. + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Invalid length"); + CmapBuilder builder(2); + std::vector table = + buildCmapFormat4Table(std::vector({'b', 'b'})); + writeU16(5000, table.data(), 2 /* length offset */); + builder.appendTable(3, 1, table); + builder.appendTable(0, 0, validTable); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from valid table + EXPECT_FALSE(coverage.get('b')); // should not use invalid table. + EXPECT_FALSE(has_cmap_format_14_subtable); + } } TEST(CmapCoverageTest, TableSelection_SkipBrokenFormat12Table) { - SparseBitSet coverage; - bool has_cmap_format_14_subtable = false; - std::vector validTable = - buildCmapFormat12Table(std::vector({'a', 'a'})); - { - SCOPED_TRACE("Unsupported format"); - CmapBuilder builder(2); - std::vector table = - buildCmapFormat12Table(std::vector({'b', 'b'})); - writeU16(0, table.data(), 0 /* format offset */); - builder.appendTable(3, 1, table); - builder.appendTable(0, 0, validTable); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from valid table - EXPECT_FALSE(coverage.get('b')); // should not use invalid table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Invalid language"); - CmapBuilder builder(2); - std::vector table = - buildCmapFormat12Table(std::vector({'b', 'b'})); - writeU32(1, table.data(), 8 /* language offset */); - builder.appendTable(3, 1, table); - builder.appendTable(0, 0, validTable); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from valid table - EXPECT_FALSE(coverage.get('b')); // should not use invalid table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } - { - SCOPED_TRACE("Invalid length"); - CmapBuilder builder(2); - std::vector table = - buildCmapFormat12Table(std::vector({'b', 'b'})); - writeU32(5000, table.data(), 4 /* length offset */); - builder.appendTable(3, 1, table); - builder.appendTable(0, 0, validTable); - std::vector cmap = builder.build(); - - SparseBitSet coverage = CmapCoverage::getCoverage( - cmap.data(), cmap.size(), &has_cmap_format_14_subtable); - EXPECT_TRUE(coverage.get('a')); // comes from valid table - EXPECT_FALSE(coverage.get('b')); // should not use invalid table. - EXPECT_FALSE(has_cmap_format_14_subtable); - } + SparseBitSet coverage; + bool has_cmap_format_14_subtable = false; + std::vector validTable = + buildCmapFormat12Table(std::vector({'a', 'a'})); + { + SCOPED_TRACE("Unsupported format"); + CmapBuilder builder(2); + std::vector table = + buildCmapFormat12Table(std::vector({'b', 'b'})); + writeU16(0, table.data(), 0 /* format offset */); + builder.appendTable(3, 1, table); + builder.appendTable(0, 0, validTable); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from valid table + EXPECT_FALSE(coverage.get('b')); // should not use invalid table. + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Invalid language"); + CmapBuilder builder(2); + std::vector table = + buildCmapFormat12Table(std::vector({'b', 'b'})); + writeU32(1, table.data(), 8 /* language offset */); + builder.appendTable(3, 1, table); + builder.appendTable(0, 0, validTable); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from valid table + EXPECT_FALSE(coverage.get('b')); // should not use invalid table. + EXPECT_FALSE(has_cmap_format_14_subtable); + } + { + SCOPED_TRACE("Invalid length"); + CmapBuilder builder(2); + std::vector table = + buildCmapFormat12Table(std::vector({'b', 'b'})); + writeU32(5000, table.data(), 4 /* length offset */); + builder.appendTable(3, 1, table); + builder.appendTable(0, 0, validTable); + std::vector cmap = builder.build(); + + SparseBitSet coverage = CmapCoverage::getCoverage( + cmap.data(), cmap.size(), &has_cmap_format_14_subtable); + EXPECT_TRUE(coverage.get('a')); // comes from valid table + EXPECT_FALSE(coverage.get('b')); // should not use invalid table. + EXPECT_FALSE(has_cmap_format_14_subtable); + } } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/EmojiTest.cpp b/third_party/txt/tests/old/unittest/EmojiTest.cpp index 04a41732ca8b8a757fe22f4d46845d25bdecb574..91512ddee5e484318c58bc9cb51ae16ee8412762 100644 --- a/third_party/txt/tests/old/unittest/EmojiTest.cpp +++ b/third_party/txt/tests/old/unittest/EmojiTest.cpp @@ -23,66 +23,70 @@ namespace minikin { TEST(EmojiTest, isEmojiTest) { - EXPECT_TRUE(isEmoji(0x0023)); // NUMBER SIGN - EXPECT_TRUE(isEmoji(0x0035)); // DIGIT FIVE - // EXPECT_TRUE(isEmoji(0x2640)); // FEMALE SIGN - // EXPECT_TRUE(isEmoji(0x2642)); // MALE SIGN - // EXPECT_TRUE(isEmoji(0x2695)); // STAFF OF AESCULAPIUS - EXPECT_TRUE(isEmoji(0x1F0CF)); // PLAYING CARD BLACK JOKER - EXPECT_TRUE(isEmoji(0x1F1E9)); // REGIONAL INDICATOR SYMBOL LETTER D - EXPECT_TRUE(isEmoji(0x1F6F7)); // SLED - EXPECT_TRUE(isEmoji(0x1F9E6)); // SOCKS + EXPECT_TRUE(isEmoji(0x0023)); // NUMBER SIGN + EXPECT_TRUE(isEmoji(0x0035)); // DIGIT FIVE + // EXPECT_TRUE(isEmoji(0x2640)); // FEMALE SIGN + // EXPECT_TRUE(isEmoji(0x2642)); // MALE SIGN + // EXPECT_TRUE(isEmoji(0x2695)); // STAFF OF AESCULAPIUS + EXPECT_TRUE(isEmoji(0x1F0CF)); // PLAYING CARD BLACK JOKER + EXPECT_TRUE(isEmoji(0x1F1E9)); // REGIONAL INDICATOR SYMBOL LETTER D + EXPECT_TRUE(isEmoji(0x1F6F7)); // SLED + EXPECT_TRUE(isEmoji(0x1F9E6)); // SOCKS - EXPECT_FALSE(isEmoji(0x0000)); // - EXPECT_FALSE(isEmoji(0x0061)); // LATIN SMALL LETTER A - EXPECT_FALSE(isEmoji(0x1F93B)); // MODERN PENTATHLON - EXPECT_FALSE(isEmoji(0x1F946)); // RIFLE - EXPECT_FALSE(isEmoji(0x29E3D)); // A han character. + EXPECT_FALSE(isEmoji(0x0000)); // + EXPECT_FALSE(isEmoji(0x0061)); // LATIN SMALL LETTER A + EXPECT_FALSE(isEmoji(0x1F93B)); // MODERN PENTATHLON + EXPECT_FALSE(isEmoji(0x1F946)); // RIFLE + EXPECT_FALSE(isEmoji(0x29E3D)); // A han character. } TEST(EmojiTest, isEmojiModifierTest) { - EXPECT_TRUE(isEmojiModifier(0x1F3FB)); // EMOJI MODIFIER FITZPATRICK TYPE-1-2 - EXPECT_TRUE(isEmojiModifier(0x1F3FC)); // EMOJI MODIFIER FITZPATRICK TYPE-3 - EXPECT_TRUE(isEmojiModifier(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4 - EXPECT_TRUE(isEmojiModifier(0x1F3FE)); // EMOJI MODIFIER FITZPATRICK TYPE-5 - EXPECT_TRUE(isEmojiModifier(0x1F3FF)); // EMOJI MODIFIER FITZPATRICK TYPE-6 + EXPECT_TRUE(isEmojiModifier(0x1F3FB)); // EMOJI MODIFIER FITZPATRICK TYPE-1-2 + EXPECT_TRUE(isEmojiModifier(0x1F3FC)); // EMOJI MODIFIER FITZPATRICK TYPE-3 + EXPECT_TRUE(isEmojiModifier(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4 + EXPECT_TRUE(isEmojiModifier(0x1F3FE)); // EMOJI MODIFIER FITZPATRICK TYPE-5 + EXPECT_TRUE(isEmojiModifier(0x1F3FF)); // EMOJI MODIFIER FITZPATRICK TYPE-6 - EXPECT_FALSE(isEmojiModifier(0x0000)); // - EXPECT_FALSE(isEmojiModifier(0x1F3FA)); // AMPHORA - EXPECT_FALSE(isEmojiModifier(0x1F400)); // RAT - EXPECT_FALSE(isEmojiModifier(0x29E3D)); // A han character. + EXPECT_FALSE(isEmojiModifier(0x0000)); // + EXPECT_FALSE(isEmojiModifier(0x1F3FA)); // AMPHORA + EXPECT_FALSE(isEmojiModifier(0x1F400)); // RAT + EXPECT_FALSE(isEmojiModifier(0x29E3D)); // A han character. } TEST(EmojiTest, isEmojiBaseTest) { - EXPECT_TRUE(isEmojiBase(0x261D)); // WHITE UP POINTING INDEX - EXPECT_TRUE(isEmojiBase(0x270D)); // WRITING HAND - EXPECT_TRUE(isEmojiBase(0x1F385)); // FATHER CHRISTMAS - // EXPECT_TRUE(isEmojiBase(0x1F3C2)); // SNOWBOARDER - // EXPECT_TRUE(isEmojiBase(0x1F3C7)); // HORSE RACING - // EXPECT_TRUE(isEmojiBase(0x1F3CC)); // GOLFER - // EXPECT_TRUE(isEmojiBase(0x1F574)); // MAN IN BUSINESS SUIT LEVITATING - // EXPECT_TRUE(isEmojiBase(0x1F6CC)); // SLEEPING ACCOMMODATION - EXPECT_TRUE(isEmojiBase(0x1F91D)); // HANDSHAKE (removed from Emoji 4.0, but we need it) - EXPECT_TRUE(isEmojiBase(0x1F91F)); // I LOVE YOU HAND SIGN - EXPECT_TRUE(isEmojiBase(0x1F931)); // BREAST-FEEDING - EXPECT_TRUE(isEmojiBase(0x1F932)); // PALMS UP TOGETHER - EXPECT_TRUE(isEmojiBase(0x1F93C)); // WRESTLERS (removed from Emoji 4.0, but we need it) - EXPECT_TRUE(isEmojiBase(0x1F9D1)); // ADULT - EXPECT_TRUE(isEmojiBase(0x1F9DD)); // ELF + EXPECT_TRUE(isEmojiBase(0x261D)); // WHITE UP POINTING INDEX + EXPECT_TRUE(isEmojiBase(0x270D)); // WRITING HAND + EXPECT_TRUE(isEmojiBase(0x1F385)); // FATHER CHRISTMAS + // EXPECT_TRUE(isEmojiBase(0x1F3C2)); // SNOWBOARDER + // EXPECT_TRUE(isEmojiBase(0x1F3C7)); // HORSE RACING + // EXPECT_TRUE(isEmojiBase(0x1F3CC)); // GOLFER + // EXPECT_TRUE(isEmojiBase(0x1F574)); // MAN IN BUSINESS SUIT LEVITATING + // EXPECT_TRUE(isEmojiBase(0x1F6CC)); // SLEEPING ACCOMMODATION + EXPECT_TRUE(isEmojiBase( + 0x1F91D)); // HANDSHAKE (removed from Emoji 4.0, but we need it) + EXPECT_TRUE(isEmojiBase(0x1F91F)); // I LOVE YOU HAND SIGN + EXPECT_TRUE(isEmojiBase(0x1F931)); // BREAST-FEEDING + EXPECT_TRUE(isEmojiBase(0x1F932)); // PALMS UP TOGETHER + EXPECT_TRUE(isEmojiBase( + 0x1F93C)); // WRESTLERS (removed from Emoji 4.0, but we need it) + EXPECT_TRUE(isEmojiBase(0x1F9D1)); // ADULT + EXPECT_TRUE(isEmojiBase(0x1F9DD)); // ELF - EXPECT_FALSE(isEmojiBase(0x0000)); // - EXPECT_FALSE(isEmojiBase(0x261C)); // WHITE LEFT POINTING INDEX - EXPECT_FALSE(isEmojiBase(0x1F384)); // CHRISTMAS TREE - EXPECT_FALSE(isEmojiBase(0x1F9DE)); // GENIE - EXPECT_FALSE(isEmojiBase(0x29E3D)); // A han character. + EXPECT_FALSE(isEmojiBase(0x0000)); // + EXPECT_FALSE(isEmojiBase(0x261C)); // WHITE LEFT POINTING INDEX + EXPECT_FALSE(isEmojiBase(0x1F384)); // CHRISTMAS TREE + EXPECT_FALSE(isEmojiBase(0x1F9DE)); // GENIE + EXPECT_FALSE(isEmojiBase(0x29E3D)); // A han character. } TEST(EmojiTest, emojiBidiOverrideTest) { - EXPECT_EQ(U_RIGHT_TO_LEFT, emojiBidiOverride(nullptr, 0x05D0)); // HEBREW LETTER ALEF - EXPECT_EQ(U_LEFT_TO_RIGHT, - emojiBidiOverride(nullptr, 0x1F170)); // NEGATIVE SQUARED LATIN CAPITAL LETTER A - EXPECT_EQ(U_OTHER_NEUTRAL, emojiBidiOverride(nullptr, 0x1F6F7)); // SLED - EXPECT_EQ(U_OTHER_NEUTRAL, emojiBidiOverride(nullptr, 0x1F9E6)); // SOCKS + EXPECT_EQ(U_RIGHT_TO_LEFT, + emojiBidiOverride(nullptr, 0x05D0)); // HEBREW LETTER ALEF + EXPECT_EQ(U_LEFT_TO_RIGHT, + emojiBidiOverride( + nullptr, 0x1F170)); // NEGATIVE SQUARED LATIN CAPITAL LETTER A + EXPECT_EQ(U_OTHER_NEUTRAL, emojiBidiOverride(nullptr, 0x1F6F7)); // SLED + EXPECT_EQ(U_OTHER_NEUTRAL, emojiBidiOverride(nullptr, 0x1F9E6)); // SOCKS } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/FontCollectionItemizeTest.cpp b/third_party/txt/tests/old/unittest/FontCollectionItemizeTest.cpp index 6715227818ff50538a301ebe474a175eb675b51c..0c98df51da8f5ac57c8343b09cff929d32c1d5f2 100644 --- a/third_party/txt/tests/old/unittest/FontCollectionItemizeTest.cpp +++ b/third_party/txt/tests/old/unittest/FontCollectionItemizeTest.cpp @@ -18,13 +18,13 @@ #include -#include "lib/txt/libs/minikin/FontLanguageListCache.h" -#include "lib/txt/libs/minikin/FontLanguage.h" #include "FontTestUtils.h" #include "ICUTestBase.h" #include "MinikinFontForTest.h" -#include "lib/txt/libs/minikin/MinikinInternal.h" #include "UnicodeUtils.h" +#include "lib/txt/libs/minikin/FontLanguage.h" +#include "lib/txt/libs/minikin/FontLanguageListCache.h" +#include "lib/txt/libs/minikin/MinikinInternal.h" #include "minikin/FontFamily.h" namespace minikin { @@ -41,1435 +41,1510 @@ const char kZH_HansFont[] = kTestFontDir "ZhHans.ttf"; const char kZH_HantFont[] = kTestFontDir "ZhHant.ttf"; const char kEmojiXmlFile[] = kTestFontDir "emoji.xml"; -const char kNoGlyphFont[] = kTestFontDir "NoGlyphFont.ttf"; +const char kNoGlyphFont[] = kTestFontDir "NoGlyphFont.ttf"; const char kColorEmojiFont[] = kTestFontDir "ColorEmojiFont.ttf"; const char kTextEmojiFont[] = kTestFontDir "TextEmojiFont.ttf"; const char kMixedEmojiFont[] = kTestFontDir "ColorTextMixedEmojiFont.ttf"; -const char kHasCmapFormat14Font[] = kTestFontDir "NoCmapFormat14.ttf"; -const char kNoCmapFormat14Font[] = kTestFontDir "VariationSelectorTest-Regular.ttf"; +const char kHasCmapFormat14Font[] = kTestFontDir "NoCmapFormat14.ttf"; +const char kNoCmapFormat14Font[] = + kTestFontDir "VariationSelectorTest-Regular.ttf"; typedef ICUTestBase FontCollectionItemizeTest; // Utility function for calling itemize function. -void itemize(const std::shared_ptr& collection, const char* str, FontStyle style, - std::vector* result) { - const size_t BUF_SIZE = 256; - uint16_t buf[BUF_SIZE]; - size_t len; - - result->clear(); - ParseUnicode(buf, BUF_SIZE, str, &len, NULL); - std::lock_guard _l(gMinikinLock); - collection->itemize(buf, len, style, result); +void itemize(const std::shared_ptr& collection, + const char* str, + FontStyle style, + std::vector* result) { + const size_t BUF_SIZE = 256; + uint16_t buf[BUF_SIZE]; + size_t len; + + result->clear(); + ParseUnicode(buf, BUF_SIZE, str, &len, NULL); + std::lock_guard _l(gMinikinLock); + collection->itemize(buf, len, style, result); } // Utility function to obtain font path associated with run. const std::string& getFontPath(const FontCollection::Run& run) { - EXPECT_NE(nullptr, run.fakedFont.font); - return ((MinikinFontForTest*)run.fakedFont.font)->fontPath(); + EXPECT_NE(nullptr, run.fakedFont.font); + return ((MinikinFontForTest*)run.fakedFont.font)->fontPath(); } // Utility function to obtain FontLanguages from string. -const FontLanguages& registerAndGetFontLanguages(const std::string& lang_string) { - std::lock_guard _l(gMinikinLock); - return FontLanguageListCache::getById(FontLanguageListCache::getId(lang_string)); +const FontLanguages& registerAndGetFontLanguages( + const std::string& lang_string) { + std::lock_guard _l(gMinikinLock); + return FontLanguageListCache::getById( + FontLanguageListCache::getId(lang_string)); } TEST_F(FontCollectionItemizeTest, itemize_latin) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - const FontStyle kRegularStyle = FontStyle(); - const FontStyle kItalicStyle = FontStyle(4, true); - const FontStyle kBoldStyle = FontStyle(7, false); - const FontStyle kBoldItalicStyle = FontStyle(7, true); - - itemize(collection, "'a' 'b' 'c' 'd' 'e'", kRegularStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - itemize(collection, "'a' 'b' 'c' 'd' 'e'", kItalicStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kLatinItalicFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - itemize(collection, "'a' 'b' 'c' 'd' 'e'", kBoldStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kLatinBoldFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - itemize(collection, "'a' 'b' 'c' 'd' 'e'", kBoldItalicStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kLatinBoldItalicFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // Continue if the specific characters (e.g. hyphen, comma, etc.) is - // followed. - itemize(collection, "'a' ',' '-' 'd' '!'", kRegularStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - itemize(collection, "'a' ',' '-' 'd' '!'", kRegularStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // U+0301(COMBINING ACUTE ACCENT) must be in the same run with preceding - // chars if the font supports it. - itemize(collection, "'a' U+0301", kRegularStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + const FontStyle kRegularStyle = FontStyle(); + const FontStyle kItalicStyle = FontStyle(4, true); + const FontStyle kBoldStyle = FontStyle(7, false); + const FontStyle kBoldItalicStyle = FontStyle(7, true); + + itemize(collection, "'a' 'b' 'c' 'd' 'e'", kRegularStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + itemize(collection, "'a' 'b' 'c' 'd' 'e'", kItalicStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kLatinItalicFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + itemize(collection, "'a' 'b' 'c' 'd' 'e'", kBoldStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kLatinBoldFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + itemize(collection, "'a' 'b' 'c' 'd' 'e'", kBoldItalicStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kLatinBoldItalicFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // Continue if the specific characters (e.g. hyphen, comma, etc.) is + // followed. + itemize(collection, "'a' ',' '-' 'd' '!'", kRegularStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + itemize(collection, "'a' ',' '-' 'd' '!'", kRegularStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // U+0301(COMBINING ACUTE ACCENT) must be in the same run with preceding + // chars if the font supports it. + itemize(collection, "'a' U+0301", kRegularStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); } TEST_F(FontCollectionItemizeTest, itemize_emoji) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - itemize(collection, "U+1F469 U+1F467", FontStyle(), &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // U+20E3(COMBINING ENCLOSING KEYCAP) must be in the same run with preceding - // character if the font supports. - itemize(collection, "'0' U+20E3", FontStyle(), &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - itemize(collection, "U+1F470 U+20E3", FontStyle(), &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - itemize(collection, "U+242EE U+1F470 U+20E3", FontStyle(), &runs); - ASSERT_EQ(2U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(2, runs[1].start); - EXPECT_EQ(5, runs[1].end); - EXPECT_EQ(kEmojiFont, getFontPath(runs[1])); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); - - // Currently there is no fonts which has a glyph for 'a' + U+20E3, so they - // are splitted into two. - itemize(collection, "'a' U+20E3", FontStyle(), &runs); - ASSERT_EQ(2U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(1, runs[1].start); - EXPECT_EQ(2, runs[1].end); - EXPECT_EQ(kEmojiFont, getFontPath(runs[1])); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + itemize(collection, "U+1F469 U+1F467", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // U+20E3(COMBINING ENCLOSING KEYCAP) must be in the same run with preceding + // character if the font supports. + itemize(collection, "'0' U+20E3", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + itemize(collection, "U+1F470 U+20E3", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + itemize(collection, "U+242EE U+1F470 U+20E3", FontStyle(), &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(5, runs[1].end); + EXPECT_EQ(kEmojiFont, getFontPath(runs[1])); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); + + // Currently there is no fonts which has a glyph for 'a' + U+20E3, so they + // are splitted into two. + itemize(collection, "'a' U+20E3", FontStyle(), &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(2, runs[1].end); + EXPECT_EQ(kEmojiFont, getFontPath(runs[1])); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); } TEST_F(FontCollectionItemizeTest, itemize_non_latin) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - FontStyle kJAStyle = FontStyle(FontStyle::registerLanguageList("ja_JP")); - FontStyle kUSStyle = FontStyle(FontStyle::registerLanguageList("en_US")); - FontStyle kZH_HansStyle = FontStyle(FontStyle::registerLanguageList("zh_Hans")); - - // All Japanese Hiragana characters. - itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kUSStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // All Korean Hangul characters. - itemize(collection, "U+B300 U+D55C U+BBFC U+AD6D", kUSStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kKOFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // All Han characters ja, zh-Hans font having. - // Japanese font should be selected if the specified language is Japanese. - itemize(collection, "U+81ED U+82B1 U+5FCD", kJAStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // Simplified Chinese font should be selected if the specified language is Simplified - // Chinese. - itemize(collection, "U+81ED U+82B1 U+5FCD", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // Fallbacks to other fonts if there is no glyph in the specified language's - // font. There is no character U+4F60 in Japanese. - itemize(collection, "U+81ED U+4F60 U+5FCD", kJAStyle, &runs); - ASSERT_EQ(3U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(1, runs[1].start); - EXPECT_EQ(2, runs[1].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[1])); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(2, runs[2].start); - EXPECT_EQ(3, runs[2].end); - EXPECT_EQ(kJAFont, getFontPath(runs[2])); - EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeItalic()); - - // Tone mark. - itemize(collection, "U+4444 U+302D", FontStyle(), &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // Both zh-Hant and ja fonts support U+242EE, but zh-Hans doesn't. - // Here, ja and zh-Hant font should have the same score but ja should be selected since it is - // listed before zh-Hant. - itemize(collection, "U+242EE", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + FontStyle kJAStyle = FontStyle(FontStyle::registerLanguageList("ja_JP")); + FontStyle kUSStyle = FontStyle(FontStyle::registerLanguageList("en_US")); + FontStyle kZH_HansStyle = + FontStyle(FontStyle::registerLanguageList("zh_Hans")); + + // All Japanese Hiragana characters. + itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kUSStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // All Korean Hangul characters. + itemize(collection, "U+B300 U+D55C U+BBFC U+AD6D", kUSStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kKOFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // All Han characters ja, zh-Hans font having. + // Japanese font should be selected if the specified language is Japanese. + itemize(collection, "U+81ED U+82B1 U+5FCD", kJAStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // Simplified Chinese font should be selected if the specified language is + // Simplified Chinese. + itemize(collection, "U+81ED U+82B1 U+5FCD", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // Fallbacks to other fonts if there is no glyph in the specified language's + // font. There is no character U+4F60 in Japanese. + itemize(collection, "U+81ED U+4F60 U+5FCD", kJAStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(2, runs[1].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[1])); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(2, runs[2].start); + EXPECT_EQ(3, runs[2].end); + EXPECT_EQ(kJAFont, getFontPath(runs[2])); + EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeItalic()); + + // Tone mark. + itemize(collection, "U+4444 U+302D", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // Both zh-Hant and ja fonts support U+242EE, but zh-Hans doesn't. + // Here, ja and zh-Hant font should have the same score but ja should be + // selected since it is listed before zh-Hant. + itemize(collection, "U+242EE", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); } TEST_F(FontCollectionItemizeTest, itemize_mixed) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - FontStyle kUSStyle = FontStyle(FontStyle::registerLanguageList("en_US")); - - itemize(collection, "'a' U+4F60 'b' U+4F60 'c'", kUSStyle, &runs); - ASSERT_EQ(5U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(1, runs[1].start); - EXPECT_EQ(2, runs[1].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[1])); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(2, runs[2].start); - EXPECT_EQ(3, runs[2].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[2])); - EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(3, runs[3].start); - EXPECT_EQ(4, runs[3].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[3])); - EXPECT_FALSE(runs[3].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[3].fakedFont.fakery.isFakeItalic()); - - EXPECT_EQ(4, runs[4].start); - EXPECT_EQ(5, runs[4].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[4])); - EXPECT_FALSE(runs[4].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[4].fakedFont.fakery.isFakeItalic()); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + FontStyle kUSStyle = FontStyle(FontStyle::registerLanguageList("en_US")); + + itemize(collection, "'a' U+4F60 'b' U+4F60 'c'", kUSStyle, &runs); + ASSERT_EQ(5U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(2, runs[1].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[1])); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[1].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(2, runs[2].start); + EXPECT_EQ(3, runs[2].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[2])); + EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[2].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(3, runs[3].start); + EXPECT_EQ(4, runs[3].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[3])); + EXPECT_FALSE(runs[3].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[3].fakedFont.fakery.isFakeItalic()); + + EXPECT_EQ(4, runs[4].start); + EXPECT_EQ(5, runs[4].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[4])); + EXPECT_FALSE(runs[4].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[4].fakedFont.fakery.isFakeItalic()); } TEST_F(FontCollectionItemizeTest, itemize_variationSelector) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - // A glyph for U+4FAE is provided by both Japanese font and Simplified - // Chinese font. Also a glyph for U+242EE is provided by both Japanese and - // Traditional Chinese font. To avoid effects of device default locale, - // explicitly specify the locale. - FontStyle kZH_HansStyle = FontStyle(FontStyle::registerLanguageList("zh_Hans")); - FontStyle kZH_HantStyle = FontStyle(FontStyle::registerLanguageList("zh_Hant")); - - // U+4FAE is available in both zh_Hans and ja font, but U+4FAE,U+FE00 is - // only available in ja font. - itemize(collection, "U+4FAE", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - - itemize(collection, "U+4FAE U+FE00", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - - itemize(collection, "U+4FAE U+4FAE U+FE00", kZH_HansStyle, &runs); - ASSERT_EQ(2U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - EXPECT_EQ(1, runs[1].start); - EXPECT_EQ(3, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - - itemize(collection, "U+4FAE U+4FAE U+FE00 U+4FAE", kZH_HansStyle, &runs); - ASSERT_EQ(3U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - EXPECT_EQ(1, runs[1].start); - EXPECT_EQ(3, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - EXPECT_EQ(3, runs[2].start); - EXPECT_EQ(4, runs[2].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[2])); - - // Validation selector after validation selector. - itemize(collection, "U+4FAE U+FE00 U+FE00", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - - // No font supports U+242EE U+FE0E. - itemize(collection, "U+4FAE U+FE0E", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - - // Surrogate pairs handling. - // U+242EE is available in ja font and zh_Hant font. - // U+242EE U+FE00 is available only in ja font. - itemize(collection, "U+242EE", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - - itemize(collection, "U+242EE U+FE00", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - - itemize(collection, "U+242EE U+242EE U+FE00", kZH_HantStyle, &runs); - ASSERT_EQ(2U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - EXPECT_EQ(2, runs[1].start); - EXPECT_EQ(5, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - - itemize(collection, "U+242EE U+242EE U+FE00 U+242EE", kZH_HantStyle, &runs); - ASSERT_EQ(3U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - EXPECT_EQ(2, runs[1].start); - EXPECT_EQ(5, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - EXPECT_EQ(5, runs[2].start); - EXPECT_EQ(7, runs[2].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[2])); - - // Validation selector after validation selector. - itemize(collection, "U+242EE U+FE00 U+FE00", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - - // No font supports U+242EE U+FE0E - itemize(collection, "U+242EE U+FE0E", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - - // Isolated variation selector supplement. - itemize(collection, "U+FE00", FontStyle(), &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0])); - - itemize(collection, "U+FE00", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0])); - - // First font family (Regular.ttf) supports U+203C but doesn't support U+203C U+FE0F. - // Emoji.ttf font supports U+203C U+FE0F. Emoji.ttf should be selected. - itemize(collection, "U+203C U+FE0F", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); - - // First font family (Regular.ttf) supports U+203C U+FE0E. - itemize(collection, "U+203C U+FE0E", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kLatinFont, getFontPath(runs[0])); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + // A glyph for U+4FAE is provided by both Japanese font and Simplified + // Chinese font. Also a glyph for U+242EE is provided by both Japanese and + // Traditional Chinese font. To avoid effects of device default locale, + // explicitly specify the locale. + FontStyle kZH_HansStyle = + FontStyle(FontStyle::registerLanguageList("zh_Hans")); + FontStyle kZH_HantStyle = + FontStyle(FontStyle::registerLanguageList("zh_Hant")); + + // U+4FAE is available in both zh_Hans and ja font, but U+4FAE,U+FE00 is + // only available in ja font. + itemize(collection, "U+4FAE", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + itemize(collection, "U+4FAE U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection, "U+4FAE U+4FAE U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(3, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection, "U+4FAE U+4FAE U+FE00 U+4FAE", kZH_HansStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(3, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(3, runs[2].start); + EXPECT_EQ(4, runs[2].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection, "U+4FAE U+FE00 U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + // No font supports U+242EE U+FE0E. + itemize(collection, "U+4FAE U+FE0E", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + // Surrogate pairs handling. + // U+242EE is available in ja font and zh_Hant font. + // U+242EE U+FE00 is available only in ja font. + itemize(collection, "U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + itemize(collection, "U+242EE U+FE00", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection, "U+242EE U+242EE U+FE00", kZH_HantStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(5, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection, "U+242EE U+242EE U+FE00 U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(5, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(5, runs[2].start); + EXPECT_EQ(7, runs[2].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection, "U+242EE U+FE00 U+FE00", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + // No font supports U+242EE U+FE0E + itemize(collection, "U+242EE U+FE0E", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + // Isolated variation selector supplement. + itemize(collection, "U+FE00", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_TRUE(runs[0].fakedFont.font == nullptr || + kLatinFont == getFontPath(runs[0])); + + itemize(collection, "U+FE00", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_TRUE(runs[0].fakedFont.font == nullptr || + kLatinFont == getFontPath(runs[0])); + + // First font family (Regular.ttf) supports U+203C but doesn't support U+203C + // U+FE0F. Emoji.ttf font supports U+203C U+FE0F. Emoji.ttf should be + // selected. + itemize(collection, "U+203C U+FE0F", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kEmojiFont, getFontPath(runs[0])); + + // First font family (Regular.ttf) supports U+203C U+FE0E. + itemize(collection, "U+203C U+FE0E", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kLatinFont, getFontPath(runs[0])); } TEST_F(FontCollectionItemizeTest, itemize_variationSelectorSupplement) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - // A glyph for U+845B is provided by both Japanese font and Simplified - // Chinese font. Also a glyph for U+242EE is provided by both Japanese and - // Traditional Chinese font. To avoid effects of device default locale, - // explicitly specify the locale. - FontStyle kZH_HansStyle = FontStyle(FontStyle::registerLanguageList("zh_Hans")); - FontStyle kZH_HantStyle = FontStyle(FontStyle::registerLanguageList("zh_Hant")); - - // U+845B is available in both zh_Hans and ja font, but U+845B,U+E0100 is - // only available in ja font. - itemize(collection, "U+845B", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - - itemize(collection, "U+845B U+E0100", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - - itemize(collection, "U+845B U+845B U+E0100", kZH_HansStyle, &runs); - ASSERT_EQ(2U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - EXPECT_EQ(1, runs[1].start); - EXPECT_EQ(4, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - - itemize(collection, "U+845B U+845B U+E0100 U+845B", kZH_HansStyle, &runs); - ASSERT_EQ(3U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - EXPECT_EQ(1, runs[1].start); - EXPECT_EQ(4, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - EXPECT_EQ(4, runs[2].start); - EXPECT_EQ(5, runs[2].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[2])); - - // Validation selector after validation selector. - itemize(collection, "U+845B U+E0100 U+E0100", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - - // No font supports U+845B U+E01E0. - itemize(collection, "U+845B U+E01E0", kZH_HansStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); - - // Isolated variation selector supplement - // Surrogate pairs handling. - // U+242EE is available in ja font and zh_Hant font. - // U+242EE U+E0100 is available only in ja font. - itemize(collection, "U+242EE", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - - itemize(collection, "U+242EE U+E0101", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - - itemize(collection, "U+242EE U+242EE U+E0101", kZH_HantStyle, &runs); - ASSERT_EQ(2U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - EXPECT_EQ(2, runs[1].start); - EXPECT_EQ(6, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - - itemize(collection, "U+242EE U+242EE U+E0101 U+242EE", kZH_HantStyle, &runs); - ASSERT_EQ(3U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - EXPECT_EQ(2, runs[1].start); - EXPECT_EQ(6, runs[1].end); - EXPECT_EQ(kJAFont, getFontPath(runs[1])); - EXPECT_EQ(6, runs[2].start); - EXPECT_EQ(8, runs[2].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[2])); - - // Validation selector after validation selector. - itemize(collection, "U+242EE U+E0100 U+E0100", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(6, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - - // No font supports U+242EE U+E01E0. - itemize(collection, "U+242EE U+E01E0", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); - - // Isolated variation selector supplement. - itemize(collection, "U+E0100", FontStyle(), &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0])); - - itemize(collection, "U+E0100", kZH_HantStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_TRUE(runs[0].fakedFont.font == nullptr || kLatinFont == getFontPath(runs[0])); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + // A glyph for U+845B is provided by both Japanese font and Simplified + // Chinese font. Also a glyph for U+242EE is provided by both Japanese and + // Traditional Chinese font. To avoid effects of device default locale, + // explicitly specify the locale. + FontStyle kZH_HansStyle = + FontStyle(FontStyle::registerLanguageList("zh_Hans")); + FontStyle kZH_HantStyle = + FontStyle(FontStyle::registerLanguageList("zh_Hant")); + + // U+845B is available in both zh_Hans and ja font, but U+845B,U+E0100 is + // only available in ja font. + itemize(collection, "U+845B", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + itemize(collection, "U+845B U+E0100", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection, "U+845B U+845B U+E0100", kZH_HansStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(4, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection, "U+845B U+845B U+E0100 U+845B", kZH_HansStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + EXPECT_EQ(1, runs[1].start); + EXPECT_EQ(4, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(4, runs[2].start); + EXPECT_EQ(5, runs[2].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection, "U+845B U+E0100 U+E0100", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + // No font supports U+845B U+E01E0. + itemize(collection, "U+845B U+E01E0", kZH_HansStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kZH_HansFont, getFontPath(runs[0])); + + // Isolated variation selector supplement + // Surrogate pairs handling. + // U+242EE is available in ja font and zh_Hant font. + // U+242EE U+E0100 is available only in ja font. + itemize(collection, "U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + itemize(collection, "U+242EE U+E0101", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + itemize(collection, "U+242EE U+242EE U+E0101", kZH_HantStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(6, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + + itemize(collection, "U+242EE U+242EE U+E0101 U+242EE", kZH_HantStyle, &runs); + ASSERT_EQ(3U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(6, runs[1].end); + EXPECT_EQ(kJAFont, getFontPath(runs[1])); + EXPECT_EQ(6, runs[2].start); + EXPECT_EQ(8, runs[2].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[2])); + + // Validation selector after validation selector. + itemize(collection, "U+242EE U+E0100 U+E0100", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(6, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + + // No font supports U+242EE U+E01E0. + itemize(collection, "U+242EE U+E01E0", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kZH_HantFont, getFontPath(runs[0])); + + // Isolated variation selector supplement. + itemize(collection, "U+E0100", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_TRUE(runs[0].fakedFont.font == nullptr || + kLatinFont == getFontPath(runs[0])); + + itemize(collection, "U+E0100", kZH_HantStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_TRUE(runs[0].fakedFont.font == nullptr || + kLatinFont == getFontPath(runs[0])); } TEST_F(FontCollectionItemizeTest, itemize_no_crash) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - // Broken Surrogate pairs. Check only not crashing. - itemize(collection, "'a' U+D83D 'a'", FontStyle(), &runs); - itemize(collection, "'a' U+DC69 'a'", FontStyle(), &runs); - itemize(collection, "'a' U+D83D U+D83D 'a'", FontStyle(), &runs); - itemize(collection, "'a' U+DC69 U+DC69 'a'", FontStyle(), &runs); - - // Isolated variation selector. Check only not crashing. - itemize(collection, "U+FE00 U+FE00", FontStyle(), &runs); - itemize(collection, "U+E0100 U+E0100", FontStyle(), &runs); - itemize(collection, "U+FE00 U+E0100", FontStyle(), &runs); - itemize(collection, "U+E0100 U+FE00", FontStyle(), &runs); - - // Tone mark only. Check only not crashing. - itemize(collection, "U+302D", FontStyle(), &runs); - itemize(collection, "U+302D U+302D", FontStyle(), &runs); - - // Tone mark and variation selector mixed. Check only not crashing. - itemize(collection, "U+FE00 U+302D U+E0100", FontStyle(), &runs); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + // Broken Surrogate pairs. Check only not crashing. + itemize(collection, "'a' U+D83D 'a'", FontStyle(), &runs); + itemize(collection, "'a' U+DC69 'a'", FontStyle(), &runs); + itemize(collection, "'a' U+D83D U+D83D 'a'", FontStyle(), &runs); + itemize(collection, "'a' U+DC69 U+DC69 'a'", FontStyle(), &runs); + + // Isolated variation selector. Check only not crashing. + itemize(collection, "U+FE00 U+FE00", FontStyle(), &runs); + itemize(collection, "U+E0100 U+E0100", FontStyle(), &runs); + itemize(collection, "U+FE00 U+E0100", FontStyle(), &runs); + itemize(collection, "U+E0100 U+FE00", FontStyle(), &runs); + + // Tone mark only. Check only not crashing. + itemize(collection, "U+302D", FontStyle(), &runs); + itemize(collection, "U+302D U+302D", FontStyle(), &runs); + + // Tone mark and variation selector mixed. Check only not crashing. + itemize(collection, "U+FE00 U+302D U+E0100", FontStyle(), &runs); } TEST_F(FontCollectionItemizeTest, itemize_fakery) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - std::vector runs; - - FontStyle kJABoldStyle = FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 7, false); - FontStyle kJAItalicStyle = FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 5, true); - FontStyle kJABoldItalicStyle = - FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 7, true); - - // Currently there is no italic or bold font for Japanese. FontFakery has - // the differences between desired and actual font style. - - // All Japanese Hiragana characters. - itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); - - // All Japanese Hiragana characters. - itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJAItalicStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic()); - - // All Japanese Hiragana characters. - itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldItalicStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kJAFont, getFontPath(runs[0])); - EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeBold()); - EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic()); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + std::vector runs; + + FontStyle kJABoldStyle = + FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 7, false); + FontStyle kJAItalicStyle = + FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 5, true); + FontStyle kJABoldItalicStyle = + FontStyle(FontStyle::registerLanguageList("ja_JP"), 0, 7, true); + + // Currently there is no italic or bold font for Japanese. FontFakery has + // the differences between desired and actual font style. + + // All Japanese Hiragana characters. + itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldStyle, + &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeItalic()); + + // All Japanese Hiragana characters. + itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJAItalicStyle, + &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_FALSE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic()); + + // All Japanese Hiragana characters. + itemize(collection, "U+3042 U+3044 U+3046 U+3048 U+304A", kJABoldItalicStyle, + &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kJAFont, getFontPath(runs[0])); + EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeBold()); + EXPECT_TRUE(runs[0].fakedFont.fakery.isFakeItalic()); } TEST_F(FontCollectionItemizeTest, itemize_vs_sequence_but_no_base_char) { - // kVSTestFont supports U+717D U+FE02 but doesn't support U+717D. - // kVSTestFont should be selected for U+717D U+FE02 even if it does not support the base code - // point. - const std::string kVSTestFont = kTestFontDir "VariationSelectorTest-Regular.ttf"; - - std::vector> families; - std::shared_ptr font(new MinikinFontForTest(kLatinFont)); - std::shared_ptr family1(new FontFamily(VARIANT_DEFAULT, - std::vector{ Font(font, FontStyle()) })); - families.push_back(family1); - - std::shared_ptr font2(new MinikinFontForTest(kVSTestFont)); - std::shared_ptr family2(new FontFamily(VARIANT_DEFAULT, - std::vector{ Font(font2, FontStyle()) })); - families.push_back(family2); - - std::shared_ptr collection(new FontCollection(families)); - - std::vector runs; - - itemize(collection, "U+717D U+FE02", FontStyle(), &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kVSTestFont, getFontPath(runs[0])); + // kVSTestFont supports U+717D U+FE02 but doesn't support U+717D. + // kVSTestFont should be selected for U+717D U+FE02 even if it does not + // support the base code point. + const std::string kVSTestFont = + kTestFontDir "VariationSelectorTest-Regular.ttf"; + + std::vector> families; + std::shared_ptr font(new MinikinFontForTest(kLatinFont)); + std::shared_ptr family1(new FontFamily( + VARIANT_DEFAULT, std::vector{Font(font, FontStyle())})); + families.push_back(family1); + + std::shared_ptr font2(new MinikinFontForTest(kVSTestFont)); + std::shared_ptr family2(new FontFamily( + VARIANT_DEFAULT, std::vector{Font(font2, FontStyle())})); + families.push_back(family2); + + std::shared_ptr collection(new FontCollection(families)); + + std::vector runs; + + itemize(collection, "U+717D U+FE02", FontStyle(), &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kVSTestFont, getFontPath(runs[0])); } TEST_F(FontCollectionItemizeTest, itemize_LanguageScore) { - struct TestCase { - std::string userPreferredLanguages; - std::vector fontLanguages; - int selectedFontIndex; - } testCases[] = { - // Font can specify empty language. - { "und", { "", "" }, 0 }, - { "und", { "", "en-Latn" }, 0 }, - { "en-Latn", { "", "" }, 0 }, - { "en-Latn", { "", "en-Latn" }, 1 }, - - // Single user preferred language. - // Exact match case - { "en-Latn", { "en-Latn", "ja-Jpan" }, 0 }, - { "ja-Jpan", { "en-Latn", "ja-Jpan" }, 1 }, - { "en-Latn", { "en-Latn", "nl-Latn", "es-Latn" }, 0 }, - { "nl-Latn", { "en-Latn", "nl-Latn", "es-Latn" }, 1 }, - { "es-Latn", { "en-Latn", "nl-Latn", "es-Latn" }, 2 }, - { "es-Latn", { "en-Latn", "en-Latn", "nl-Latn" }, 0 }, - - // Exact script match case - { "en-Latn", { "nl-Latn", "e-Latn" }, 0 }, - { "en-Arab", { "nl-Latn", "ar-Arab" }, 1 }, - { "en-Latn", { "be-Latn", "ar-Arab", "d-Beng" }, 0 }, - { "en-Arab", { "be-Latn", "ar-Arab", "d-Beng" }, 1 }, - { "en-Beng", { "be-Latn", "ar-Arab", "d-Beng" }, 2 }, - { "en-Beng", { "be-Latn", "ar-Beng", "d-Beng" }, 1 }, - { "zh-Hant", { "zh-Hant", "zh-Hans" }, 0 }, - { "zh-Hans", { "zh-Hant", "zh-Hans" }, 1 }, - - // Subscript match case, e.g. Jpan supports Hira. - { "en-Hira", { "ja-Jpan" }, 0 }, - { "zh-Hani", { "zh-Hans", "zh-Hant" }, 0 }, - { "zh-Hani", { "zh-Hant", "zh-Hans" }, 0 }, - { "en-Hira", { "zh-Hant", "ja-Jpan", "ja-Jpan" }, 1 }, - - // Language match case - { "ja-Latn", { "zh-Latn", "ja-Latn" }, 1 }, - { "zh-Latn", { "zh-Latn", "ja-Latn" }, 0 }, - { "ja-Latn", { "zh-Latn", "ja-Latn" }, 1 }, - { "ja-Latn", { "zh-Latn", "ja-Latn", "ja-Latn" }, 1 }, - - // Mixed case - // Script/subscript match is strongest. - { "ja-Jpan", { "en-Latn", "ja-Latn", "en-Jpan" }, 2 }, - { "ja-Hira", { "en-Latn", "ja-Latn", "en-Jpan" }, 2 }, - { "ja-Hira", { "en-Latn", "ja-Latn", "en-Jpan", "en-Jpan" }, 2 }, - - // Language match only happens if the script matches. - { "ja-Hira", { "en-Latn", "ja-Latn" }, 0 }, - { "ja-Hira", { "en-Jpan", "ja-Jpan" }, 1 }, - - // Multiple languages. - // Even if all fonts have the same score, use the 2nd language for better selection. - { "en-Latn,ja-Jpan", { "zh-Hant", "zh-Hans", "ja-Jpan" }, 2 }, - { "en-Latn,nl-Latn", { "es-Latn", "be-Latn", "nl-Latn" }, 2 }, - { "en-Latn,br-Latn,nl-Latn", { "es-Latn", "be-Latn", "nl-Latn" }, 2 }, - { "en-Latn,br-Latn,nl-Latn", { "es-Latn", "be-Latn", "nl-Latn", "nl-Latn" }, 2 }, - - // Script score. - { "en-Latn,ja-Jpan", { "en-Arab", "en-Jpan" }, 1 }, - { "en-Latn,ja-Jpan", { "en-Arab", "en-Jpan", "en-Jpan" }, 1 }, - - // Language match case - { "en-Latn,ja-Latn", { "bd-Latn", "ja-Latn" }, 1 }, - { "en-Latn,ja-Latn", { "bd-Latn", "ja-Latn", "ja-Latn" }, 1 }, - - // Language match only happens if the script matches. - { "en-Latn,ar-Arab", { "en-Beng", "ar-Arab" }, 1 }, - - // Multiple languages in the font settings. - { "ko-Jamo", { "ja-Jpan", "ko-Kore", "ko-Kore,ko-Jamo"}, 2 }, - { "en-Latn", { "ja-Jpan", "en-Latn,ja-Jpan"}, 1 }, - { "en-Latn", { "ja-Jpan", "ja-Jpan,en-Latn"}, 1 }, - { "en-Latn", { "ja-Jpan,zh-Hant", "en-Latn,ja-Jpan", "en-Latn"}, 1 }, - { "en-Latn", { "zh-Hant,ja-Jpan", "ja-Jpan,en-Latn", "en-Latn"}, 1 }, - - // Kore = Hang + Hani, etc. - { "ko-Kore", { "ko-Hang", "ko-Jamo,ko-Hani", "ko-Hang,ko-Hani"}, 2 }, - { "ja-Hrkt", { "ja-Hira", "ja-Kana", "ja-Hira,ja-Kana"}, 2 }, - { "ja-Jpan", { "ja-Hira", "ja-Kana", "ja-Hani", "ja-Hira,ja-Kana,ja-Hani"}, 3 }, - { "zh-Hanb", { "zh-Hant", "zh-Bopo", "zh-Hant,zh-Bopo"}, 2 }, - { "zh-Hanb", { "ja-Hanb", "zh-Hant,zh-Bopo"}, 1 }, - - // Language match with unified subscript bits. - { "zh-Hanb", { "zh-Hant", "zh-Bopo", "ja-Hant,ja-Bopo", "zh-Hant,zh-Bopo"}, 3 }, - { "zh-Hanb", { "zh-Hant", "zh-Bopo", "ja-Hant,zh-Bopo", "zh-Hant,zh-Bopo"}, 3 }, - - // Two elements subtag matching: language and subtag or language or script. - { "ja-Kana-u-em-emoji", { "zh-Hant", "ja-Kana"}, 1 }, - { "ja-Kana-u-em-emoji", { "zh-Hant", "ja-Kana", "ja-Zsye"}, 2 }, - { "ja-Zsym-u-em-emoji", { "ja-Kana", "ja-Zsym", "ja-Zsye"}, 2 }, - - // One element subtag matching: subtag only or script only. - { "en-Latn-u-em-emoji", { "ja-Latn", "ja-Zsye"}, 1 }, - { "en-Zsym-u-em-emoji", { "ja-Zsym", "ja-Zsye"}, 1 }, - { "en-Zsye-u-em-text", { "ja-Zsym", "ja-Zsye"}, 0 }, - - // Multiple languages list with subtags. - { "en-Latn,ja-Jpan-u-em-text", { "en-Latn", "en-Zsye", "en-Zsym"}, 0 }, - { "en-Latn,en-Zsye,ja-Jpan-u-em-text", { "zh", "en-Zsye", "en-Zsym"}, 1 }, - }; - - for (auto testCase : testCases) { - std::string fontLanguagesStr = "{"; - for (size_t i = 0; i < testCase.fontLanguages.size(); ++i) { - if (i != 0) { - fontLanguagesStr += ", "; - } - fontLanguagesStr += "\"" + testCase.fontLanguages[i] + "\""; - } - fontLanguagesStr += "}"; - SCOPED_TRACE("Test of user preferred languages: \"" + testCase.userPreferredLanguages + - "\" with font languages: " + fontLanguagesStr); - - std::vector> families; - - // Prepare first font which doesn't supports U+9AA8 - std::shared_ptr firstFamilyMinikinFont( - new MinikinFontForTest(kNoGlyphFont)); - std::shared_ptr firstFamily(new FontFamily( - FontStyle::registerLanguageList("und"), 0 /* variant */, - std::vector({ Font(firstFamilyMinikinFont, FontStyle()) }))); - families.push_back(firstFamily); - - // Prepare font families - // Each font family is associated with a specified language. All font families except for - // the first font support U+9AA8. - std::unordered_map fontLangIdxMap; - - for (size_t i = 0; i < testCase.fontLanguages.size(); ++i) { - std::shared_ptr minikin_font(new MinikinFontForTest(kJAFont)); - std::shared_ptr family(new FontFamily( - FontStyle::registerLanguageList(testCase.fontLanguages[i]), 0 /* variant */, - std::vector({ Font(minikin_font, FontStyle()) }))); - families.push_back(family); - fontLangIdxMap.insert(std::make_pair(minikin_font.get(), i)); - } - std::shared_ptr collection(new FontCollection(families)); - // Do itemize - const FontStyle style = FontStyle( - FontStyle::registerLanguageList(testCase.userPreferredLanguages)); - std::vector runs; - itemize(collection, "U+9AA8", style, &runs); - ASSERT_EQ(1U, runs.size()); - ASSERT_NE(nullptr, runs[0].fakedFont.font); - - // First family doesn't support U+9AA8 and others support it, so the first font should not - // be selected. - EXPECT_NE(firstFamilyMinikinFont.get(), runs[0].fakedFont.font); - - // Lookup used font family by MinikinFont*. - const int usedLangIndex = fontLangIdxMap[runs[0].fakedFont.font]; - EXPECT_EQ(testCase.selectedFontIndex, usedLangIndex); + struct TestCase { + std::string userPreferredLanguages; + std::vector fontLanguages; + int selectedFontIndex; + } testCases[] = { + // Font can specify empty language. + {"und", {"", ""}, 0}, + {"und", {"", "en-Latn"}, 0}, + {"en-Latn", {"", ""}, 0}, + {"en-Latn", {"", "en-Latn"}, 1}, + + // Single user preferred language. + // Exact match case + {"en-Latn", {"en-Latn", "ja-Jpan"}, 0}, + {"ja-Jpan", {"en-Latn", "ja-Jpan"}, 1}, + {"en-Latn", {"en-Latn", "nl-Latn", "es-Latn"}, 0}, + {"nl-Latn", {"en-Latn", "nl-Latn", "es-Latn"}, 1}, + {"es-Latn", {"en-Latn", "nl-Latn", "es-Latn"}, 2}, + {"es-Latn", {"en-Latn", "en-Latn", "nl-Latn"}, 0}, + + // Exact script match case + {"en-Latn", {"nl-Latn", "e-Latn"}, 0}, + {"en-Arab", {"nl-Latn", "ar-Arab"}, 1}, + {"en-Latn", {"be-Latn", "ar-Arab", "d-Beng"}, 0}, + {"en-Arab", {"be-Latn", "ar-Arab", "d-Beng"}, 1}, + {"en-Beng", {"be-Latn", "ar-Arab", "d-Beng"}, 2}, + {"en-Beng", {"be-Latn", "ar-Beng", "d-Beng"}, 1}, + {"zh-Hant", {"zh-Hant", "zh-Hans"}, 0}, + {"zh-Hans", {"zh-Hant", "zh-Hans"}, 1}, + + // Subscript match case, e.g. Jpan supports Hira. + {"en-Hira", {"ja-Jpan"}, 0}, + {"zh-Hani", {"zh-Hans", "zh-Hant"}, 0}, + {"zh-Hani", {"zh-Hant", "zh-Hans"}, 0}, + {"en-Hira", {"zh-Hant", "ja-Jpan", "ja-Jpan"}, 1}, + + // Language match case + {"ja-Latn", {"zh-Latn", "ja-Latn"}, 1}, + {"zh-Latn", {"zh-Latn", "ja-Latn"}, 0}, + {"ja-Latn", {"zh-Latn", "ja-Latn"}, 1}, + {"ja-Latn", {"zh-Latn", "ja-Latn", "ja-Latn"}, 1}, + + // Mixed case + // Script/subscript match is strongest. + {"ja-Jpan", {"en-Latn", "ja-Latn", "en-Jpan"}, 2}, + {"ja-Hira", {"en-Latn", "ja-Latn", "en-Jpan"}, 2}, + {"ja-Hira", {"en-Latn", "ja-Latn", "en-Jpan", "en-Jpan"}, 2}, + + // Language match only happens if the script matches. + {"ja-Hira", {"en-Latn", "ja-Latn"}, 0}, + {"ja-Hira", {"en-Jpan", "ja-Jpan"}, 1}, + + // Multiple languages. + // Even if all fonts have the same score, use the 2nd language for better + // selection. + {"en-Latn,ja-Jpan", {"zh-Hant", "zh-Hans", "ja-Jpan"}, 2}, + {"en-Latn,nl-Latn", {"es-Latn", "be-Latn", "nl-Latn"}, 2}, + {"en-Latn,br-Latn,nl-Latn", {"es-Latn", "be-Latn", "nl-Latn"}, 2}, + {"en-Latn,br-Latn,nl-Latn", + {"es-Latn", "be-Latn", "nl-Latn", "nl-Latn"}, + 2}, + + // Script score. + {"en-Latn,ja-Jpan", {"en-Arab", "en-Jpan"}, 1}, + {"en-Latn,ja-Jpan", {"en-Arab", "en-Jpan", "en-Jpan"}, 1}, + + // Language match case + {"en-Latn,ja-Latn", {"bd-Latn", "ja-Latn"}, 1}, + {"en-Latn,ja-Latn", {"bd-Latn", "ja-Latn", "ja-Latn"}, 1}, + + // Language match only happens if the script matches. + {"en-Latn,ar-Arab", {"en-Beng", "ar-Arab"}, 1}, + + // Multiple languages in the font settings. + {"ko-Jamo", {"ja-Jpan", "ko-Kore", "ko-Kore,ko-Jamo"}, 2}, + {"en-Latn", {"ja-Jpan", "en-Latn,ja-Jpan"}, 1}, + {"en-Latn", {"ja-Jpan", "ja-Jpan,en-Latn"}, 1}, + {"en-Latn", {"ja-Jpan,zh-Hant", "en-Latn,ja-Jpan", "en-Latn"}, 1}, + {"en-Latn", {"zh-Hant,ja-Jpan", "ja-Jpan,en-Latn", "en-Latn"}, 1}, + + // Kore = Hang + Hani, etc. + {"ko-Kore", {"ko-Hang", "ko-Jamo,ko-Hani", "ko-Hang,ko-Hani"}, 2}, + {"ja-Hrkt", {"ja-Hira", "ja-Kana", "ja-Hira,ja-Kana"}, 2}, + {"ja-Jpan", + {"ja-Hira", "ja-Kana", "ja-Hani", "ja-Hira,ja-Kana,ja-Hani"}, + 3}, + {"zh-Hanb", {"zh-Hant", "zh-Bopo", "zh-Hant,zh-Bopo"}, 2}, + {"zh-Hanb", {"ja-Hanb", "zh-Hant,zh-Bopo"}, 1}, + + // Language match with unified subscript bits. + {"zh-Hanb", + {"zh-Hant", "zh-Bopo", "ja-Hant,ja-Bopo", "zh-Hant,zh-Bopo"}, + 3}, + {"zh-Hanb", + {"zh-Hant", "zh-Bopo", "ja-Hant,zh-Bopo", "zh-Hant,zh-Bopo"}, + 3}, + + // Two elements subtag matching: language and subtag or language or + // script. + {"ja-Kana-u-em-emoji", {"zh-Hant", "ja-Kana"}, 1}, + {"ja-Kana-u-em-emoji", {"zh-Hant", "ja-Kana", "ja-Zsye"}, 2}, + {"ja-Zsym-u-em-emoji", {"ja-Kana", "ja-Zsym", "ja-Zsye"}, 2}, + + // One element subtag matching: subtag only or script only. + {"en-Latn-u-em-emoji", {"ja-Latn", "ja-Zsye"}, 1}, + {"en-Zsym-u-em-emoji", {"ja-Zsym", "ja-Zsye"}, 1}, + {"en-Zsye-u-em-text", {"ja-Zsym", "ja-Zsye"}, 0}, + + // Multiple languages list with subtags. + {"en-Latn,ja-Jpan-u-em-text", {"en-Latn", "en-Zsye", "en-Zsym"}, 0}, + {"en-Latn,en-Zsye,ja-Jpan-u-em-text", {"zh", "en-Zsye", "en-Zsym"}, 1}, + }; + + for (auto testCase : testCases) { + std::string fontLanguagesStr = "{"; + for (size_t i = 0; i < testCase.fontLanguages.size(); ++i) { + if (i != 0) { + fontLanguagesStr += ", "; + } + fontLanguagesStr += "\"" + testCase.fontLanguages[i] + "\""; } -} + fontLanguagesStr += "}"; + SCOPED_TRACE("Test of user preferred languages: \"" + + testCase.userPreferredLanguages + + "\" with font languages: " + fontLanguagesStr); -TEST_F(FontCollectionItemizeTest, itemize_LanguageAndCoverage) { - struct TestCase { - std::string testString; - std::string requestedLanguages; - std::string expectedFont; - } testCases[] = { - // Following test cases verify that following rules in font fallback chain. - // - If the first font in the collection supports the given character or variation sequence, - // it should be selected. - // - If the font doesn't support the given character, variation sequence or its base - // character, it should not be selected. - // - If two or more fonts match the requested languages, the font matches with the highest - // priority language should be selected. - // - If two or more fonts get the same score, the font listed earlier in the XML file - // (here, kItemizeFontXml) should be selected. - - // Regardless of language, the first font is always selected if it covers the code point. - { "'a'", "", kLatinFont}, - { "'a'", "en-Latn", kLatinFont}, - { "'a'", "ja-Jpan", kLatinFont}, - { "'a'", "ja-Jpan,en-Latn", kLatinFont}, - { "'a'", "zh-Hans,zh-Hant,en-Latn,ja-Jpan,fr-Latn", kLatinFont}, - - // U+81ED is supported by both the ja font and zh-Hans font. - { "U+81ED", "", kZH_HansFont }, // zh-Hans font is listed before ja font. - { "U+81ED", "en-Latn", kZH_HansFont }, // zh-Hans font is listed before ja font. - { "U+81ED", "ja-Jpan", kJAFont }, - { "U+81ED", "zh-Hans", kZH_HansFont }, - - { "U+81ED", "ja-Jpan,en-Latn", kJAFont }, - { "U+81ED", "en-Latn,ja-Jpan", kJAFont }, - { "U+81ED", "en-Latn,zh-Hans", kZH_HansFont }, - { "U+81ED", "zh-Hans,en-Latn", kZH_HansFont }, - { "U+81ED", "ja-Jpan,zh-Hans", kJAFont }, - { "U+81ED", "zh-Hans,ja-Jpan", kZH_HansFont }, - - { "U+81ED", "en-Latn,zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+81ED", "en-Latn,ja-Jpan,zh-Hans", kJAFont }, - { "U+81ED", "en-Latn,zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+81ED", "ja-Jpan,en-Latn,zh-Hans", kJAFont }, - { "U+81ED", "ja-Jpan,zh-Hans,en-Latn", kJAFont }, - { "U+81ED", "zh-Hans,en-Latn,ja-Jpan", kZH_HansFont }, - { "U+81ED", "zh-Hans,ja-Jpan,en-Latn", kZH_HansFont }, - - // U+304A is only supported by ja font. - { "U+304A", "", kJAFont }, - { "U+304A", "ja-Jpan", kJAFont }, - { "U+304A", "zh-Hant", kJAFont }, - { "U+304A", "zh-Hans", kJAFont }, - - { "U+304A", "ja-Jpan,zh-Hant", kJAFont }, - { "U+304A", "zh-Hant,ja-Jpan", kJAFont }, - { "U+304A", "zh-Hans,zh-Hant", kJAFont }, - { "U+304A", "zh-Hant,zh-Hans", kJAFont }, - { "U+304A", "zh-Hans,ja-Jpan", kJAFont }, - { "U+304A", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+304A", "zh-Hans,ja-Jpan,zh-Hant", kJAFont }, - { "U+304A", "zh-Hans,zh-Hant,ja-Jpan", kJAFont }, - { "U+304A", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+304A", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+304A", "zh-Hant,zh-Hans,ja-Jpan", kJAFont }, - { "U+304A", "zh-Hant,ja-Jpan,zh-Hans", kJAFont }, - - // U+242EE is supported by both ja font and zh-Hant fonts but not by zh-Hans font. - { "U+242EE", "", kJAFont }, // ja font is listed before zh-Hant font. - { "U+242EE", "ja-Jpan", kJAFont }, - { "U+242EE", "zh-Hans", kJAFont }, - { "U+242EE", "zh-Hant", kZH_HantFont }, - - { "U+242EE", "ja-Jpan,zh-Hant", kJAFont }, - { "U+242EE", "zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+242EE", "zh-Hans,zh-Hant", kZH_HantFont }, - { "U+242EE", "zh-Hant,zh-Hans", kZH_HantFont }, - { "U+242EE", "zh-Hans,ja-Jpan", kJAFont }, - { "U+242EE", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+242EE", "zh-Hans,ja-Jpan,zh-Hant", kJAFont }, - { "U+242EE", "zh-Hans,zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+242EE", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+242EE", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+242EE", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont }, - { "U+242EE", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont }, - - // U+9AA8 is supported by all ja-Jpan, zh-Hans, zh-Hant fonts. - { "U+9AA8", "", kZH_HansFont }, // zh-Hans font is listed before ja and zh-Hant fonts. - { "U+9AA8", "ja-Jpan", kJAFont }, - { "U+9AA8", "zh-Hans", kZH_HansFont }, - { "U+9AA8", "zh-Hant", kZH_HantFont }, - - { "U+9AA8", "ja-Jpan,zh-Hant", kJAFont }, - { "U+9AA8", "zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+9AA8", "zh-Hans,zh-Hant", kZH_HansFont }, - { "U+9AA8", "zh-Hant,zh-Hans", kZH_HantFont }, - { "U+9AA8", "zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+9AA8", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+9AA8", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont }, - { "U+9AA8", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont }, - { "U+9AA8", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+9AA8", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+9AA8", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont }, - { "U+9AA8", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont }, - - // U+242EE U+FE00 is supported by ja font but not by zh-Hans or zh-Hant fonts. - { "U+242EE U+FE00", "", kJAFont }, - { "U+242EE U+FE00", "ja-Jpan", kJAFont }, - { "U+242EE U+FE00", "zh-Hant", kJAFont }, - { "U+242EE U+FE00", "zh-Hans", kJAFont }, - - { "U+242EE U+FE00", "ja-Jpan,zh-Hant", kJAFont }, - { "U+242EE U+FE00", "zh-Hant,ja-Jpan", kJAFont }, - { "U+242EE U+FE00", "zh-Hans,zh-Hant", kJAFont }, - { "U+242EE U+FE00", "zh-Hant,zh-Hans", kJAFont }, - { "U+242EE U+FE00", "zh-Hans,ja-Jpan", kJAFont }, - { "U+242EE U+FE00", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+242EE U+FE00", "zh-Hans,ja-Jpan,zh-Hant", kJAFont }, - { "U+242EE U+FE00", "zh-Hans,zh-Hant,ja-Jpan", kJAFont }, - { "U+242EE U+FE00", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+242EE U+FE00", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+242EE U+FE00", "zh-Hant,zh-Hans,ja-Jpan", kJAFont }, - { "U+242EE U+FE00", "zh-Hant,ja-Jpan,zh-Hans", kJAFont }, - - // U+3402 U+E0100 is supported by both zh-Hans and zh-Hant but not by ja font. - { "U+3402 U+E0100", "", kZH_HansFont }, // zh-Hans font is listed before zh-Hant font. - { "U+3402 U+E0100", "ja-Jpan", kZH_HansFont }, // zh-Hans font is listed before zh-Hant font. - { "U+3402 U+E0100", "zh-Hant", kZH_HantFont }, - { "U+3402 U+E0100", "zh-Hans", kZH_HansFont }, - - { "U+3402 U+E0100", "ja-Jpan,zh-Hant", kZH_HantFont }, - { "U+3402 U+E0100", "zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+3402 U+E0100", "zh-Hans,zh-Hant", kZH_HansFont }, - { "U+3402 U+E0100", "zh-Hant,zh-Hans", kZH_HantFont }, - { "U+3402 U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+3402 U+E0100", "ja-Jpan,zh-Hans", kZH_HansFont }, - - { "U+3402 U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont }, - { "U+3402 U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont }, - { "U+3402 U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kZH_HansFont }, - { "U+3402 U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kZH_HantFont }, - { "U+3402 U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont }, - { "U+3402 U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont }, - - // No font supports U+4444 U+FE00 but only zh-Hans supports its base character U+4444. - { "U+4444 U+FE00", "", kZH_HansFont }, - { "U+4444 U+FE00", "ja-Jpan", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hant", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hans", kZH_HansFont }, - - { "U+4444 U+FE00", "ja-Jpan,zh-Hant", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hant,ja-Jpan", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hans,zh-Hant", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hant,zh-Hans", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+4444 U+FE00", "ja-Jpan,zh-Hans", kZH_HansFont }, - - { "U+4444 U+FE00", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont }, - { "U+4444 U+FE00", "ja-Jpan,zh-Hans,zh-Hant", kZH_HansFont }, - { "U+4444 U+FE00", "ja-Jpan,zh-Hant,zh-Hans", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hant,zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+4444 U+FE00", "zh-Hant,ja-Jpan,zh-Hans", kZH_HansFont }, - - // No font supports U+81ED U+E0100 but ja and zh-Hans support its base character U+81ED. - // zh-Hans font is listed before ja font. - { "U+81ED U+E0100", "", kZH_HansFont }, - { "U+81ED U+E0100", "ja-Jpan", kJAFont }, - { "U+81ED U+E0100", "zh-Hant", kZH_HansFont }, - { "U+81ED U+E0100", "zh-Hans", kZH_HansFont }, - - { "U+81ED U+E0100", "ja-Jpan,zh-Hant", kJAFont }, - { "U+81ED U+E0100", "zh-Hant,ja-Jpan", kJAFont }, - { "U+81ED U+E0100", "zh-Hans,zh-Hant", kZH_HansFont }, - { "U+81ED U+E0100", "zh-Hant,zh-Hans", kZH_HansFont }, - { "U+81ED U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+81ED U+E0100", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+81ED U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont }, - { "U+81ED U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont }, - { "U+81ED U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+81ED U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+81ED U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+81ED U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kJAFont }, - - // No font supports U+9AA8 U+E0100 but all zh-Hans zh-hant ja fonts support its base - // character U+9AA8. - // zh-Hans font is listed before ja and zh-Hant fonts. - { "U+9AA8 U+E0100", "", kZH_HansFont }, - { "U+9AA8 U+E0100", "ja-Jpan", kJAFont }, - { "U+9AA8 U+E0100", "zh-Hans", kZH_HansFont }, - { "U+9AA8 U+E0100", "zh-Hant", kZH_HantFont }, - - { "U+9AA8 U+E0100", "ja-Jpan,zh-Hant", kJAFont }, - { "U+9AA8 U+E0100", "zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+9AA8 U+E0100", "zh-Hans,zh-Hant", kZH_HansFont }, - { "U+9AA8 U+E0100", "zh-Hant,zh-Hans", kZH_HantFont }, - { "U+9AA8 U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+9AA8 U+E0100", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+9AA8 U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont }, - { "U+9AA8 U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont }, - { "U+9AA8 U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+9AA8 U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+9AA8 U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont }, - { "U+9AA8 U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont }, - - // All zh-Hans,zh-Hant,ja fonts support U+35A8 U+E0100 and its base character U+35A8. - // zh-Hans font is listed before ja and zh-Hant fonts. - { "U+35A8", "", kZH_HansFont }, - { "U+35A8", "ja-Jpan", kJAFont }, - { "U+35A8", "zh-Hans", kZH_HansFont }, - { "U+35A8", "zh-Hant", kZH_HantFont }, - - { "U+35A8", "ja-Jpan,zh-Hant", kJAFont }, - { "U+35A8", "zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+35A8", "zh-Hans,zh-Hant", kZH_HansFont }, - { "U+35A8", "zh-Hant,zh-Hans", kZH_HantFont }, - { "U+35A8", "zh-Hans,ja-Jpan", kZH_HansFont }, - { "U+35A8", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+35A8", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont }, - { "U+35A8", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont }, - { "U+35A8", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+35A8", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+35A8", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont }, - { "U+35A8", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont }, - - // All zh-Hans,zh-Hant,ja fonts support U+35B6 U+E0100, but zh-Hant and ja fonts support its - // base character U+35B6. - // ja font is listed before zh-Hant font. - { "U+35B6", "", kJAFont }, - { "U+35B6", "ja-Jpan", kJAFont }, - { "U+35B6", "zh-Hant", kZH_HantFont }, - { "U+35B6", "zh-Hans", kJAFont }, - - { "U+35B6", "ja-Jpan,zh-Hant", kJAFont }, - { "U+35B6", "zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+35B6", "zh-Hans,zh-Hant", kZH_HantFont }, - { "U+35B6", "zh-Hant,zh-Hans", kZH_HantFont }, - { "U+35B6", "zh-Hans,ja-Jpan", kJAFont }, - { "U+35B6", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+35B6", "zh-Hans,ja-Jpan,zh-Hant", kJAFont }, - { "U+35B6", "zh-Hans,zh-Hant,ja-Jpan", kZH_HantFont }, - { "U+35B6", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+35B6", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+35B6", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont }, - { "U+35B6", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont }, - - // All zh-Hans,zh-Hant,ja fonts support U+35C5 U+E0100, but only ja font supports its base - // character U+35C5. - { "U+35C5", "", kJAFont }, - { "U+35C5", "ja-Jpan", kJAFont }, - { "U+35C5", "zh-Hant", kJAFont }, - { "U+35C5", "zh-Hans", kJAFont }, - - { "U+35C5", "ja-Jpan,zh-Hant", kJAFont }, - { "U+35C5", "zh-Hant,ja-Jpan", kJAFont }, - { "U+35C5", "zh-Hans,zh-Hant", kJAFont }, - { "U+35C5", "zh-Hant,zh-Hans", kJAFont }, - { "U+35C5", "zh-Hans,ja-Jpan", kJAFont }, - { "U+35C5", "ja-Jpan,zh-Hans", kJAFont }, - - { "U+35C5", "zh-Hans,ja-Jpan,zh-Hant", kJAFont }, - { "U+35C5", "zh-Hans,zh-Hant,ja-Jpan", kJAFont }, - { "U+35C5", "ja-Jpan,zh-Hans,zh-Hant", kJAFont }, - { "U+35C5", "ja-Jpan,zh-Hant,zh-Hans", kJAFont }, - { "U+35C5", "zh-Hant,zh-Hans,ja-Jpan", kJAFont }, - { "U+35C5", "zh-Hant,ja-Jpan,zh-Hans", kJAFont }, - - // None of ja-Jpan, zh-Hant, zh-Hans font supports U+1F469. Emoji font supports it. - { "U+1F469", "", kEmojiFont }, - { "U+1F469", "ja-Jpan", kEmojiFont }, - { "U+1F469", "zh-Hant", kEmojiFont }, - { "U+1F469", "zh-Hans", kEmojiFont }, - - { "U+1F469", "ja-Jpan,zh-Hant", kEmojiFont }, - { "U+1F469", "zh-Hant,ja-Jpan", kEmojiFont }, - { "U+1F469", "zh-Hans,zh-Hant", kEmojiFont }, - { "U+1F469", "zh-Hant,zh-Hans", kEmojiFont }, - { "U+1F469", "zh-Hans,ja-Jpan", kEmojiFont }, - { "U+1F469", "ja-Jpan,zh-Hans", kEmojiFont }, - - { "U+1F469", "zh-Hans,ja-Jpan,zh-Hant", kEmojiFont }, - { "U+1F469", "zh-Hans,zh-Hant,ja-Jpan", kEmojiFont }, - { "U+1F469", "ja-Jpan,zh-Hans,zh-Hant", kEmojiFont }, - { "U+1F469", "ja-Jpan,zh-Hant,zh-Hans", kEmojiFont }, - { "U+1F469", "zh-Hant,zh-Hans,ja-Jpan", kEmojiFont }, - { "U+1F469", "zh-Hant,ja-Jpan,zh-Hans", kEmojiFont }, - }; - - std::shared_ptr collection(getFontCollection(kTestFontDir, kItemizeFontXml)); - - for (auto testCase : testCases) { - SCOPED_TRACE("Test for \"" + testCase.testString + "\" with languages " + - testCase.requestedLanguages); - - std::vector runs; - const FontStyle style = - FontStyle(FontStyle::registerLanguageList(testCase.requestedLanguages)); - itemize(collection, testCase.testString.c_str(), style, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(testCase.expectedFont, getFontPath(runs[0])); - } -} + std::vector> families; -TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_withFE0E) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kEmojiXmlFile)); + // Prepare first font which doesn't supports U+9AA8 + std::shared_ptr firstFamilyMinikinFont( + new MinikinFontForTest(kNoGlyphFont)); + std::shared_ptr firstFamily(new FontFamily( + FontStyle::registerLanguageList("und"), 0 /* variant */, + std::vector({Font(firstFamilyMinikinFont, FontStyle())}))); + families.push_back(firstFamily); + + // Prepare font families + // Each font family is associated with a specified language. All font + // families except for the first font support U+9AA8. + std::unordered_map fontLangIdxMap; + + for (size_t i = 0; i < testCase.fontLanguages.size(); ++i) { + std::shared_ptr minikin_font( + new MinikinFontForTest(kJAFont)); + std::shared_ptr family(new FontFamily( + FontStyle::registerLanguageList(testCase.fontLanguages[i]), + 0 /* variant */, + std::vector({Font(minikin_font, FontStyle())}))); + families.push_back(family); + fontLangIdxMap.insert(std::make_pair(minikin_font.get(), i)); + } + std::shared_ptr collection(new FontCollection(families)); + // Do itemize + const FontStyle style = FontStyle( + FontStyle::registerLanguageList(testCase.userPreferredLanguages)); std::vector runs; - - const FontStyle kDefaultFontStyle; - - // U+00A9 is a text default emoji which is only available in TextEmojiFont.ttf. - // TextEmojiFont.ttf should be selected. - itemize(collection, "U+00A9 U+FE0E", kDefaultFontStyle, &runs); + itemize(collection, "U+9AA8", style, &runs); ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + ASSERT_NE(nullptr, runs[0].fakedFont.font); - // U+00A9 is a text default emoji which is only available in ColorEmojiFont.ttf. - // ColorEmojiFont.ttf should be selected. - itemize(collection, "U+00AE U+FE0E", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - // Text emoji is specified but it is not available. Use color emoji instead. - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // U+203C is a text default emoji which is available in both TextEmojiFont.ttf and - // ColorEmojiFont.ttf. TextEmojiFont.ttf should be selected. - itemize(collection, "U+203C U+FE0E", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); - - // U+2049 is a text default emoji which is not available either TextEmojiFont.ttf or - // ColorEmojiFont.ttf. No font should be selected. - itemize(collection, "U+2049 U+FE0E", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + // First family doesn't support U+9AA8 and others support it, so the first + // font should not be selected. + EXPECT_NE(firstFamilyMinikinFont.get(), runs[0].fakedFont.font); - // U+231A is a emoji default emoji which is available only in TextEmojifFont. - // TextEmojiFont.ttf sohuld be selected. - itemize(collection, "U+231A U+FE0E", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + // Lookup used font family by MinikinFont*. + const int usedLangIndex = fontLangIdxMap[runs[0].fakedFont.font]; + EXPECT_EQ(testCase.selectedFontIndex, usedLangIndex); + } +} - // U+231B is a emoji default emoji which is available only in ColorEmojiFont.ttf. - // ColorEmojiFont.ttf should be selected. - itemize(collection, "U+231B U+FE0E", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - // Text emoji is specified but it is not available. Use color emoji instead. - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // U+23E9 is a emoji default emoji which is available in both TextEmojiFont.ttf and - // ColorEmojiFont.ttf. TextEmojiFont.ttf should be selected even if U+23E9 is emoji default - // emoji since U+FE0E is appended. - itemize(collection, "U+23E9 U+FE0E", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); +TEST_F(FontCollectionItemizeTest, itemize_LanguageAndCoverage) { + struct TestCase { + std::string testString; + std::string requestedLanguages; + std::string expectedFont; + } testCases[] = { + // Following test cases verify that following rules in font fallback + // chain. + // - If the first font in the collection supports the given character or + // variation sequence, + // it should be selected. + // - If the font doesn't support the given character, variation sequence + // or its base + // character, it should not be selected. + // - If two or more fonts match the requested languages, the font matches + // with the highest + // priority language should be selected. + // - If two or more fonts get the same score, the font listed earlier in + // the XML file + // (here, kItemizeFontXml) should be selected. + + // Regardless of language, the first font is always selected if it covers + // the code point. + {"'a'", "", kLatinFont}, + {"'a'", "en-Latn", kLatinFont}, + {"'a'", "ja-Jpan", kLatinFont}, + {"'a'", "ja-Jpan,en-Latn", kLatinFont}, + {"'a'", "zh-Hans,zh-Hant,en-Latn,ja-Jpan,fr-Latn", kLatinFont}, + + // U+81ED is supported by both the ja font and zh-Hans font. + {"U+81ED", "", kZH_HansFont}, // zh-Hans font is listed before ja font. + {"U+81ED", "en-Latn", + kZH_HansFont}, // zh-Hans font is listed before ja font. + {"U+81ED", "ja-Jpan", kJAFont}, + {"U+81ED", "zh-Hans", kZH_HansFont}, + + {"U+81ED", "ja-Jpan,en-Latn", kJAFont}, + {"U+81ED", "en-Latn,ja-Jpan", kJAFont}, + {"U+81ED", "en-Latn,zh-Hans", kZH_HansFont}, + {"U+81ED", "zh-Hans,en-Latn", kZH_HansFont}, + {"U+81ED", "ja-Jpan,zh-Hans", kJAFont}, + {"U+81ED", "zh-Hans,ja-Jpan", kZH_HansFont}, + + {"U+81ED", "en-Latn,zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+81ED", "en-Latn,ja-Jpan,zh-Hans", kJAFont}, + {"U+81ED", "en-Latn,zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+81ED", "ja-Jpan,en-Latn,zh-Hans", kJAFont}, + {"U+81ED", "ja-Jpan,zh-Hans,en-Latn", kJAFont}, + {"U+81ED", "zh-Hans,en-Latn,ja-Jpan", kZH_HansFont}, + {"U+81ED", "zh-Hans,ja-Jpan,en-Latn", kZH_HansFont}, + + // U+304A is only supported by ja font. + {"U+304A", "", kJAFont}, + {"U+304A", "ja-Jpan", kJAFont}, + {"U+304A", "zh-Hant", kJAFont}, + {"U+304A", "zh-Hans", kJAFont}, + + {"U+304A", "ja-Jpan,zh-Hant", kJAFont}, + {"U+304A", "zh-Hant,ja-Jpan", kJAFont}, + {"U+304A", "zh-Hans,zh-Hant", kJAFont}, + {"U+304A", "zh-Hant,zh-Hans", kJAFont}, + {"U+304A", "zh-Hans,ja-Jpan", kJAFont}, + {"U+304A", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+304A", "zh-Hans,ja-Jpan,zh-Hant", kJAFont}, + {"U+304A", "zh-Hans,zh-Hant,ja-Jpan", kJAFont}, + {"U+304A", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+304A", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+304A", "zh-Hant,zh-Hans,ja-Jpan", kJAFont}, + {"U+304A", "zh-Hant,ja-Jpan,zh-Hans", kJAFont}, + + // U+242EE is supported by both ja font and zh-Hant fonts but not by + // zh-Hans font. + {"U+242EE", "", kJAFont}, // ja font is listed before zh-Hant font. + {"U+242EE", "ja-Jpan", kJAFont}, + {"U+242EE", "zh-Hans", kJAFont}, + {"U+242EE", "zh-Hant", kZH_HantFont}, + + {"U+242EE", "ja-Jpan,zh-Hant", kJAFont}, + {"U+242EE", "zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+242EE", "zh-Hans,zh-Hant", kZH_HantFont}, + {"U+242EE", "zh-Hant,zh-Hans", kZH_HantFont}, + {"U+242EE", "zh-Hans,ja-Jpan", kJAFont}, + {"U+242EE", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+242EE", "zh-Hans,ja-Jpan,zh-Hant", kJAFont}, + {"U+242EE", "zh-Hans,zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+242EE", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+242EE", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+242EE", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont}, + {"U+242EE", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont}, + + // U+9AA8 is supported by all ja-Jpan, zh-Hans, zh-Hant fonts. + {"U+9AA8", "", + kZH_HansFont}, // zh-Hans font is listed before ja and zh-Hant fonts. + {"U+9AA8", "ja-Jpan", kJAFont}, + {"U+9AA8", "zh-Hans", kZH_HansFont}, + {"U+9AA8", "zh-Hant", kZH_HantFont}, + + {"U+9AA8", "ja-Jpan,zh-Hant", kJAFont}, + {"U+9AA8", "zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+9AA8", "zh-Hans,zh-Hant", kZH_HansFont}, + {"U+9AA8", "zh-Hant,zh-Hans", kZH_HantFont}, + {"U+9AA8", "zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+9AA8", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+9AA8", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont}, + {"U+9AA8", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont}, + {"U+9AA8", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+9AA8", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+9AA8", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont}, + {"U+9AA8", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont}, + + // U+242EE U+FE00 is supported by ja font but not by zh-Hans or zh-Hant + // fonts. + {"U+242EE U+FE00", "", kJAFont}, + {"U+242EE U+FE00", "ja-Jpan", kJAFont}, + {"U+242EE U+FE00", "zh-Hant", kJAFont}, + {"U+242EE U+FE00", "zh-Hans", kJAFont}, + + {"U+242EE U+FE00", "ja-Jpan,zh-Hant", kJAFont}, + {"U+242EE U+FE00", "zh-Hant,ja-Jpan", kJAFont}, + {"U+242EE U+FE00", "zh-Hans,zh-Hant", kJAFont}, + {"U+242EE U+FE00", "zh-Hant,zh-Hans", kJAFont}, + {"U+242EE U+FE00", "zh-Hans,ja-Jpan", kJAFont}, + {"U+242EE U+FE00", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+242EE U+FE00", "zh-Hans,ja-Jpan,zh-Hant", kJAFont}, + {"U+242EE U+FE00", "zh-Hans,zh-Hant,ja-Jpan", kJAFont}, + {"U+242EE U+FE00", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+242EE U+FE00", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+242EE U+FE00", "zh-Hant,zh-Hans,ja-Jpan", kJAFont}, + {"U+242EE U+FE00", "zh-Hant,ja-Jpan,zh-Hans", kJAFont}, + + // U+3402 U+E0100 is supported by both zh-Hans and zh-Hant but not by ja + // font. + {"U+3402 U+E0100", "", + kZH_HansFont}, // zh-Hans font is listed before zh-Hant font. + {"U+3402 U+E0100", "ja-Jpan", + kZH_HansFont}, // zh-Hans font is listed before zh-Hant font. + {"U+3402 U+E0100", "zh-Hant", kZH_HantFont}, + {"U+3402 U+E0100", "zh-Hans", kZH_HansFont}, + + {"U+3402 U+E0100", "ja-Jpan,zh-Hant", kZH_HantFont}, + {"U+3402 U+E0100", "zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+3402 U+E0100", "zh-Hans,zh-Hant", kZH_HansFont}, + {"U+3402 U+E0100", "zh-Hant,zh-Hans", kZH_HantFont}, + {"U+3402 U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+3402 U+E0100", "ja-Jpan,zh-Hans", kZH_HansFont}, + + {"U+3402 U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont}, + {"U+3402 U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont}, + {"U+3402 U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kZH_HansFont}, + {"U+3402 U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kZH_HantFont}, + {"U+3402 U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont}, + {"U+3402 U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont}, + + // No font supports U+4444 U+FE00 but only zh-Hans supports its base + // character U+4444. + {"U+4444 U+FE00", "", kZH_HansFont}, + {"U+4444 U+FE00", "ja-Jpan", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hant", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hans", kZH_HansFont}, + + {"U+4444 U+FE00", "ja-Jpan,zh-Hant", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hant,ja-Jpan", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hans,zh-Hant", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hant,zh-Hans", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+4444 U+FE00", "ja-Jpan,zh-Hans", kZH_HansFont}, + + {"U+4444 U+FE00", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont}, + {"U+4444 U+FE00", "ja-Jpan,zh-Hans,zh-Hant", kZH_HansFont}, + {"U+4444 U+FE00", "ja-Jpan,zh-Hant,zh-Hans", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hant,zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+4444 U+FE00", "zh-Hant,ja-Jpan,zh-Hans", kZH_HansFont}, + + // No font supports U+81ED U+E0100 but ja and zh-Hans support its base + // character U+81ED. + // zh-Hans font is listed before ja font. + {"U+81ED U+E0100", "", kZH_HansFont}, + {"U+81ED U+E0100", "ja-Jpan", kJAFont}, + {"U+81ED U+E0100", "zh-Hant", kZH_HansFont}, + {"U+81ED U+E0100", "zh-Hans", kZH_HansFont}, + + {"U+81ED U+E0100", "ja-Jpan,zh-Hant", kJAFont}, + {"U+81ED U+E0100", "zh-Hant,ja-Jpan", kJAFont}, + {"U+81ED U+E0100", "zh-Hans,zh-Hant", kZH_HansFont}, + {"U+81ED U+E0100", "zh-Hant,zh-Hans", kZH_HansFont}, + {"U+81ED U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+81ED U+E0100", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+81ED U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont}, + {"U+81ED U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont}, + {"U+81ED U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+81ED U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+81ED U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+81ED U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kJAFont}, + + // No font supports U+9AA8 U+E0100 but all zh-Hans zh-hant ja fonts + // support its base + // character U+9AA8. + // zh-Hans font is listed before ja and zh-Hant fonts. + {"U+9AA8 U+E0100", "", kZH_HansFont}, + {"U+9AA8 U+E0100", "ja-Jpan", kJAFont}, + {"U+9AA8 U+E0100", "zh-Hans", kZH_HansFont}, + {"U+9AA8 U+E0100", "zh-Hant", kZH_HantFont}, + + {"U+9AA8 U+E0100", "ja-Jpan,zh-Hant", kJAFont}, + {"U+9AA8 U+E0100", "zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+9AA8 U+E0100", "zh-Hans,zh-Hant", kZH_HansFont}, + {"U+9AA8 U+E0100", "zh-Hant,zh-Hans", kZH_HantFont}, + {"U+9AA8 U+E0100", "zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+9AA8 U+E0100", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+9AA8 U+E0100", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont}, + {"U+9AA8 U+E0100", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont}, + {"U+9AA8 U+E0100", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+9AA8 U+E0100", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+9AA8 U+E0100", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont}, + {"U+9AA8 U+E0100", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont}, + + // All zh-Hans,zh-Hant,ja fonts support U+35A8 U+E0100 and its base + // character U+35A8. + // zh-Hans font is listed before ja and zh-Hant fonts. + {"U+35A8", "", kZH_HansFont}, + {"U+35A8", "ja-Jpan", kJAFont}, + {"U+35A8", "zh-Hans", kZH_HansFont}, + {"U+35A8", "zh-Hant", kZH_HantFont}, + + {"U+35A8", "ja-Jpan,zh-Hant", kJAFont}, + {"U+35A8", "zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+35A8", "zh-Hans,zh-Hant", kZH_HansFont}, + {"U+35A8", "zh-Hant,zh-Hans", kZH_HantFont}, + {"U+35A8", "zh-Hans,ja-Jpan", kZH_HansFont}, + {"U+35A8", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+35A8", "zh-Hans,ja-Jpan,zh-Hant", kZH_HansFont}, + {"U+35A8", "zh-Hans,zh-Hant,ja-Jpan", kZH_HansFont}, + {"U+35A8", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+35A8", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+35A8", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont}, + {"U+35A8", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont}, + + // All zh-Hans,zh-Hant,ja fonts support U+35B6 U+E0100, but zh-Hant and ja + // fonts support its + // base character U+35B6. + // ja font is listed before zh-Hant font. + {"U+35B6", "", kJAFont}, + {"U+35B6", "ja-Jpan", kJAFont}, + {"U+35B6", "zh-Hant", kZH_HantFont}, + {"U+35B6", "zh-Hans", kJAFont}, + + {"U+35B6", "ja-Jpan,zh-Hant", kJAFont}, + {"U+35B6", "zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+35B6", "zh-Hans,zh-Hant", kZH_HantFont}, + {"U+35B6", "zh-Hant,zh-Hans", kZH_HantFont}, + {"U+35B6", "zh-Hans,ja-Jpan", kJAFont}, + {"U+35B6", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+35B6", "zh-Hans,ja-Jpan,zh-Hant", kJAFont}, + {"U+35B6", "zh-Hans,zh-Hant,ja-Jpan", kZH_HantFont}, + {"U+35B6", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+35B6", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+35B6", "zh-Hant,zh-Hans,ja-Jpan", kZH_HantFont}, + {"U+35B6", "zh-Hant,ja-Jpan,zh-Hans", kZH_HantFont}, + + // All zh-Hans,zh-Hant,ja fonts support U+35C5 U+E0100, but only ja font + // supports its base + // character U+35C5. + {"U+35C5", "", kJAFont}, + {"U+35C5", "ja-Jpan", kJAFont}, + {"U+35C5", "zh-Hant", kJAFont}, + {"U+35C5", "zh-Hans", kJAFont}, + + {"U+35C5", "ja-Jpan,zh-Hant", kJAFont}, + {"U+35C5", "zh-Hant,ja-Jpan", kJAFont}, + {"U+35C5", "zh-Hans,zh-Hant", kJAFont}, + {"U+35C5", "zh-Hant,zh-Hans", kJAFont}, + {"U+35C5", "zh-Hans,ja-Jpan", kJAFont}, + {"U+35C5", "ja-Jpan,zh-Hans", kJAFont}, + + {"U+35C5", "zh-Hans,ja-Jpan,zh-Hant", kJAFont}, + {"U+35C5", "zh-Hans,zh-Hant,ja-Jpan", kJAFont}, + {"U+35C5", "ja-Jpan,zh-Hans,zh-Hant", kJAFont}, + {"U+35C5", "ja-Jpan,zh-Hant,zh-Hans", kJAFont}, + {"U+35C5", "zh-Hant,zh-Hans,ja-Jpan", kJAFont}, + {"U+35C5", "zh-Hant,ja-Jpan,zh-Hans", kJAFont}, + + // None of ja-Jpan, zh-Hant, zh-Hans font supports U+1F469. Emoji font + // supports it. + {"U+1F469", "", kEmojiFont}, + {"U+1F469", "ja-Jpan", kEmojiFont}, + {"U+1F469", "zh-Hant", kEmojiFont}, + {"U+1F469", "zh-Hans", kEmojiFont}, + + {"U+1F469", "ja-Jpan,zh-Hant", kEmojiFont}, + {"U+1F469", "zh-Hant,ja-Jpan", kEmojiFont}, + {"U+1F469", "zh-Hans,zh-Hant", kEmojiFont}, + {"U+1F469", "zh-Hant,zh-Hans", kEmojiFont}, + {"U+1F469", "zh-Hans,ja-Jpan", kEmojiFont}, + {"U+1F469", "ja-Jpan,zh-Hans", kEmojiFont}, + + {"U+1F469", "zh-Hans,ja-Jpan,zh-Hant", kEmojiFont}, + {"U+1F469", "zh-Hans,zh-Hant,ja-Jpan", kEmojiFont}, + {"U+1F469", "ja-Jpan,zh-Hans,zh-Hant", kEmojiFont}, + {"U+1F469", "ja-Jpan,zh-Hant,zh-Hans", kEmojiFont}, + {"U+1F469", "zh-Hant,zh-Hans,ja-Jpan", kEmojiFont}, + {"U+1F469", "zh-Hant,ja-Jpan,zh-Hans", kEmojiFont}, + }; + + std::shared_ptr collection( + getFontCollection(kTestFontDir, kItemizeFontXml)); + + for (auto testCase : testCases) { + SCOPED_TRACE("Test for \"" + testCase.testString + "\" with languages " + + testCase.requestedLanguages); - // U+23EA is a emoji default emoji but which is not available in either TextEmojiFont.ttf or - // ColorEmojiFont.ttf. No font should be selected. - itemize(collection, "U+23EA U+FE0E", kDefaultFontStyle, &runs); + std::vector runs; + const FontStyle style = + FontStyle(FontStyle::registerLanguageList(testCase.requestedLanguages)); + itemize(collection, testCase.testString.c_str(), style, &runs); ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + EXPECT_EQ(testCase.expectedFont, getFontPath(runs[0])); + } +} - // U+26FA U+FE0E is specified but ColorTextMixedEmojiFont has a variation sequence U+26F9 U+FE0F - // in its cmap, so ColorTextMixedEmojiFont should be selected instaed of ColorEmojiFont. - itemize(collection, "U+26FA U+FE0E", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kMixedEmojiFont, getFontPath(runs[0])); +TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_withFE0E) { + std::shared_ptr collection( + getFontCollection(kTestFontDir, kEmojiXmlFile)); + std::vector runs; + + const FontStyle kDefaultFontStyle; + + // U+00A9 is a text default emoji which is only available in + // TextEmojiFont.ttf. TextEmojiFont.ttf should be selected. + itemize(collection, "U+00A9 U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + + // U+00A9 is a text default emoji which is only available in + // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected. + itemize(collection, "U+00AE U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + // Text emoji is specified but it is not available. Use color emoji instead. + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // U+203C is a text default emoji which is available in both TextEmojiFont.ttf + // and ColorEmojiFont.ttf. TextEmojiFont.ttf should be selected. + itemize(collection, "U+203C U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + + // U+2049 is a text default emoji which is not available either + // TextEmojiFont.ttf or ColorEmojiFont.ttf. No font should be selected. + itemize(collection, "U+2049 U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + + // U+231A is a emoji default emoji which is available only in TextEmojifFont. + // TextEmojiFont.ttf sohuld be selected. + itemize(collection, "U+231A U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + + // U+231B is a emoji default emoji which is available only in + // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected. + itemize(collection, "U+231B U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + // Text emoji is specified but it is not available. Use color emoji instead. + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // U+23E9 is a emoji default emoji which is available in both + // TextEmojiFont.ttf and ColorEmojiFont.ttf. TextEmojiFont.ttf should be + // selected even if U+23E9 is emoji default emoji since U+FE0E is appended. + itemize(collection, "U+23E9 U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + + // U+23EA is a emoji default emoji but which is not available in either + // TextEmojiFont.ttf or ColorEmojiFont.ttf. No font should be selected. + itemize(collection, "U+23EA U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + + // U+26FA U+FE0E is specified but ColorTextMixedEmojiFont has a variation + // sequence U+26F9 U+FE0F in its cmap, so ColorTextMixedEmojiFont should be + // selected instaed of ColorEmojiFont. + itemize(collection, "U+26FA U+FE0E", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kMixedEmojiFont, getFontPath(runs[0])); } TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_withFE0F) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kEmojiXmlFile)); - std::vector runs; - - const FontStyle kDefaultFontStyle; - - // U+00A9 is a text default emoji which is available only in TextEmojiFont.ttf. - // TextEmojiFont.ttf shoudl be selected. - itemize(collection, "U+00A9 U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - // Color emoji is specified but it is not available. Use text representaion instead. - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); - - // U+00AE is a text default emoji which is available only in ColorEmojiFont.ttf. - // ColorEmojiFont.ttf should be selected. - itemize(collection, "U+00AE U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // U+203C is a text default emoji which is available in both TextEmojiFont.ttf and - // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected even if U+203C is a text default - // emoji since U+FF0F is appended. - itemize(collection, "U+203C U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // U+2049 is a text default emoji which is not available in either TextEmojiFont.ttf or - // ColorEmojiFont.ttf. No font should be selected. - itemize(collection, "U+2049 U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); - - // U+231A is a emoji default emoji which is available only in TextEmojiFont.ttf. - // TextEmojiFont.ttf should be selected. - itemize(collection, "U+231A U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - // Color emoji is specified but it is not available. Use text representation instead. - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); - - // U+231B is a emoji default emoji which is available only in ColorEmojiFont.ttf. - // ColorEmojiFont.ttf should be selected. - itemize(collection, "U+231B U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // U+23E9 is a emoji default emoji which is available in both TextEmojiFont.ttf and - // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected. - itemize(collection, "U+23E9 U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // U+23EA is a emoji default emoji which is not available in either TextEmojiFont.ttf or - // ColorEmojiFont.ttf. No font should be selected. - itemize(collection, "U+23EA U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); - - // U+26F9 U+FE0F is specified but ColorTextMixedEmojiFont has a variation sequence U+26F9 U+FE0F - // in its cmap, so ColorTextMixedEmojiFont should be selected instaed of ColorEmojiFont. - itemize(collection, "U+26F9 U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kMixedEmojiFont, getFontPath(runs[0])); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kEmojiXmlFile)); + std::vector runs; + + const FontStyle kDefaultFontStyle; + + // U+00A9 is a text default emoji which is available only in + // TextEmojiFont.ttf. TextEmojiFont.ttf shoudl be selected. + itemize(collection, "U+00A9 U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + // Color emoji is specified but it is not available. Use text representaion + // instead. + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + + // U+00AE is a text default emoji which is available only in + // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected. + itemize(collection, "U+00AE U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // U+203C is a text default emoji which is available in both TextEmojiFont.ttf + // and ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected even if + // U+203C is a text default emoji since U+FF0F is appended. + itemize(collection, "U+203C U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // U+2049 is a text default emoji which is not available in either + // TextEmojiFont.ttf or ColorEmojiFont.ttf. No font should be selected. + itemize(collection, "U+2049 U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + + // U+231A is a emoji default emoji which is available only in + // TextEmojiFont.ttf. TextEmojiFont.ttf should be selected. + itemize(collection, "U+231A U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + // Color emoji is specified but it is not available. Use text representation + // instead. + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + + // U+231B is a emoji default emoji which is available only in + // ColorEmojiFont.ttf. ColorEmojiFont.ttf should be selected. + itemize(collection, "U+231B U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // U+23E9 is a emoji default emoji which is available in both + // TextEmojiFont.ttf and ColorEmojiFont.ttf. ColorEmojiFont.ttf should be + // selected. + itemize(collection, "U+23E9 U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // U+23EA is a emoji default emoji which is not available in either + // TextEmojiFont.ttf or ColorEmojiFont.ttf. No font should be selected. + itemize(collection, "U+23EA U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + + // U+26F9 U+FE0F is specified but ColorTextMixedEmojiFont has a variation + // sequence U+26F9 U+FE0F in its cmap, so ColorTextMixedEmojiFont should be + // selected instaed of ColorEmojiFont. + itemize(collection, "U+26F9 U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kMixedEmojiFont, getFontPath(runs[0])); } TEST_F(FontCollectionItemizeTest, itemize_emojiSelection_with_skinTone) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kEmojiXmlFile)); - std::vector runs; - - const FontStyle kDefaultFontStyle; - - // TextEmoji font is selected since it is listed before ColorEmoji font. - itemize(collection, "U+261D", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(1, runs[0].end); - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); - - // If skin tone is specified, it should be colored. - itemize(collection, "U+261D U+1F3FD", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(3, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // Still color font is selected if an emoji variation selector is specified. - itemize(collection, "U+261D U+FE0F U+1F3FD", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - // Text font should be selected if a text variation selector is specified and skin tone is - // rendered by itself. - itemize(collection, "U+261D U+FE0E U+1F3FD", kDefaultFontStyle, &runs); - ASSERT_EQ(2U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); - EXPECT_EQ(2, runs[1].start); - EXPECT_EQ(4, runs[1].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[1])); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kEmojiXmlFile)); + std::vector runs; + + const FontStyle kDefaultFontStyle; + + // TextEmoji font is selected since it is listed before ColorEmoji font. + itemize(collection, "U+261D", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(1, runs[0].end); + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + + // If skin tone is specified, it should be colored. + itemize(collection, "U+261D U+1F3FD", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(3, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // Still color font is selected if an emoji variation selector is specified. + itemize(collection, "U+261D U+FE0F U+1F3FD", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + // Text font should be selected if a text variation selector is specified and + // skin tone is rendered by itself. + itemize(collection, "U+261D U+FE0E U+1F3FD", kDefaultFontStyle, &runs); + ASSERT_EQ(2U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kTextEmojiFont, getFontPath(runs[0])); + EXPECT_EQ(2, runs[1].start); + EXPECT_EQ(4, runs[1].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[1])); } TEST_F(FontCollectionItemizeTest, itemize_PrivateUseArea) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kEmojiXmlFile)); - std::vector runs; - - const FontStyle kDefaultFontStyle; - - // Should not set nullptr to the result run. (Issue 26808815) - itemize(collection, "U+FEE10", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(2, runs[0].end); - EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); - - itemize(collection, "U+FEE40 U+FE4C5", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kEmojiXmlFile)); + std::vector runs; + + const FontStyle kDefaultFontStyle; + + // Should not set nullptr to the result run. (Issue 26808815) + itemize(collection, "U+FEE10", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(2, runs[0].end); + EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); + + itemize(collection, "U+FEE40 U+FE4C5", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kNoGlyphFont, getFontPath(runs[0])); } TEST_F(FontCollectionItemizeTest, itemize_genderBalancedEmoji) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kEmojiXmlFile)); - std::vector runs; - - const FontStyle kDefaultFontStyle; - - itemize(collection, "U+1F469 U+200D U+1F373", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - itemize(collection, "U+1F469 U+200D U+2695 U+FE0F", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(5, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); - - itemize(collection, "U+1F469 U+200D U+2695", kDefaultFontStyle, &runs); - ASSERT_EQ(1U, runs.size()); - EXPECT_EQ(0, runs[0].start); - EXPECT_EQ(4, runs[0].end); - EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kEmojiXmlFile)); + std::vector runs; + + const FontStyle kDefaultFontStyle; + + itemize(collection, "U+1F469 U+200D U+1F373", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + itemize(collection, "U+1F469 U+200D U+2695 U+FE0F", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(5, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); + + itemize(collection, "U+1F469 U+200D U+2695", kDefaultFontStyle, &runs); + ASSERT_EQ(1U, runs.size()); + EXPECT_EQ(0, runs[0].start); + EXPECT_EQ(4, runs[0].end); + EXPECT_EQ(kColorEmojiFont, getFontPath(runs[0])); } // For b/29585939 TEST_F(FontCollectionItemizeTest, itemizeShouldKeepOrderForVS) { - const FontStyle kDefaultFontStyle; - - std::shared_ptr dummyFont(new MinikinFontForTest(kNoGlyphFont)); - std::shared_ptr fontA(new MinikinFontForTest(kZH_HansFont)); - std::shared_ptr fontB(new MinikinFontForTest(kZH_HansFont)); - - std::shared_ptr dummyFamily(new FontFamily( - std::vector({ Font(dummyFont, FontStyle()) }))); - std::shared_ptr familyA(new FontFamily( - std::vector({ Font(fontA, FontStyle()) }))); - std::shared_ptr familyB(new FontFamily( - std::vector({ Font(fontB, FontStyle()) }))); - - std::vector> families = - { dummyFamily, familyA, familyB }; - std::vector> reversedFamilies = - { dummyFamily, familyB, familyA }; - - std::shared_ptr collection(new FontCollection(families)); - std::shared_ptr reversedCollection(new FontCollection(reversedFamilies)); - - // Both fontA/fontB support U+35A8 but don't support U+35A8 U+E0100. The first font should be - // selected. - std::vector runs; - itemize(collection, "U+35A8 U+E0100", kDefaultFontStyle, &runs); - EXPECT_EQ(fontA.get(), runs[0].fakedFont.font); - - itemize(reversedCollection, "U+35A8 U+E0100", kDefaultFontStyle, &runs); - EXPECT_EQ(fontB.get(), runs[0].fakedFont.font); + const FontStyle kDefaultFontStyle; + + std::shared_ptr dummyFont(new MinikinFontForTest(kNoGlyphFont)); + std::shared_ptr fontA(new MinikinFontForTest(kZH_HansFont)); + std::shared_ptr fontB(new MinikinFontForTest(kZH_HansFont)); + + std::shared_ptr dummyFamily( + new FontFamily(std::vector({Font(dummyFont, FontStyle())}))); + std::shared_ptr familyA( + new FontFamily(std::vector({Font(fontA, FontStyle())}))); + std::shared_ptr familyB( + new FontFamily(std::vector({Font(fontB, FontStyle())}))); + + std::vector> families = {dummyFamily, familyA, + familyB}; + std::vector> reversedFamilies = { + dummyFamily, familyB, familyA}; + + std::shared_ptr collection(new FontCollection(families)); + std::shared_ptr reversedCollection( + new FontCollection(reversedFamilies)); + + // Both fontA/fontB support U+35A8 but don't support U+35A8 U+E0100. The first + // font should be selected. + std::vector runs; + itemize(collection, "U+35A8 U+E0100", kDefaultFontStyle, &runs); + EXPECT_EQ(fontA.get(), runs[0].fakedFont.font); + + itemize(reversedCollection, "U+35A8 U+E0100", kDefaultFontStyle, &runs); + EXPECT_EQ(fontB.get(), runs[0].fakedFont.font); } // For b/29585939 TEST_F(FontCollectionItemizeTest, itemizeShouldKeepOrderForVS2) { - const FontStyle kDefaultFontStyle; - - std::shared_ptr dummyFont(new MinikinFontForTest(kNoGlyphFont)); - std::shared_ptr hasCmapFormat14Font( - new MinikinFontForTest(kHasCmapFormat14Font)); - std::shared_ptr noCmapFormat14Font( - new MinikinFontForTest(kNoCmapFormat14Font)); - - std::shared_ptr dummyFamily(new FontFamily( - std::vector({ Font(dummyFont, FontStyle()) }))); - std::shared_ptr hasCmapFormat14Family(new FontFamily( - std::vector({ Font(hasCmapFormat14Font, FontStyle()) }))); - std::shared_ptr noCmapFormat14Family(new FontFamily( - std::vector({ Font(noCmapFormat14Font, FontStyle()) }))); - - std::vector> families = - { dummyFamily, hasCmapFormat14Family, noCmapFormat14Family }; - std::vector> reversedFamilies = - { dummyFamily, noCmapFormat14Family, hasCmapFormat14Family }; - - std::shared_ptr collection(new FontCollection(families)); - std::shared_ptr reversedCollection(new FontCollection(reversedFamilies)); - - // Both hasCmapFormat14Font/noCmapFormat14Font support U+5380 but don't support U+5380 U+E0100. - // The first font should be selected. - std::vector runs; - itemize(collection, "U+5380 U+E0100", kDefaultFontStyle, &runs); - EXPECT_EQ(hasCmapFormat14Font.get(), runs[0].fakedFont.font); - - itemize(reversedCollection, "U+5380 U+E0100", kDefaultFontStyle, &runs); - EXPECT_EQ(noCmapFormat14Font.get(), runs[0].fakedFont.font); + const FontStyle kDefaultFontStyle; + + std::shared_ptr dummyFont(new MinikinFontForTest(kNoGlyphFont)); + std::shared_ptr hasCmapFormat14Font( + new MinikinFontForTest(kHasCmapFormat14Font)); + std::shared_ptr noCmapFormat14Font( + new MinikinFontForTest(kNoCmapFormat14Font)); + + std::shared_ptr dummyFamily( + new FontFamily(std::vector({Font(dummyFont, FontStyle())}))); + std::shared_ptr hasCmapFormat14Family(new FontFamily( + std::vector({Font(hasCmapFormat14Font, FontStyle())}))); + std::shared_ptr noCmapFormat14Family(new FontFamily( + std::vector({Font(noCmapFormat14Font, FontStyle())}))); + + std::vector> families = { + dummyFamily, hasCmapFormat14Family, noCmapFormat14Family}; + std::vector> reversedFamilies = { + dummyFamily, noCmapFormat14Family, hasCmapFormat14Family}; + + std::shared_ptr collection(new FontCollection(families)); + std::shared_ptr reversedCollection( + new FontCollection(reversedFamilies)); + + // Both hasCmapFormat14Font/noCmapFormat14Font support U+5380 but don't + // support U+5380 U+E0100. The first font should be selected. + std::vector runs; + itemize(collection, "U+5380 U+E0100", kDefaultFontStyle, &runs); + EXPECT_EQ(hasCmapFormat14Font.get(), runs[0].fakedFont.font); + + itemize(reversedCollection, "U+5380 U+E0100", kDefaultFontStyle, &runs); + EXPECT_EQ(noCmapFormat14Font.get(), runs[0].fakedFont.font); } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/FontCollectionTest.cpp b/third_party/txt/tests/old/unittest/FontCollectionTest.cpp index 9480079dac90b2539da8c6b357262476d3270dfd..fb2f11180a83a4b5e41979ed13fcd82e83393ee8 100644 --- a/third_party/txt/tests/old/unittest/FontCollectionTest.cpp +++ b/third_party/txt/tests/old/unittest/FontCollectionTest.cpp @@ -40,160 +40,169 @@ namespace minikin { // U+717D U+E0103 (VS20) const char kVsTestFont[] = kTestFontDir "/VariationSelectorTest-Regular.ttf"; -void expectVSGlyphs(const FontCollection* fc, uint32_t codepoint, const std::set& vsSet) { - for (uint32_t vs = 0xFE00; vs <= 0xE01EF; ++vs) { - // Move to variation selectors supplements after variation selectors. - if (vs == 0xFF00) { - vs = 0xE0100; - } - if (vsSet.find(vs) == vsSet.end()) { - EXPECT_FALSE(fc->hasVariationSelector(codepoint, vs)) - << "Glyph for U+" << std::hex << codepoint << " U+" << vs; - } else { - EXPECT_TRUE(fc->hasVariationSelector(codepoint, vs)) - << "Glyph for U+" << std::hex << codepoint << " U+" << vs; - } +void expectVSGlyphs(const FontCollection* fc, + uint32_t codepoint, + const std::set& vsSet) { + for (uint32_t vs = 0xFE00; vs <= 0xE01EF; ++vs) { + // Move to variation selectors supplements after variation selectors. + if (vs == 0xFF00) { + vs = 0xE0100; } + if (vsSet.find(vs) == vsSet.end()) { + EXPECT_FALSE(fc->hasVariationSelector(codepoint, vs)) + << "Glyph for U+" << std::hex << codepoint << " U+" << vs; + } else { + EXPECT_TRUE(fc->hasVariationSelector(codepoint, vs)) + << "Glyph for U+" << std::hex << codepoint << " U+" << vs; + } + } } TEST(FontCollectionTest, hasVariationSelectorTest) { std::shared_ptr font(new MinikinFontForTest(kVsTestFont)); - std::shared_ptr family(new FontFamily( - std::vector({ Font(font, FontStyle()) }))); - std::vector> families({ family }); + std::shared_ptr family( + new FontFamily(std::vector({Font(font, FontStyle())}))); + std::vector> families({family}); std::shared_ptr fc(new FontCollection(families)); EXPECT_FALSE(fc->hasVariationSelector(0x82A6, 0)); - expectVSGlyphs(fc.get(), 0x82A6, std::set({0xFE00, 0xFE0E, 0xE0100, 0xE0101, 0xE0102})); + expectVSGlyphs( + fc.get(), 0x82A6, + std::set({0xFE00, 0xFE0E, 0xE0100, 0xE0101, 0xE0102})); EXPECT_FALSE(fc->hasVariationSelector(0x845B, 0)); - expectVSGlyphs(fc.get(), 0x845B, std::set({0xFE01, 0xFE0E, 0xE0101, 0xE0102, 0xE0103})); + expectVSGlyphs( + fc.get(), 0x845B, + std::set({0xFE01, 0xFE0E, 0xE0101, 0xE0102, 0xE0103})); EXPECT_FALSE(fc->hasVariationSelector(0x537F, 0)); expectVSGlyphs(fc.get(), 0x537F, std::set({0xFE0E})); EXPECT_FALSE(fc->hasVariationSelector(0x717D, 0)); - expectVSGlyphs(fc.get(), 0x717D, std::set({0xFE02, 0xE0102, 0xE0103})); + expectVSGlyphs(fc.get(), 0x717D, + std::set({0xFE02, 0xE0102, 0xE0103})); } const char kEmojiXmlFile[] = kTestFontDir "emoji.xml"; TEST(FontCollectionTest, hasVariationSelectorTest_emoji) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kEmojiXmlFile)); - - // Both text/color font have cmap format 14 subtable entry for VS15/VS16 respectively. - EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0E)); - EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0F)); - - // The text font has cmap format 14 subtable entry for VS15 but the color font doesn't have for - // VS16 - EXPECT_TRUE(collection->hasVariationSelector(0x2626, 0xFE0E)); - EXPECT_FALSE(collection->hasVariationSelector(0x2626, 0xFE0F)); - - // The color font has cmap format 14 subtable entry for VS16 but the text font doesn't have for - // VS15. - EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0E)); - EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0F)); - - // Neither text/color font have cmap format 14 subtable entry for VS15/VS16. - EXPECT_TRUE(collection->hasVariationSelector(0x262E, 0xFE0E)); - EXPECT_FALSE(collection->hasVariationSelector(0x262E, 0xFE0F)); - - // Text font doesn't support U+1F3FD. Only the color emoji fonts has. So VS15 is not supported. - EXPECT_FALSE(collection->hasVariationSelector(0x1F3FD, 0xFE0E)); - - // Text font doesn't have U+262F U+FE0E or even its base code point U+262F. - EXPECT_FALSE(collection->hasVariationSelector(0x262F, 0xFE0E)); - - // None of the fonts support U+2229. - EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0E)); - EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0F)); - + std::shared_ptr collection( + getFontCollection(kTestFontDir, kEmojiXmlFile)); + + // Both text/color font have cmap format 14 subtable entry for VS15/VS16 + // respectively. + EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0E)); + EXPECT_TRUE(collection->hasVariationSelector(0x2623, 0xFE0F)); + + // The text font has cmap format 14 subtable entry for VS15 but the color font + // doesn't have for VS16 + EXPECT_TRUE(collection->hasVariationSelector(0x2626, 0xFE0E)); + EXPECT_FALSE(collection->hasVariationSelector(0x2626, 0xFE0F)); + + // The color font has cmap format 14 subtable entry for VS16 but the text font + // doesn't have for VS15. + EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0E)); + EXPECT_TRUE(collection->hasVariationSelector(0x262A, 0xFE0F)); + + // Neither text/color font have cmap format 14 subtable entry for VS15/VS16. + EXPECT_TRUE(collection->hasVariationSelector(0x262E, 0xFE0E)); + EXPECT_FALSE(collection->hasVariationSelector(0x262E, 0xFE0F)); + + // Text font doesn't support U+1F3FD. Only the color emoji fonts has. So VS15 + // is not supported. + EXPECT_FALSE(collection->hasVariationSelector(0x1F3FD, 0xFE0E)); + + // Text font doesn't have U+262F U+FE0E or even its base code point U+262F. + EXPECT_FALSE(collection->hasVariationSelector(0x262F, 0xFE0E)); + + // None of the fonts support U+2229. + EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0E)); + EXPECT_FALSE(collection->hasVariationSelector(0x2229, 0xFE0F)); } TEST(FontCollectionTest, newEmojiTest) { - std::shared_ptr collection(getFontCollection(kTestFontDir, kEmojiXmlFile)); - - // U+2695, U+2640, U+2642 are not in emoji catrgory in Unicode 9 but they are now in emoji - // category. Should return true even if U+FE0E was appended. - // These three emojis are only avalilable in TextEmoji.ttf but U+2695 is excluded here since it - // is used in other tests. - EXPECT_TRUE(collection->hasVariationSelector(0x2640, 0xFE0E)); - EXPECT_FALSE(collection->hasVariationSelector(0x2640, 0xFE0F)); - EXPECT_TRUE(collection->hasVariationSelector(0x2642, 0xFE0E)); - EXPECT_FALSE(collection->hasVariationSelector(0x2642, 0xFE0F)); + std::shared_ptr collection( + getFontCollection(kTestFontDir, kEmojiXmlFile)); + + // U+2695, U+2640, U+2642 are not in emoji catrgory in Unicode 9 but they are + // now in emoji category. Should return true even if U+FE0E was appended. + // These three emojis are only avalilable in TextEmoji.ttf but U+2695 is + // excluded here since it is used in other tests. + EXPECT_TRUE(collection->hasVariationSelector(0x2640, 0xFE0E)); + EXPECT_FALSE(collection->hasVariationSelector(0x2640, 0xFE0F)); + EXPECT_TRUE(collection->hasVariationSelector(0x2642, 0xFE0E)); + EXPECT_FALSE(collection->hasVariationSelector(0x2642, 0xFE0F)); } TEST(FontCollectionTest, createWithVariations) { - // This font has 'wdth' and 'wght' axes. - const char kMultiAxisFont[] = kTestFontDir "/MultiAxis.ttf"; - const char kNoAxisFont[] = kTestFontDir "/Regular.ttf"; - - std::shared_ptr multiAxisFont(new MinikinFontForTest(kMultiAxisFont)); - std::shared_ptr multiAxisFamily(new FontFamily( - std::vector({ Font(multiAxisFont, FontStyle()) }))); - std::vector> multiAxisFamilies({multiAxisFamily}); - std::shared_ptr multiAxisFc(new FontCollection(multiAxisFamilies)); - - std::shared_ptr noAxisFont(new MinikinFontForTest(kNoAxisFont)); - std::shared_ptr noAxisFamily(new FontFamily( - std::vector({ Font(noAxisFont, FontStyle()) }))); - std::vector> noAxisFamilies({noAxisFamily}); - std::shared_ptr noAxisFc(new FontCollection(noAxisFamilies)); - - { - // Do not ceate new instance if none of variations are specified. - EXPECT_EQ(nullptr, - multiAxisFc->createCollectionWithVariation(std::vector())); - EXPECT_EQ(nullptr, - noAxisFc->createCollectionWithVariation(std::vector())); - } - { - // New instance should be used for supported variation. - std::vector variations = { - { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f } - }; - std::shared_ptr newFc( - multiAxisFc->createCollectionWithVariation(variations)); - EXPECT_NE(nullptr, newFc.get()); - EXPECT_NE(multiAxisFc.get(), newFc.get()); - - EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); - } - { - // New instance should be used for supported variation (multiple variations case). - std::vector variations = { - { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f }, - { MinikinFont::MakeTag('w', 'g', 'h', 't'), 1.0f } - }; - std::shared_ptr newFc( - multiAxisFc->createCollectionWithVariation(variations)); - EXPECT_NE(nullptr, newFc.get()); - EXPECT_NE(multiAxisFc.get(), newFc.get()); - - EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); - } - { - // Do not ceate new instance if none of variations are supported. - std::vector variations = { - { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f } - }; - EXPECT_EQ(nullptr, multiAxisFc->createCollectionWithVariation(variations)); - EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); - } - { - // At least one axis is supported, should create new instance. - std::vector variations = { - { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f }, - { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f } - }; - std::shared_ptr newFc( - multiAxisFc->createCollectionWithVariation(variations)); - EXPECT_NE(nullptr, newFc.get()); - EXPECT_NE(multiAxisFc.get(), newFc.get()); - - EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); - } + // This font has 'wdth' and 'wght' axes. + const char kMultiAxisFont[] = kTestFontDir "/MultiAxis.ttf"; + const char kNoAxisFont[] = kTestFontDir "/Regular.ttf"; + + std::shared_ptr multiAxisFont( + new MinikinFontForTest(kMultiAxisFont)); + std::shared_ptr multiAxisFamily( + new FontFamily(std::vector({Font(multiAxisFont, FontStyle())}))); + std::vector> multiAxisFamilies({multiAxisFamily}); + std::shared_ptr multiAxisFc( + new FontCollection(multiAxisFamilies)); + + std::shared_ptr noAxisFont(new MinikinFontForTest(kNoAxisFont)); + std::shared_ptr noAxisFamily( + new FontFamily(std::vector({Font(noAxisFont, FontStyle())}))); + std::vector> noAxisFamilies({noAxisFamily}); + std::shared_ptr noAxisFc(new FontCollection(noAxisFamilies)); + + { + // Do not ceate new instance if none of variations are specified. + EXPECT_EQ(nullptr, multiAxisFc->createCollectionWithVariation( + std::vector())); + EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation( + std::vector())); + } + { + // New instance should be used for supported variation. + std::vector variations = { + {MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}}; + std::shared_ptr newFc( + multiAxisFc->createCollectionWithVariation(variations)); + EXPECT_NE(nullptr, newFc.get()); + EXPECT_NE(multiAxisFc.get(), newFc.get()); + + EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); + } + { + // New instance should be used for supported variation (multiple variations + // case). + std::vector variations = { + {MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}, + {MinikinFont::MakeTag('w', 'g', 'h', 't'), 1.0f}}; + std::shared_ptr newFc( + multiAxisFc->createCollectionWithVariation(variations)); + EXPECT_NE(nullptr, newFc.get()); + EXPECT_NE(multiAxisFc.get(), newFc.get()); + + EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); + } + { + // Do not ceate new instance if none of variations are supported. + std::vector variations = { + {MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f}}; + EXPECT_EQ(nullptr, multiAxisFc->createCollectionWithVariation(variations)); + EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); + } + { + // At least one axis is supported, should create new instance. + std::vector variations = { + {MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}, + {MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f}}; + std::shared_ptr newFc( + multiAxisFc->createCollectionWithVariation(variations)); + EXPECT_NE(nullptr, newFc.get()); + EXPECT_NE(multiAxisFc.get(), newFc.get()); + + EXPECT_EQ(nullptr, noAxisFc->createCollectionWithVariation(variations)); + } } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/FontFamilyTest.cpp b/third_party/txt/tests/old/unittest/FontFamilyTest.cpp index 21810d78d362c002a61ce4fe2d48bbfd557b938d..68d6d51b413ece3307f150a42c903c5405df3491 100644 --- a/third_party/txt/tests/old/unittest/FontFamilyTest.cpp +++ b/third_party/txt/tests/old/unittest/FontFamilyTest.cpp @@ -16,12 +16,12 @@ #include -#include #include +#include -#include "lib/txt/libs/minikin/FontLanguageListCache.h" #include "ICUTestBase.h" #include "MinikinFontForTest.h" +#include "lib/txt/libs/minikin/FontLanguageListCache.h" #include "lib/txt/libs/minikin/MinikinInternal.h" namespace minikin { @@ -30,460 +30,495 @@ typedef ICUTestBase FontLanguagesTest; typedef ICUTestBase FontLanguageTest; static const FontLanguages& createFontLanguages(const std::string& input) { - std::lock_guard _l(gMinikinLock); - uint32_t langId = FontLanguageListCache::getId(input); - return FontLanguageListCache::getById(langId); + std::lock_guard _l(gMinikinLock); + uint32_t langId = FontLanguageListCache::getId(input); + return FontLanguageListCache::getById(langId); } static FontLanguage createFontLanguage(const std::string& input) { - std::lock_guard _l(gMinikinLock); - uint32_t langId = FontLanguageListCache::getId(input); - return FontLanguageListCache::getById(langId)[0]; + std::lock_guard _l(gMinikinLock); + uint32_t langId = FontLanguageListCache::getId(input); + return FontLanguageListCache::getById(langId)[0]; } -static FontLanguage createFontLanguageWithoutICUSanitization(const std::string& input) { - return FontLanguage(input.c_str(), input.size()); +static FontLanguage createFontLanguageWithoutICUSanitization( + const std::string& input) { + return FontLanguage(input.c_str(), input.size()); } std::shared_ptr makeFamily(const std::string& fontPath) { - std::shared_ptr font(new MinikinFontForTest(fontPath)); - return std::make_shared( - std::vector({Font(font, FontStyle())})); + std::shared_ptr font(new MinikinFontForTest(fontPath)); + return std::make_shared( + std::vector({Font(font, FontStyle())})); } TEST_F(FontLanguageTest, basicTests) { - FontLanguage defaultLang; - FontLanguage emptyLang("", 0); - FontLanguage english = createFontLanguage("en"); - FontLanguage french = createFontLanguage("fr"); - FontLanguage und = createFontLanguage("und"); - FontLanguage undZsye = createFontLanguage("und-Zsye"); - - EXPECT_EQ(english, english); - EXPECT_EQ(french, french); - - EXPECT_TRUE(defaultLang != defaultLang); - EXPECT_TRUE(emptyLang != emptyLang); - EXPECT_TRUE(defaultLang != emptyLang); - EXPECT_TRUE(defaultLang != und); - EXPECT_TRUE(emptyLang != und); - EXPECT_TRUE(english != defaultLang); - EXPECT_TRUE(english != emptyLang); - EXPECT_TRUE(english != french); - EXPECT_TRUE(english != undZsye); - EXPECT_TRUE(und != undZsye); - EXPECT_TRUE(english != und); - - EXPECT_TRUE(defaultLang.isUnsupported()); - EXPECT_TRUE(emptyLang.isUnsupported()); - - EXPECT_FALSE(english.isUnsupported()); - EXPECT_FALSE(french.isUnsupported()); - EXPECT_FALSE(und.isUnsupported()); - EXPECT_FALSE(undZsye.isUnsupported()); + FontLanguage defaultLang; + FontLanguage emptyLang("", 0); + FontLanguage english = createFontLanguage("en"); + FontLanguage french = createFontLanguage("fr"); + FontLanguage und = createFontLanguage("und"); + FontLanguage undZsye = createFontLanguage("und-Zsye"); + + EXPECT_EQ(english, english); + EXPECT_EQ(french, french); + + EXPECT_TRUE(defaultLang != defaultLang); + EXPECT_TRUE(emptyLang != emptyLang); + EXPECT_TRUE(defaultLang != emptyLang); + EXPECT_TRUE(defaultLang != und); + EXPECT_TRUE(emptyLang != und); + EXPECT_TRUE(english != defaultLang); + EXPECT_TRUE(english != emptyLang); + EXPECT_TRUE(english != french); + EXPECT_TRUE(english != undZsye); + EXPECT_TRUE(und != undZsye); + EXPECT_TRUE(english != und); + + EXPECT_TRUE(defaultLang.isUnsupported()); + EXPECT_TRUE(emptyLang.isUnsupported()); + + EXPECT_FALSE(english.isUnsupported()); + EXPECT_FALSE(french.isUnsupported()); + EXPECT_FALSE(und.isUnsupported()); + EXPECT_FALSE(undZsye.isUnsupported()); } TEST_F(FontLanguageTest, getStringTest) { - EXPECT_EQ("en-Latn-US", createFontLanguage("en").getString()); - EXPECT_EQ("en-Latn-US", createFontLanguage("en-Latn").getString()); - - // Capitalized language code or lowercased script should be normalized. - EXPECT_EQ("en-Latn-US", createFontLanguage("EN-LATN").getString()); - EXPECT_EQ("en-Latn-US", createFontLanguage("EN-latn").getString()); - EXPECT_EQ("en-Latn-US", createFontLanguage("en-latn").getString()); - - // Invalid script should be kept. - EXPECT_EQ("en-Xyzt-US", createFontLanguage("en-xyzt").getString()); - - EXPECT_EQ("en-Latn-US", createFontLanguage("en-Latn-US").getString()); - EXPECT_EQ("ja-Jpan-JP", createFontLanguage("ja").getString()); - EXPECT_EQ("zh-Hant-TW", createFontLanguage("zh-TW").getString()); - EXPECT_EQ("zh-Hant-HK", createFontLanguage("zh-HK").getString()); - EXPECT_EQ("zh-Hant-MO", createFontLanguage("zh-MO").getString()); - EXPECT_EQ("zh-Hans-CN", createFontLanguage("zh").getString()); - EXPECT_EQ("zh-Hans-CN", createFontLanguage("zh-CN").getString()); - EXPECT_EQ("zh-Hans-SG", createFontLanguage("zh-SG").getString()); - EXPECT_EQ("und", createFontLanguage("und").getString()); - EXPECT_EQ("und", createFontLanguage("UND").getString()); - EXPECT_EQ("und", createFontLanguage("Und").getString()); - EXPECT_EQ("und-Zsye", createFontLanguage("und-Zsye").getString()); - EXPECT_EQ("und-Zsye", createFontLanguage("Und-ZSYE").getString()); - EXPECT_EQ("und-Zsye", createFontLanguage("Und-zsye").getString()); - - EXPECT_EQ("de-Latn-DE", createFontLanguage("de-1901").getString()); - - EXPECT_EQ("es-Latn-419", createFontLanguage("es-Latn-419").getString()); - - // Emoji subtag is dropped from getString(). - EXPECT_EQ("es-Latn-419", createFontLanguage("es-419-u-em-emoji").getString()); - EXPECT_EQ("es-Latn-419", createFontLanguage("es-Latn-419-u-em-emoji").getString()); - - // This is not a necessary desired behavior, just known behavior. - EXPECT_EQ("en-Latn-US", createFontLanguage("und-Abcdefgh").getString()); + EXPECT_EQ("en-Latn-US", createFontLanguage("en").getString()); + EXPECT_EQ("en-Latn-US", createFontLanguage("en-Latn").getString()); + + // Capitalized language code or lowercased script should be normalized. + EXPECT_EQ("en-Latn-US", createFontLanguage("EN-LATN").getString()); + EXPECT_EQ("en-Latn-US", createFontLanguage("EN-latn").getString()); + EXPECT_EQ("en-Latn-US", createFontLanguage("en-latn").getString()); + + // Invalid script should be kept. + EXPECT_EQ("en-Xyzt-US", createFontLanguage("en-xyzt").getString()); + + EXPECT_EQ("en-Latn-US", createFontLanguage("en-Latn-US").getString()); + EXPECT_EQ("ja-Jpan-JP", createFontLanguage("ja").getString()); + EXPECT_EQ("zh-Hant-TW", createFontLanguage("zh-TW").getString()); + EXPECT_EQ("zh-Hant-HK", createFontLanguage("zh-HK").getString()); + EXPECT_EQ("zh-Hant-MO", createFontLanguage("zh-MO").getString()); + EXPECT_EQ("zh-Hans-CN", createFontLanguage("zh").getString()); + EXPECT_EQ("zh-Hans-CN", createFontLanguage("zh-CN").getString()); + EXPECT_EQ("zh-Hans-SG", createFontLanguage("zh-SG").getString()); + EXPECT_EQ("und", createFontLanguage("und").getString()); + EXPECT_EQ("und", createFontLanguage("UND").getString()); + EXPECT_EQ("und", createFontLanguage("Und").getString()); + EXPECT_EQ("und-Zsye", createFontLanguage("und-Zsye").getString()); + EXPECT_EQ("und-Zsye", createFontLanguage("Und-ZSYE").getString()); + EXPECT_EQ("und-Zsye", createFontLanguage("Und-zsye").getString()); + + EXPECT_EQ("de-Latn-DE", createFontLanguage("de-1901").getString()); + + EXPECT_EQ("es-Latn-419", createFontLanguage("es-Latn-419").getString()); + + // Emoji subtag is dropped from getString(). + EXPECT_EQ("es-Latn-419", createFontLanguage("es-419-u-em-emoji").getString()); + EXPECT_EQ("es-Latn-419", + createFontLanguage("es-Latn-419-u-em-emoji").getString()); + + // This is not a necessary desired behavior, just known behavior. + EXPECT_EQ("en-Latn-US", createFontLanguage("und-Abcdefgh").getString()); } TEST_F(FontLanguageTest, testReconstruction) { - EXPECT_EQ("en", createFontLanguageWithoutICUSanitization("en").getString()); - EXPECT_EQ("fil", createFontLanguageWithoutICUSanitization("fil").getString()); - EXPECT_EQ("und", createFontLanguageWithoutICUSanitization("und").getString()); - - EXPECT_EQ("en-Latn", createFontLanguageWithoutICUSanitization("en-Latn").getString()); - EXPECT_EQ("fil-Taga", createFontLanguageWithoutICUSanitization("fil-Taga").getString()); - EXPECT_EQ("und-Zsye", createFontLanguageWithoutICUSanitization("und-Zsye").getString()); - - EXPECT_EQ("en-US", createFontLanguageWithoutICUSanitization("en-US").getString()); - EXPECT_EQ("fil-PH", createFontLanguageWithoutICUSanitization("fil-PH").getString()); - EXPECT_EQ("es-419", createFontLanguageWithoutICUSanitization("es-419").getString()); - - EXPECT_EQ("en-Latn-US", createFontLanguageWithoutICUSanitization("en-Latn-US").getString()); - EXPECT_EQ("fil-Taga-PH", createFontLanguageWithoutICUSanitization("fil-Taga-PH").getString()); - EXPECT_EQ("es-Latn-419", createFontLanguageWithoutICUSanitization("es-Latn-419").getString()); - - // Possible minimum/maximum values. - EXPECT_EQ("aa", createFontLanguageWithoutICUSanitization("aa").getString()); - EXPECT_EQ("zz", createFontLanguageWithoutICUSanitization("zz").getString()); - EXPECT_EQ("aa-Aaaa", createFontLanguageWithoutICUSanitization("aa-Aaaa").getString()); - EXPECT_EQ("zz-Zzzz", createFontLanguageWithoutICUSanitization("zz-Zzzz").getString()); - EXPECT_EQ("aaa-Aaaa-AA", createFontLanguageWithoutICUSanitization("aaa-Aaaa-AA").getString()); - EXPECT_EQ("zzz-Zzzz-ZZ", createFontLanguageWithoutICUSanitization("zzz-Zzzz-ZZ").getString()); - EXPECT_EQ("aaa-Aaaa-000", createFontLanguageWithoutICUSanitization("aaa-Aaaa-000").getString()); - EXPECT_EQ("zzz-Zzzz-999", createFontLanguageWithoutICUSanitization("zzz-Zzzz-999").getString()); + EXPECT_EQ("en", createFontLanguageWithoutICUSanitization("en").getString()); + EXPECT_EQ("fil", createFontLanguageWithoutICUSanitization("fil").getString()); + EXPECT_EQ("und", createFontLanguageWithoutICUSanitization("und").getString()); + + EXPECT_EQ("en-Latn", + createFontLanguageWithoutICUSanitization("en-Latn").getString()); + EXPECT_EQ("fil-Taga", + createFontLanguageWithoutICUSanitization("fil-Taga").getString()); + EXPECT_EQ("und-Zsye", + createFontLanguageWithoutICUSanitization("und-Zsye").getString()); + + EXPECT_EQ("en-US", + createFontLanguageWithoutICUSanitization("en-US").getString()); + EXPECT_EQ("fil-PH", + createFontLanguageWithoutICUSanitization("fil-PH").getString()); + EXPECT_EQ("es-419", + createFontLanguageWithoutICUSanitization("es-419").getString()); + + EXPECT_EQ("en-Latn-US", + createFontLanguageWithoutICUSanitization("en-Latn-US").getString()); + EXPECT_EQ( + "fil-Taga-PH", + createFontLanguageWithoutICUSanitization("fil-Taga-PH").getString()); + EXPECT_EQ( + "es-Latn-419", + createFontLanguageWithoutICUSanitization("es-Latn-419").getString()); + + // Possible minimum/maximum values. + EXPECT_EQ("aa", createFontLanguageWithoutICUSanitization("aa").getString()); + EXPECT_EQ("zz", createFontLanguageWithoutICUSanitization("zz").getString()); + EXPECT_EQ("aa-Aaaa", + createFontLanguageWithoutICUSanitization("aa-Aaaa").getString()); + EXPECT_EQ("zz-Zzzz", + createFontLanguageWithoutICUSanitization("zz-Zzzz").getString()); + EXPECT_EQ( + "aaa-Aaaa-AA", + createFontLanguageWithoutICUSanitization("aaa-Aaaa-AA").getString()); + EXPECT_EQ( + "zzz-Zzzz-ZZ", + createFontLanguageWithoutICUSanitization("zzz-Zzzz-ZZ").getString()); + EXPECT_EQ( + "aaa-Aaaa-000", + createFontLanguageWithoutICUSanitization("aaa-Aaaa-000").getString()); + EXPECT_EQ( + "zzz-Zzzz-999", + createFontLanguageWithoutICUSanitization("zzz-Zzzz-999").getString()); } TEST_F(FontLanguageTest, ScriptEqualTest) { - EXPECT_TRUE(createFontLanguage("en").isEqualScript(createFontLanguage("en"))); - EXPECT_TRUE(createFontLanguage("en-Latn").isEqualScript(createFontLanguage("en"))); - EXPECT_TRUE(createFontLanguage("jp-Latn").isEqualScript(createFontLanguage("en-Latn"))); - EXPECT_TRUE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Jpan"))); - - EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Hira"))); - EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript(createFontLanguage("en-Hani"))); + EXPECT_TRUE(createFontLanguage("en").isEqualScript(createFontLanguage("en"))); + EXPECT_TRUE( + createFontLanguage("en-Latn").isEqualScript(createFontLanguage("en"))); + EXPECT_TRUE(createFontLanguage("jp-Latn").isEqualScript( + createFontLanguage("en-Latn"))); + EXPECT_TRUE(createFontLanguage("en-Jpan").isEqualScript( + createFontLanguage("en-Jpan"))); + + EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript( + createFontLanguage("en-Hira"))); + EXPECT_FALSE(createFontLanguage("en-Jpan").isEqualScript( + createFontLanguage("en-Hani"))); } TEST_F(FontLanguageTest, ScriptMatchTest) { - const bool SUPPORTED = true; - const bool NOT_SUPPORTED = false; - - struct TestCase { - const std::string baseScript; - const std::string requestedScript; - bool isSupported; - } testCases[] = { - // Same scripts - { "en-Latn", "Latn", SUPPORTED }, - { "ja-Jpan", "Jpan", SUPPORTED }, - { "ja-Hira", "Hira", SUPPORTED }, - { "ja-Kana", "Kana", SUPPORTED }, - { "ja-Hrkt", "Hrkt", SUPPORTED }, - { "zh-Hans", "Hans", SUPPORTED }, - { "zh-Hant", "Hant", SUPPORTED }, - { "zh-Hani", "Hani", SUPPORTED }, - { "ko-Kore", "Kore", SUPPORTED }, - { "ko-Hang", "Hang", SUPPORTED }, - { "zh-Hanb", "Hanb", SUPPORTED }, - - // Japanese supports Hiragana, Katakanara, etc. - { "ja-Jpan", "Hira", SUPPORTED }, - { "ja-Jpan", "Kana", SUPPORTED }, - { "ja-Jpan", "Hrkt", SUPPORTED }, - { "ja-Hrkt", "Hira", SUPPORTED }, - { "ja-Hrkt", "Kana", SUPPORTED }, - - // Chinese supports Han. - { "zh-Hans", "Hani", SUPPORTED }, - { "zh-Hant", "Hani", SUPPORTED }, - { "zh-Hanb", "Hani", SUPPORTED }, - - // Hanb supports Bopomofo. - { "zh-Hanb", "Bopo", SUPPORTED }, - - // Korean supports Hangul. - { "ko-Kore", "Hang", SUPPORTED }, - - // Different scripts - { "ja-Jpan", "Latn", NOT_SUPPORTED }, - { "en-Latn", "Jpan", NOT_SUPPORTED }, - { "ja-Jpan", "Hant", NOT_SUPPORTED }, - { "zh-Hant", "Jpan", NOT_SUPPORTED }, - { "ja-Jpan", "Hans", NOT_SUPPORTED }, - { "zh-Hans", "Jpan", NOT_SUPPORTED }, - { "ja-Jpan", "Kore", NOT_SUPPORTED }, - { "ko-Kore", "Jpan", NOT_SUPPORTED }, - { "zh-Hans", "Hant", NOT_SUPPORTED }, - { "zh-Hant", "Hans", NOT_SUPPORTED }, - { "zh-Hans", "Kore", NOT_SUPPORTED }, - { "ko-Kore", "Hans", NOT_SUPPORTED }, - { "zh-Hant", "Kore", NOT_SUPPORTED }, - { "ko-Kore", "Hant", NOT_SUPPORTED }, - - // Hiragana doesn't support Japanese, etc. - { "ja-Hira", "Jpan", NOT_SUPPORTED }, - { "ja-Kana", "Jpan", NOT_SUPPORTED }, - { "ja-Hrkt", "Jpan", NOT_SUPPORTED }, - { "ja-Hani", "Jpan", NOT_SUPPORTED }, - { "ja-Hira", "Hrkt", NOT_SUPPORTED }, - { "ja-Kana", "Hrkt", NOT_SUPPORTED }, - { "ja-Hani", "Hrkt", NOT_SUPPORTED }, - { "ja-Hani", "Hira", NOT_SUPPORTED }, - { "ja-Hani", "Kana", NOT_SUPPORTED }, - - // Kanji doesn't support Chinese, etc. - { "zh-Hani", "Hant", NOT_SUPPORTED }, - { "zh-Hani", "Hans", NOT_SUPPORTED }, - { "zh-Hani", "Hanb", NOT_SUPPORTED }, - - // Hangul doesn't support Korean, etc. - { "ko-Hang", "Kore", NOT_SUPPORTED }, - { "ko-Hani", "Kore", NOT_SUPPORTED }, - { "ko-Hani", "Hang", NOT_SUPPORTED }, - { "ko-Hang", "Hani", NOT_SUPPORTED }, - - // Han with botomofo doesn't support simplified Chinese, etc. - { "zh-Hanb", "Hant", NOT_SUPPORTED }, - { "zh-Hanb", "Hans", NOT_SUPPORTED }, - { "zh-Hanb", "Jpan", NOT_SUPPORTED }, - { "zh-Hanb", "Kore", NOT_SUPPORTED }, - }; - - for (auto testCase : testCases) { - hb_script_t script = hb_script_from_iso15924_tag( - HB_TAG(testCase.requestedScript[0], testCase.requestedScript[1], - testCase.requestedScript[2], testCase.requestedScript[3])); - if (testCase.isSupported) { - EXPECT_TRUE( - createFontLanguage(testCase.baseScript).supportsHbScript(script)) - << testCase.baseScript << " should support " << testCase.requestedScript; - } else { - EXPECT_FALSE( - createFontLanguage(testCase.baseScript).supportsHbScript(script)) - << testCase.baseScript << " shouldn't support " << testCase.requestedScript; - } + const bool SUPPORTED = true; + const bool NOT_SUPPORTED = false; + + struct TestCase { + const std::string baseScript; + const std::string requestedScript; + bool isSupported; + } testCases[] = { + // Same scripts + {"en-Latn", "Latn", SUPPORTED}, + {"ja-Jpan", "Jpan", SUPPORTED}, + {"ja-Hira", "Hira", SUPPORTED}, + {"ja-Kana", "Kana", SUPPORTED}, + {"ja-Hrkt", "Hrkt", SUPPORTED}, + {"zh-Hans", "Hans", SUPPORTED}, + {"zh-Hant", "Hant", SUPPORTED}, + {"zh-Hani", "Hani", SUPPORTED}, + {"ko-Kore", "Kore", SUPPORTED}, + {"ko-Hang", "Hang", SUPPORTED}, + {"zh-Hanb", "Hanb", SUPPORTED}, + + // Japanese supports Hiragana, Katakanara, etc. + {"ja-Jpan", "Hira", SUPPORTED}, + {"ja-Jpan", "Kana", SUPPORTED}, + {"ja-Jpan", "Hrkt", SUPPORTED}, + {"ja-Hrkt", "Hira", SUPPORTED}, + {"ja-Hrkt", "Kana", SUPPORTED}, + + // Chinese supports Han. + {"zh-Hans", "Hani", SUPPORTED}, + {"zh-Hant", "Hani", SUPPORTED}, + {"zh-Hanb", "Hani", SUPPORTED}, + + // Hanb supports Bopomofo. + {"zh-Hanb", "Bopo", SUPPORTED}, + + // Korean supports Hangul. + {"ko-Kore", "Hang", SUPPORTED}, + + // Different scripts + {"ja-Jpan", "Latn", NOT_SUPPORTED}, + {"en-Latn", "Jpan", NOT_SUPPORTED}, + {"ja-Jpan", "Hant", NOT_SUPPORTED}, + {"zh-Hant", "Jpan", NOT_SUPPORTED}, + {"ja-Jpan", "Hans", NOT_SUPPORTED}, + {"zh-Hans", "Jpan", NOT_SUPPORTED}, + {"ja-Jpan", "Kore", NOT_SUPPORTED}, + {"ko-Kore", "Jpan", NOT_SUPPORTED}, + {"zh-Hans", "Hant", NOT_SUPPORTED}, + {"zh-Hant", "Hans", NOT_SUPPORTED}, + {"zh-Hans", "Kore", NOT_SUPPORTED}, + {"ko-Kore", "Hans", NOT_SUPPORTED}, + {"zh-Hant", "Kore", NOT_SUPPORTED}, + {"ko-Kore", "Hant", NOT_SUPPORTED}, + + // Hiragana doesn't support Japanese, etc. + {"ja-Hira", "Jpan", NOT_SUPPORTED}, + {"ja-Kana", "Jpan", NOT_SUPPORTED}, + {"ja-Hrkt", "Jpan", NOT_SUPPORTED}, + {"ja-Hani", "Jpan", NOT_SUPPORTED}, + {"ja-Hira", "Hrkt", NOT_SUPPORTED}, + {"ja-Kana", "Hrkt", NOT_SUPPORTED}, + {"ja-Hani", "Hrkt", NOT_SUPPORTED}, + {"ja-Hani", "Hira", NOT_SUPPORTED}, + {"ja-Hani", "Kana", NOT_SUPPORTED}, + + // Kanji doesn't support Chinese, etc. + {"zh-Hani", "Hant", NOT_SUPPORTED}, + {"zh-Hani", "Hans", NOT_SUPPORTED}, + {"zh-Hani", "Hanb", NOT_SUPPORTED}, + + // Hangul doesn't support Korean, etc. + {"ko-Hang", "Kore", NOT_SUPPORTED}, + {"ko-Hani", "Kore", NOT_SUPPORTED}, + {"ko-Hani", "Hang", NOT_SUPPORTED}, + {"ko-Hang", "Hani", NOT_SUPPORTED}, + + // Han with botomofo doesn't support simplified Chinese, etc. + {"zh-Hanb", "Hant", NOT_SUPPORTED}, + {"zh-Hanb", "Hans", NOT_SUPPORTED}, + {"zh-Hanb", "Jpan", NOT_SUPPORTED}, + {"zh-Hanb", "Kore", NOT_SUPPORTED}, + }; + + for (auto testCase : testCases) { + hb_script_t script = hb_script_from_iso15924_tag( + HB_TAG(testCase.requestedScript[0], testCase.requestedScript[1], + testCase.requestedScript[2], testCase.requestedScript[3])); + if (testCase.isSupported) { + EXPECT_TRUE( + createFontLanguage(testCase.baseScript).supportsHbScript(script)) + << testCase.baseScript << " should support " + << testCase.requestedScript; + } else { + EXPECT_FALSE( + createFontLanguage(testCase.baseScript).supportsHbScript(script)) + << testCase.baseScript << " shouldn't support " + << testCase.requestedScript; } + } } TEST_F(FontLanguagesTest, basicTests) { - FontLanguages emptyLangs; - EXPECT_EQ(0u, emptyLangs.size()); - - FontLanguage english = createFontLanguage("en"); - const FontLanguages& singletonLangs = createFontLanguages("en"); - EXPECT_EQ(1u, singletonLangs.size()); - EXPECT_EQ(english, singletonLangs[0]); - - FontLanguage french = createFontLanguage("fr"); - const FontLanguages& twoLangs = createFontLanguages("en,fr"); - EXPECT_EQ(2u, twoLangs.size()); - EXPECT_EQ(english, twoLangs[0]); - EXPECT_EQ(french, twoLangs[1]); + FontLanguages emptyLangs; + EXPECT_EQ(0u, emptyLangs.size()); + + FontLanguage english = createFontLanguage("en"); + const FontLanguages& singletonLangs = createFontLanguages("en"); + EXPECT_EQ(1u, singletonLangs.size()); + EXPECT_EQ(english, singletonLangs[0]); + + FontLanguage french = createFontLanguage("fr"); + const FontLanguages& twoLangs = createFontLanguages("en,fr"); + EXPECT_EQ(2u, twoLangs.size()); + EXPECT_EQ(english, twoLangs[0]); + EXPECT_EQ(french, twoLangs[1]); } TEST_F(FontLanguagesTest, unsupportedLanguageTests) { - const FontLanguages& oneUnsupported = createFontLanguages("abcd-example"); - EXPECT_TRUE(oneUnsupported.empty()); - - const FontLanguages& twoUnsupporteds = createFontLanguages("abcd-example,abcd-example"); - EXPECT_TRUE(twoUnsupporteds.empty()); - - FontLanguage english = createFontLanguage("en"); - const FontLanguages& firstUnsupported = createFontLanguages("abcd-example,en"); - EXPECT_EQ(1u, firstUnsupported.size()); - EXPECT_EQ(english, firstUnsupported[0]); - - const FontLanguages& lastUnsupported = createFontLanguages("en,abcd-example"); - EXPECT_EQ(1u, lastUnsupported.size()); - EXPECT_EQ(english, lastUnsupported[0]); + const FontLanguages& oneUnsupported = createFontLanguages("abcd-example"); + EXPECT_TRUE(oneUnsupported.empty()); + + const FontLanguages& twoUnsupporteds = + createFontLanguages("abcd-example,abcd-example"); + EXPECT_TRUE(twoUnsupporteds.empty()); + + FontLanguage english = createFontLanguage("en"); + const FontLanguages& firstUnsupported = + createFontLanguages("abcd-example,en"); + EXPECT_EQ(1u, firstUnsupported.size()); + EXPECT_EQ(english, firstUnsupported[0]); + + const FontLanguages& lastUnsupported = createFontLanguages("en,abcd-example"); + EXPECT_EQ(1u, lastUnsupported.size()); + EXPECT_EQ(english, lastUnsupported[0]); } TEST_F(FontLanguagesTest, repeatedLanguageTests) { - FontLanguage english = createFontLanguage("en"); - FontLanguage french = createFontLanguage("fr"); - FontLanguage canadianFrench = createFontLanguage("fr-CA"); - FontLanguage englishInLatn = createFontLanguage("en-Latn"); - ASSERT_TRUE(english == englishInLatn); - - const FontLanguages& langs = createFontLanguages("en,en-Latn"); - EXPECT_EQ(1u, langs.size()); - EXPECT_EQ(english, langs[0]); - - const FontLanguages& fr = createFontLanguages("fr,fr-FR,fr-Latn-FR"); - EXPECT_EQ(1u, fr.size()); - EXPECT_EQ(french, fr[0]); - - // ICU appends FR to fr. The third language is dropped which is same as the first language. - const FontLanguages& fr2 = createFontLanguages("fr,fr-CA,fr-FR"); - EXPECT_EQ(2u, fr2.size()); - EXPECT_EQ(french, fr2[0]); - EXPECT_EQ(canadianFrench, fr2[1]); - - // The order should be kept. - const FontLanguages& langs2 = createFontLanguages("en,fr,en-Latn"); - EXPECT_EQ(2u, langs2.size()); - EXPECT_EQ(english, langs2[0]); - EXPECT_EQ(french, langs2[1]); + FontLanguage english = createFontLanguage("en"); + FontLanguage french = createFontLanguage("fr"); + FontLanguage canadianFrench = createFontLanguage("fr-CA"); + FontLanguage englishInLatn = createFontLanguage("en-Latn"); + ASSERT_TRUE(english == englishInLatn); + + const FontLanguages& langs = createFontLanguages("en,en-Latn"); + EXPECT_EQ(1u, langs.size()); + EXPECT_EQ(english, langs[0]); + + const FontLanguages& fr = createFontLanguages("fr,fr-FR,fr-Latn-FR"); + EXPECT_EQ(1u, fr.size()); + EXPECT_EQ(french, fr[0]); + + // ICU appends FR to fr. The third language is dropped which is same as the + // first language. + const FontLanguages& fr2 = createFontLanguages("fr,fr-CA,fr-FR"); + EXPECT_EQ(2u, fr2.size()); + EXPECT_EQ(french, fr2[0]); + EXPECT_EQ(canadianFrench, fr2[1]); + + // The order should be kept. + const FontLanguages& langs2 = createFontLanguages("en,fr,en-Latn"); + EXPECT_EQ(2u, langs2.size()); + EXPECT_EQ(english, langs2[0]); + EXPECT_EQ(french, langs2[1]); } TEST_F(FontLanguagesTest, identifierTest) { - EXPECT_EQ(createFontLanguage("en-Latn-US"), createFontLanguage("en-Latn-US")); - EXPECT_EQ(createFontLanguage("zh-Hans-CN"), createFontLanguage("zh-Hans-CN")); - EXPECT_EQ(createFontLanguage("en-Zsye-US"), createFontLanguage("en-Zsye-US")); - - EXPECT_NE(createFontLanguage("en-Latn-US"), createFontLanguage("en-Latn-GB")); - EXPECT_NE(createFontLanguage("en-Latn-US"), createFontLanguage("en-Zsye-US")); - EXPECT_NE(createFontLanguage("es-Latn-US"), createFontLanguage("en-Latn-US")); - EXPECT_NE(createFontLanguage("zh-Hant-HK"), createFontLanguage("zh-Hant-TW")); + EXPECT_EQ(createFontLanguage("en-Latn-US"), createFontLanguage("en-Latn-US")); + EXPECT_EQ(createFontLanguage("zh-Hans-CN"), createFontLanguage("zh-Hans-CN")); + EXPECT_EQ(createFontLanguage("en-Zsye-US"), createFontLanguage("en-Zsye-US")); + + EXPECT_NE(createFontLanguage("en-Latn-US"), createFontLanguage("en-Latn-GB")); + EXPECT_NE(createFontLanguage("en-Latn-US"), createFontLanguage("en-Zsye-US")); + EXPECT_NE(createFontLanguage("es-Latn-US"), createFontLanguage("en-Latn-US")); + EXPECT_NE(createFontLanguage("zh-Hant-HK"), createFontLanguage("zh-Hant-TW")); } TEST_F(FontLanguagesTest, undEmojiTests) { - FontLanguage emoji = createFontLanguage("und-Zsye"); - EXPECT_EQ(FontLanguage::EMSTYLE_EMOJI, emoji.getEmojiStyle()); + FontLanguage emoji = createFontLanguage("und-Zsye"); + EXPECT_EQ(FontLanguage::EMSTYLE_EMOJI, emoji.getEmojiStyle()); - FontLanguage und = createFontLanguage("und"); - EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, und.getEmojiStyle()); - EXPECT_FALSE(emoji == und); + FontLanguage und = createFontLanguage("und"); + EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, und.getEmojiStyle()); + EXPECT_FALSE(emoji == und); - FontLanguage undExample = createFontLanguage("und-example"); - EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, undExample.getEmojiStyle()); - EXPECT_FALSE(emoji == undExample); + FontLanguage undExample = createFontLanguage("und-example"); + EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, undExample.getEmojiStyle()); + EXPECT_FALSE(emoji == undExample); } TEST_F(FontLanguagesTest, subtagEmojiTest) { - std::string subtagEmojiStrings[] = { - // Duplicate subtag case. - "und-Latn-u-em-emoji-u-em-text", - - // Strings that contain language. - "und-u-em-emoji", - "en-u-em-emoji", - - // Strings that contain the script. - "und-Jpan-u-em-emoji", - "en-Latn-u-em-emoji", - "und-Zsym-u-em-emoji", - "und-Zsye-u-em-emoji", - "en-Zsym-u-em-emoji", - "en-Zsye-u-em-emoji", - - // Strings that contain the county. - "und-US-u-em-emoji", - "en-US-u-em-emoji", - "es-419-u-em-emoji", - "und-Latn-US-u-em-emoji", - "en-Zsym-US-u-em-emoji", - "en-Zsye-US-u-em-emoji", - "es-Zsye-419-u-em-emoji", - }; - - for (auto subtagEmojiString : subtagEmojiStrings) { - SCOPED_TRACE("Test for \"" + subtagEmojiString + "\""); - FontLanguage subtagEmoji = createFontLanguage(subtagEmojiString); - EXPECT_EQ(FontLanguage::EMSTYLE_EMOJI, subtagEmoji.getEmojiStyle()); - } + std::string subtagEmojiStrings[] = { + // Duplicate subtag case. + "und-Latn-u-em-emoji-u-em-text", + + // Strings that contain language. + "und-u-em-emoji", + "en-u-em-emoji", + + // Strings that contain the script. + "und-Jpan-u-em-emoji", + "en-Latn-u-em-emoji", + "und-Zsym-u-em-emoji", + "und-Zsye-u-em-emoji", + "en-Zsym-u-em-emoji", + "en-Zsye-u-em-emoji", + + // Strings that contain the county. + "und-US-u-em-emoji", + "en-US-u-em-emoji", + "es-419-u-em-emoji", + "und-Latn-US-u-em-emoji", + "en-Zsym-US-u-em-emoji", + "en-Zsye-US-u-em-emoji", + "es-Zsye-419-u-em-emoji", + }; + + for (auto subtagEmojiString : subtagEmojiStrings) { + SCOPED_TRACE("Test for \"" + subtagEmojiString + "\""); + FontLanguage subtagEmoji = createFontLanguage(subtagEmojiString); + EXPECT_EQ(FontLanguage::EMSTYLE_EMOJI, subtagEmoji.getEmojiStyle()); + } } TEST_F(FontLanguagesTest, subtagTextTest) { - std::string subtagTextStrings[] = { - // Duplicate subtag case. - "und-Latn-u-em-text-u-em-emoji", - - // Strings that contain language. - "und-u-em-text", - "en-u-em-text", - - // Strings that contain the script. - "und-Latn-u-em-text", - "en-Jpan-u-em-text", - "und-Zsym-u-em-text", - "und-Zsye-u-em-text", - "en-Zsym-u-em-text", - "en-Zsye-u-em-text", - - // Strings that contain the county. - "und-US-u-em-text", - "en-US-u-em-text", - "es-419-u-em-text", - "und-Latn-US-u-em-text", - "en-Zsym-US-u-em-text", - "en-Zsye-US-u-em-text", - "es-Zsye-419-u-em-text", - }; - - for (auto subtagTextString : subtagTextStrings) { - SCOPED_TRACE("Test for \"" + subtagTextString + "\""); - FontLanguage subtagText = createFontLanguage(subtagTextString); - EXPECT_EQ(FontLanguage::EMSTYLE_TEXT, subtagText.getEmojiStyle()); - } + std::string subtagTextStrings[] = { + // Duplicate subtag case. + "und-Latn-u-em-text-u-em-emoji", + + // Strings that contain language. + "und-u-em-text", + "en-u-em-text", + + // Strings that contain the script. + "und-Latn-u-em-text", + "en-Jpan-u-em-text", + "und-Zsym-u-em-text", + "und-Zsye-u-em-text", + "en-Zsym-u-em-text", + "en-Zsye-u-em-text", + + // Strings that contain the county. + "und-US-u-em-text", + "en-US-u-em-text", + "es-419-u-em-text", + "und-Latn-US-u-em-text", + "en-Zsym-US-u-em-text", + "en-Zsye-US-u-em-text", + "es-Zsye-419-u-em-text", + }; + + for (auto subtagTextString : subtagTextStrings) { + SCOPED_TRACE("Test for \"" + subtagTextString + "\""); + FontLanguage subtagText = createFontLanguage(subtagTextString); + EXPECT_EQ(FontLanguage::EMSTYLE_TEXT, subtagText.getEmojiStyle()); + } } // TODO: add more "und" language cases whose language and script are // unexpectedly translated to en-Latn by ICU. TEST_F(FontLanguagesTest, subtagDefaultTest) { - std::string subtagDefaultStrings[] = { - // Duplicate subtag case. - "en-Latn-u-em-default-u-em-emoji", - "en-Latn-u-em-default-u-em-text", - - // Strings that contain language. - "und-u-em-default", - "en-u-em-default", - - // Strings that contain the script. - "en-Latn-u-em-default", - "en-Zsym-u-em-default", - "en-Zsye-u-em-default", - - // Strings that contain the county. - "en-US-u-em-default", - "en-Latn-US-u-em-default", - "es-Latn-419-u-em-default", - "en-Zsym-US-u-em-default", - "en-Zsye-US-u-em-default", - "es-Zsye-419-u-em-default", - }; - - for (auto subtagDefaultString : subtagDefaultStrings) { - SCOPED_TRACE("Test for \"" + subtagDefaultString + "\""); - FontLanguage subtagDefault = createFontLanguage(subtagDefaultString); - EXPECT_EQ(FontLanguage::EMSTYLE_DEFAULT, subtagDefault.getEmojiStyle()); - } + std::string subtagDefaultStrings[] = { + // Duplicate subtag case. + "en-Latn-u-em-default-u-em-emoji", + "en-Latn-u-em-default-u-em-text", + + // Strings that contain language. + "und-u-em-default", + "en-u-em-default", + + // Strings that contain the script. + "en-Latn-u-em-default", + "en-Zsym-u-em-default", + "en-Zsye-u-em-default", + + // Strings that contain the county. + "en-US-u-em-default", + "en-Latn-US-u-em-default", + "es-Latn-419-u-em-default", + "en-Zsym-US-u-em-default", + "en-Zsye-US-u-em-default", + "es-Zsye-419-u-em-default", + }; + + for (auto subtagDefaultString : subtagDefaultStrings) { + SCOPED_TRACE("Test for \"" + subtagDefaultString + "\""); + FontLanguage subtagDefault = createFontLanguage(subtagDefaultString); + EXPECT_EQ(FontLanguage::EMSTYLE_DEFAULT, subtagDefault.getEmojiStyle()); + } } TEST_F(FontLanguagesTest, subtagEmptyTest) { - std::string subtagEmptyStrings[] = { - "und", - "jp", - "en-US", - "en-Latn", - "en-Latn-US", - "en-Latn-US-u-em", - "en-Latn-US-u-em-defaultemoji", - }; - - for (auto subtagEmptyString : subtagEmptyStrings) { - SCOPED_TRACE("Test for \"" + subtagEmptyString + "\""); - FontLanguage subtagEmpty = createFontLanguage(subtagEmptyString); - EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, subtagEmpty.getEmojiStyle()); - } + std::string subtagEmptyStrings[] = { + "und", + "jp", + "en-US", + "en-Latn", + "en-Latn-US", + "en-Latn-US-u-em", + "en-Latn-US-u-em-defaultemoji", + }; + + for (auto subtagEmptyString : subtagEmptyStrings) { + SCOPED_TRACE("Test for \"" + subtagEmptyString + "\""); + FontLanguage subtagEmpty = createFontLanguage(subtagEmptyString); + EXPECT_EQ(FontLanguage::EMSTYLE_EMPTY, subtagEmpty.getEmojiStyle()); + } } TEST_F(FontLanguagesTest, registerLanguageListTest) { - EXPECT_EQ(0UL, FontStyle::registerLanguageList("")); - EXPECT_NE(0UL, FontStyle::registerLanguageList("en")); - EXPECT_NE(0UL, FontStyle::registerLanguageList("jp")); - EXPECT_NE(0UL, FontStyle::registerLanguageList("en,zh-Hans")); - - EXPECT_EQ(FontStyle::registerLanguageList("en"), FontStyle::registerLanguageList("en")); - EXPECT_NE(FontStyle::registerLanguageList("en"), FontStyle::registerLanguageList("jp")); - - EXPECT_EQ(FontStyle::registerLanguageList("en,zh-Hans"), - FontStyle::registerLanguageList("en,zh-Hans")); - EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), - FontStyle::registerLanguageList("zh-Hans,en")); - EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), - FontStyle::registerLanguageList("jp")); - EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), - FontStyle::registerLanguageList("en")); - EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), - FontStyle::registerLanguageList("en,zh-Hant")); + EXPECT_EQ(0UL, FontStyle::registerLanguageList("")); + EXPECT_NE(0UL, FontStyle::registerLanguageList("en")); + EXPECT_NE(0UL, FontStyle::registerLanguageList("jp")); + EXPECT_NE(0UL, FontStyle::registerLanguageList("en,zh-Hans")); + + EXPECT_EQ(FontStyle::registerLanguageList("en"), + FontStyle::registerLanguageList("en")); + EXPECT_NE(FontStyle::registerLanguageList("en"), + FontStyle::registerLanguageList("jp")); + + EXPECT_EQ(FontStyle::registerLanguageList("en,zh-Hans"), + FontStyle::registerLanguageList("en,zh-Hans")); + EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), + FontStyle::registerLanguageList("zh-Hans,en")); + EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), + FontStyle::registerLanguageList("jp")); + EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), + FontStyle::registerLanguageList("en")); + EXPECT_NE(FontStyle::registerLanguageList("en,zh-Hans"), + FontStyle::registerLanguageList("en,zh-Hant")); } // The test font has following glyphs. @@ -504,182 +539,188 @@ TEST_F(FontLanguagesTest, registerLanguageListTest) { const char kVsTestFont[] = kTestFontDir "VariationSelectorTest-Regular.ttf"; class FontFamilyTest : public ICUTestBase { -public: - virtual void SetUp() override { - ICUTestBase::SetUp(); - if (access(kVsTestFont, R_OK) != 0) { - FAIL() << "Unable to read " << kVsTestFont << ". " - << "Please prepare the test data directory. " - << "For more details, please see how_to_run.txt."; - } + public: + virtual void SetUp() override { + ICUTestBase::SetUp(); + if (access(kVsTestFont, R_OK) != 0) { + FAIL() << "Unable to read " << kVsTestFont << ". " + << "Please prepare the test data directory. " + << "For more details, please see how_to_run.txt."; } + } }; // Asserts that the font family has glyphs for and only for specified codepoint // and variationSelector pairs. -void expectVSGlyphs(FontFamily* family, uint32_t codepoint, const std::set& vs) { - for (uint32_t i = 0xFE00; i <= 0xE01EF; ++i) { - // Move to variation selectors supplements after variation selectors. - if (i == 0xFF00) { - i = 0xE0100; - } - if (vs.find(i) == vs.end()) { - EXPECT_FALSE(family->hasGlyph(codepoint, i)) - << "Glyph for U+" << std::hex << codepoint << " U+" << i; - } else { - EXPECT_TRUE(family->hasGlyph(codepoint, i)) - << "Glyph for U+" << std::hex << codepoint << " U+" << i; - } - +void expectVSGlyphs(FontFamily* family, + uint32_t codepoint, + const std::set& vs) { + for (uint32_t i = 0xFE00; i <= 0xE01EF; ++i) { + // Move to variation selectors supplements after variation selectors. + if (i == 0xFF00) { + i = 0xE0100; + } + if (vs.find(i) == vs.end()) { + EXPECT_FALSE(family->hasGlyph(codepoint, i)) + << "Glyph for U+" << std::hex << codepoint << " U+" << i; + } else { + EXPECT_TRUE(family->hasGlyph(codepoint, i)) + << "Glyph for U+" << std::hex << codepoint << " U+" << i; } + } } TEST_F(FontFamilyTest, hasVariationSelectorTest) { - std::shared_ptr minikinFont(new MinikinFontForTest(kVsTestFont)); - std::shared_ptr family( - new FontFamily(std::vector{ Font(minikinFont, FontStyle()) })); - - std::lock_guard _l(gMinikinLock); - - const uint32_t kVS1 = 0xFE00; - const uint32_t kVS2 = 0xFE01; - const uint32_t kVS3 = 0xFE02; - const uint32_t kVS17 = 0xE0100; - const uint32_t kVS18 = 0xE0101; - const uint32_t kVS19 = 0xE0102; - const uint32_t kVS20 = 0xE0103; - - const uint32_t kSupportedChar1 = 0x82A6; - EXPECT_TRUE(family->getCoverage().get(kSupportedChar1)); - expectVSGlyphs(family.get(), kSupportedChar1, std::set({kVS1, kVS17, kVS18, kVS19})); - - const uint32_t kSupportedChar2 = 0x845B; - EXPECT_TRUE(family->getCoverage().get(kSupportedChar2)); - expectVSGlyphs(family.get(), kSupportedChar2, std::set({kVS2, kVS18, kVS19, kVS20})); - - const uint32_t kNoVsSupportedChar = 0x537F; - EXPECT_TRUE(family->getCoverage().get(kNoVsSupportedChar)); - expectVSGlyphs(family.get(), kNoVsSupportedChar, std::set()); - - const uint32_t kVsOnlySupportedChar = 0x717D; - EXPECT_FALSE(family->getCoverage().get(kVsOnlySupportedChar)); - expectVSGlyphs(family.get(), kVsOnlySupportedChar, std::set({kVS3, kVS19, kVS20})); - - const uint32_t kNotSupportedChar = 0x845C; - EXPECT_FALSE(family->getCoverage().get(kNotSupportedChar)); - expectVSGlyphs(family.get(), kNotSupportedChar, std::set()); + std::shared_ptr minikinFont(new MinikinFontForTest(kVsTestFont)); + std::shared_ptr family( + new FontFamily(std::vector{Font(minikinFont, FontStyle())})); + + std::lock_guard _l(gMinikinLock); + + const uint32_t kVS1 = 0xFE00; + const uint32_t kVS2 = 0xFE01; + const uint32_t kVS3 = 0xFE02; + const uint32_t kVS17 = 0xE0100; + const uint32_t kVS18 = 0xE0101; + const uint32_t kVS19 = 0xE0102; + const uint32_t kVS20 = 0xE0103; + + const uint32_t kSupportedChar1 = 0x82A6; + EXPECT_TRUE(family->getCoverage().get(kSupportedChar1)); + expectVSGlyphs(family.get(), kSupportedChar1, + std::set({kVS1, kVS17, kVS18, kVS19})); + + const uint32_t kSupportedChar2 = 0x845B; + EXPECT_TRUE(family->getCoverage().get(kSupportedChar2)); + expectVSGlyphs(family.get(), kSupportedChar2, + std::set({kVS2, kVS18, kVS19, kVS20})); + + const uint32_t kNoVsSupportedChar = 0x537F; + EXPECT_TRUE(family->getCoverage().get(kNoVsSupportedChar)); + expectVSGlyphs(family.get(), kNoVsSupportedChar, std::set()); + + const uint32_t kVsOnlySupportedChar = 0x717D; + EXPECT_FALSE(family->getCoverage().get(kVsOnlySupportedChar)); + expectVSGlyphs(family.get(), kVsOnlySupportedChar, + std::set({kVS3, kVS19, kVS20})); + + const uint32_t kNotSupportedChar = 0x845C; + EXPECT_FALSE(family->getCoverage().get(kNotSupportedChar)); + expectVSGlyphs(family.get(), kNotSupportedChar, std::set()); } TEST_F(FontFamilyTest, hasVSTableTest) { - struct TestCase { - const std::string fontPath; - bool hasVSTable; - } testCases[] = { - { kTestFontDir "Ja.ttf", true }, - { kTestFontDir "ZhHant.ttf", true }, - { kTestFontDir "ZhHans.ttf", true }, - { kTestFontDir "Italic.ttf", false }, - { kTestFontDir "Bold.ttf", false }, - { kTestFontDir "BoldItalic.ttf", false }, - }; - - for (auto testCase : testCases) { - SCOPED_TRACE(testCase.hasVSTable ? - "Font " + testCase.fontPath + " should have a variation sequence table." : - "Font " + testCase.fontPath + " shouldn't have a variation sequence table."); - - std::shared_ptr minikinFont( - new MinikinFontForTest(testCase.fontPath)); - std::shared_ptr family(new FontFamily( - std::vector{ Font(minikinFont, FontStyle()) })); - std::lock_guard _l(gMinikinLock); - EXPECT_EQ(testCase.hasVSTable, family->hasVSTable()); - } + struct TestCase { + const std::string fontPath; + bool hasVSTable; + } testCases[] = { + {kTestFontDir "Ja.ttf", true}, {kTestFontDir "ZhHant.ttf", true}, + {kTestFontDir "ZhHans.ttf", true}, {kTestFontDir "Italic.ttf", false}, + {kTestFontDir "Bold.ttf", false}, {kTestFontDir "BoldItalic.ttf", false}, + }; + + for (auto testCase : testCases) { + SCOPED_TRACE(testCase.hasVSTable + ? "Font " + testCase.fontPath + + " should have a variation sequence table." + : "Font " + testCase.fontPath + + " shouldn't have a variation sequence table."); + + std::shared_ptr minikinFont( + new MinikinFontForTest(testCase.fontPath)); + std::shared_ptr family( + new FontFamily(std::vector{Font(minikinFont, FontStyle())})); + std::lock_guard _l(gMinikinLock); + EXPECT_EQ(testCase.hasVSTable, family->hasVSTable()); + } } TEST_F(FontFamilyTest, createFamilyWithVariationTest) { - // This font has 'wdth' and 'wght' axes. - const char kMultiAxisFont[] = kTestFontDir "/MultiAxis.ttf"; - const char kNoAxisFont[] = kTestFontDir "/Regular.ttf"; - - std::shared_ptr multiAxisFamily = makeFamily(kMultiAxisFont); - std::shared_ptr noAxisFamily = makeFamily(kNoAxisFont); - - { - // Do not ceate new instance if none of variations are specified. - EXPECT_EQ(nullptr, - multiAxisFamily->createFamilyWithVariation(std::vector())); - EXPECT_EQ(nullptr, - noAxisFamily->createFamilyWithVariation(std::vector())); - } - { - // New instance should be used for supported variation. - std::vector variations = {{MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}}; - std::shared_ptr newFamily( - multiAxisFamily->createFamilyWithVariation(variations)); - EXPECT_NE(nullptr, newFamily.get()); - EXPECT_NE(multiAxisFamily.get(), newFamily.get()); - EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); - } - { - // New instance should be used for supported variation. (multiple variations case) - std::vector variations = { - { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f }, - { MinikinFont::MakeTag('w', 'g', 'h', 't'), 1.0f } - }; - std::shared_ptr newFamily( - multiAxisFamily->createFamilyWithVariation(variations)); - EXPECT_NE(nullptr, newFamily.get()); - EXPECT_NE(multiAxisFamily.get(), newFamily.get()); - EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); - } - { - // Do not ceate new instance if none of variations are supported. - std::vector variations = { - { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f } - }; - EXPECT_EQ(nullptr, multiAxisFamily->createFamilyWithVariation(variations)); - EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); - } - { - // At least one axis is supported, should create new instance. - std::vector variations = { - { MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f }, - { MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f } - }; - std::shared_ptr newFamily( - multiAxisFamily->createFamilyWithVariation(variations)); - EXPECT_NE(nullptr, newFamily.get()); - EXPECT_NE(multiAxisFamily.get(), newFamily.get()); - EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); - } + // This font has 'wdth' and 'wght' axes. + const char kMultiAxisFont[] = kTestFontDir "/MultiAxis.ttf"; + const char kNoAxisFont[] = kTestFontDir "/Regular.ttf"; + + std::shared_ptr multiAxisFamily = makeFamily(kMultiAxisFont); + std::shared_ptr noAxisFamily = makeFamily(kNoAxisFont); + + { + // Do not ceate new instance if none of variations are specified. + EXPECT_EQ(nullptr, multiAxisFamily->createFamilyWithVariation( + std::vector())); + EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation( + std::vector())); + } + { + // New instance should be used for supported variation. + std::vector variations = { + {MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}}; + std::shared_ptr newFamily( + multiAxisFamily->createFamilyWithVariation(variations)); + EXPECT_NE(nullptr, newFamily.get()); + EXPECT_NE(multiAxisFamily.get(), newFamily.get()); + EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); + } + { + // New instance should be used for supported variation. (multiple variations + // case) + std::vector variations = { + {MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}, + {MinikinFont::MakeTag('w', 'g', 'h', 't'), 1.0f}}; + std::shared_ptr newFamily( + multiAxisFamily->createFamilyWithVariation(variations)); + EXPECT_NE(nullptr, newFamily.get()); + EXPECT_NE(multiAxisFamily.get(), newFamily.get()); + EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); + } + { + // Do not ceate new instance if none of variations are supported. + std::vector variations = { + {MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f}}; + EXPECT_EQ(nullptr, multiAxisFamily->createFamilyWithVariation(variations)); + EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); + } + { + // At least one axis is supported, should create new instance. + std::vector variations = { + {MinikinFont::MakeTag('w', 'd', 't', 'h'), 1.0f}, + {MinikinFont::MakeTag('Z', 'Z', 'Z', 'Z'), 1.0f}}; + std::shared_ptr newFamily( + multiAxisFamily->createFamilyWithVariation(variations)); + EXPECT_NE(nullptr, newFamily.get()); + EXPECT_NE(multiAxisFamily.get(), newFamily.get()); + EXPECT_EQ(nullptr, noAxisFamily->createFamilyWithVariation(variations)); + } } TEST_F(FontFamilyTest, coverageTableSelectionTest) { - // This font supports U+0061. The cmap subtable is format 4 and its platform ID is 0 and - // encoding ID is 1. - const char kUnicodeEncoding1Font[] = kTestFontDir "UnicodeBMPOnly.ttf"; - - // This font supports U+0061. The cmap subtable is format 4 and its platform ID is 0 and - // encoding ID is 3. - const char kUnicodeEncoding3Font[] = kTestFontDir "UnicodeBMPOnly2.ttf"; - - // This font has both cmap format 4 subtable which platform ID is 0 and encoding ID is 1 - // and cmap format 14 subtable which platform ID is 0 and encoding ID is 10. - // U+0061 is listed in both subtable but U+1F926 is only listed in latter. - const char kUnicodeEncoding4Font[] = kTestFontDir "UnicodeUCS4.ttf"; - - std::shared_ptr unicodeEnc1Font = makeFamily(kUnicodeEncoding1Font); - std::shared_ptr unicodeEnc3Font = makeFamily(kUnicodeEncoding3Font); - std::shared_ptr unicodeEnc4Font = makeFamily(kUnicodeEncoding4Font); - - std::lock_guard _l(gMinikinLock); - - EXPECT_TRUE(unicodeEnc1Font->hasGlyph(0x0061, 0)); - EXPECT_TRUE(unicodeEnc3Font->hasGlyph(0x0061, 0)); - EXPECT_TRUE(unicodeEnc4Font->hasGlyph(0x0061, 0)); - - EXPECT_TRUE(unicodeEnc4Font->hasGlyph(0x1F926, 0)); + // This font supports U+0061. The cmap subtable is format 4 and its platform + // ID is 0 and encoding ID is 1. + const char kUnicodeEncoding1Font[] = kTestFontDir "UnicodeBMPOnly.ttf"; + + // This font supports U+0061. The cmap subtable is format 4 and its platform + // ID is 0 and encoding ID is 3. + const char kUnicodeEncoding3Font[] = kTestFontDir "UnicodeBMPOnly2.ttf"; + + // This font has both cmap format 4 subtable which platform ID is 0 and + // encoding ID is 1 and cmap format 14 subtable which platform ID is 0 and + // encoding ID is 10. U+0061 is listed in both subtable but U+1F926 is only + // listed in latter. + const char kUnicodeEncoding4Font[] = kTestFontDir "UnicodeUCS4.ttf"; + + std::shared_ptr unicodeEnc1Font = + makeFamily(kUnicodeEncoding1Font); + std::shared_ptr unicodeEnc3Font = + makeFamily(kUnicodeEncoding3Font); + std::shared_ptr unicodeEnc4Font = + makeFamily(kUnicodeEncoding4Font); + + std::lock_guard _l(gMinikinLock); + + EXPECT_TRUE(unicodeEnc1Font->hasGlyph(0x0061, 0)); + EXPECT_TRUE(unicodeEnc3Font->hasGlyph(0x0061, 0)); + EXPECT_TRUE(unicodeEnc4Font->hasGlyph(0x0061, 0)); + + EXPECT_TRUE(unicodeEnc4Font->hasGlyph(0x1F926, 0)); } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/FontLanguageListCacheTest.cpp b/third_party/txt/tests/old/unittest/FontLanguageListCacheTest.cpp index 69b93143404eda0a3c933a879c89ca6e1dc5e4d3..5c20f0977f5c800a332cfb06e179fc4d15852859 100644 --- a/third_party/txt/tests/old/unittest/FontLanguageListCacheTest.cpp +++ b/third_party/txt/tests/old/unittest/FontLanguageListCacheTest.cpp @@ -18,8 +18,8 @@ #include -#include "lib/txt/libs/minikin/FontLanguageListCache.h" #include "ICUTestBase.h" +#include "lib/txt/libs/minikin/FontLanguageListCache.h" #include "lib/txt/libs/minikin/MinikinInternal.h" namespace minikin { @@ -27,47 +27,50 @@ namespace minikin { typedef ICUTestBase FontLanguageListCacheTest; TEST_F(FontLanguageListCacheTest, getId) { - EXPECT_NE(0UL, FontStyle::registerLanguageList("en")); - EXPECT_NE(0UL, FontStyle::registerLanguageList("jp")); - EXPECT_NE(0UL, FontStyle::registerLanguageList("en,zh-Hans")); + EXPECT_NE(0UL, FontStyle::registerLanguageList("en")); + EXPECT_NE(0UL, FontStyle::registerLanguageList("jp")); + EXPECT_NE(0UL, FontStyle::registerLanguageList("en,zh-Hans")); - std::lock_guard _l(gMinikinLock); - EXPECT_EQ(0UL, FontLanguageListCache::getId("")); + std::lock_guard _l(gMinikinLock); + EXPECT_EQ(0UL, FontLanguageListCache::getId("")); - EXPECT_EQ(FontLanguageListCache::getId("en"), FontLanguageListCache::getId("en")); - EXPECT_NE(FontLanguageListCache::getId("en"), FontLanguageListCache::getId("jp")); + EXPECT_EQ(FontLanguageListCache::getId("en"), + FontLanguageListCache::getId("en")); + EXPECT_NE(FontLanguageListCache::getId("en"), + FontLanguageListCache::getId("jp")); - EXPECT_EQ(FontLanguageListCache::getId("en,zh-Hans"), - FontLanguageListCache::getId("en,zh-Hans")); - EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), - FontLanguageListCache::getId("zh-Hans,en")); - EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), - FontLanguageListCache::getId("jp")); - EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), - FontLanguageListCache::getId("en")); - EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), - FontLanguageListCache::getId("en,zh-Hant")); + EXPECT_EQ(FontLanguageListCache::getId("en,zh-Hans"), + FontLanguageListCache::getId("en,zh-Hans")); + EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), + FontLanguageListCache::getId("zh-Hans,en")); + EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), + FontLanguageListCache::getId("jp")); + EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), + FontLanguageListCache::getId("en")); + EXPECT_NE(FontLanguageListCache::getId("en,zh-Hans"), + FontLanguageListCache::getId("en,zh-Hant")); } TEST_F(FontLanguageListCacheTest, getById) { - std::lock_guard _l(gMinikinLock); - uint32_t enLangId = FontLanguageListCache::getId("en"); - uint32_t jpLangId = FontLanguageListCache::getId("jp"); - FontLanguage english = FontLanguageListCache::getById(enLangId)[0]; - FontLanguage japanese = FontLanguageListCache::getById(jpLangId)[0]; + std::lock_guard _l(gMinikinLock); + uint32_t enLangId = FontLanguageListCache::getId("en"); + uint32_t jpLangId = FontLanguageListCache::getId("jp"); + FontLanguage english = FontLanguageListCache::getById(enLangId)[0]; + FontLanguage japanese = FontLanguageListCache::getById(jpLangId)[0]; - const FontLanguages& defLangs = FontLanguageListCache::getById(0); - EXPECT_TRUE(defLangs.empty()); + const FontLanguages& defLangs = FontLanguageListCache::getById(0); + EXPECT_TRUE(defLangs.empty()); - const FontLanguages& langs = FontLanguageListCache::getById(FontLanguageListCache::getId("en")); - ASSERT_EQ(1UL, langs.size()); - EXPECT_EQ(english, langs[0]); + const FontLanguages& langs = + FontLanguageListCache::getById(FontLanguageListCache::getId("en")); + ASSERT_EQ(1UL, langs.size()); + EXPECT_EQ(english, langs[0]); - const FontLanguages& langs2 = - FontLanguageListCache::getById(FontLanguageListCache::getId("en,jp")); - ASSERT_EQ(2UL, langs2.size()); - EXPECT_EQ(english, langs2[0]); - EXPECT_EQ(japanese, langs2[1]); + const FontLanguages& langs2 = + FontLanguageListCache::getById(FontLanguageListCache::getId("en,jp")); + ASSERT_EQ(2UL, langs2.size()); + EXPECT_EQ(english, langs2[0]); + EXPECT_EQ(japanese, langs2[1]); } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/GraphemeBreakTests.cpp b/third_party/txt/tests/old/unittest/GraphemeBreakTests.cpp index 24030220661bc7a5a42326473281098eaee01be0..7350b5b0ef47629c924d3ba9c3ea6b34e48d9042 100644 --- a/third_party/txt/tests/old/unittest/GraphemeBreakTests.cpp +++ b/third_party/txt/tests/old/unittest/GraphemeBreakTests.cpp @@ -14,304 +14,358 @@ * limitations under the License. */ -#include #include +#include #include namespace minikin { bool IsBreak(const char* src) { - const size_t BUF_SIZE = 256; - uint16_t buf[BUF_SIZE]; - size_t offset; - size_t size; - ParseUnicode(buf, BUF_SIZE, src, &size, &offset); - return GraphemeBreak::isGraphemeBreak(nullptr, buf, 0, size, offset); + const size_t BUF_SIZE = 256; + uint16_t buf[BUF_SIZE]; + size_t offset; + size_t size; + ParseUnicode(buf, BUF_SIZE, src, &size, &offset); + return GraphemeBreak::isGraphemeBreak(nullptr, buf, 0, size, offset); } bool IsBreakWithAdvances(const float* advances, const char* src) { - const size_t BUF_SIZE = 256; - uint16_t buf[BUF_SIZE]; - size_t offset; - size_t size; - ParseUnicode(buf, BUF_SIZE, src, &size, &offset); - return GraphemeBreak::isGraphemeBreak(advances, buf, 0, size, offset); + const size_t BUF_SIZE = 256; + uint16_t buf[BUF_SIZE]; + size_t offset; + size_t size; + ParseUnicode(buf, BUF_SIZE, src, &size, &offset); + return GraphemeBreak::isGraphemeBreak(advances, buf, 0, size, offset); } TEST(GraphemeBreak, utf16) { - EXPECT_FALSE(IsBreak("U+D83C | U+DC31")); // emoji, U+1F431 - - // tests for invalid UTF-16 - EXPECT_TRUE(IsBreak("U+D800 | U+D800")); // two leading surrogates - EXPECT_TRUE(IsBreak("U+DC00 | U+DC00")); // two trailing surrogates - EXPECT_TRUE(IsBreak("'a' | U+D800")); // lonely leading surrogate - EXPECT_TRUE(IsBreak("U+DC00 | 'a'")); // lonely trailing surrogate - EXPECT_TRUE(IsBreak("U+D800 | 'a'")); // leading surrogate followed by non-surrogate - EXPECT_TRUE(IsBreak("'a' | U+DC00")); // non-surrogate followed by trailing surrogate + EXPECT_FALSE(IsBreak("U+D83C | U+DC31")); // emoji, U+1F431 + + // tests for invalid UTF-16 + EXPECT_TRUE(IsBreak("U+D800 | U+D800")); // two leading surrogates + EXPECT_TRUE(IsBreak("U+DC00 | U+DC00")); // two trailing surrogates + EXPECT_TRUE(IsBreak("'a' | U+D800")); // lonely leading surrogate + EXPECT_TRUE(IsBreak("U+DC00 | 'a'")); // lonely trailing surrogate + EXPECT_TRUE( + IsBreak("U+D800 | 'a'")); // leading surrogate followed by non-surrogate + EXPECT_TRUE( + IsBreak("'a' | U+DC00")); // non-surrogate followed by trailing surrogate } TEST(GraphemeBreak, rules) { - // Rule GB1, sot ÷; Rule GB2, ÷ eot - EXPECT_TRUE(IsBreak("| 'a'")); - EXPECT_TRUE(IsBreak("'a' |")); - - // Rule GB3, CR x LF - EXPECT_FALSE(IsBreak("U+000D | U+000A")); // CR x LF - - // Rule GB4, (Control | CR | LF) ÷ - EXPECT_TRUE(IsBreak("'a' | U+2028")); // Line separator - EXPECT_TRUE(IsBreak("'a' | U+000D")); // LF - EXPECT_TRUE(IsBreak("'a' | U+000A")); // CR - - // Rule GB5, ÷ (Control | CR | LF) - EXPECT_TRUE(IsBreak("U+2028 | 'a'")); // Line separator - EXPECT_TRUE(IsBreak("U+000D | 'a'")); // LF - EXPECT_TRUE(IsBreak("U+000A | 'a'")); // CR - - // Rule GB6, L x ( L | V | LV | LVT ) - EXPECT_FALSE(IsBreak("U+1100 | U+1100")); // L x L - EXPECT_FALSE(IsBreak("U+1100 | U+1161")); // L x V - EXPECT_FALSE(IsBreak("U+1100 | U+AC00")); // L x LV - EXPECT_FALSE(IsBreak("U+1100 | U+AC01")); // L x LVT - - // Rule GB7, ( LV | V ) x ( V | T ) - EXPECT_FALSE(IsBreak("U+AC00 | U+1161")); // LV x V - EXPECT_FALSE(IsBreak("U+1161 | U+1161")); // V x V - EXPECT_FALSE(IsBreak("U+AC00 | U+11A8")); // LV x T - EXPECT_FALSE(IsBreak("U+1161 | U+11A8")); // V x T - - // Rule GB8, ( LVT | T ) x T - EXPECT_FALSE(IsBreak("U+AC01 | U+11A8")); // LVT x T - EXPECT_FALSE(IsBreak("U+11A8 | U+11A8")); // T x T - - // Other hangul pairs not counted above _are_ breaks (GB10) - EXPECT_TRUE(IsBreak("U+AC00 | U+1100")); // LV x L - EXPECT_TRUE(IsBreak("U+AC01 | U+1100")); // LVT x L - EXPECT_TRUE(IsBreak("U+11A8 | U+1100")); // T x L - EXPECT_TRUE(IsBreak("U+11A8 | U+AC00")); // T x LV - EXPECT_TRUE(IsBreak("U+11A8 | U+AC01")); // T x LVT - - // Rule GB12 and Rule GB13, Regional_Indicator x Regional_Indicator - EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8")); - EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag) - EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag) - EXPECT_FALSE(IsBreak("U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag) - - EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | U+1F1FA")); // Regional indicator pair (flag) - EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8 U+1F1FA")); // Regional indicator pair (flag) - // Same case as the two above, knowing that the first two characters ligate, which is what - // would typically happen. - const float firstPairLigated[] = {1.0, 0.0, 0.0, 0.0, 1.0, 0.0}; // Two entries per codepoint - EXPECT_TRUE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA")); - EXPECT_FALSE(IsBreakWithAdvances(firstPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA")); - // Repeat the tests, But now the font doesn't have a ligature for the first two characters, - // while it does have a ligature for the last two. This could happen for fonts that do not - // support some (potentially encoded later than they were developed) flags. - const float secondPairLigated[] = {1.0, 0.0, 1.0, 0.0, 0.0, 0.0}; - EXPECT_FALSE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA")); - EXPECT_TRUE(IsBreakWithAdvances(secondPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA")); - - EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA")); // Regional indicator pair (flag) - EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA")); // Regional indicator pair (flag) - - EXPECT_TRUE( - IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag) - EXPECT_FALSE( - IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag) - EXPECT_FALSE( - IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag) - - // Rule GB9, x (Extend | ZWJ) - EXPECT_FALSE(IsBreak("'a' | U+0301")); // combining accent - EXPECT_FALSE(IsBreak("'a' | U+200D")); // ZWJ - // Rule GB9a, x SpacingMark - EXPECT_FALSE(IsBreak("U+0915 | U+093E")); // KA, AA (spacing mark) - // Rule GB9b, Prepend x - // see tailoring test for prepend, as current ICU doesn't have any characters in the class - - // Rule GB999, Any ÷ Any - EXPECT_TRUE(IsBreak("'a' | 'b'")); - EXPECT_TRUE(IsBreak("'f' | 'i'")); // probable ligature - EXPECT_TRUE(IsBreak("U+0644 | U+0627")); // probable ligature, lam + alef - EXPECT_TRUE(IsBreak("U+4E00 | U+4E00")); // CJK ideographs - EXPECT_TRUE(IsBreak("'a' | U+1F1FA U+1F1F8")); // Regional indicator pair (flag) - EXPECT_TRUE(IsBreak("U+1F1FA U+1F1F8 | 'a'")); // Regional indicator pair (flag) - - // Extended rule for emoji tag sequence. - EXPECT_TRUE(IsBreak("'a' | U+1F3F4 'a'")); - EXPECT_TRUE(IsBreak("'a' U+1F3F4 | 'a'")); - - // Immediate tag_term after tag_base. - EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E007F 'a'")); - EXPECT_FALSE(IsBreak("U+1F3F4 | U+E007F")); - EXPECT_TRUE(IsBreak("'a' U+1F3F4 U+E007F | 'a'")); - - // Flag sequence - // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag sequence for the flag - // of Scotland. - // U+1F3F4 is WAVING BLACK FLAG. This can be a tag_base character. - // U+E0067 is TAG LATIN SMALL LETTER G. This can be a part of tag_spec. - // U+E0062 is TAG LATIN SMALL LETTER B. This can be a part of tag_spec. - // U+E0073 is TAG LATIN SMALL LETTER S. This can be a part of tag_spec. - // U+E0063 is TAG LATIN SMALL LETTER C. This can be a part of tag_spec. - // U+E0074 is TAG LATIN SMALL LETTER T. This can be a part of tag_spec. - // U+E007F is CANCEL TAG. This is a tag_term character. - EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F")); - EXPECT_FALSE(IsBreak("U+1F3F4 | U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F")); - EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 | U+E0062 U+E0073 U+E0063 U+E0074 U+E007F")); - EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 | U+E0073 U+E0063 U+E0074 U+E007F")); - EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 | U+E0063 U+E0074 U+E007F")); - EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 | U+E0074 U+E007F")); - EXPECT_FALSE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 | U+E007F")); - EXPECT_TRUE(IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F | 'a'")); + // Rule GB1, sot ÷; Rule GB2, ÷ eot + EXPECT_TRUE(IsBreak("| 'a'")); + EXPECT_TRUE(IsBreak("'a' |")); + + // Rule GB3, CR x LF + EXPECT_FALSE(IsBreak("U+000D | U+000A")); // CR x LF + + // Rule GB4, (Control | CR | LF) ÷ + EXPECT_TRUE(IsBreak("'a' | U+2028")); // Line separator + EXPECT_TRUE(IsBreak("'a' | U+000D")); // LF + EXPECT_TRUE(IsBreak("'a' | U+000A")); // CR + + // Rule GB5, ÷ (Control | CR | LF) + EXPECT_TRUE(IsBreak("U+2028 | 'a'")); // Line separator + EXPECT_TRUE(IsBreak("U+000D | 'a'")); // LF + EXPECT_TRUE(IsBreak("U+000A | 'a'")); // CR + + // Rule GB6, L x ( L | V | LV | LVT ) + EXPECT_FALSE(IsBreak("U+1100 | U+1100")); // L x L + EXPECT_FALSE(IsBreak("U+1100 | U+1161")); // L x V + EXPECT_FALSE(IsBreak("U+1100 | U+AC00")); // L x LV + EXPECT_FALSE(IsBreak("U+1100 | U+AC01")); // L x LVT + + // Rule GB7, ( LV | V ) x ( V | T ) + EXPECT_FALSE(IsBreak("U+AC00 | U+1161")); // LV x V + EXPECT_FALSE(IsBreak("U+1161 | U+1161")); // V x V + EXPECT_FALSE(IsBreak("U+AC00 | U+11A8")); // LV x T + EXPECT_FALSE(IsBreak("U+1161 | U+11A8")); // V x T + + // Rule GB8, ( LVT | T ) x T + EXPECT_FALSE(IsBreak("U+AC01 | U+11A8")); // LVT x T + EXPECT_FALSE(IsBreak("U+11A8 | U+11A8")); // T x T + + // Other hangul pairs not counted above _are_ breaks (GB10) + EXPECT_TRUE(IsBreak("U+AC00 | U+1100")); // LV x L + EXPECT_TRUE(IsBreak("U+AC01 | U+1100")); // LVT x L + EXPECT_TRUE(IsBreak("U+11A8 | U+1100")); // T x L + EXPECT_TRUE(IsBreak("U+11A8 | U+AC00")); // T x LV + EXPECT_TRUE(IsBreak("U+11A8 | U+AC01")); // T x LVT + + // Rule GB12 and Rule GB13, Regional_Indicator x Regional_Indicator + EXPECT_FALSE(IsBreak("U+1F1FA | U+1F1F8")); + EXPECT_TRUE(IsBreak( + "U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional indicator pair (flag) + EXPECT_FALSE(IsBreak( + "U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional indicator pair (flag) + EXPECT_FALSE(IsBreak( + "U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional indicator pair (flag) + + EXPECT_TRUE( + IsBreak("U+1F1FA U+1F1F8 | U+1F1FA")); // Regional indicator pair (flag) + EXPECT_FALSE( + IsBreak("U+1F1FA | U+1F1F8 U+1F1FA")); // Regional indicator pair (flag) + // Same case as the two above, knowing that the first two characters ligate, + // which is what would typically happen. + const float firstPairLigated[] = {1.0, 0.0, 0.0, 0.0, + 1.0, 0.0}; // Two entries per codepoint + EXPECT_TRUE( + IsBreakWithAdvances(firstPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA")); + EXPECT_FALSE( + IsBreakWithAdvances(firstPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA")); + // Repeat the tests, But now the font doesn't have a ligature for the first + // two characters, while it does have a ligature for the last two. This could + // happen for fonts that do not support some (potentially encoded later than + // they were developed) flags. + const float secondPairLigated[] = {1.0, 0.0, 1.0, 0.0, 0.0, 0.0}; + EXPECT_FALSE( + IsBreakWithAdvances(secondPairLigated, "U+1F1FA U+1F1F8 | U+1F1FA")); + EXPECT_TRUE( + IsBreakWithAdvances(secondPairLigated, "U+1F1FA | U+1F1F8 U+1F1FA")); + + EXPECT_TRUE(IsBreak( + "'a' U+1F1FA U+1F1F8 | U+1F1FA")); // Regional indicator pair (flag) + EXPECT_FALSE(IsBreak( + "'a' U+1F1FA | U+1F1F8 U+1F1FA")); // Regional indicator pair (flag) + + EXPECT_TRUE(IsBreak("'a' U+1F1FA U+1F1F8 | U+1F1FA U+1F1F8")); // Regional + // indicator + // pair (flag) + EXPECT_FALSE(IsBreak("'a' U+1F1FA | U+1F1F8 U+1F1FA U+1F1F8")); // Regional + // indicator + // pair + // (flag) + EXPECT_FALSE(IsBreak("'a' U+1F1FA U+1F1F8 U+1F1FA | U+1F1F8")); // Regional + // indicator + // pair + // (flag) + + // Rule GB9, x (Extend | ZWJ) + EXPECT_FALSE(IsBreak("'a' | U+0301")); // combining accent + EXPECT_FALSE(IsBreak("'a' | U+200D")); // ZWJ + // Rule GB9a, x SpacingMark + EXPECT_FALSE(IsBreak("U+0915 | U+093E")); // KA, AA (spacing mark) + // Rule GB9b, Prepend x + // see tailoring test for prepend, as current ICU doesn't have any characters + // in the class + + // Rule GB999, Any ÷ Any + EXPECT_TRUE(IsBreak("'a' | 'b'")); + EXPECT_TRUE(IsBreak("'f' | 'i'")); // probable ligature + EXPECT_TRUE(IsBreak("U+0644 | U+0627")); // probable ligature, lam + alef + EXPECT_TRUE(IsBreak("U+4E00 | U+4E00")); // CJK ideographs + EXPECT_TRUE( + IsBreak("'a' | U+1F1FA U+1F1F8")); // Regional indicator pair (flag) + EXPECT_TRUE( + IsBreak("U+1F1FA U+1F1F8 | 'a'")); // Regional indicator pair (flag) + + // Extended rule for emoji tag sequence. + EXPECT_TRUE(IsBreak("'a' | U+1F3F4 'a'")); + EXPECT_TRUE(IsBreak("'a' U+1F3F4 | 'a'")); + + // Immediate tag_term after tag_base. + EXPECT_TRUE(IsBreak("'a' | U+1F3F4 U+E007F 'a'")); + EXPECT_FALSE(IsBreak("U+1F3F4 | U+E007F")); + EXPECT_TRUE(IsBreak("'a' U+1F3F4 U+E007F | 'a'")); + + // Flag sequence + // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag + // sequence for the flag of Scotland. U+1F3F4 is WAVING BLACK FLAG. This can + // be a tag_base character. U+E0067 is TAG LATIN SMALL LETTER G. This can be a + // part of tag_spec. U+E0062 is TAG LATIN SMALL LETTER B. This can be a part + // of tag_spec. U+E0073 is TAG LATIN SMALL LETTER S. This can be a part of + // tag_spec. U+E0063 is TAG LATIN SMALL LETTER C. This can be a part of + // tag_spec. U+E0074 is TAG LATIN SMALL LETTER T. This can be a part of + // tag_spec. U+E007F is CANCEL TAG. This is a tag_term character. + EXPECT_TRUE( + IsBreak("'a' | U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F")); + EXPECT_FALSE( + IsBreak("U+1F3F4 | U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F")); + EXPECT_FALSE( + IsBreak("U+1F3F4 U+E0067 | U+E0062 U+E0073 U+E0063 U+E0074 U+E007F")); + EXPECT_FALSE( + IsBreak("U+1F3F4 U+E0067 U+E0062 | U+E0073 U+E0063 U+E0074 U+E007F")); + EXPECT_FALSE( + IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 | U+E0063 U+E0074 U+E007F")); + EXPECT_FALSE( + IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 | U+E0074 U+E007F")); + EXPECT_FALSE( + IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 | U+E007F")); + EXPECT_TRUE( + IsBreak("U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F | 'a'")); } TEST(GraphemeBreak, DISABLED_tailoring) { - // control characters that we interpret as "extend" - EXPECT_FALSE(IsBreak("'a' | U+00AD")); // soft hyphen - EXPECT_FALSE(IsBreak("'a' | U+200B")); // zwsp - EXPECT_FALSE(IsBreak("'a' | U+200E")); // lrm - EXPECT_FALSE(IsBreak("'a' | U+202A")); // lre - EXPECT_FALSE(IsBreak("'a' | U+E0041")); // tag character - - // UTC-approved characters for the Prepend class - EXPECT_FALSE(IsBreak("U+06DD | U+0661")); // arabic subtending mark + digit one - - EXPECT_TRUE(IsBreak("U+0E01 | U+0E33")); // Thai sara am - - // virama is not a grapheme break, but "pure killer" is - EXPECT_FALSE(IsBreak("U+0915 | U+094D U+0915")); // Devanagari ka+virama+ka - EXPECT_FALSE(IsBreak("U+0915 U+094D | U+0915")); // Devanagari ka+virama+ka - EXPECT_FALSE(IsBreak("U+0E01 | U+0E3A U+0E01")); // thai phinthu = pure killer - EXPECT_TRUE(IsBreak("U+0E01 U+0E3A | U+0E01")); // thai phinthu = pure killer - - // Repetition of above tests, but with a given advances array that implies everything - // became just one cluster. - const float conjoined[] = {1.0, 0.0, 0.0}; - EXPECT_FALSE(IsBreakWithAdvances(conjoined, - "U+0915 | U+094D U+0915")); // Devanagari ka+virama+ka - EXPECT_FALSE(IsBreakWithAdvances(conjoined, - "U+0915 U+094D | U+0915")); // Devanagari ka+virama+ka - EXPECT_FALSE(IsBreakWithAdvances(conjoined, - "U+0E01 | U+0E3A U+0E01")); // thai phinthu = pure killer - EXPECT_TRUE(IsBreakWithAdvances(conjoined, - "U+0E01 U+0E3A | U+0E01")); // thai phinthu = pure killer - - // Repetition of above tests, but with a given advances array that the virama did not - // form a cluster with the following consonant. The difference is that there is now - // a grapheme break after the virama in ka+virama+ka. - const float separate[] = {1.0, 0.0, 1.0}; - EXPECT_FALSE(IsBreakWithAdvances(separate, - "U+0915 | U+094D U+0915")); // Devanagari ka+virama+ka - EXPECT_TRUE(IsBreakWithAdvances(separate, - "U+0915 U+094D | U+0915")); // Devanagari ka+virama+ka - EXPECT_FALSE(IsBreakWithAdvances(separate, - "U+0E01 | U+0E3A U+0E01")); // thai phinthu = pure killer - EXPECT_TRUE(IsBreakWithAdvances(separate, - "U+0E01 U+0E3A | U+0E01")); // thai phinthu = pure killer - - // suppress grapheme breaks in zwj emoji sequences - EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468")); - EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D | U+1F48B U+200D U+1F468")); - EXPECT_FALSE(IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D | U+1F468")); - EXPECT_FALSE(IsBreak("U+1F468 U+200D | U+1F469 U+200D U+1F466")); - EXPECT_FALSE(IsBreak("U+1F468 U+200D U+1F469 U+200D | U+1F466")); - EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F469 U+200D U+1F467 U+200D U+1F466")); - EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D | U+1F467 U+200D U+1F466")); - EXPECT_FALSE(IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466")); - EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8")); - - // Do not break before and after zwj with all kind of emoji characters. - EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464")); - EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464")); - - // ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve the break - EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764")); + // control characters that we interpret as "extend" + EXPECT_FALSE(IsBreak("'a' | U+00AD")); // soft hyphen + EXPECT_FALSE(IsBreak("'a' | U+200B")); // zwsp + EXPECT_FALSE(IsBreak("'a' | U+200E")); // lrm + EXPECT_FALSE(IsBreak("'a' | U+202A")); // lre + EXPECT_FALSE(IsBreak("'a' | U+E0041")); // tag character + + // UTC-approved characters for the Prepend class + EXPECT_FALSE( + IsBreak("U+06DD | U+0661")); // arabic subtending mark + digit one + + EXPECT_TRUE(IsBreak("U+0E01 | U+0E33")); // Thai sara am + + // virama is not a grapheme break, but "pure killer" is + EXPECT_FALSE(IsBreak("U+0915 | U+094D U+0915")); // Devanagari ka+virama+ka + EXPECT_FALSE(IsBreak("U+0915 U+094D | U+0915")); // Devanagari ka+virama+ka + EXPECT_FALSE( + IsBreak("U+0E01 | U+0E3A U+0E01")); // thai phinthu = pure killer + EXPECT_TRUE(IsBreak("U+0E01 U+0E3A | U+0E01")); // thai phinthu = pure killer + + // Repetition of above tests, but with a given advances array that implies + // everything became just one cluster. + const float conjoined[] = {1.0, 0.0, 0.0}; + EXPECT_FALSE(IsBreakWithAdvances( + conjoined, + "U+0915 | U+094D U+0915")); // Devanagari ka+virama+ka + EXPECT_FALSE(IsBreakWithAdvances( + conjoined, + "U+0915 U+094D | U+0915")); // Devanagari ka+virama+ka + EXPECT_FALSE(IsBreakWithAdvances( + conjoined, + "U+0E01 | U+0E3A U+0E01")); // thai phinthu = pure killer + EXPECT_TRUE(IsBreakWithAdvances( + conjoined, + "U+0E01 U+0E3A | U+0E01")); // thai phinthu = pure killer + + // Repetition of above tests, but with a given advances array that the virama + // did not form a cluster with the following consonant. The difference is that + // there is now a grapheme break after the virama in ka+virama+ka. + const float separate[] = {1.0, 0.0, 1.0}; + EXPECT_FALSE(IsBreakWithAdvances( + separate, + "U+0915 | U+094D U+0915")); // Devanagari ka+virama+ka + EXPECT_TRUE(IsBreakWithAdvances( + separate, + "U+0915 U+094D | U+0915")); // Devanagari ka+virama+ka + EXPECT_FALSE(IsBreakWithAdvances( + separate, + "U+0E01 | U+0E3A U+0E01")); // thai phinthu = pure killer + EXPECT_TRUE(IsBreakWithAdvances( + separate, + "U+0E01 U+0E3A | U+0E01")); // thai phinthu = pure killer + + // suppress grapheme breaks in zwj emoji sequences + EXPECT_FALSE( + IsBreak("U+1F469 U+200D | U+2764 U+FE0F U+200D U+1F48B U+200D U+1F468")); + EXPECT_FALSE( + IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D | U+1F48B U+200D U+1F468")); + EXPECT_FALSE( + IsBreak("U+1F469 U+200D U+2764 U+FE0F U+200D U+1F48B U+200D | U+1F468")); + EXPECT_FALSE(IsBreak("U+1F468 U+200D | U+1F469 U+200D U+1F466")); + EXPECT_FALSE(IsBreak("U+1F468 U+200D U+1F469 U+200D | U+1F466")); + EXPECT_FALSE( + IsBreak("U+1F469 U+200D | U+1F469 U+200D U+1F467 U+200D U+1F466")); + EXPECT_FALSE( + IsBreak("U+1F469 U+200D U+1F469 U+200D | U+1F467 U+200D U+1F466")); + EXPECT_FALSE( + IsBreak("U+1F469 U+200D U+1F469 U+200D U+1F467 U+200D | U+1F466")); + EXPECT_FALSE(IsBreak("U+1F441 U+200D | U+1F5E8")); + + // Do not break before and after zwj with all kind of emoji characters. + EXPECT_FALSE(IsBreak("U+1F431 | U+200D U+1F464")); + EXPECT_FALSE(IsBreak("U+1F431 U+200D | U+1F464")); + + // ARABIC LETTER BEH + ZWJ + heart, not a zwj emoji sequence, so we preserve + // the break + EXPECT_TRUE(IsBreak("U+0628 U+200D | U+2764")); } TEST(GraphemeBreak, DISABLED_emojiModifiers) { - EXPECT_FALSE(IsBreak("U+261D | U+1F3FB")); // white up pointing index + modifier - EXPECT_FALSE(IsBreak("U+270C | U+1F3FB")); // victory hand + modifier - EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB")); // boy + modifier - EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC")); // boy + modifier - EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD")); // boy + modifier - EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE")); // boy + modifier - EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF")); // boy + modifier - EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF")); // sign of the horns + modifier - EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF")); // selfie (Unicode 9) + modifier - // Reptition of the tests above, with the knowledge that they are ligated. - const float ligated1_2[] = {1.0, 0.0, 0.0}; - const float ligated2_2[] = {1.0, 0.0, 0.0, 0.0}; - EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+261D | U+1F3FB")); - EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+270C | U+1F3FB")); - EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FB")); - EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FC")); - EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FD")); - EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FE")); - EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FF")); - EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F918 | U+1F3FF")); - EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F933 | U+1F3FF")); - // Reptition of the tests above, with the knowledge that they are not ligated. - const float unligated1_2[] = {1.0, 1.0, 0.0}; - const float unligated2_2[] = {1.0, 0.0, 1.0, 0.0}; - EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+261D | U+1F3FB")); - EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+270C | U+1F3FB")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FB")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FC")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FD")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FE")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FF")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F918 | U+1F3FF")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F933 | U+1F3FF")); - - // adding extend characters between emoji base and modifier doesn't affect grapheme cluster - EXPECT_FALSE(IsBreak("U+270C U+FE0E | U+1F3FB")); // victory hand + text style + modifier - EXPECT_FALSE(IsBreak("U+270C U+FE0F | U+1F3FB")); // heart + emoji style + modifier - // Reptition of the two tests above, with the knowledge that they are ligated. - const float ligated1_1_2[] = {1.0, 0.0, 0.0, 0.0}; - EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0E | U+1F3FB")); - EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0F | U+1F3FB")); - // Reptition of the first two tests, with the knowledge that they are not ligated. - const float unligated1_1_2[] = {1.0, 0.0, 1.0, 0.0}; - EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0E | U+1F3FB")); - EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0F | U+1F3FB")); - - // heart is not an emoji base - EXPECT_TRUE(IsBreak("U+2764 | U+1F3FB")); // heart + modifier - EXPECT_TRUE(IsBreak("U+2764 U+FE0E | U+1F3FB")); // heart + emoji style + modifier - EXPECT_TRUE(IsBreak("U+2764 U+FE0F | U+1F3FB")); // heart + emoji style + modifier - EXPECT_TRUE(IsBreak("U+1F3FB | U+1F3FB")); // modifier + modifier - - // rat is not an emoji modifer - EXPECT_TRUE(IsBreak("U+1F466 | U+1F400")); // boy + rat + EXPECT_FALSE( + IsBreak("U+261D | U+1F3FB")); // white up pointing index + modifier + EXPECT_FALSE(IsBreak("U+270C | U+1F3FB")); // victory hand + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FB")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FC")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FD")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FE")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F466 | U+1F3FF")); // boy + modifier + EXPECT_FALSE(IsBreak("U+1F918 | U+1F3FF")); // sign of the horns + modifier + EXPECT_FALSE(IsBreak("U+1F933 | U+1F3FF")); // selfie (Unicode 9) + modifier + // Reptition of the tests above, with the knowledge that they are ligated. + const float ligated1_2[] = {1.0, 0.0, 0.0}; + const float ligated2_2[] = {1.0, 0.0, 0.0, 0.0}; + EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+261D | U+1F3FB")); + EXPECT_FALSE(IsBreakWithAdvances(ligated1_2, "U+270C | U+1F3FB")); + EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FB")); + EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FC")); + EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FD")); + EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FE")); + EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F466 | U+1F3FF")); + EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F918 | U+1F3FF")); + EXPECT_FALSE(IsBreakWithAdvances(ligated2_2, "U+1F933 | U+1F3FF")); + // Reptition of the tests above, with the knowledge that they are not ligated. + const float unligated1_2[] = {1.0, 1.0, 0.0}; + const float unligated2_2[] = {1.0, 0.0, 1.0, 0.0}; + EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+261D | U+1F3FB")); + EXPECT_TRUE(IsBreakWithAdvances(unligated1_2, "U+270C | U+1F3FB")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FB")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FC")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FD")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FE")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F466 | U+1F3FF")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F918 | U+1F3FF")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_2, "U+1F933 | U+1F3FF")); + + // adding extend characters between emoji base and modifier doesn't affect + // grapheme cluster + EXPECT_FALSE(IsBreak( + "U+270C U+FE0E | U+1F3FB")); // victory hand + text style + modifier + EXPECT_FALSE( + IsBreak("U+270C U+FE0F | U+1F3FB")); // heart + emoji style + modifier + // Reptition of the two tests above, with the knowledge that they are ligated. + const float ligated1_1_2[] = {1.0, 0.0, 0.0, 0.0}; + EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0E | U+1F3FB")); + EXPECT_FALSE(IsBreakWithAdvances(ligated1_1_2, "U+270C U+FE0F | U+1F3FB")); + // Reptition of the first two tests, with the knowledge that they are not + // ligated. + const float unligated1_1_2[] = {1.0, 0.0, 1.0, 0.0}; + EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0E | U+1F3FB")); + EXPECT_TRUE(IsBreakWithAdvances(unligated1_1_2, "U+270C U+FE0F | U+1F3FB")); + + // heart is not an emoji base + EXPECT_TRUE(IsBreak("U+2764 | U+1F3FB")); // heart + modifier + EXPECT_TRUE( + IsBreak("U+2764 U+FE0E | U+1F3FB")); // heart + emoji style + modifier + EXPECT_TRUE( + IsBreak("U+2764 U+FE0F | U+1F3FB")); // heart + emoji style + modifier + EXPECT_TRUE(IsBreak("U+1F3FB | U+1F3FB")); // modifier + modifier + + // rat is not an emoji modifer + EXPECT_TRUE(IsBreak("U+1F466 | U+1F400")); // boy + rat } TEST(GraphemeBreak, DISABLED_genderBalancedEmoji) { - // U+1F469 is WOMAN, U+200D is ZWJ, U+1F4BC is BRIEFCASE. - EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+1F4BC")); - EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F4BC")); - // The above two cases, when the ligature is not supported in the font. We now expect a break - // between them. - const float unligated2_1_2[] = {1.0, 0.0, 0.0, 1.0, 0.0}; - EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 | U+200D U+1F4BC")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 U+200D | U+1F4BC")); - - // U+2695 has now emoji property, so should be part of ZWJ sequence. - EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+2695")); - EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2695")); - // The above two cases, when the ligature is not supported in the font. We now expect a break - // between them. - const float unligated2_1_1[] = {1.0, 0.0, 0.0, 1.0}; - EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 | U+200D U+2695")); - EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 U+200D | U+2695")); + // U+1F469 is WOMAN, U+200D is ZWJ, U+1F4BC is BRIEFCASE. + EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+1F4BC")); + EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+1F4BC")); + // The above two cases, when the ligature is not supported in the font. We now + // expect a break between them. + const float unligated2_1_2[] = {1.0, 0.0, 0.0, 1.0, 0.0}; + EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 | U+200D U+1F4BC")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_2, "U+1F469 U+200D | U+1F4BC")); + + // U+2695 has now emoji property, so should be part of ZWJ sequence. + EXPECT_FALSE(IsBreak("U+1F469 | U+200D U+2695")); + EXPECT_FALSE(IsBreak("U+1F469 U+200D | U+2695")); + // The above two cases, when the ligature is not supported in the font. We now + // expect a break between them. + const float unligated2_1_1[] = {1.0, 0.0, 0.0, 1.0}; + EXPECT_FALSE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 | U+200D U+2695")); + EXPECT_TRUE(IsBreakWithAdvances(unligated2_1_1, "U+1F469 U+200D | U+2695")); } TEST(GraphemeBreak, offsets) { - uint16_t string[] = { 0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301 }; - EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 2)); - EXPECT_FALSE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 3)); - EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 4)); - EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 5)); + uint16_t string[] = {0x0041, 0x06DD, 0x0045, 0x0301, 0x0049, 0x0301}; + EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 2)); + EXPECT_FALSE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 3)); + EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 4)); + EXPECT_TRUE(GraphemeBreak::isGraphemeBreak(nullptr, string, 2, 3, 5)); } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/HbFontCacheTest.cpp b/third_party/txt/tests/old/unittest/HbFontCacheTest.cpp index 5b306aa2ba47122cacc8c2cdc08adf62c172a3dd..5edd016b45cfe1afcd33b1b9ba852d6eb8a7ae77 100644 --- a/third_party/txt/tests/old/unittest/HbFontCacheTest.cpp +++ b/third_party/txt/tests/old/unittest/HbFontCacheTest.cpp @@ -16,77 +16,78 @@ #include "lib/txt/libs/minikin/HbFontCache.h" -#include #include +#include -#include #include +#include #include -#include "lib/txt/libs/minikin/MinikinInternal.h" -#include "MinikinFontForTest.h" #include +#include "MinikinFontForTest.h" +#include "lib/txt/libs/minikin/MinikinInternal.h" namespace minikin { class HbFontCacheTest : public testing::Test { -public: - virtual void TearDown() { - std::lock_guard _l(gMinikinLock); - purgeHbFontCacheLocked(); - } + public: + virtual void TearDown() { + std::lock_guard _l(gMinikinLock); + purgeHbFontCacheLocked(); + } }; TEST_F(HbFontCacheTest, getHbFontLockedTest) { - std::shared_ptr fontA( - new MinikinFontForTest(kTestFontDir "Regular.ttf")); + std::shared_ptr fontA( + new MinikinFontForTest(kTestFontDir "Regular.ttf")); - std::shared_ptr fontB( - new MinikinFontForTest(kTestFontDir "Bold.ttf")); + std::shared_ptr fontB( + new MinikinFontForTest(kTestFontDir "Bold.ttf")); - std::shared_ptr fontC( - new MinikinFontForTest(kTestFontDir "BoldItalic.ttf")); + std::shared_ptr fontC( + new MinikinFontForTest(kTestFontDir "BoldItalic.ttf")); - std::lock_guard _l(gMinikinLock); - // Never return NULL. - EXPECT_NE(nullptr, getHbFontLocked(fontA.get())); - EXPECT_NE(nullptr, getHbFontLocked(fontB.get())); - EXPECT_NE(nullptr, getHbFontLocked(fontC.get())); + std::lock_guard _l(gMinikinLock); + // Never return NULL. + EXPECT_NE(nullptr, getHbFontLocked(fontA.get())); + EXPECT_NE(nullptr, getHbFontLocked(fontB.get())); + EXPECT_NE(nullptr, getHbFontLocked(fontC.get())); - EXPECT_NE(nullptr, getHbFontLocked(nullptr)); + EXPECT_NE(nullptr, getHbFontLocked(nullptr)); - // Must return same object if same font object is passed. - EXPECT_EQ(getHbFontLocked(fontA.get()), getHbFontLocked(fontA.get())); - EXPECT_EQ(getHbFontLocked(fontB.get()), getHbFontLocked(fontB.get())); - EXPECT_EQ(getHbFontLocked(fontC.get()), getHbFontLocked(fontC.get())); + // Must return same object if same font object is passed. + EXPECT_EQ(getHbFontLocked(fontA.get()), getHbFontLocked(fontA.get())); + EXPECT_EQ(getHbFontLocked(fontB.get()), getHbFontLocked(fontB.get())); + EXPECT_EQ(getHbFontLocked(fontC.get()), getHbFontLocked(fontC.get())); - // Different object must be returned if the passed minikinFont has different ID. - EXPECT_NE(getHbFontLocked(fontA.get()), getHbFontLocked(fontB.get())); - EXPECT_NE(getHbFontLocked(fontA.get()), getHbFontLocked(fontC.get())); + // Different object must be returned if the passed minikinFont has different + // ID. + EXPECT_NE(getHbFontLocked(fontA.get()), getHbFontLocked(fontB.get())); + EXPECT_NE(getHbFontLocked(fontA.get()), getHbFontLocked(fontC.get())); } TEST_F(HbFontCacheTest, purgeCacheTest) { - std::shared_ptr minikinFont( - new MinikinFontForTest(kTestFontDir "Regular.ttf")); - - std::lock_guard _l(gMinikinLock); - hb_font_t* font = getHbFontLocked(minikinFont.get()); - ASSERT_NE(nullptr, font); - - // Set user data to identify the font object. - hb_user_data_key_t key; - void* data = (void*)0xdeadbeef; - hb_font_set_user_data(font, &key, data, NULL, false); - ASSERT_EQ(data, hb_font_get_user_data(font, &key)); - - purgeHbFontCacheLocked(); - - // By checking user data, confirm that the object after purge is different from previously - // created one. Do not compare the returned pointer here since memory allocator may assign - // same region for new object. - font = getHbFontLocked(minikinFont.get()); - EXPECT_EQ(nullptr, hb_font_get_user_data(font, &key)); + std::shared_ptr minikinFont( + new MinikinFontForTest(kTestFontDir "Regular.ttf")); + + std::lock_guard _l(gMinikinLock); + hb_font_t* font = getHbFontLocked(minikinFont.get()); + ASSERT_NE(nullptr, font); + + // Set user data to identify the font object. + hb_user_data_key_t key; + void* data = (void*)0xdeadbeef; + hb_font_set_user_data(font, &key, data, NULL, false); + ASSERT_EQ(data, hb_font_get_user_data(font, &key)); + + purgeHbFontCacheLocked(); + + // By checking user data, confirm that the object after purge is different + // from previously created one. Do not compare the returned pointer here since + // memory allocator may assign same region for new object. + font = getHbFontLocked(minikinFont.get()); + EXPECT_EQ(nullptr, hb_font_get_user_data(font, &key)); } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/HyphenatorTest.cpp b/third_party/txt/tests/old/unittest/HyphenatorTest.cpp index ecd58a2ea084bb2aef4a8c74e25ad5edfb683fad..f7daf414e3b180f72cb849f817d001eb42cddaef 100644 --- a/third_party/txt/tests/old/unittest/HyphenatorTest.cpp +++ b/third_party/txt/tests/old/unittest/HyphenatorTest.cpp @@ -16,9 +16,9 @@ #include -#include "ICUTestBase.h" -#include #include +#include +#include "ICUTestBase.h" #ifndef NELEM #define NELEM(x) ((sizeof(x) / sizeof((x)[0]))) @@ -49,286 +49,299 @@ const uint16_t UCAS_E = 0x1401; const uint16_t HYPHEN = 0x2010; const uint16_t EN_DASH = 0x2013; -// Simple test for US English. This tests "table", which happens to be the in the exceptions list. +// Simple test for US English. This tests "table", which happens to be the in +// the exceptions list. TEST_F(HyphenatorTest, usEnglishAutomaticHyphenation) { - Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(usHyph).data(), 2, 3); - const uint16_t word[] = {'t', 'a', 'b', 'l', 'e'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 5, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); + Hyphenator* hyphenator = + Hyphenator::loadBinary(readWholeFile(usHyph).data(), 2, 3); + const uint16_t word[] = {'t', 'a', 'b', 'l', 'e'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)5, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); } // Catalan l·l should break as l-/l TEST_F(HyphenatorTest, catalanMiddleDot) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'l', 'l', MIDDLE_DOT, 'l', 'l'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale); - EXPECT_EQ((size_t) 5, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); - EXPECT_EQ(HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN, result[3]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'l', 'l', MIDDLE_DOT, 'l', 'l'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale); + EXPECT_EQ((size_t)5, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); + EXPECT_EQ(HyphenationType::BREAK_AND_REPLACE_WITH_HYPHEN, result[3]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); } // Catalan l·l should not break if the word is too short. TEST_F(HyphenatorTest, catalanMiddleDotShortWord) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'l', MIDDLE_DOT, 'l'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'l', MIDDLE_DOT, 'l'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), catalanLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); } -// If we break on a hyphen in Polish, the hyphen should be repeated on the next line. +// If we break on a hyphen in Polish, the hyphen should be repeated on the next +// line. TEST_F(HyphenatorTest, polishHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'x', HYPHEN, 'y'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), polishLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'x', HYPHEN, 'y'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), polishLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AT_NEXT_LINE, result[2]); } -// If the language is Polish but the script is not Latin, don't use Polish rules for hyphenation. +// If the language is Polish but the script is not Latin, don't use Polish rules +// for hyphenation. TEST_F(HyphenatorTest, polishHyphenButNonLatinWord) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {GREEK_LOWER_ALPHA, HYPHEN, GREEK_LOWER_ALPHA}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), polishLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {GREEK_LOWER_ALPHA, HYPHEN, GREEK_LOWER_ALPHA}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), polishLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); } -// Polish en dash doesn't repeat on next line (as far as we know), but just provides a break -// opportunity. +// Polish en dash doesn't repeat on next line (as far as we know), but just +// provides a break opportunity. TEST_F(HyphenatorTest, polishEnDash) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'x', EN_DASH, 'y'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), polishLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'x', EN_DASH, 'y'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), polishLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); } -// In Latin script text, soft hyphens should insert a visible hyphen if broken at. +// In Latin script text, soft hyphens should insert a visible hyphen if broken +// at. TEST_F(HyphenatorTest, latinSoftHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'x', SOFT_HYPHEN, 'y'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'x', SOFT_HYPHEN, 'y'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); } // Soft hyphens at the beginning of a word are not useful in linebreaking. TEST_F(HyphenatorTest, latinSoftHyphenStartingTheWord) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {SOFT_HYPHEN, 'y'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 2, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {SOFT_HYPHEN, 'y'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)2, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); } -// In Malayalam script text, soft hyphens should not insert a visible hyphen if broken at. +// In Malayalam script text, soft hyphens should not insert a visible hyphen if +// broken at. TEST_F(HyphenatorTest, malayalamSoftHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {MALAYALAM_KA, SOFT_HYPHEN, MALAYALAM_KA}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {MALAYALAM_KA, SOFT_HYPHEN, MALAYALAM_KA}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); } -// In automatically hyphenated Malayalam script text, we should not insert a visible hyphen. +// In automatically hyphenated Malayalam script text, we should not insert a +// visible hyphen. TEST_F(HyphenatorTest, malayalamAutomaticHyphenation) { - Hyphenator* hyphenator = Hyphenator::loadBinary(readWholeFile(malayalamHyph).data(), 2, 2); - const uint16_t word[] = { - MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 5, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); - EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[3]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); + Hyphenator* hyphenator = + Hyphenator::loadBinary(readWholeFile(malayalamHyph).data(), 2, 2); + const uint16_t word[] = {MALAYALAM_KA, MALAYALAM_KA, MALAYALAM_KA, + MALAYALAM_KA, MALAYALAM_KA}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)5, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); + EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[3]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); } -// In Armenian script text, soft hyphens should insert an Armenian hyphen if broken at. +// In Armenian script text, soft hyphens should insert an Armenian hyphen if +// broken at. TEST_F(HyphenatorTest, aremenianSoftHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {ARMENIAN_AYB, SOFT_HYPHEN, ARMENIAN_AYB}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {ARMENIAN_AYB, SOFT_HYPHEN, ARMENIAN_AYB}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_ARMENIAN_HYPHEN, result[2]); } -// In Hebrew script text, soft hyphens should insert a normal hyphen if broken at, for now. -// We may need to change this to maqaf later. +// In Hebrew script text, soft hyphens should insert a normal hyphen if broken +// at, for now. We may need to change this to maqaf later. TEST_F(HyphenatorTest, hebrewSoftHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {HEBREW_ALEF, SOFT_HYPHEN, HEBREW_ALEF}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {HEBREW_ALEF, SOFT_HYPHEN, HEBREW_ALEF}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); } // Soft hyphen between two Arabic letters that join should keep the joining // behavior when broken across lines. TEST_F(HyphenatorTest, arabicSoftHyphenConnecting) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {ARABIC_BEH, SOFT_HYPHEN, ARABIC_BEH}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {ARABIC_BEH, SOFT_HYPHEN, ARABIC_BEH}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[2]); } // Arabic letters may be joining on one side, but if it's the wrong side, we // should use the normal hyphen. TEST_F(HyphenatorTest, arabicSoftHyphenNonConnecting) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {ARABIC_ALEF, SOFT_HYPHEN, ARABIC_BEH}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {ARABIC_ALEF, SOFT_HYPHEN, ARABIC_BEH}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); } // Skip transparent characters until you find a non-transparent one. TEST_F(HyphenatorTest, arabicSoftHyphenSkipTransparents) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 5, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[3]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, + ARABIC_ZWARAKAY, ARABIC_BEH}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)5, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN_AND_ZWJ, result[3]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[4]); } -// Skip transparent characters until you find a non-transparent one. If we get to one end without -// finding anything, we are still non-joining. +// Skip transparent characters until you find a non-transparent one. If we get +// to one end without finding anything, we are still non-joining. TEST_F(HyphenatorTest, arabicSoftHyphenTransparentsAtEnd) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 4, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[3]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {ARABIC_BEH, ARABIC_ZWARAKAY, SOFT_HYPHEN, + ARABIC_ZWARAKAY}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)4, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[2]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[3]); } -// Skip transparent characters until you find a non-transparent one. If we get to one end without -// finding anything, we are still non-joining. +// Skip transparent characters until you find a non-transparent one. If we get +// to one end without finding anything, we are still non-joining. TEST_F(HyphenatorTest, arabicSoftHyphenTransparentsAtStart) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, ARABIC_BEH}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 4, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {ARABIC_ZWARAKAY, SOFT_HYPHEN, ARABIC_ZWARAKAY, + ARABIC_BEH}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)4, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_HYPHEN, result[2]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[3]); } -// In Unified Canadian Aboriginal script (UCAS) text, soft hyphens should insert a UCAS hyphen. +// In Unified Canadian Aboriginal script (UCAS) text, soft hyphens should insert +// a UCAS hyphen. TEST_F(HyphenatorTest, ucasSoftHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {UCAS_E, SOFT_HYPHEN, UCAS_E}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {UCAS_E, SOFT_HYPHEN, UCAS_E}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]); } -// Presently, soft hyphen looks at the character after it to determine hyphenation type. This is a -// little arbitrary, but let's test it anyway. +// Presently, soft hyphen looks at the character after it to determine +// hyphenation type. This is a little arbitrary, but let's test it anyway. TEST_F(HyphenatorTest, mixedScriptSoftHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'a', SOFT_HYPHEN, UCAS_E}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'a', SOFT_HYPHEN, UCAS_E}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_INSERT_UCAS_HYPHEN, result[2]); } // Hard hyphens provide a breaking opportunity with nothing extra inserted. TEST_F(HyphenatorTest, hardHyphen) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'x', HYPHEN, 'y'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'x', HYPHEN, 'y'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); } -// Hyphen-minuses also provide a breaking opportunity with nothing extra inserted. +// Hyphen-minuses also provide a breaking opportunity with nothing extra +// inserted. TEST_F(HyphenatorTest, hyphenMinus) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {'x', HYPHEN_MINUS, 'y'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 3, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); - EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {'x', HYPHEN_MINUS, 'y'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)3, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + EXPECT_EQ(HyphenationType::BREAK_AND_DONT_INSERT_HYPHEN, result[2]); } -// If the word starts with a hard hyphen or hyphen-minus, it doesn't make sense to break -// it at that point. +// If the word starts with a hard hyphen or hyphen-minus, it doesn't make sense +// to break it at that point. TEST_F(HyphenatorTest, startingHyphenMinus) { - Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); - const uint16_t word[] = {HYPHEN_MINUS, 'y'}; - std::vector result; - hyphenator->hyphenate(&result, word, NELEM(word), usLocale); - EXPECT_EQ((size_t) 2, result.size()); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); - EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); + Hyphenator* hyphenator = Hyphenator::loadBinary(nullptr, 2, 2); + const uint16_t word[] = {HYPHEN_MINUS, 'y'}; + std::vector result; + hyphenator->hyphenate(&result, word, NELEM(word), usLocale); + EXPECT_EQ((size_t)2, result.size()); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[0]); + EXPECT_EQ(HyphenationType::DONT_BREAK, result[1]); } } // namespace minikin - diff --git a/third_party/txt/tests/old/unittest/ICUTestBase.h b/third_party/txt/tests/old/unittest/ICUTestBase.h index f915cf80cec5d4a1c6d2ee63bf9ae75c453eaa02..c25b8f6daffcc0e94d350048c0f7b228d3e8bba0 100644 --- a/third_party/txt/tests/old/unittest/ICUTestBase.h +++ b/third_party/txt/tests/old/unittest/ICUTestBase.h @@ -23,31 +23,29 @@ // low level file access for mapping ICU data #include -#include #include +#include namespace minikin { class ICUTestBase : public testing::Test { -protected: - virtual void SetUp() override { - const char* fn = "/system/usr/icu/" U_ICUDATA_NAME ".dat"; - int fd = open(fn, O_RDONLY); - ASSERT_NE(-1, fd); - struct stat sb; - ASSERT_EQ(0, fstat(fd, &sb)); - void* data = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); - - UErrorCode errorCode = U_ZERO_ERROR; - udata_setCommonData(data, &errorCode); - ASSERT_TRUE(U_SUCCESS(errorCode)); - u_init(&errorCode); - ASSERT_TRUE(U_SUCCESS(errorCode)); - } - - virtual void TearDown() override { - u_cleanup(); - } + protected: + virtual void SetUp() override { + const char* fn = "/system/usr/icu/" U_ICUDATA_NAME ".dat"; + int fd = open(fn, O_RDONLY); + ASSERT_NE(-1, fd); + struct stat sb; + ASSERT_EQ(0, fstat(fd, &sb)); + void* data = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0); + + UErrorCode errorCode = U_ZERO_ERROR; + udata_setCommonData(data, &errorCode); + ASSERT_TRUE(U_SUCCESS(errorCode)); + u_init(&errorCode); + ASSERT_TRUE(U_SUCCESS(errorCode)); + } + + virtual void TearDown() override { u_cleanup(); } }; } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/LayoutTest.cpp b/third_party/txt/tests/old/unittest/LayoutTest.cpp index 1770d3ac5e24e3395443827f7b0db632ed2a52ea..e9ad2eb066a826057ca60215bb94d7d027bda3a6 100644 --- a/third_party/txt/tests/old/unittest/LayoutTest.cpp +++ b/third_party/txt/tests/old/unittest/LayoutTest.cpp @@ -16,11 +16,11 @@ #include +#include "../util/FontTestUtils.h" +#include "../util/UnicodeUtils.h" #include "ICUTestBase.h" #include "minikin/FontCollection.h" #include "minikin/Layout.h" -#include "../util/FontTestUtils.h" -#include "../util/UnicodeUtils.h" const char* SYSTEM_FONT_PATH = "/system/fonts/"; const char* SYSTEM_FONT_XML = "/system/etc/fonts.xml"; @@ -29,397 +29,401 @@ namespace minikin { const float UNTOUCHED_MARKER = 1e+38; -static void expectAdvances(std::vector expected, float* advances, size_t length) { - EXPECT_LE(expected.size(), length); - for (size_t i = 0; i < expected.size(); ++i) { - EXPECT_EQ(expected[i], advances[i]) - << i << "th element is different. Expected: " << expected[i] - << ", Actual: " << advances[i]; - } - EXPECT_EQ(UNTOUCHED_MARKER, advances[expected.size()]); +static void expectAdvances(std::vector expected, + float* advances, + size_t length) { + EXPECT_LE(expected.size(), length); + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQ(expected[i], advances[i]) + << i << "th element is different. Expected: " << expected[i] + << ", Actual: " << advances[i]; + } + EXPECT_EQ(UNTOUCHED_MARKER, advances[expected.size()]); } static void resetAdvances(float* advances, size_t length) { - for (size_t i = 0; i < length; ++i) { - advances[i] = UNTOUCHED_MARKER; - } + for (size_t i = 0; i < length; ++i) { + advances[i] = UNTOUCHED_MARKER; + } } class LayoutTest : public ICUTestBase { -protected: - LayoutTest() : mCollection(nullptr) { - } + protected: + LayoutTest() : mCollection(nullptr) {} - virtual ~LayoutTest() {} + virtual ~LayoutTest() {} - virtual void SetUp() override { - mCollection = std::shared_ptr( - getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); - } + virtual void SetUp() override { + mCollection = std::shared_ptr( + getFontCollection(SYSTEM_FONT_PATH, SYSTEM_FONT_XML)); + } - virtual void TearDown() override { - } + virtual void TearDown() override {} - std::shared_ptr mCollection; + std::shared_ptr mCollection; }; TEST_F(LayoutTest, doLayoutTest) { - MinikinPaint paint; - MinikinRect rect; - const size_t kMaxAdvanceLength = 32; - float advances[kMaxAdvanceLength]; - std::vector expectedValues; - - Layout layout; - std::vector text; - - // The mock implementation returns 10.0f advance and 0,0-10x10 bounds for all glyph. - { - SCOPED_TRACE("one word"); - text = utf8ToUtf16("oneword"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(70.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(70.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + MinikinPaint paint; + MinikinRect rect; + const size_t kMaxAdvanceLength = 32; + float advances[kMaxAdvanceLength]; + std::vector expectedValues; + + Layout layout; + std::vector text; + + // The mock implementation returns 10.0f advance and 0,0-10x10 bounds for all + // glyph. + { + SCOPED_TRACE("one word"); + text = utf8ToUtf16("oneword"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(70.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(70.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("two words"); - text = utf8ToUtf16("two words"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(90.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(90.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("two words"); + text = utf8ToUtf16("two words"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(90.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(90.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("three words"); - text = utf8ToUtf16("three words test"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(160.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(160.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("three words"); + text = utf8ToUtf16("three words test"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(160.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(160.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("two spaces"); - text = utf8ToUtf16("two spaces"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(110.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(110.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("two spaces"); + text = utf8ToUtf16("two spaces"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(110.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(110.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } } TEST_F(LayoutTest, doLayoutTest_wordSpacing) { - MinikinPaint paint; - MinikinRect rect; - const size_t kMaxAdvanceLength = 32; - float advances[kMaxAdvanceLength]; - std::vector expectedValues; - std::vector text; - - Layout layout; - - paint.wordSpacing = 5.0f; - - // The mock implementation returns 10.0f advance and 0,0-10x10 bounds for all glyph. - { - SCOPED_TRACE("one word"); - text = utf8ToUtf16("oneword"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(70.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(70.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + MinikinPaint paint; + MinikinRect rect; + const size_t kMaxAdvanceLength = 32; + float advances[kMaxAdvanceLength]; + std::vector expectedValues; + std::vector text; + + Layout layout; + + paint.wordSpacing = 5.0f; + + // The mock implementation returns 10.0f advance and 0,0-10x10 bounds for all + // glyph. + { + SCOPED_TRACE("one word"); + text = utf8ToUtf16("oneword"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(70.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(70.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("two words"); - text = utf8ToUtf16("two words"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(95.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(95.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - EXPECT_EQ(UNTOUCHED_MARKER, advances[text.size()]); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectedValues[3] = 15.0f; - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("two words"); + text = utf8ToUtf16("two words"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(95.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(95.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + EXPECT_EQ(UNTOUCHED_MARKER, advances[text.size()]); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("three words test"); - text = utf8ToUtf16("three words test"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(170.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(170.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectedValues[5] = 15.0f; - expectedValues[11] = 15.0f; - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectedValues[3] = 15.0f; + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("three words test"); + text = utf8ToUtf16("three words test"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(170.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(170.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("two spaces"); - text = utf8ToUtf16("two spaces"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(120.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(120.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectedValues[3] = 15.0f; - expectedValues[4] = 15.0f; - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectedValues[5] = 15.0f; + expectedValues[11] = 15.0f; + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("two spaces"); + text = utf8ToUtf16("two spaces"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(120.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(120.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } + expectedValues[3] = 15.0f; + expectedValues[4] = 15.0f; + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } } TEST_F(LayoutTest, doLayoutTest_negativeWordSpacing) { - MinikinPaint paint; - MinikinRect rect; - const size_t kMaxAdvanceLength = 32; - float advances[kMaxAdvanceLength]; - std::vector expectedValues; - - Layout layout; - std::vector text; - - // Negative word spacing also should work. - paint.wordSpacing = -5.0f; - - { - SCOPED_TRACE("one word"); - text = utf8ToUtf16("oneword"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(70.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(70.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + MinikinPaint paint; + MinikinRect rect; + const size_t kMaxAdvanceLength = 32; + float advances[kMaxAdvanceLength]; + std::vector expectedValues; + + Layout layout; + std::vector text; + + // Negative word spacing also should work. + paint.wordSpacing = -5.0f; + + { + SCOPED_TRACE("one word"); + text = utf8ToUtf16("oneword"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(70.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(70.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("two words"); - text = utf8ToUtf16("two words"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(85.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(85.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectedValues[3] = 5.0f; - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("two words"); + text = utf8ToUtf16("two words"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(85.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(85.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("three words"); - text = utf8ToUtf16("three word test"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(140.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(140.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectedValues[5] = 5.0f; - expectedValues[10] = 5.0f; - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectedValues[3] = 5.0f; + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("three words"); + text = utf8ToUtf16("three word test"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(140.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(140.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } - { - SCOPED_TRACE("two spaces"); - text = utf8ToUtf16("two spaces"); - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(100.0f, layout.getAdvance()); - layout.getBounds(&rect); - EXPECT_EQ(0.0f, rect.mLeft); - EXPECT_EQ(0.0f, rect.mTop); - EXPECT_EQ(100.0f, rect.mRight); - EXPECT_EQ(10.0f, rect.mBottom); - resetAdvances(advances, kMaxAdvanceLength); - layout.getAdvances(advances); - expectedValues.resize(text.size()); - for (size_t i = 0; i < expectedValues.size(); ++i) { - expectedValues[i] = 10.0f; - } - expectedValues[3] = 5.0f; - expectedValues[4] = 5.0f; - expectAdvances(expectedValues, advances, kMaxAdvanceLength); + expectedValues[5] = 5.0f; + expectedValues[10] = 5.0f; + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } + { + SCOPED_TRACE("two spaces"); + text = utf8ToUtf16("two spaces"); + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(100.0f, layout.getAdvance()); + layout.getBounds(&rect); + EXPECT_EQ(0.0f, rect.mLeft); + EXPECT_EQ(0.0f, rect.mTop); + EXPECT_EQ(100.0f, rect.mRight); + EXPECT_EQ(10.0f, rect.mBottom); + resetAdvances(advances, kMaxAdvanceLength); + layout.getAdvances(advances); + expectedValues.resize(text.size()); + for (size_t i = 0; i < expectedValues.size(); ++i) { + expectedValues[i] = 10.0f; } + expectedValues[3] = 5.0f; + expectedValues[4] = 5.0f; + expectAdvances(expectedValues, advances, kMaxAdvanceLength); + } } TEST_F(LayoutTest, doLayoutTest_rtlTest) { - MinikinPaint paint; + MinikinPaint paint; - std::vector text = parseUnicodeString("'a' 'b' U+3042 U+3043 'c' 'd'"); + std::vector text = + parseUnicodeString("'a' 'b' U+3042 U+3043 'c' 'd'"); - Layout ltrLayout; - ltrLayout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); + Layout ltrLayout; + ltrLayout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); - Layout rtlLayout; - rtlLayout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_RTL, FontStyle(), paint, - mCollection); + Layout rtlLayout; + rtlLayout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_RTL, + FontStyle(), paint, mCollection); - ASSERT_EQ(ltrLayout.nGlyphs(), rtlLayout.nGlyphs()); - ASSERT_EQ(6u, ltrLayout.nGlyphs()); + ASSERT_EQ(ltrLayout.nGlyphs(), rtlLayout.nGlyphs()); + ASSERT_EQ(6u, ltrLayout.nGlyphs()); - size_t nGlyphs = ltrLayout.nGlyphs(); - for (size_t i = 0; i < nGlyphs; ++i) { - EXPECT_EQ(ltrLayout.getFont(i), rtlLayout.getFont(nGlyphs - i - 1)); - EXPECT_EQ(ltrLayout.getGlyphId(i), rtlLayout.getGlyphId(nGlyphs - i - 1)); - } + size_t nGlyphs = ltrLayout.nGlyphs(); + for (size_t i = 0; i < nGlyphs; ++i) { + EXPECT_EQ(ltrLayout.getFont(i), rtlLayout.getFont(nGlyphs - i - 1)); + EXPECT_EQ(ltrLayout.getGlyphId(i), rtlLayout.getGlyphId(nGlyphs - i - 1)); + } } TEST_F(LayoutTest, hyphenationTest) { - Layout layout; - std::vector text; - - // The mock implementation returns 10.0f advance for all glyphs. - { - SCOPED_TRACE("one word with no hyphen edit"); - text = utf8ToUtf16("oneword"); - MinikinPaint paint; - paint.hyphenEdit = HyphenEdit::NO_EDIT; - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(70.0f, layout.getAdvance()); - } - { - SCOPED_TRACE("one word with hyphen insertion at the end"); - text = utf8ToUtf16("oneword"); - MinikinPaint paint; - paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_END; - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(80.0f, layout.getAdvance()); - } - { - SCOPED_TRACE("one word with hyphen replacement at the end"); - text = utf8ToUtf16("oneword"); - MinikinPaint paint; - paint.hyphenEdit = HyphenEdit::REPLACE_WITH_HYPHEN_AT_END; - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(70.0f, layout.getAdvance()); - } - { - SCOPED_TRACE("one word with hyphen insertion at the start"); - text = utf8ToUtf16("oneword"); - MinikinPaint paint; - paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_START; - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(80.0f, layout.getAdvance()); - } - { - SCOPED_TRACE("one word with hyphen insertion at the both ends"); - text = utf8ToUtf16("oneword"); - MinikinPaint paint; - paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_START | HyphenEdit::INSERT_HYPHEN_AT_END; - layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, FontStyle(), paint, - mCollection); - EXPECT_EQ(90.0f, layout.getAdvance()); - } + Layout layout; + std::vector text; + + // The mock implementation returns 10.0f advance for all glyphs. + { + SCOPED_TRACE("one word with no hyphen edit"); + text = utf8ToUtf16("oneword"); + MinikinPaint paint; + paint.hyphenEdit = HyphenEdit::NO_EDIT; + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(70.0f, layout.getAdvance()); + } + { + SCOPED_TRACE("one word with hyphen insertion at the end"); + text = utf8ToUtf16("oneword"); + MinikinPaint paint; + paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_END; + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(80.0f, layout.getAdvance()); + } + { + SCOPED_TRACE("one word with hyphen replacement at the end"); + text = utf8ToUtf16("oneword"); + MinikinPaint paint; + paint.hyphenEdit = HyphenEdit::REPLACE_WITH_HYPHEN_AT_END; + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(70.0f, layout.getAdvance()); + } + { + SCOPED_TRACE("one word with hyphen insertion at the start"); + text = utf8ToUtf16("oneword"); + MinikinPaint paint; + paint.hyphenEdit = HyphenEdit::INSERT_HYPHEN_AT_START; + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(80.0f, layout.getAdvance()); + } + { + SCOPED_TRACE("one word with hyphen insertion at the both ends"); + text = utf8ToUtf16("oneword"); + MinikinPaint paint; + paint.hyphenEdit = + HyphenEdit::INSERT_HYPHEN_AT_START | HyphenEdit::INSERT_HYPHEN_AT_END; + layout.doLayout(text.data(), 0, text.size(), text.size(), kBidi_LTR, + FontStyle(), paint, mCollection); + EXPECT_EQ(90.0f, layout.getAdvance()); + } } // TODO: Add more test cases, e.g. measure text, letter spacing. diff --git a/third_party/txt/tests/old/unittest/LayoutUtilsTest.cpp b/third_party/txt/tests/old/unittest/LayoutUtilsTest.cpp index 8aa3a21ef952315ef064c725563c463e32cce660..b10610b9fa2bdb485dd013eafee3ec9dbc976a3b 100644 --- a/third_party/txt/tests/old/unittest/LayoutUtilsTest.cpp +++ b/third_party/txt/tests/old/unittest/LayoutUtilsTest.cpp @@ -14,497 +14,497 @@ * limitations under the License. */ -#include #include +#include #include "lib/txt/libs/minikin/LayoutUtils.h" namespace minikin { void ExpectNextWordBreakForCache(size_t offset_in, const char* query_str) { - const size_t BUF_SIZE = 256U; - uint16_t buf[BUF_SIZE]; - size_t expected_breakpoint = 0U; - size_t size = 0U; - - ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); - EXPECT_EQ(expected_breakpoint, - getNextWordBreakForCache(buf, offset_in, size)) - << "Expected position is [" << query_str << "] from offset " << offset_in; + const size_t BUF_SIZE = 256U; + uint16_t buf[BUF_SIZE]; + size_t expected_breakpoint = 0U; + size_t size = 0U; + + ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); + EXPECT_EQ(expected_breakpoint, getNextWordBreakForCache(buf, offset_in, size)) + << "Expected position is [" << query_str << "] from offset " << offset_in; } void ExpectPrevWordBreakForCache(size_t offset_in, const char* query_str) { - const size_t BUF_SIZE = 256U; - uint16_t buf[BUF_SIZE]; - size_t expected_breakpoint = 0U; - size_t size = 0U; - - ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); - EXPECT_EQ(expected_breakpoint, - getPrevWordBreakForCache(buf, offset_in, size)) - << "Expected position is [" << query_str << "] from offset " << offset_in; + const size_t BUF_SIZE = 256U; + uint16_t buf[BUF_SIZE]; + size_t expected_breakpoint = 0U; + size_t size = 0U; + + ParseUnicode(buf, BUF_SIZE, query_str, &size, &expected_breakpoint); + EXPECT_EQ(expected_breakpoint, getPrevWordBreakForCache(buf, offset_in, size)) + << "Expected position is [" << query_str << "] from offset " << offset_in; } TEST(WordBreakTest, goNextWordBreakTest) { - ExpectNextWordBreakForCache(0, "|"); - - // Continue for spaces. - ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |"); - ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |"); - ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |"); - ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |"); - ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |"); - ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |"); - - // Space makes word break. - ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'"); - ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'"); - ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'"); - ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |"); - ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |"); - ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |"); - ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |"); - - ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'"); - ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'"); - ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'"); - ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |"); - ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |"); - ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |"); - ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |"); - - ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'"); - ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'"); - ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'"); - ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); - ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |"); - ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |"); - ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |"); - ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |"); - - // CJK ideographs makes word break. - ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); - ExpectNextWordBreakForCache(1, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); - ExpectNextWordBreakForCache(2, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); - ExpectNextWordBreakForCache(3, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); - ExpectNextWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); - ExpectNextWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); - ExpectNextWordBreakForCache(1000, - "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); - - ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); - ExpectNextWordBreakForCache(1, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); - ExpectNextWordBreakForCache(2, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); - ExpectNextWordBreakForCache(3, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); - ExpectNextWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); - ExpectNextWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); - ExpectNextWordBreakForCache(1000, - "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); - - ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); - ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); - ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); - ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); - ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); - ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); - ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); - - // Continue if trailing characters is Unicode combining characters. - ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00"); - ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00"); - ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |"); - ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |"); - ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |"); - - // Surrogate pairs. - ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |"); - ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |"); - ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |"); - ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |"); - ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |"); - ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |"); - - // Broken surrogate pairs. - // U+D84D is leading surrogate but there is no trailing surrogate for it. - ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |"); - ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |"); - ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |"); - ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |"); - ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |"); - - ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |"); - ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |"); - ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |"); - ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |"); - ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |"); - - // U+DE0D is trailing surrogate but there is no leading surrogate for it. - ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |"); - ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |"); - ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |"); - ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |"); - ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |"); - - ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |"); - ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |"); - ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |"); - ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |"); - ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |"); - - // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. - ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |"); - ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |"); - ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |"); - ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |"); - - // Tone marks. - // CJK ideographic char + Tone mark + CJK ideographic char - ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444"); - ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444"); - ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |"); - ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |"); - ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |"); - - // Variation Selectors. - // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char - ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B"); - ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B"); - ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |"); - ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |"); - ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |"); - - // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char - ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B"); - ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B"); - ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B"); - ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |"); - ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |"); - ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |"); - ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |"); - - // CJK ideographic char + Tone mark + Variation Character(VS1) - ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444"); - ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444"); - ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444"); - ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |"); - ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |"); - ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |"); - - // CJK ideographic char + Tone mark + Variation Character(VS17) - ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444"); - ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444"); - ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444"); - ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444"); - ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |"); - ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |"); - ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |"); - - // CJK ideographic char + Variation Character(VS1) + Tone mark - ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444"); - ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444"); - ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444"); - ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |"); - ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |"); - ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |"); - - // CJK ideographic char + Variation Character(VS17) + Tone mark - ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444"); - ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444"); - ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444"); - ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444"); - ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |"); - ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |"); - ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |"); - - // Following test cases are unusual usage of variation selectors and tone - // marks for caching up the further behavior changes, e.g. index of bounds - // or crashes. Please feel free to update the test expectations if the - // behavior change makes sense to you. - - // Isolated Tone marks and Variation Selectors - ExpectNextWordBreakForCache(0, "U+FE00 |"); - ExpectNextWordBreakForCache(1, "U+FE00 |"); - ExpectNextWordBreakForCache(1000, "U+FE00 |"); - ExpectNextWordBreakForCache(0, "U+E0100 |"); - ExpectNextWordBreakForCache(1000, "U+E0100 |"); - ExpectNextWordBreakForCache(0, "U+302D |"); - ExpectNextWordBreakForCache(1000, "U+302D |"); - - // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) - ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B"); - ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B"); - ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B"); - ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |"); - ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |"); - ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |"); - - // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) - ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |"); - ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |"); - ExpectNextWordBreakForCache(1000, - "U+845B U+E0100 U+E0100 U+845B |"); - - // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) - ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B"); - ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |"); - ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |"); - ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |"); - - // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) - ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B"); - ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B"); - ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B"); - ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B"); - ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |"); - ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |"); - ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |"); - - // Tone mark. + Tone mark - ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444"); - ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444"); - ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444"); - ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |"); - ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |"); - ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |"); + ExpectNextWordBreakForCache(0, "|"); + + // Continue for spaces. + ExpectNextWordBreakForCache(0, "'a' 'b' 'c' 'd' |"); + ExpectNextWordBreakForCache(1, "'a' 'b' 'c' 'd' |"); + ExpectNextWordBreakForCache(2, "'a' 'b' 'c' 'd' |"); + ExpectNextWordBreakForCache(3, "'a' 'b' 'c' 'd' |"); + ExpectNextWordBreakForCache(4, "'a' 'b' 'c' 'd' |"); + ExpectNextWordBreakForCache(1000, "'a' 'b' 'c' 'd' |"); + + // Space makes word break. + ExpectNextWordBreakForCache(0, "'a' 'b' | U+0020 'c' 'd'"); + ExpectNextWordBreakForCache(1, "'a' 'b' | U+0020 'c' 'd'"); + ExpectNextWordBreakForCache(2, "'a' 'b' U+0020 | 'c' 'd'"); + ExpectNextWordBreakForCache(3, "'a' 'b' U+0020 'c' 'd' |"); + ExpectNextWordBreakForCache(4, "'a' 'b' U+0020 'c' 'd' |"); + ExpectNextWordBreakForCache(5, "'a' 'b' U+0020 'c' 'd' |"); + ExpectNextWordBreakForCache(1000, "'a' 'b' U+0020 'c' 'd' |"); + + ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 'c' 'd'"); + ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 'c' 'd'"); + ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | 'c' 'd'"); + ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 'c' 'd' |"); + ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 'c' 'd' |"); + ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 'c' 'd' |"); + ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 'c' 'd' |"); + + ExpectNextWordBreakForCache(0, "'a' 'b' | U+2000 U+2000 'c' 'd'"); + ExpectNextWordBreakForCache(1, "'a' 'b' | U+2000 U+2000 'c' 'd'"); + ExpectNextWordBreakForCache(2, "'a' 'b' U+2000 | U+2000 'c' 'd'"); + ExpectNextWordBreakForCache(3, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); + ExpectNextWordBreakForCache(4, "'a' 'b' U+2000 U+2000 'c' 'd' |"); + ExpectNextWordBreakForCache(5, "'a' 'b' U+2000 U+2000 'c' 'd' |"); + ExpectNextWordBreakForCache(6, "'a' 'b' U+2000 U+2000 'c' 'd' |"); + ExpectNextWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 'c' 'd' |"); + + // CJK ideographs makes word break. + ExpectNextWordBreakForCache(0, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); + ExpectNextWordBreakForCache(1, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); + ExpectNextWordBreakForCache(2, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); + ExpectNextWordBreakForCache(3, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); + ExpectNextWordBreakForCache(4, + "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); + ExpectNextWordBreakForCache(5, + "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); + ExpectNextWordBreakForCache(1000, + "U+4E00 U+4E00 U+4E00 U+4E00 U+4E00 |"); + + ExpectNextWordBreakForCache(0, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); + ExpectNextWordBreakForCache(1, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); + ExpectNextWordBreakForCache(2, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); + ExpectNextWordBreakForCache(3, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); + ExpectNextWordBreakForCache(4, + "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); + ExpectNextWordBreakForCache(5, + "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); + ExpectNextWordBreakForCache(1000, + "U+4E00 U+4E8C U+4E09 U+56DB U+4E94 |"); + + ExpectNextWordBreakForCache(0, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); + ExpectNextWordBreakForCache(1, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); + ExpectNextWordBreakForCache(2, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); + ExpectNextWordBreakForCache(3, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); + ExpectNextWordBreakForCache(4, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); + ExpectNextWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); + ExpectNextWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' U+4E00 |"); + + // Continue if trailing characters is Unicode combining characters. + ExpectNextWordBreakForCache(0, "U+4E00 U+0332 | U+4E00"); + ExpectNextWordBreakForCache(1, "U+4E00 U+0332 | U+4E00"); + ExpectNextWordBreakForCache(2, "U+4E00 U+0332 U+4E00 |"); + ExpectNextWordBreakForCache(3, "U+4E00 U+0332 U+4E00 |"); + ExpectNextWordBreakForCache(1000, "U+4E00 U+0332 U+4E00 |"); + + // Surrogate pairs. + ExpectNextWordBreakForCache(0, "U+1F60D U+1F618 |"); + ExpectNextWordBreakForCache(1, "U+1F60D U+1F618 |"); + ExpectNextWordBreakForCache(2, "U+1F60D U+1F618 |"); + ExpectNextWordBreakForCache(3, "U+1F60D U+1F618 |"); + ExpectNextWordBreakForCache(4, "U+1F60D U+1F618 |"); + ExpectNextWordBreakForCache(1000, "U+1F60D U+1F618 |"); + + // Broken surrogate pairs. + // U+D84D is leading surrogate but there is no trailing surrogate for it. + ExpectNextWordBreakForCache(0, "U+D84D U+1F618 |"); + ExpectNextWordBreakForCache(1, "U+D84D U+1F618 |"); + ExpectNextWordBreakForCache(2, "U+D84D U+1F618 |"); + ExpectNextWordBreakForCache(3, "U+D84D U+1F618 |"); + ExpectNextWordBreakForCache(1000, "U+D84D U+1F618 |"); + + ExpectNextWordBreakForCache(0, "U+1F618 U+D84D |"); + ExpectNextWordBreakForCache(1, "U+1F618 U+D84D |"); + ExpectNextWordBreakForCache(2, "U+1F618 U+D84D |"); + ExpectNextWordBreakForCache(3, "U+1F618 U+D84D |"); + ExpectNextWordBreakForCache(1000, "U+1F618 U+D84D |"); + + // U+DE0D is trailing surrogate but there is no leading surrogate for it. + ExpectNextWordBreakForCache(0, "U+DE0D U+1F618 |"); + ExpectNextWordBreakForCache(1, "U+DE0D U+1F618 |"); + ExpectNextWordBreakForCache(2, "U+DE0D U+1F618 |"); + ExpectNextWordBreakForCache(3, "U+DE0D U+1F618 |"); + ExpectNextWordBreakForCache(1000, "U+DE0D U+1F618 |"); + + ExpectNextWordBreakForCache(0, "U+1F618 U+DE0D |"); + ExpectNextWordBreakForCache(1, "U+1F618 U+DE0D |"); + ExpectNextWordBreakForCache(2, "U+1F618 U+DE0D |"); + ExpectNextWordBreakForCache(3, "U+1F618 U+DE0D |"); + ExpectNextWordBreakForCache(1000, "U+1F618 U+DE0D |"); + + // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. + ExpectNextWordBreakForCache(0, "U+1F1FA U+1F1F8 |"); + ExpectNextWordBreakForCache(1, "U+1F1FA U+1F1F8 |"); + ExpectNextWordBreakForCache(2, "U+1F1FA U+1F1F8 |"); + ExpectNextWordBreakForCache(1000, "U+1F1FA U+1F1F8 |"); + + // Tone marks. + // CJK ideographic char + Tone mark + CJK ideographic char + ExpectNextWordBreakForCache(0, "U+4444 U+302D | U+4444"); + ExpectNextWordBreakForCache(1, "U+4444 U+302D | U+4444"); + ExpectNextWordBreakForCache(2, "U+4444 U+302D U+4444 |"); + ExpectNextWordBreakForCache(3, "U+4444 U+302D U+4444 |"); + ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+4444 |"); + + // Variation Selectors. + // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char + ExpectNextWordBreakForCache(0, "U+845B U+FE00 | U+845B"); + ExpectNextWordBreakForCache(1, "U+845B U+FE00 | U+845B"); + ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+845B |"); + ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+845B |"); + ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+845B |"); + + // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char + ExpectNextWordBreakForCache(0, "U+845B U+E0100 | U+845B"); + ExpectNextWordBreakForCache(1, "U+845B U+E0100 | U+845B"); + ExpectNextWordBreakForCache(2, "U+845B U+E0100 | U+845B"); + ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+845B |"); + ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+845B |"); + ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+845B |"); + ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+845B |"); + + // CJK ideographic char + Tone mark + Variation Character(VS1) + ExpectNextWordBreakForCache(0, "U+4444 U+302D U+FE00 | U+4444"); + ExpectNextWordBreakForCache(1, "U+4444 U+302D U+FE00 | U+4444"); + ExpectNextWordBreakForCache(2, "U+4444 U+302D U+FE00 | U+4444"); + ExpectNextWordBreakForCache(3, "U+4444 U+302D U+FE00 U+4444 |"); + ExpectNextWordBreakForCache(4, "U+4444 U+302D U+FE00 U+4444 |"); + ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+FE00 U+4444 |"); + + // CJK ideographic char + Tone mark + Variation Character(VS17) + ExpectNextWordBreakForCache(0, "U+4444 U+302D U+E0100 | U+4444"); + ExpectNextWordBreakForCache(1, "U+4444 U+302D U+E0100 | U+4444"); + ExpectNextWordBreakForCache(2, "U+4444 U+302D U+E0100 | U+4444"); + ExpectNextWordBreakForCache(3, "U+4444 U+302D U+E0100 | U+4444"); + ExpectNextWordBreakForCache(4, "U+4444 U+302D U+E0100 U+4444 |"); + ExpectNextWordBreakForCache(5, "U+4444 U+302D U+E0100 U+4444 |"); + ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+E0100 U+4444 |"); + + // CJK ideographic char + Variation Character(VS1) + Tone mark + ExpectNextWordBreakForCache(0, "U+4444 U+FE00 U+302D | U+4444"); + ExpectNextWordBreakForCache(1, "U+4444 U+FE00 U+302D | U+4444"); + ExpectNextWordBreakForCache(2, "U+4444 U+FE00 U+302D | U+4444"); + ExpectNextWordBreakForCache(3, "U+4444 U+FE00 U+302D U+4444 |"); + ExpectNextWordBreakForCache(4, "U+4444 U+FE00 U+302D U+4444 |"); + ExpectNextWordBreakForCache(1000, "U+4444 U+FE00 U+302D U+4444 |"); + + // CJK ideographic char + Variation Character(VS17) + Tone mark + ExpectNextWordBreakForCache(0, "U+4444 U+E0100 U+302D | U+4444"); + ExpectNextWordBreakForCache(1, "U+4444 U+E0100 U+302D | U+4444"); + ExpectNextWordBreakForCache(2, "U+4444 U+E0100 U+302D | U+4444"); + ExpectNextWordBreakForCache(3, "U+4444 U+E0100 U+302D | U+4444"); + ExpectNextWordBreakForCache(4, "U+4444 U+E0100 U+302D U+4444 |"); + ExpectNextWordBreakForCache(5, "U+4444 U+E0100 U+302D U+4444 |"); + ExpectNextWordBreakForCache(1000, "U+4444 U+E0100 U+302D U+4444 |"); + + // Following test cases are unusual usage of variation selectors and tone + // marks for caching up the further behavior changes, e.g. index of bounds + // or crashes. Please feel free to update the test expectations if the + // behavior change makes sense to you. + + // Isolated Tone marks and Variation Selectors + ExpectNextWordBreakForCache(0, "U+FE00 |"); + ExpectNextWordBreakForCache(1, "U+FE00 |"); + ExpectNextWordBreakForCache(1000, "U+FE00 |"); + ExpectNextWordBreakForCache(0, "U+E0100 |"); + ExpectNextWordBreakForCache(1000, "U+E0100 |"); + ExpectNextWordBreakForCache(0, "U+302D |"); + ExpectNextWordBreakForCache(1000, "U+302D |"); + + // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) + ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+FE00 | U+845B"); + ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+FE00 | U+845B"); + ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+FE00 | U+845B"); + ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+FE00 U+845B |"); + ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+FE00 U+845B |"); + ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+FE00 U+845B |"); + + // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) + ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+E0100 U+845B |"); + ExpectNextWordBreakForCache(6, "U+845B U+E0100 U+E0100 U+845B |"); + ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+E0100 U+845B |"); + + // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) + ExpectNextWordBreakForCache(0, "U+845B U+FE00 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(1, "U+845B U+FE00 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(2, "U+845B U+FE00 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(3, "U+845B U+FE00 U+E0100 | U+845B"); + ExpectNextWordBreakForCache(4, "U+845B U+FE00 U+E0100 U+845B |"); + ExpectNextWordBreakForCache(5, "U+845B U+FE00 U+E0100 U+845B |"); + ExpectNextWordBreakForCache(1000, "U+845B U+FE00 U+E0100 U+845B |"); + + // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) + ExpectNextWordBreakForCache(0, "U+845B U+E0100 U+FE00 | U+845B"); + ExpectNextWordBreakForCache(1, "U+845B U+E0100 U+FE00 | U+845B"); + ExpectNextWordBreakForCache(2, "U+845B U+E0100 U+FE00 | U+845B"); + ExpectNextWordBreakForCache(3, "U+845B U+E0100 U+FE00 | U+845B"); + ExpectNextWordBreakForCache(4, "U+845B U+E0100 U+FE00 U+845B |"); + ExpectNextWordBreakForCache(5, "U+845B U+E0100 U+FE00 U+845B |"); + ExpectNextWordBreakForCache(1000, "U+845B U+E0100 U+FE00 U+845B |"); + + // Tone mark. + Tone mark + ExpectNextWordBreakForCache(0, "U+4444 U+302D U+302D | U+4444"); + ExpectNextWordBreakForCache(1, "U+4444 U+302D U+302D | U+4444"); + ExpectNextWordBreakForCache(2, "U+4444 U+302D U+302D | U+4444"); + ExpectNextWordBreakForCache(3, "U+4444 U+302D U+302D U+4444 |"); + ExpectNextWordBreakForCache(4, "U+4444 U+302D U+302D U+4444 |"); + ExpectNextWordBreakForCache(1000, "U+4444 U+302D U+302D U+4444 |"); } TEST(WordBreakTest, goPrevWordBreakTest) { - ExpectPrevWordBreakForCache(0, "|"); - - // Continue for spaces. - ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'"); - ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'"); - ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'"); - ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'"); - ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'"); - ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'"); - - // Space makes word break. - ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'"); - ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'"); - ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'"); - ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'"); - ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'"); - ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'"); - ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'"); - - ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'"); - ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'"); - ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'"); - - ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'"); - ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); - ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); - ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); - - // CJK ideographs makes word break. - ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); - ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); - ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); - ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); - ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); - ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); - ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); - - ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); - ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); - ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); - ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); - ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); - ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); - ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); - - // Mixed case. - ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); - ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); - ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); - ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); - ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); - ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); - ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); - ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); - - // Continue if trailing characters is Unicode combining characters. - ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00"); - ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00"); - ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00"); - ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00"); - ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00"); - - // Surrogate pairs. - ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618"); - ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618"); - ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618"); - ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618"); - ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618"); - ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618"); - - // Broken surrogate pairs. - // U+D84D is leading surrogate but there is no trailing surrogate for it. - ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618"); - ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618"); - ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618"); - ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618"); - ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618"); - - ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D"); - ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D"); - ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D"); - ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D"); - ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D"); - - // U+DE0D is trailing surrogate but there is no leading surrogate for it. - ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618"); - ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618"); - ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618"); - ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618"); - ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618"); - - ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D"); - ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D"); - ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D"); - ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D"); - ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D"); - - // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. - ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8"); - ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8"); - ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8"); - ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8"); - - // Tone marks. - // CJK ideographic char + Tone mark + CJK ideographic char - ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444"); - ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444"); - ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444"); - ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444"); - ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444"); - - // Variation Selectors. - // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char - ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B"); - ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B"); - ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B"); - ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B"); - ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B"); - - // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char - ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B"); - ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B"); - ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B"); - ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B"); - ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B"); - ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B"); - ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B"); - - // CJK ideographic char + Tone mark + Variation Character(VS1) - ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444"); - ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444"); - ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444"); - ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444"); - ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444"); - ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444"); - - // CJK ideographic char + Tone mark + Variation Character(VS17) - ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444"); - ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444"); - ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444"); - ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444"); - ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444"); - ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444"); - ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444"); - - // CJK ideographic char + Variation Character(VS1) + Tone mark - ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444"); - ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444"); - ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444"); - ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444"); - ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444"); - ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444"); - - // CJK ideographic char + Variation Character(VS17) + Tone mark - ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444"); - ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444"); - ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444"); - ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444"); - ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444"); - ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444"); - ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444"); - - // Following test cases are unusual usage of variation selectors and tone - // marks for caching up the further behavior changes, e.g. index of bounds - // or crashes. Please feel free to update the test expectations if the - // behavior change makes sense to you. - - // Isolated Tone marks and Variation Selectors - ExpectPrevWordBreakForCache(0, "| U+FE00"); - ExpectPrevWordBreakForCache(1, "| U+FE00"); - ExpectPrevWordBreakForCache(1000, "| U+FE00"); - ExpectPrevWordBreakForCache(0, "| U+E0100"); - ExpectPrevWordBreakForCache(1000, "| U+E0100"); - ExpectPrevWordBreakForCache(0, "| U+302D"); - ExpectPrevWordBreakForCache(1000, "| U+302D"); - - // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) - ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B"); - ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B"); - - // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) - ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B"); - ExpectPrevWordBreakForCache(1000, - "U+845B U+E0100 U+E0100 | U+845B"); - - // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) - ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B"); - ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B"); - ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B"); - - // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) - ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B"); - ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B"); - ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B"); - - // Tone mark. + Tone mark - ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444"); - ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444"); - ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444"); - ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444"); - ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444"); - ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444"); + ExpectPrevWordBreakForCache(0, "|"); + + // Continue for spaces. + ExpectPrevWordBreakForCache(0, "| 'a' 'b' 'c' 'd'"); + ExpectPrevWordBreakForCache(1, "| 'a' 'b' 'c' 'd'"); + ExpectPrevWordBreakForCache(2, "| 'a' 'b' 'c' 'd'"); + ExpectPrevWordBreakForCache(3, "| 'a' 'b' 'c' 'd'"); + ExpectPrevWordBreakForCache(4, "| 'a' 'b' 'c' 'd'"); + ExpectPrevWordBreakForCache(1000, "| 'a' 'b' 'c' 'd'"); + + // Space makes word break. + ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+0020 'c' 'd'"); + ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+0020 'c' 'd'"); + ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+0020 'c' 'd'"); + ExpectPrevWordBreakForCache(3, "'a' 'b' | U+0020 'c' 'd'"); + ExpectPrevWordBreakForCache(4, "'a' 'b' U+0020 | 'c' 'd'"); + ExpectPrevWordBreakForCache(5, "'a' 'b' U+0020 | 'c' 'd'"); + ExpectPrevWordBreakForCache(1000, "'a' 'b' U+0020 | 'c' 'd'"); + + ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | 'c' 'd'"); + ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 | 'c' 'd'"); + ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 | 'c' 'd'"); + + ExpectPrevWordBreakForCache(0, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(1, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(2, "| 'a' 'b' U+2000 U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(3, "'a' 'b' | U+2000 U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(4, "'a' 'b' U+2000 | U+2000 'c' 'd'"); + ExpectPrevWordBreakForCache(5, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); + ExpectPrevWordBreakForCache(6, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); + ExpectPrevWordBreakForCache(1000, "'a' 'b' U+2000 U+2000 | 'c' 'd'"); + + // CJK ideographs makes word break. + ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); + ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E00 U+4E00 U+4E00 U+4E00"); + ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E00 U+4E00 U+4E00 U+4E00"); + ExpectPrevWordBreakForCache(3, "U+4E00 U+4E00 | U+4E00 U+4E00 U+4E00"); + ExpectPrevWordBreakForCache(4, "U+4E00 U+4E00 U+4E00 | U+4E00 U+4E00"); + ExpectPrevWordBreakForCache(5, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); + ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E00 U+4E00 U+4E00 | U+4E00"); + + ExpectPrevWordBreakForCache(0, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); + ExpectPrevWordBreakForCache(1, "| U+4E00 U+4E8C U+4E09 U+56DB U+4E94"); + ExpectPrevWordBreakForCache(2, "U+4E00 | U+4E8C U+4E09 U+56DB U+4E94"); + ExpectPrevWordBreakForCache(3, "U+4E00 U+4E8C | U+4E09 U+56DB U+4E94"); + ExpectPrevWordBreakForCache(4, "U+4E00 U+4E8C U+4E09 | U+56DB U+4E94"); + ExpectPrevWordBreakForCache(5, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); + ExpectPrevWordBreakForCache(1000, "U+4E00 U+4E8C U+4E09 U+56DB | U+4E94"); + + // Mixed case. + ExpectPrevWordBreakForCache(0, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); + ExpectPrevWordBreakForCache(1, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); + ExpectPrevWordBreakForCache(2, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); + ExpectPrevWordBreakForCache(3, "| U+4E00 'a' 'b' U+2000 'c' U+4E00"); + ExpectPrevWordBreakForCache(4, "U+4E00 'a' 'b' | U+2000 'c' U+4E00"); + ExpectPrevWordBreakForCache(5, "U+4E00 'a' 'b' U+2000 | 'c' U+4E00"); + ExpectPrevWordBreakForCache(6, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); + ExpectPrevWordBreakForCache(1000, "U+4E00 'a' 'b' U+2000 'c' | U+4E00"); + + // Continue if trailing characters is Unicode combining characters. + ExpectPrevWordBreakForCache(0, "| U+4E00 U+0332 U+4E00"); + ExpectPrevWordBreakForCache(1, "| U+4E00 U+0332 U+4E00"); + ExpectPrevWordBreakForCache(2, "| U+4E00 U+0332 U+4E00"); + ExpectPrevWordBreakForCache(3, "U+4E00 U+0332 | U+4E00"); + ExpectPrevWordBreakForCache(1000, "U+4E00 U+0332 | U+4E00"); + + // Surrogate pairs. + ExpectPrevWordBreakForCache(0, "| U+1F60D U+1F618"); + ExpectPrevWordBreakForCache(1, "| U+1F60D U+1F618"); + ExpectPrevWordBreakForCache(2, "| U+1F60D U+1F618"); + ExpectPrevWordBreakForCache(3, "| U+1F60D U+1F618"); + ExpectPrevWordBreakForCache(4, "| U+1F60D U+1F618"); + ExpectPrevWordBreakForCache(1000, "| U+1F60D U+1F618"); + + // Broken surrogate pairs. + // U+D84D is leading surrogate but there is no trailing surrogate for it. + ExpectPrevWordBreakForCache(0, "| U+D84D U+1F618"); + ExpectPrevWordBreakForCache(1, "| U+D84D U+1F618"); + ExpectPrevWordBreakForCache(2, "| U+D84D U+1F618"); + ExpectPrevWordBreakForCache(3, "| U+D84D U+1F618"); + ExpectPrevWordBreakForCache(1000, "| U+D84D U+1F618"); + + ExpectPrevWordBreakForCache(0, "| U+1F618 U+D84D"); + ExpectPrevWordBreakForCache(1, "| U+1F618 U+D84D"); + ExpectPrevWordBreakForCache(2, "| U+1F618 U+D84D"); + ExpectPrevWordBreakForCache(3, "| U+1F618 U+D84D"); + ExpectPrevWordBreakForCache(1000, "| U+1F618 U+D84D"); + + // U+DE0D is trailing surrogate but there is no leading surrogate for it. + ExpectPrevWordBreakForCache(0, "| U+DE0D U+1F618"); + ExpectPrevWordBreakForCache(1, "| U+DE0D U+1F618"); + ExpectPrevWordBreakForCache(2, "| U+DE0D U+1F618"); + ExpectPrevWordBreakForCache(3, "| U+DE0D U+1F618"); + ExpectPrevWordBreakForCache(1000, "| U+DE0D U+1F618"); + + ExpectPrevWordBreakForCache(0, "| U+1F618 U+DE0D"); + ExpectPrevWordBreakForCache(1, "| U+1F618 U+DE0D"); + ExpectPrevWordBreakForCache(2, "| U+1F618 U+DE0D"); + ExpectPrevWordBreakForCache(3, "| U+1F618 U+DE0D"); + ExpectPrevWordBreakForCache(1000, "| U+1F618 U+DE0D"); + + // Regional indicator pair. U+1F1FA U+1F1F8 is US national flag. + ExpectPrevWordBreakForCache(0, "| U+1F1FA U+1F1F8"); + ExpectPrevWordBreakForCache(1, "| U+1F1FA U+1F1F8"); + ExpectPrevWordBreakForCache(2, "| U+1F1FA U+1F1F8"); + ExpectPrevWordBreakForCache(1000, "| U+1F1FA U+1F1F8"); + + // Tone marks. + // CJK ideographic char + Tone mark + CJK ideographic char + ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+4444"); + ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+4444"); + ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+4444"); + ExpectPrevWordBreakForCache(3, "U+4444 U+302D | U+4444"); + ExpectPrevWordBreakForCache(1000, "U+4444 U+302D | U+4444"); + + // Variation Selectors. + // CJK Ideographic char + Variation Selector(VS1) + CJK Ideographic char + ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+845B"); + ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+845B"); + ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+845B"); + ExpectPrevWordBreakForCache(3, "U+845B U+FE00 | U+845B"); + ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 | U+845B"); + + // CJK Ideographic char + Variation Selector(VS17) + CJK Ideographic char + ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+845B"); + ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+845B"); + ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+845B"); + ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+845B"); + ExpectPrevWordBreakForCache(4, "U+845B U+E0100 | U+845B"); + ExpectPrevWordBreakForCache(5, "U+845B U+E0100 | U+845B"); + ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 | U+845B"); + + // CJK ideographic char + Tone mark + Variation Character(VS1) + ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+FE00 U+4444"); + ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+FE00 U+4444"); + ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+FE00 U+4444"); + ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+FE00 U+4444"); + ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+FE00 | U+4444"); + ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+FE00 | U+4444"); + + // CJK ideographic char + Tone mark + Variation Character(VS17) + ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+E0100 U+4444"); + ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+E0100 U+4444"); + ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+E0100 U+4444"); + ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+E0100 U+4444"); + ExpectPrevWordBreakForCache(4, "| U+4444 U+302D U+E0100 U+4444"); + ExpectPrevWordBreakForCache(5, "U+4444 U+302D U+E0100 | U+4444"); + ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+E0100 | U+4444"); + + // CJK ideographic char + Variation Character(VS1) + Tone mark + ExpectPrevWordBreakForCache(0, "| U+4444 U+FE00 U+302D U+4444"); + ExpectPrevWordBreakForCache(1, "| U+4444 U+FE00 U+302D U+4444"); + ExpectPrevWordBreakForCache(2, "| U+4444 U+FE00 U+302D U+4444"); + ExpectPrevWordBreakForCache(3, "| U+4444 U+FE00 U+302D U+4444"); + ExpectPrevWordBreakForCache(4, "U+4444 U+FE00 U+302D | U+4444"); + ExpectPrevWordBreakForCache(1000, "U+4444 U+FE00 U+302D | U+4444"); + + // CJK ideographic char + Variation Character(VS17) + Tone mark + ExpectPrevWordBreakForCache(0, "| U+4444 U+E0100 U+302D U+4444"); + ExpectPrevWordBreakForCache(1, "| U+4444 U+E0100 U+302D U+4444"); + ExpectPrevWordBreakForCache(2, "| U+4444 U+E0100 U+302D U+4444"); + ExpectPrevWordBreakForCache(3, "| U+4444 U+E0100 U+302D U+4444"); + ExpectPrevWordBreakForCache(4, "| U+4444 U+E0100 U+302D U+4444"); + ExpectPrevWordBreakForCache(5, "U+4444 U+E0100 U+302D | U+4444"); + ExpectPrevWordBreakForCache(1000, "U+4444 U+E0100 U+302D | U+4444"); + + // Following test cases are unusual usage of variation selectors and tone + // marks for caching up the further behavior changes, e.g. index of bounds + // or crashes. Please feel free to update the test expectations if the + // behavior change makes sense to you. + + // Isolated Tone marks and Variation Selectors + ExpectPrevWordBreakForCache(0, "| U+FE00"); + ExpectPrevWordBreakForCache(1, "| U+FE00"); + ExpectPrevWordBreakForCache(1000, "| U+FE00"); + ExpectPrevWordBreakForCache(0, "| U+E0100"); + ExpectPrevWordBreakForCache(1000, "| U+E0100"); + ExpectPrevWordBreakForCache(0, "| U+302D"); + ExpectPrevWordBreakForCache(1000, "| U+302D"); + + // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS1) + ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(4, "U+845B U+FE00 U+FE00 | U+845B"); + ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+FE00 | U+845B"); + + // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS17) + ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(5, "| U+845B U+E0100 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(6, "U+845B U+E0100 U+E0100 | U+845B"); + ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+E0100 | U+845B"); + + // CJK Ideographic char + Variation Selector(VS1) + Variation Selector(VS17) + ExpectPrevWordBreakForCache(0, "| U+845B U+FE00 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(1, "| U+845B U+FE00 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(2, "| U+845B U+FE00 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(3, "| U+845B U+FE00 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(4, "| U+845B U+FE00 U+E0100 U+845B"); + ExpectPrevWordBreakForCache(5, "U+845B U+FE00 U+E0100 | U+845B"); + ExpectPrevWordBreakForCache(1000, "U+845B U+FE00 U+E0100 | U+845B"); + + // CJK Ideographic char + Variation Selector(VS17) + Variation Selector(VS1) + ExpectPrevWordBreakForCache(0, "| U+845B U+E0100 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(1, "| U+845B U+E0100 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(2, "| U+845B U+E0100 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(3, "| U+845B U+E0100 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(4, "| U+845B U+E0100 U+FE00 U+845B"); + ExpectPrevWordBreakForCache(5, "U+845B U+E0100 U+FE00 | U+845B"); + ExpectPrevWordBreakForCache(1000, "U+845B U+E0100 U+FE00 | U+845B"); + + // Tone mark. + Tone mark + ExpectPrevWordBreakForCache(0, "| U+4444 U+302D U+302D U+4444"); + ExpectPrevWordBreakForCache(1, "| U+4444 U+302D U+302D U+4444"); + ExpectPrevWordBreakForCache(2, "| U+4444 U+302D U+302D U+4444"); + ExpectPrevWordBreakForCache(3, "| U+4444 U+302D U+302D U+4444"); + ExpectPrevWordBreakForCache(4, "U+4444 U+302D U+302D | U+4444"); + ExpectPrevWordBreakForCache(1000, "U+4444 U+302D U+302D | U+4444"); } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/MeasurementTests.cpp b/third_party/txt/tests/old/unittest/MeasurementTests.cpp index 7fedecb5ea21c4bedfa62cf294c8afba3ec8ecb0..d6daf42fa488bbf62cebd5c9d6663517d63a728f 100644 --- a/third_party/txt/tests/old/unittest/MeasurementTests.cpp +++ b/third_party/txt/tests/old/unittest/MeasurementTests.cpp @@ -14,47 +14,47 @@ * limitations under the License. */ -#include #include +#include #include namespace minikin { float getAdvance(const float* advances, const char* src) { - const size_t BUF_SIZE = 256; - uint16_t buf[BUF_SIZE]; - size_t offset; - size_t size; - ParseUnicode(buf, BUF_SIZE, src, &size, &offset); - return getRunAdvance(advances, buf, 0, size, offset); + const size_t BUF_SIZE = 256; + uint16_t buf[BUF_SIZE]; + size_t offset; + size_t size; + ParseUnicode(buf, BUF_SIZE, src, &size, &offset); + return getRunAdvance(advances, buf, 0, size, offset); } // Latin fi TEST(Measurement, getRunAdvance_fi) { - const float unligated[] = {30.0, 20.0}; - EXPECT_EQ(0.0, getAdvance(unligated, "| 'f' 'i'")); - EXPECT_EQ(30.0, getAdvance(unligated, "'f' | 'i'")); - EXPECT_EQ(50.0, getAdvance(unligated, "'f' 'i' |")); - - const float ligated[] = {40.0, 0.0}; - EXPECT_EQ(0.0, getAdvance(ligated, "| 'f' 'i'")); - EXPECT_EQ(20.0, getAdvance(ligated, "'f' | 'i'")); - EXPECT_EQ(40.0, getAdvance(ligated, "'f' 'i' |")); + const float unligated[] = {30.0, 20.0}; + EXPECT_EQ(0.0, getAdvance(unligated, "| 'f' 'i'")); + EXPECT_EQ(30.0, getAdvance(unligated, "'f' | 'i'")); + EXPECT_EQ(50.0, getAdvance(unligated, "'f' 'i' |")); + + const float ligated[] = {40.0, 0.0}; + EXPECT_EQ(0.0, getAdvance(ligated, "| 'f' 'i'")); + EXPECT_EQ(20.0, getAdvance(ligated, "'f' | 'i'")); + EXPECT_EQ(40.0, getAdvance(ligated, "'f' 'i' |")); } // Devanagari ka+virama+ka TEST(Measurement, getRunAdvance_kka) { - const float unligated[] = {30.0, 0.0, 30.0}; - EXPECT_EQ(0.0, getAdvance(unligated, "| U+0915 U+094D U+0915")); - EXPECT_EQ(30.0, getAdvance(unligated, "U+0915 | U+094D U+0915")); - EXPECT_EQ(30.0, getAdvance(unligated, "U+0915 U+094D | U+0915")); - EXPECT_EQ(60.0, getAdvance(unligated, "U+0915 U+094D U+0915 |")); - - const float ligated[] = {30.0, 0.0, 0.0}; - EXPECT_EQ(0.0, getAdvance(ligated, "| U+0915 U+094D U+0915")); - EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 | U+094D U+0915")); - EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 U+094D | U+0915")); - EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 U+094D U+0915 |")); + const float unligated[] = {30.0, 0.0, 30.0}; + EXPECT_EQ(0.0, getAdvance(unligated, "| U+0915 U+094D U+0915")); + EXPECT_EQ(30.0, getAdvance(unligated, "U+0915 | U+094D U+0915")); + EXPECT_EQ(30.0, getAdvance(unligated, "U+0915 U+094D | U+0915")); + EXPECT_EQ(60.0, getAdvance(unligated, "U+0915 U+094D U+0915 |")); + + const float ligated[] = {30.0, 0.0, 0.0}; + EXPECT_EQ(0.0, getAdvance(ligated, "| U+0915 U+094D U+0915")); + EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 | U+094D U+0915")); + EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 U+094D | U+0915")); + EXPECT_EQ(30.0, getAdvance(ligated, "U+0915 U+094D U+0915 |")); } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/SparseBitSetTest.cpp b/third_party/txt/tests/old/unittest/SparseBitSetTest.cpp index 39c9e1b91234080aef10edf0f3d3d903729f5fe3..80856ee66e00d827c6d0fa15363451c7ab0b2b3d 100644 --- a/third_party/txt/tests/old/unittest/SparseBitSetTest.cpp +++ b/third_party/txt/tests/old/unittest/SparseBitSetTest.cpp @@ -22,33 +22,33 @@ namespace minikin { TEST(SparseBitSetTest, randomTest) { - const uint32_t kTestRangeNum = 4096; + const uint32_t kTestRangeNum = 4096; - std::mt19937 mt; // Fix seeds to be able to reproduce the result. - std::uniform_int_distribution distribution(1, 512); + std::mt19937 mt; // Fix seeds to be able to reproduce the result. + std::uniform_int_distribution distribution(1, 512); - std::vector range { distribution(mt) }; - for (size_t i = 1; i < kTestRangeNum * 2; ++i) { - range.push_back((range.back() - 1) + distribution(mt)); - } + std::vector range{distribution(mt)}; + for (size_t i = 1; i < kTestRangeNum * 2; ++i) { + range.push_back((range.back() - 1) + distribution(mt)); + } - SparseBitSet bitset(range.data(), range.size() / 2); + SparseBitSet bitset(range.data(), range.size() / 2); - uint32_t ch = 0; - for (size_t i = 0; i < range.size() / 2; ++i) { - uint32_t start = range[i * 2]; - uint32_t end = range[i * 2 + 1]; + uint32_t ch = 0; + for (size_t i = 0; i < range.size() / 2; ++i) { + uint32_t start = range[i * 2]; + uint32_t end = range[i * 2 + 1]; - for (; ch < start; ch++) { - ASSERT_FALSE(bitset.get(ch)) << std::hex << ch; - } - for (; ch < end; ch++) { - ASSERT_TRUE(bitset.get(ch)) << std::hex << ch; - } + for (; ch < start; ch++) { + ASSERT_FALSE(bitset.get(ch)) << std::hex << ch; } - for (; ch < 0x1FFFFFF; ++ch) { - ASSERT_FALSE(bitset.get(ch)) << std::hex << ch; + for (; ch < end; ch++) { + ASSERT_TRUE(bitset.get(ch)) << std::hex << ch; } + } + for (; ch < 0x1FFFFFF; ++ch) { + ASSERT_FALSE(bitset.get(ch)) << std::hex << ch; + } } } // namespace minikin diff --git a/third_party/txt/tests/old/unittest/UnicodeUtilsTest.cpp b/third_party/txt/tests/old/unittest/UnicodeUtilsTest.cpp index 99327235c590e5f14da6bde1fc9fa919e5d6ec5d..1ab59718d64e2d57fadb5d3a4edd505beb48fc2b 100644 --- a/third_party/txt/tests/old/unittest/UnicodeUtilsTest.cpp +++ b/third_party/txt/tests/old/unittest/UnicodeUtilsTest.cpp @@ -21,17 +21,17 @@ namespace minikin { TEST(UnicodeUtils, parse) { - const size_t BUF_SIZE = 256; - uint16_t buf[BUF_SIZE]; - size_t offset; - size_t size; - ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 | 'a'", &size, &offset); - EXPECT_EQ(size, 4u); - EXPECT_EQ(offset, 3u); - EXPECT_EQ(buf[0], 0x000D); - EXPECT_EQ(buf[1], 0xD83D); - EXPECT_EQ(buf[2], 0xDC31); - EXPECT_EQ(buf[3], 'a'); + const size_t BUF_SIZE = 256; + uint16_t buf[BUF_SIZE]; + size_t offset; + size_t size; + ParseUnicode(buf, BUF_SIZE, "U+000D U+1F431 | 'a'", &size, &offset); + EXPECT_EQ(size, 4u); + EXPECT_EQ(offset, 3u); + EXPECT_EQ(buf[0], 0x000D); + EXPECT_EQ(buf[1], 0xD83D); + EXPECT_EQ(buf[2], 0xDC31); + EXPECT_EQ(buf[3], 'a'); } -} // namespace minikin +} // namespace minikin diff --git a/third_party/txt/tests/old/unittest/WordBreakerTests.cpp b/third_party/txt/tests/old/unittest/WordBreakerTests.cpp index 0e6cea8db6634ae6f397492983f5fd2e6aff10e3..f4bb239fc09155c067d15a08e3aebce91e2a2513 100644 --- a/third_party/txt/tests/old/unittest/WordBreakerTests.cpp +++ b/third_party/txt/tests/old/unittest/WordBreakerTests.cpp @@ -16,15 +16,15 @@ #define LOG_TAG "Minikin" -#include #include +#include -#include "ICUTestBase.h" -#include "UnicodeUtils.h" #include #include #include #include +#include "ICUTestBase.h" +#include "UnicodeUtils.h" #ifndef NELEM #define NELEM(x) ((sizeof(x) / sizeof((x)[0]))) @@ -37,467 +37,494 @@ namespace minikin { typedef ICUTestBase WordBreakerTest; TEST_F(WordBreakerTest, basic) { - uint16_t buf[] = {'h', 'e', 'l', 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(6, breaker.next()); // after "hello " - EXPECT_EQ(0, breaker.wordStart()); // "hello" - EXPECT_EQ(5, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ(6, breaker.current()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(6, breaker.wordStart()); // "world" - EXPECT_EQ(11, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ(11, breaker.current()); + uint16_t buf[] = {'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(6, breaker.next()); // after "hello " + EXPECT_EQ(0, breaker.wordStart()); // "hello" + EXPECT_EQ(5, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ(6, breaker.current()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(6, breaker.wordStart()); // "world" + EXPECT_EQ(11, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ(11, breaker.current()); } TEST_F(WordBreakerTest, softHyphen) { - uint16_t buf[] = {'h', 'e', 'l', 0x00AD, 'l' ,'o', ' ', 'w', 'o', 'r', 'l', 'd'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(7, breaker.next()); // after "hel{SOFT HYPHEN}lo " - EXPECT_EQ(0, breaker.wordStart()); // "hel{SOFT HYPHEN}lo" - EXPECT_EQ(6, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(7, breaker.wordStart()); // "world" - EXPECT_EQ(12, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'h', 'e', 'l', 0x00AD, 'l', 'o', + ' ', 'w', 'o', 'r', 'l', 'd'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(7, breaker.next()); // after "hel{SOFT HYPHEN}lo " + EXPECT_EQ(0, breaker.wordStart()); // "hel{SOFT HYPHEN}lo" + EXPECT_EQ(6, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(7, breaker.wordStart()); // "world" + EXPECT_EQ(12, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, hardHyphen) { - // Hyphens should not allow breaks anymore. - uint16_t buf[] = {'s', 'u', 'g', 'a', 'r', '-', 'f', 'r', 'e', 'e'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); - EXPECT_EQ(0, breaker.wordStart()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + // Hyphens should not allow breaks anymore. + uint16_t buf[] = {'s', 'u', 'g', 'a', 'r', '-', 'f', 'r', 'e', 'e'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, postfixAndPrefix) { - uint16_t buf[] = {'U', 'S', 0x00A2, ' ', 'J', 'P', 0x00A5}; // US¢ JP¥ - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - - EXPECT_EQ(4, breaker.next()); // after CENT SIGN - EXPECT_EQ(0, breaker.wordStart()); // "US¢" - EXPECT_EQ(3, breaker.wordEnd()); - - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end of string - EXPECT_EQ(4, breaker.wordStart()); // "JP¥" - EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); + uint16_t buf[] = {'U', 'S', 0x00A2, ' ', 'J', 'P', 0x00A5}; // US¢ JP¥ + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + + EXPECT_EQ(4, breaker.next()); // after CENT SIGN + EXPECT_EQ(0, breaker.wordStart()); // "US¢" + EXPECT_EQ(3, breaker.wordEnd()); + + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end of string + EXPECT_EQ(4, breaker.wordStart()); // "JP¥" + EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); } TEST_F(WordBreakerTest, myanmarKinzi) { - uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, 0x102C}; // NGA, ASAT, VIRAMA, KA, UU - WordBreaker breaker; - icu::Locale burmese("my"); - breaker.setLocale(burmese); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end of string - EXPECT_EQ(0, breaker.wordStart()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); + uint16_t buf[] = {0x1004, 0x103A, 0x1039, 0x1000, + 0x102C}; // NGA, ASAT, VIRAMA, KA, UU + WordBreaker breaker; + icu::Locale burmese("my"); + breaker.setLocale(burmese); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end of string + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.wordEnd()); } TEST_F(WordBreakerTest, zwjEmojiSequences) { - uint16_t buf[] = { - // man + zwj + heart + zwj + man - UTF16(0x1F468), 0x200D, 0x2764, 0x200D, UTF16(0x1F468), - // woman + zwj + heart + zwj + kiss mark + zwj + woman - UTF16(0x1F469), 0x200D, 0x2764, 0x200D, UTF16(0x1F48B), 0x200D, UTF16(0x1F469), - // eye + zwj + left speech bubble - UTF16(0x1F441), 0x200D, UTF16(0x1F5E8), - // CAT FACE + zwj + BUST IN SILHOUETTE - UTF16(0x1F431), 0x200D, UTF16(0x1F464), - }; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(7, breaker.next()); // after man + zwj + heart + zwj + man - EXPECT_EQ(0, breaker.wordStart()); - EXPECT_EQ(7, breaker.wordEnd()); - EXPECT_EQ(17, breaker.next()); // after woman + zwj + heart + zwj + woman - EXPECT_EQ(7, breaker.wordStart()); - EXPECT_EQ(17, breaker.wordEnd()); - EXPECT_EQ(22, breaker.next()); // after eye + zwj + left speech bubble - EXPECT_EQ(17, breaker.wordStart()); - EXPECT_EQ(22, breaker.wordEnd()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(22, breaker.wordStart()); - EXPECT_EQ(27, breaker.wordEnd()); + uint16_t buf[] = { + // man + zwj + heart + zwj + man + UTF16(0x1F468), + 0x200D, + 0x2764, + 0x200D, + UTF16(0x1F468), + // woman + zwj + heart + zwj + kiss mark + zwj + woman + UTF16(0x1F469), + 0x200D, + 0x2764, + 0x200D, + UTF16(0x1F48B), + 0x200D, + UTF16(0x1F469), + // eye + zwj + left speech bubble + UTF16(0x1F441), + 0x200D, + UTF16(0x1F5E8), + // CAT FACE + zwj + BUST IN SILHOUETTE + UTF16(0x1F431), + 0x200D, + UTF16(0x1F464), + }; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(7, breaker.next()); // after man + zwj + heart + zwj + man + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ(7, breaker.wordEnd()); + EXPECT_EQ(17, breaker.next()); // after woman + zwj + heart + zwj + woman + EXPECT_EQ(7, breaker.wordStart()); + EXPECT_EQ(17, breaker.wordEnd()); + EXPECT_EQ(22, breaker.next()); // after eye + zwj + left speech bubble + EXPECT_EQ(17, breaker.wordStart()); + EXPECT_EQ(22, breaker.wordEnd()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(22, breaker.wordStart()); + EXPECT_EQ(27, breaker.wordEnd()); } TEST_F(WordBreakerTest, emojiWithModifier) { - uint16_t buf[] = { - UTF16(0x1F466), UTF16(0x1F3FB), // boy + type 1-2 fitzpatrick modifier - 0x270C, 0xFE0F, UTF16(0x1F3FF) // victory hand + emoji style + type 6 fitzpatrick modifier - }; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(4, breaker.next()); // after boy + type 1-2 fitzpatrick modifier - EXPECT_EQ(0, breaker.wordStart()); - EXPECT_EQ(4, breaker.wordEnd()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(4, breaker.wordStart()); - EXPECT_EQ(8, breaker.wordEnd()); + uint16_t buf[] = { + UTF16(0x1F466), UTF16(0x1F3FB), // boy + type 1-2 fitzpatrick modifier + 0x270C, 0xFE0F, + UTF16( + 0x1F3FF) // victory hand + emoji style + type 6 fitzpatrick modifier + }; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(4, breaker.next()); // after boy + type 1-2 fitzpatrick modifier + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ(4, breaker.wordEnd()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(4, breaker.wordStart()); + EXPECT_EQ(8, breaker.wordEnd()); } TEST_F(WordBreakerTest, unicode10Emoji) { - // Should break between emojis. - uint16_t buf[] = { - // SLED + SLED - UTF16(0x1F6F7), UTF16(0x1F6F7), - // SLED + VS15 + SLED - UTF16(0x1F6F7), 0xFE0E, UTF16(0x1F6F7), - // WHITE SMILING FACE + SLED - 0x263A, UTF16(0x1F6F7), - // WHITE SMILING FACE + VS16 + SLED - 0x263A, 0xFE0F, UTF16(0x1F6F7), - }; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getEnglish()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(2, breaker.next()); - EXPECT_EQ(0, breaker.wordStart()); - EXPECT_EQ(2, breaker.wordEnd()); - - EXPECT_EQ(4, breaker.next()); - EXPECT_EQ(2, breaker.wordStart()); - EXPECT_EQ(4, breaker.wordEnd()); - - EXPECT_EQ(7, breaker.next()); - EXPECT_EQ(4, breaker.wordStart()); - EXPECT_EQ(7, breaker.wordEnd()); - - EXPECT_EQ(9, breaker.next()); - EXPECT_EQ(7, breaker.wordStart()); - EXPECT_EQ(9, breaker.wordEnd()); - - EXPECT_EQ(10, breaker.next()); - EXPECT_EQ(9, breaker.wordStart()); - EXPECT_EQ(10, breaker.wordEnd()); - - EXPECT_EQ(12, breaker.next()); - EXPECT_EQ(10, breaker.wordStart()); - EXPECT_EQ(12, breaker.wordEnd()); - - EXPECT_EQ(14, breaker.next()); - EXPECT_EQ(12, breaker.wordStart()); - EXPECT_EQ(14, breaker.wordEnd()); - - EXPECT_EQ(16, breaker.next()); - EXPECT_EQ(14, breaker.wordStart()); - EXPECT_EQ(16, breaker.wordEnd()); + // Should break between emojis. + uint16_t buf[] = { + // SLED + SLED + UTF16(0x1F6F7), + UTF16(0x1F6F7), + // SLED + VS15 + SLED + UTF16(0x1F6F7), + 0xFE0E, + UTF16(0x1F6F7), + // WHITE SMILING FACE + SLED + 0x263A, + UTF16(0x1F6F7), + // WHITE SMILING FACE + VS16 + SLED + 0x263A, + 0xFE0F, + UTF16(0x1F6F7), + }; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getEnglish()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(2, breaker.next()); + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ(2, breaker.wordEnd()); + + EXPECT_EQ(4, breaker.next()); + EXPECT_EQ(2, breaker.wordStart()); + EXPECT_EQ(4, breaker.wordEnd()); + + EXPECT_EQ(7, breaker.next()); + EXPECT_EQ(4, breaker.wordStart()); + EXPECT_EQ(7, breaker.wordEnd()); + + EXPECT_EQ(9, breaker.next()); + EXPECT_EQ(7, breaker.wordStart()); + EXPECT_EQ(9, breaker.wordEnd()); + + EXPECT_EQ(10, breaker.next()); + EXPECT_EQ(9, breaker.wordStart()); + EXPECT_EQ(10, breaker.wordEnd()); + + EXPECT_EQ(12, breaker.next()); + EXPECT_EQ(10, breaker.wordStart()); + EXPECT_EQ(12, breaker.wordEnd()); + + EXPECT_EQ(14, breaker.next()); + EXPECT_EQ(12, breaker.wordStart()); + EXPECT_EQ(14, breaker.wordEnd()); + + EXPECT_EQ(16, breaker.next()); + EXPECT_EQ(14, breaker.wordStart()); + EXPECT_EQ(16, breaker.wordEnd()); } TEST_F(WordBreakerTest, flagsSequenceSingleFlag) { - const std::string kFlag = "U+1F3F4"; - const std::string flags = kFlag + " " + kFlag; - - const int kFlagLength = 2; - const size_t BUF_SIZE = kFlagLength * 2; - - uint16_t buf[BUF_SIZE]; - size_t size; - ParseUnicode(buf, BUF_SIZE, flags.c_str(), &size, nullptr); - - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, size); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(kFlagLength, breaker.next()); // end of the first flag - EXPECT_EQ(0, breaker.wordStart()); - EXPECT_EQ(kFlagLength, breaker.wordEnd()); - EXPECT_EQ(static_cast(size), breaker.next()); - EXPECT_EQ(kFlagLength, breaker.wordStart()); - EXPECT_EQ(kFlagLength * 2, breaker.wordEnd()); + const std::string kFlag = "U+1F3F4"; + const std::string flags = kFlag + " " + kFlag; + + const int kFlagLength = 2; + const size_t BUF_SIZE = kFlagLength * 2; + + uint16_t buf[BUF_SIZE]; + size_t size; + ParseUnicode(buf, BUF_SIZE, flags.c_str(), &size, nullptr); + + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, size); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(kFlagLength, breaker.next()); // end of the first flag + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ(kFlagLength, breaker.wordEnd()); + EXPECT_EQ(static_cast(size), breaker.next()); + EXPECT_EQ(kFlagLength, breaker.wordStart()); + EXPECT_EQ(kFlagLength * 2, breaker.wordEnd()); } TEST_F(WordBreakerTest, flagsSequence) { - // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag sequence for the flag - // of Scotland. - const std::string kFlagSequence = "U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"; - const std::string flagSequence = kFlagSequence + " " + kFlagSequence; - - const int kFlagLength = 14; - const size_t BUF_SIZE = kFlagLength * 2; - - uint16_t buf[BUF_SIZE]; - size_t size; - ParseUnicode(buf, BUF_SIZE, flagSequence.c_str(), &size, nullptr); - - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, size); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(kFlagLength, breaker.next()); // end of the first flag sequence - EXPECT_EQ(0, breaker.wordStart()); - EXPECT_EQ(kFlagLength, breaker.wordEnd()); - EXPECT_EQ(static_cast(size), breaker.next()); - EXPECT_EQ(kFlagLength, breaker.wordStart()); - EXPECT_EQ(kFlagLength * 2, breaker.wordEnd()); + // U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F is emoji tag + // sequence for the flag of Scotland. + const std::string kFlagSequence = + "U+1F3F4 U+E0067 U+E0062 U+E0073 U+E0063 U+E0074 U+E007F"; + const std::string flagSequence = kFlagSequence + " " + kFlagSequence; + + const int kFlagLength = 14; + const size_t BUF_SIZE = kFlagLength * 2; + + uint16_t buf[BUF_SIZE]; + size_t size; + ParseUnicode(buf, BUF_SIZE, flagSequence.c_str(), &size, nullptr); + + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, size); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(kFlagLength, breaker.next()); // end of the first flag sequence + EXPECT_EQ(0, breaker.wordStart()); + EXPECT_EQ(kFlagLength, breaker.wordEnd()); + EXPECT_EQ(static_cast(size), breaker.next()); + EXPECT_EQ(kFlagLength, breaker.wordStart()); + EXPECT_EQ(kFlagLength * 2, breaker.wordEnd()); } TEST_F(WordBreakerTest, punct) { - uint16_t buf[] = {0x00A1, 0x00A1, 'h', 'e', 'l', 'l' ,'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', - '!', '!'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(9, breaker.next()); // after "¡¡hello, " - EXPECT_EQ(2, breaker.wordStart()); // "hello" - EXPECT_EQ(7, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(9, breaker.wordStart()); // "world" - EXPECT_EQ(14, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {0x00A1, 0x00A1, 'h', 'e', 'l', 'l', 'o', ',', + ' ', 'w', 'o', 'r', 'l', 'd', '!', '!'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(9, breaker.next()); // after "¡¡hello, " + EXPECT_EQ(2, breaker.wordStart()); // "hello" + EXPECT_EQ(7, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(9, breaker.wordStart()); // "world" + EXPECT_EQ(14, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, email) { - uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', - ' ', 'x'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(11, breaker.next()); // after "foo@example" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(16, breaker.next()); // after ".com " - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(16, breaker.wordStart()); // "x" - EXPECT_EQ(17, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', + 'l', 'e', '.', 'c', 'o', 'm', ' ', 'x'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(11, breaker.next()); // after "foo@example" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(16, breaker.next()); // after ".com " + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(16, breaker.wordStart()); // "x" + EXPECT_EQ(17, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, mailto) { - uint16_t buf[] = {'m', 'a', 'i', 'l', 't', 'o', ':', 'f', 'o', 'o', '@', - 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', ' ', 'x'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(7, breaker.next()); // after "mailto:" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(18, breaker.next()); // after "foo@example" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(23, breaker.next()); // after ".com " - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(23, breaker.wordStart()); // "x" - EXPECT_EQ(24, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'m', 'a', 'i', 'l', 't', 'o', ':', 'f', 'o', 'o', '@', 'e', + 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', ' ', 'x'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(7, breaker.next()); // after "mailto:" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(18, breaker.next()); // after "foo@example" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(23, breaker.next()); // after ".com " + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(23, breaker.wordStart()); // "x" + EXPECT_EQ(24, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } -// The current logic always places a line break after a detected email address or URL -// and an immediately following non-ASCII character. +// The current logic always places a line break after a detected email address +// or URL and an immediately following non-ASCII character. TEST_F(WordBreakerTest, emailNonAscii) { - uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', - 0x4E00}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(11, breaker.next()); // after "foo@example" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(15, breaker.next()); // after ".com" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(15, breaker.wordStart()); // "一" - EXPECT_EQ(16, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', + 'p', 'l', 'e', '.', 'c', 'o', 'm', 0x4E00}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(11, breaker.next()); // after "foo@example" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(15, breaker.next()); // after ".com" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(15, breaker.wordStart()); // "一" + EXPECT_EQ(16, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, emailCombining) { - uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', - 0x0303, ' ', 'x'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(11, breaker.next()); // after "foo@example" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(17, breaker.next()); // after ".com̃ " - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(17, breaker.wordStart()); // "x" - EXPECT_EQ(18, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'f', 'o', 'o', '@', 'e', 'x', 'a', 'm', 'p', + 'l', 'e', '.', 'c', 'o', 'm', 0x0303, ' ', 'x'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(11, breaker.next()); // after "foo@example" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(17, breaker.next()); // after ".com̃ " + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(17, breaker.wordStart()); // "x" + EXPECT_EQ(18, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, lonelyAt) { - uint16_t buf[] = {'a', ' ', '@', ' ', 'b'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(2, breaker.next()); // after "a " - EXPECT_EQ(0, breaker.wordStart()); // "a" - EXPECT_EQ(1, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ(4, breaker.next()); // after "@ " - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(4, breaker.wordStart()); // "b" - EXPECT_EQ(5, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'a', ' ', '@', ' ', 'b'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(2, breaker.next()); // after "a " + EXPECT_EQ(0, breaker.wordStart()); // "a" + EXPECT_EQ(1, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ(4, breaker.next()); // after "@ " + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(4, breaker.wordStart()); // "b" + EXPECT_EQ(5, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, url) { - uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'e', 'x', 'a', 'm', 'p', 'l', 'e', - '.', 'c', 'o', 'm', ' ', 'x'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(5, breaker.next()); // after "http:" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(7, breaker.next()); // after "//" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(14, breaker.next()); // after "example" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(19, breaker.next()); // after ".com " - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_EQ(19, breaker.wordStart()); // "x" - EXPECT_EQ(20, breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'e', 'x', 'a', + 'm', 'p', 'l', 'e', '.', 'c', 'o', 'm', ' ', 'x'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(5, breaker.next()); // after "http:" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(7, breaker.next()); // after "//" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(14, breaker.next()); // after "example" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(19, breaker.next()); // after ".com " + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_EQ(19, breaker.wordStart()); // "x" + EXPECT_EQ(20, breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } -// Breaks according to section 14.12 of Chicago Manual of Style, *URLs or DOIs and line breaks* +// Breaks according to section 14.12 of Chicago Manual of Style, *URLs or DOIs +// and line breaks* TEST_F(WordBreakerTest, urlBreakChars) { - uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '.', 'b', '/', '~', 'c', ',', 'd', - '-', 'e', '?', 'f', '=', 'g', '&', 'h', '#', 'i', '%', 'j', '_', 'k', '/', 'l'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(5, breaker.next()); // after "http:" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(7, breaker.next()); // after "//" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(8, breaker.next()); // after "a" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(10, breaker.next()); // after ".b" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(11, breaker.next()); // after "/" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(13, breaker.next()); // after "~c" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(15, breaker.next()); // after ",d" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(17, breaker.next()); // after "-e" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(19, breaker.next()); // after "?f" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(20, breaker.next()); // after "=" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(21, breaker.next()); // after "g" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(22, breaker.next()); // after "&" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(23, breaker.next()); // after "h" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(25, breaker.next()); // after "#i" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(27, breaker.next()); // after "%j" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ(29, breaker.next()); // after "_k" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(1, breaker.breakBadness()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(0, breaker.breakBadness()); + uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '.', 'b', '/', + '~', 'c', ',', 'd', '-', 'e', '?', 'f', '=', 'g', '&', + 'h', '#', 'i', '%', 'j', '_', 'k', '/', 'l'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(5, breaker.next()); // after "http:" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(7, breaker.next()); // after "//" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(8, breaker.next()); // after "a" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(10, breaker.next()); // after ".b" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(11, breaker.next()); // after "/" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(13, breaker.next()); // after "~c" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(15, breaker.next()); // after ",d" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(17, breaker.next()); // after "-e" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(19, breaker.next()); // after "?f" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(20, breaker.next()); // after "=" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(21, breaker.next()); // after "g" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(22, breaker.next()); // after "&" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(23, breaker.next()); // after "h" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(25, breaker.next()); // after "#i" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(27, breaker.next()); // after "%j" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ(29, breaker.next()); // after "_k" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(1, breaker.breakBadness()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(0, breaker.breakBadness()); } TEST_F(WordBreakerTest, urlNoHyphenBreak) { - uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '-', '/', 'b'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(5, breaker.next()); // after "http:" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(7, breaker.next()); // after "//" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(8, breaker.next()); // after "a" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '-', '/', 'b'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(5, breaker.next()); // after "http:" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(7, breaker.next()); // after "//" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(8, breaker.next()); // after "a" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); } TEST_F(WordBreakerTest, urlEndsWithSlash) { - uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '/'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ(5, breaker.next()); // after "http:" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(7, breaker.next()); // after "//" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ(8, breaker.next()); // after "a" - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + uint16_t buf[] = {'h', 't', 't', 'p', ':', '/', '/', 'a', '/'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ(5, breaker.next()); // after "http:" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(7, breaker.next()); // after "//" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ(8, breaker.next()); // after "a" + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); } TEST_F(WordBreakerTest, emailStartsWithSlash) { - uint16_t buf[] = {'/', 'a', '@', 'b'}; - WordBreaker breaker; - breaker.setLocale(icu::Locale::getUS()); - breaker.setText(buf, NELEM(buf)); - EXPECT_EQ(0, breaker.current()); - EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end - EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); + uint16_t buf[] = {'/', 'a', '@', 'b'}; + WordBreaker breaker; + breaker.setLocale(icu::Locale::getUS()); + breaker.setText(buf, NELEM(buf)); + EXPECT_EQ(0, breaker.current()); + EXPECT_EQ((ssize_t)NELEM(buf), breaker.next()); // end + EXPECT_TRUE(breaker.wordStart() >= breaker.wordEnd()); } } // namespace minikin diff --git a/third_party/txt/tests/old/util/FontTestUtils.cpp b/third_party/txt/tests/old/util/FontTestUtils.cpp index e754a5f615f26ba0e2e71682b8bbd1ed4dc2416a..acb4fee50334784780e63684f9756afa8b5f0e92 100644 --- a/third_party/txt/tests/old/util/FontTestUtils.cpp +++ b/third_party/txt/tests/old/util/FontTestUtils.cpp @@ -21,80 +21,88 @@ #include -#include "lib/txt/libs/minikin/FontLanguage.h" -#include "MinikinFontForTest.h" #include #include +#include "MinikinFontForTest.h" +#include "lib/txt/libs/minikin/FontLanguage.h" namespace minikin { -std::vector> getFontFamilies(const char* fontDir, const char* fontXml) { - xmlDoc* doc = xmlReadFile(fontXml, NULL, 0); - xmlNode* familySet = xmlDocGetRootElement(doc); +std::vector> getFontFamilies(const char* fontDir, + const char* fontXml) { + xmlDoc* doc = xmlReadFile(fontXml, NULL, 0); + xmlNode* familySet = xmlDocGetRootElement(doc); - std::vector> families; - for (xmlNode* familyNode = familySet->children; familyNode; familyNode = familyNode->next) { - if (xmlStrcmp(familyNode->name, (const xmlChar*)"family") != 0) { - continue; - } + std::vector> families; + for (xmlNode* familyNode = familySet->children; familyNode; + familyNode = familyNode->next) { + if (xmlStrcmp(familyNode->name, (const xmlChar*)"family") != 0) { + continue; + } - xmlChar* variantXmlch = xmlGetProp(familyNode, (const xmlChar*)"variant"); - int variant = VARIANT_DEFAULT; - if (variantXmlch) { - if (xmlStrcmp(variantXmlch, (const xmlChar*)"elegant") == 0) { - variant = VARIANT_ELEGANT; - } else if (xmlStrcmp(variantXmlch, (const xmlChar*)"compact") == 0) { - variant = VARIANT_COMPACT; - } - } + xmlChar* variantXmlch = xmlGetProp(familyNode, (const xmlChar*)"variant"); + int variant = VARIANT_DEFAULT; + if (variantXmlch) { + if (xmlStrcmp(variantXmlch, (const xmlChar*)"elegant") == 0) { + variant = VARIANT_ELEGANT; + } else if (xmlStrcmp(variantXmlch, (const xmlChar*)"compact") == 0) { + variant = VARIANT_COMPACT; + } + } - std::vector fonts; - for (xmlNode* fontNode = familyNode->children; fontNode; fontNode = fontNode->next) { - if (xmlStrcmp(fontNode->name, (const xmlChar*)"font") != 0) { - continue; - } + std::vector fonts; + for (xmlNode* fontNode = familyNode->children; fontNode; + fontNode = fontNode->next) { + if (xmlStrcmp(fontNode->name, (const xmlChar*)"font") != 0) { + continue; + } - int weight = atoi((const char*)(xmlGetProp(fontNode, (const xmlChar*)"weight"))) / 100; - bool italic = xmlStrcmp( - xmlGetProp(fontNode, (const xmlChar*)"style"), (const xmlChar*)"italic") == 0; - xmlChar* index = xmlGetProp(familyNode, (const xmlChar*)"index"); + int weight = + atoi((const char*)(xmlGetProp(fontNode, (const xmlChar*)"weight"))) / + 100; + bool italic = xmlStrcmp(xmlGetProp(fontNode, (const xmlChar*)"style"), + (const xmlChar*)"italic") == 0; + xmlChar* index = xmlGetProp(familyNode, (const xmlChar*)"index"); - xmlChar* fontFileName = xmlNodeListGetString(doc, fontNode->xmlChildrenNode, 1); - std::string fontPath = fontDir + std::string((const char*)fontFileName); - xmlFree(fontFileName); + xmlChar* fontFileName = + xmlNodeListGetString(doc, fontNode->xmlChildrenNode, 1); + std::string fontPath = fontDir + std::string((const char*)fontFileName); + xmlFree(fontFileName); - if (access(fontPath.c_str(), R_OK) != 0) { - ALOGW("%s is not found.", fontPath.c_str()); - continue; - } + if (access(fontPath.c_str(), R_OK) != 0) { + ALOGW("%s is not found.", fontPath.c_str()); + continue; + } - if (index == nullptr) { - std::shared_ptr minikinFont = - std::make_shared(fontPath); - fonts.push_back(Font(minikinFont, FontStyle(weight, italic))); - } else { - std::shared_ptr minikinFont = - std::make_shared(fontPath, atoi((const char*)index)); - fonts.push_back(Font(minikinFont, FontStyle(weight, italic))); - } - } + if (index == nullptr) { + std::shared_ptr minikinFont = + std::make_shared(fontPath); + fonts.push_back(Font(minikinFont, FontStyle(weight, italic))); + } else { + std::shared_ptr minikinFont = + std::make_shared(fontPath, + atoi((const char*)index)); + fonts.push_back(Font(minikinFont, FontStyle(weight, italic))); + } + } - xmlChar* lang = xmlGetProp(familyNode, (const xmlChar*)"lang"); - std::shared_ptr family; - if (lang == nullptr) { - family = std::make_shared(variant, std::move(fonts)); - } else { - uint32_t langId = FontStyle::registerLanguageList( - std::string((const char*)lang, xmlStrlen(lang))); - family = std::make_shared(langId, variant, std::move(fonts)); - } - families.push_back(family); + xmlChar* lang = xmlGetProp(familyNode, (const xmlChar*)"lang"); + std::shared_ptr family; + if (lang == nullptr) { + family = std::make_shared(variant, std::move(fonts)); + } else { + uint32_t langId = FontStyle::registerLanguageList( + std::string((const char*)lang, xmlStrlen(lang))); + family = std::make_shared(langId, variant, std::move(fonts)); } - xmlFreeDoc(doc); - return families; + families.push_back(family); + } + xmlFreeDoc(doc); + return families; } -std::shared_ptr getFontCollection(const char* fontDir, const char* fontXml) { - return std::make_shared(getFontFamilies(fontDir, fontXml)); +std::shared_ptr getFontCollection(const char* fontDir, + const char* fontXml) { + return std::make_shared(getFontFamilies(fontDir, fontXml)); } } // namespace minikin diff --git a/third_party/txt/tests/old/util/FontTestUtils.h b/third_party/txt/tests/old/util/FontTestUtils.h index dd5e5860e4b6a50406aae8702c5604803a2aa868..e059301f03bdf54ec42d84773b4cbc1bb408054d 100644 --- a/third_party/txt/tests/old/util/FontTestUtils.h +++ b/third_party/txt/tests/old/util/FontTestUtils.h @@ -30,17 +30,19 @@ namespace minikin { * * Caller must unref the returned pointer. */ -std::vector> getFontFamilies(const char* fontDir, const char* fontXml); +std::vector> getFontFamilies(const char* fontDir, + const char* fontXml); /** * Returns FontCollection from installed fonts. * - * This function reads an XML file and makes font families and collections of them. - * MinikinFontForTest is used for FontFamily creation. + * This function reads an XML file and makes font families and collections of + * them. MinikinFontForTest is used for FontFamily creation. * * Caller must unref the returned pointer. */ -std::shared_ptr getFontCollection(const char* fontDir, const char* fontXml); +std::shared_ptr getFontCollection(const char* fontDir, + const char* fontXml); } // namespace minikin #endif // MINIKIN_FONT_TEST_UTILS_H diff --git a/third_party/txt/tests/old/util/MinikinFontForTest.cpp b/third_party/txt/tests/old/util/MinikinFontForTest.cpp index 723e86ac2d20439a20b31e1b7ab4c622b8d13449..88f1a63c313bc9249980bc4834a8ee78bab511de 100644 --- a/third_party/txt/tests/old/util/MinikinFontForTest.cpp +++ b/third_party/txt/tests/old/util/MinikinFontForTest.cpp @@ -34,44 +34,49 @@ namespace minikin { static int uniqueId = 0; // TODO: make thread safe if necessary. -MinikinFontForTest::MinikinFontForTest(const std::string& font_path, int index, - const std::vector& variations) : - MinikinFont(uniqueId++), - mFontPath(font_path), - mVariations(variations), - mFontIndex(index) { - int fd = open(font_path.c_str(), O_RDONLY); - LOG_ALWAYS_FATAL_IF(fd == -1); - struct stat st = {}; - LOG_ALWAYS_FATAL_IF(fstat(fd, &st) != 0); - mFontSize = st.st_size; - mFontData = mmap(NULL, mFontSize, PROT_READ, MAP_SHARED, fd, 0); - LOG_ALWAYS_FATAL_IF(mFontData == nullptr); - close(fd); +MinikinFontForTest::MinikinFontForTest( + const std::string& font_path, + int index, + const std::vector& variations) + : MinikinFont(uniqueId++), + mFontPath(font_path), + mVariations(variations), + mFontIndex(index) { + int fd = open(font_path.c_str(), O_RDONLY); + LOG_ALWAYS_FATAL_IF(fd == -1); + struct stat st = {}; + LOG_ALWAYS_FATAL_IF(fstat(fd, &st) != 0); + mFontSize = st.st_size; + mFontData = mmap(NULL, mFontSize, PROT_READ, MAP_SHARED, fd, 0); + LOG_ALWAYS_FATAL_IF(mFontData == nullptr); + close(fd); } MinikinFontForTest::~MinikinFontForTest() { - munmap(mFontData, mFontSize); + munmap(mFontData, mFontSize); } -float MinikinFontForTest::GetHorizontalAdvance(uint32_t /* glyph_id */, - const MinikinPaint& /* paint */) const { - // TODO: Make mock value configurable if necessary. - return 10.0f; +float MinikinFontForTest::GetHorizontalAdvance( + uint32_t /* glyph_id */, + const MinikinPaint& /* paint */) const { + // TODO: Make mock value configurable if necessary. + return 10.0f; } -void MinikinFontForTest::GetBounds(MinikinRect* bounds, uint32_t /* glyph_id */, - const MinikinPaint& /* paint */) const { - // TODO: Make mock values configurable if necessary. - bounds->mLeft = 0.0f; - bounds->mTop = 0.0f; - bounds->mRight = 10.0f; - bounds->mBottom = 10.0f; +void MinikinFontForTest::GetBounds(MinikinRect* bounds, + uint32_t /* glyph_id */, + const MinikinPaint& /* paint */) const { + // TODO: Make mock values configurable if necessary. + bounds->mLeft = 0.0f; + bounds->mTop = 0.0f; + bounds->mRight = 10.0f; + bounds->mBottom = 10.0f; } std::shared_ptr MinikinFontForTest::createFontWithVariation( - const std::vector& variations) const { - return std::shared_ptr(new MinikinFontForTest(mFontPath, mFontIndex, variations)); + const std::vector& variations) const { + return std::shared_ptr( + new MinikinFontForTest(mFontPath, mFontIndex, variations)); } } // namespace minikin diff --git a/third_party/txt/tests/old/util/MinikinFontForTest.h b/third_party/txt/tests/old/util/MinikinFontForTest.h index 6e230e1d31914d15107ce8bed04f3644d5d39dcd..1da817b9c50e042d8a24bd16c4d7d3afaab4797d 100644 --- a/third_party/txt/tests/old/util/MinikinFontForTest.h +++ b/third_party/txt/tests/old/util/MinikinFontForTest.h @@ -24,37 +24,44 @@ class SkTypeface; namespace minikin { class MinikinFontForTest : public MinikinFont { -public: - MinikinFontForTest(const std::string& font_path, int index, - const std::vector& variations); - MinikinFontForTest(const std::string& font_path, int index) - : MinikinFontForTest(font_path, index, std::vector()) {} - MinikinFontForTest(const std::string& font_path) : MinikinFontForTest(font_path, 0) {} - virtual ~MinikinFontForTest(); - - // MinikinFont overrides. - float GetHorizontalAdvance(uint32_t glyph_id, const MinikinPaint &paint) const; - void GetBounds(MinikinRect* bounds, uint32_t glyph_id, - const MinikinPaint& paint) const; - - const std::string& fontPath() const { return mFontPath; } - - const void* GetFontData() const { return mFontData; } - size_t GetFontSize() const { return mFontSize; } - int GetFontIndex() const { return mFontIndex; } - const std::vector& GetAxes() const { return mVariations; } - std::shared_ptr createFontWithVariation( - const std::vector& variations) const; -private: - MinikinFontForTest() = delete; - MinikinFontForTest(const MinikinFontForTest&) = delete; - MinikinFontForTest& operator=(MinikinFontForTest&) = delete; - - const std::string mFontPath; - const std::vector mVariations; - const int mFontIndex; - void* mFontData; - size_t mFontSize; + public: + MinikinFontForTest(const std::string& font_path, + int index, + const std::vector& variations); + MinikinFontForTest(const std::string& font_path, int index) + : MinikinFontForTest(font_path, index, std::vector()) {} + MinikinFontForTest(const std::string& font_path) + : MinikinFontForTest(font_path, 0) {} + virtual ~MinikinFontForTest(); + + // MinikinFont overrides. + float GetHorizontalAdvance(uint32_t glyph_id, + const MinikinPaint& paint) const; + void GetBounds(MinikinRect* bounds, + uint32_t glyph_id, + const MinikinPaint& paint) const; + + const std::string& fontPath() const { return mFontPath; } + + const void* GetFontData() const { return mFontData; } + size_t GetFontSize() const { return mFontSize; } + int GetFontIndex() const { return mFontIndex; } + const std::vector& GetAxes() const { + return mVariations; + } + std::shared_ptr createFontWithVariation( + const std::vector& variations) const; + + private: + MinikinFontForTest() = delete; + MinikinFontForTest(const MinikinFontForTest&) = delete; + MinikinFontForTest& operator=(MinikinFontForTest&) = delete; + + const std::string mFontPath; + const std::vector mVariations; + const int mFontIndex; + void* mFontData; + size_t mFontSize; }; } // namespace minikin diff --git a/third_party/txt/tests/old/util/UnicodeUtils.cpp b/third_party/txt/tests/old/util/UnicodeUtils.cpp index a6595cee7b76323b86f14d08e452fd6227064397..7df5fe326074a333714605dc86c14f377635257d 100644 --- a/third_party/txt/tests/old/util/UnicodeUtils.cpp +++ b/third_party/txt/tests/old/util/UnicodeUtils.cpp @@ -14,106 +14,111 @@ * limitations under the License. */ +#include #include #include #include -#include -#include #include +#include namespace minikin { -// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null. -// Size is returned in an out parameter because gtest needs a void return for ASSERT to work. -void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size, - size_t* offset) { - size_t input_ix = 0; - size_t output_ix = 0; - bool seen_offset = false; +// src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset +// if non-null. Size is returned in an out parameter because gtest needs a void +// return for ASSERT to work. +void ParseUnicode(uint16_t* buf, + size_t buf_size, + const char* src, + size_t* result_size, + size_t* offset) { + size_t input_ix = 0; + size_t output_ix = 0; + bool seen_offset = false; - while (src[input_ix] != 0) { - switch (src[input_ix]) { - case '\'': - // single ASCII char - LOG_ALWAYS_FATAL_IF(static_cast(src[input_ix]) >= 0x80); - input_ix++; - LOG_ALWAYS_FATAL_IF(src[input_ix] == 0); - LOG_ALWAYS_FATAL_IF(output_ix >= buf_size); - buf[output_ix++] = (uint16_t)src[input_ix++]; - LOG_ALWAYS_FATAL_IF(src[input_ix] != '\''); - input_ix++; - break; - case 'u': - case 'U': { - // Unicode codepoint in hex syntax - input_ix++; - LOG_ALWAYS_FATAL_IF(src[input_ix] != '+'); - input_ix++; - char* endptr = (char*)src + input_ix; - unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16); - size_t num_hex_digits = endptr - (src + input_ix); + while (src[input_ix] != 0) { + switch (src[input_ix]) { + case '\'': + // single ASCII char + LOG_ALWAYS_FATAL_IF(static_cast(src[input_ix]) >= 0x80); + input_ix++; + LOG_ALWAYS_FATAL_IF(src[input_ix] == 0); + LOG_ALWAYS_FATAL_IF(output_ix >= buf_size); + buf[output_ix++] = (uint16_t)src[input_ix++]; + LOG_ALWAYS_FATAL_IF(src[input_ix] != '\''); + input_ix++; + break; + case 'u': + case 'U': { + // Unicode codepoint in hex syntax + input_ix++; + LOG_ALWAYS_FATAL_IF(src[input_ix] != '+'); + input_ix++; + char* endptr = (char*)src + input_ix; + unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16); + size_t num_hex_digits = endptr - (src + input_ix); - // also triggers on invalid number syntax, digits = 0 - LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u); - LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u); - LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu); - input_ix += num_hex_digits; - if (U16_LENGTH(codepoint) == 1) { - LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size); - buf[output_ix++] = codepoint; - } else { - // UTF-16 encoding - LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size); - buf[output_ix++] = U16_LEAD(codepoint); - buf[output_ix++] = U16_TRAIL(codepoint); - } - break; - } - case ' ': - input_ix++; - break; - case '|': - LOG_ALWAYS_FATAL_IF(seen_offset); - LOG_ALWAYS_FATAL_IF(offset == nullptr); - *offset = output_ix; - seen_offset = true; - input_ix++; - break; - default: - LOG_ALWAYS_FATAL("Unexpected Character"); + // also triggers on invalid number syntax, digits = 0 + LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u); + LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u); + LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu); + input_ix += num_hex_digits; + if (U16_LENGTH(codepoint) == 1) { + LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size); + buf[output_ix++] = codepoint; + } else { + // UTF-16 encoding + LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size); + buf[output_ix++] = U16_LEAD(codepoint); + buf[output_ix++] = U16_TRAIL(codepoint); } + break; + } + case ' ': + input_ix++; + break; + case '|': + LOG_ALWAYS_FATAL_IF(seen_offset); + LOG_ALWAYS_FATAL_IF(offset == nullptr); + *offset = output_ix; + seen_offset = true; + input_ix++; + break; + default: + LOG_ALWAYS_FATAL("Unexpected Character"); } - LOG_ALWAYS_FATAL_IF(result_size == nullptr); - *result_size = output_ix; - LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr); + } + LOG_ALWAYS_FATAL_IF(result_size == nullptr); + *result_size = output_ix; + LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr); } -std::vector parseUnicodeStringWithOffset(const std::string& in, size_t* offset) { - std::unique_ptr buffer(new uint16_t[in.size()]); - size_t result_size = 0; - ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset); - return std::vector(buffer.get(), buffer.get() + result_size); +std::vector parseUnicodeStringWithOffset(const std::string& in, + size_t* offset) { + std::unique_ptr buffer(new uint16_t[in.size()]); + size_t result_size = 0; + ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset); + return std::vector(buffer.get(), buffer.get() + result_size); } std::vector parseUnicodeString(const std::string& in) { - return parseUnicodeStringWithOffset(in, nullptr); + return parseUnicodeStringWithOffset(in, nullptr); } std::vector utf8ToUtf16(const std::string& text) { - std::vector result; - int32_t i = 0; - const int32_t textLength = static_cast(text.size()); - uint32_t c = 0; - while (i < textLength) { - U8_NEXT(text.c_str(), i, textLength, c); - if (U16_LENGTH(c) == 1) { - result.push_back(c); - } else { - result.push_back(U16_LEAD(c)); - result.push_back(U16_TRAIL(c)); - } + std::vector result; + int32_t i = 0; + const int32_t textLength = static_cast(text.size()); + uint32_t c = 0; + while (i < textLength) { + U8_NEXT(text.c_str(), i, textLength, c); + if (U16_LENGTH(c) == 1) { + result.push_back(c); + } else { + result.push_back(U16_LEAD(c)); + result.push_back(U16_TRAIL(c)); } - return result; + } + return result; } } // namespace minikin diff --git a/third_party/txt/tests/old/util/UnicodeUtils.h b/third_party/txt/tests/old/util/UnicodeUtils.h index 6ce2fcbd90ecbb47041377e704e5aa5d8964df87..564ffccdde98ae460a8ff8f0d62f8db216db01a6 100644 --- a/third_party/txt/tests/old/util/UnicodeUtils.h +++ b/third_party/txt/tests/old/util/UnicodeUtils.h @@ -16,10 +16,14 @@ namespace minikin { -void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size, - size_t* offset); +void ParseUnicode(uint16_t* buf, + size_t buf_size, + const char* src, + size_t* result_size, + size_t* offset); -std::vector parseUnicodeStringWithOffset(const std::string& in, size_t* offset); +std::vector parseUnicodeStringWithOffset(const std::string& in, + size_t* offset); std::vector parseUnicodeString(const std::string& in); // Converts UTF-8 to UTF-16. diff --git a/third_party/txt/tests/txt_run_all_unittests.cc b/third_party/txt/tests/txt_run_all_unittests.cc index 583a95c767fa9f5196b2ea02059724e2fa856741..fca56df01357649f795b00b255e150a2d1333ef1 100644 --- a/third_party/txt/tests/txt_run_all_unittests.cc +++ b/third_party/txt/tests/txt_run_all_unittests.cc @@ -18,8 +18,8 @@ #include "gtest/gtest.h" #include "lib/ftl/command_line.h" #include "lib/ftl/logging.h" -#include "utils.h" #include "third_party/skia/include/core/SkGraphics.h" +#include "utils.h" #include