From 754913bd274b32ad66c1dfcf3bd592bb6c8c01bd Mon Sep 17 00:00:00 2001 From: Roozbeh Pournader Date: Fri, 17 Mar 2017 14:29:09 -0700 Subject: [PATCH] Update Minikin to use ICU's emoji data Certain differences are still needed, since ICU appears to support Emoji 4.0 only, while we need Emoji 5.0. But the bulk of the data is now carried by ICU. We no longer need the script that generates the tables, so that's also removed. Test: Comprehensive unit tests added. Bug: 27365282 Bug: 30874706 Change-Id: I011443fbca9bb202deff7fffb40043f89e1f1fb1 --- libs/minikin/Android.mk | 12 --- libs/minikin/MinikinInternal.cpp | 63 +++++---------- libs/minikin/unicode_emoji_h_gen.py | 105 ------------------------- tests/unittest/MinikinInternalTest.cpp | 46 +++++++++++ 4 files changed, 66 insertions(+), 160 deletions(-) delete mode 100644 libs/minikin/unicode_emoji_h_gen.py diff --git a/libs/minikin/Android.mk b/libs/minikin/Android.mk index 603638e754..be5301218a 100644 --- a/libs/minikin/Android.mk +++ b/libs/minikin/Android.mk @@ -15,18 +15,6 @@ LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) -# Generate unicode emoji data from UCD. -UNICODE_EMOJI_H_GEN_PY := $(LOCAL_PATH)/unicode_emoji_h_gen.py -UNICODE_EMOJI_DATA := $(TOP)/external/unicode/emoji-data.txt - -UNICODE_EMOJI_H := $(intermediates)/generated/UnicodeData.h -$(UNICODE_EMOJI_H): $(UNICODE_EMOJI_H_GEN_PY) $(UNICODE_EMOJI_DATA) -$(LOCAL_PATH)/MinikinInternal.cpp: $(UNICODE_EMOJI_H) -$(UNICODE_EMOJI_H): PRIVATE_CUSTOM_TOOL := python $(UNICODE_EMOJI_H_GEN_PY) \ - -i $(UNICODE_EMOJI_DATA) \ - -o $(UNICODE_EMOJI_H) -$(UNICODE_EMOJI_H): - $(transform-generated-source) include $(CLEAR_VARS) minikin_src_files := \ diff --git a/libs/minikin/MinikinInternal.cpp b/libs/minikin/MinikinInternal.cpp index 60fa9636f8..212ee26c0f 100644 --- a/libs/minikin/MinikinInternal.cpp +++ b/libs/minikin/MinikinInternal.cpp @@ -19,8 +19,8 @@ #include "MinikinInternal.h" #include "HbFontCache.h" -#include "generated/UnicodeData.h" +#include #include namespace minikin { @@ -36,7 +36,7 @@ void assertMinikinLocked() { bool isEmoji(uint32_t c) { // Emoji characters new in Unicode emoji 5.0. // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt - // TODO: Remove once emoji-data.text 5.0 is in the tree. + // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0. if ((0x1F6F7 <= c && c <= 0x1F6F8) || c == 0x1F91F || (0x1F928 <= c && c <= 0x1F92F) @@ -47,54 +47,31 @@ bool isEmoji(uint32_t c) { || (0x1F9D0 <= c && c <= 0x1F9E6)) { return true; } - - const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]); - return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c); + return u_hasBinaryProperty(c, UCHAR_EMOJI); } -// Based on Emoji_Modifier from http://www.unicode.org/Public/emoji/5.0/emoji-data.txt bool isEmojiModifier(uint32_t c) { - return (0x1F3FB <= c && c <= 0x1F3FF); + // Emoji modifier are not expected to change, so there's a small change we need to customize + // this. + return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER); } -// Based on Emoji_Modifier_Base from -// http://www.unicode.org/Public/emoji/5.0/emoji-data.txt bool isEmojiBase(uint32_t c) { - if (0x261D <= c && c <= 0x270D) { - return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D)); - } else if (0x1F385 <= c && c <= 0x1F93E) { - return (c == 0x1F385 - || (0x1F3C2 <= c && c <= 0x1F3C4) - || c == 0x1F3C7 - || (0x1F3CA <= c && c <= 0x1F3CC) - || (0x1F442 <= c && c <= 0x1F443) - || (0x1F446 <= c && c <= 0x1F450) - || (0x1F466 <= c && c <= 0x1F469) - || c == 0x1F46E - || (0x1F470 <= c && c <= 0x1F478) - || c == 0x1F47C - || (0x1F481 <= c && c <= 0x1F483) - || (0x1F485 <= c && c <= 0x1F487) - || c == 0x1F4AA - || (0x1F574 <= c && c <= 0x1F575) - || c == 0x1F57A - || c == 0x1F590 - || (0x1F595 <= c && c <= 0x1F596) - || (0x1F645 <= c && c <= 0x1F647) - || (0x1F64B <= c && c <= 0x1F64F) - || c == 0x1F6A3 - || (0x1F6B4 <= c && c <= 0x1F6B6) - || c == 0x1F6C0 - || c == 0x1F6CC - || (0x1F918 <= c && c <= 0x1F91C) - || (0x1F91E <= c && c <= 0x1F91F) - || c == 0x1F926 - || (0x1F930 <= c && c <= 0x1F939) - || (0x1F93D <= c && c <= 0x1F93E) - || (0x1F9D1 <= c && c <= 0x1F9DD)); - } else { - return false; + // These two characters were removed from Emoji_Modifier_Base in Emoji 4.0, but we need to keep + // them as emoji modifier bases since there are fonts and user-generated text out there that + // treats these as potential emoji bases. + if (c == 0x1F91D || c == 0x1F93C) { + return true; + } + // Emoji Modifier Base characters new in Unicode emoji 5.0. + // From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt + // TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0. + if (c == 0x1F91F + || (0x1F931 <= c && c <= 0x1F932) + || (0x1F9D1 <= c && c <= 0x1F9DD)) { + return true; } + return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER_BASE); } hb_blob_t* getFontTable(const MinikinFont* minikinFont, uint32_t tag) { diff --git a/libs/minikin/unicode_emoji_h_gen.py b/libs/minikin/unicode_emoji_h_gen.py deleted file mode 100644 index 5186455188..0000000000 --- a/libs/minikin/unicode_emoji_h_gen.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (C) 2016 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Generate header file for unicode data.""" - -import optparse -import sys - - -UNICODE_EMOJI_TEMPLATE=""" -/* file generated by frameworks/minikin/lib/minikin/Android.mk */ -#ifndef MINIKIN_UNICODE_EMOJI_H -#define MINIKIN_UNICODE_EMOJI_H - -#include - -namespace minikin { -namespace generated { - -int32_t EMOJI_LIST[] = { -@@@EMOJI_DATA@@@ -}; - -} // namespace generated -} // namespace minikin - -#endif // MINIKIN_UNICODE_EMOJI_H -""" - - -def _create_opt_parser(): - parser = optparse.OptionParser() - parser.add_option('-i', '--input', type='str', action='store', - help='path to input emoji-data.txt') - parser.add_option('-o', '--output', type='str', action='store', - help='path to output UnicodeEmoji.h') - return parser - - -def _read_emoji_data(emoji_data_file_path): - result = [] - with open(emoji_data_file_path) as emoji_data_file: - for line in emoji_data_file: - if '#' in line: - line = line[:line.index('#')] # Drop comments. - if not line.strip(): - continue # Skip empty line. - - code_points, prop = line.split(';') - code_points = code_points.strip() - prop = prop.strip() - if prop != 'Emoji': - break # Only collect Emoji property code points - - if '..' in code_points: # code point range - cp_start, cp_end = code_points.split('..') - result.extend(xrange(int(cp_start, 16), int(cp_end, 16) + 1)) - else: - code_point = int(code_points, 16) - result.append(code_point) - return result - - -def _generate_header_contents(emoji_list): - INDENT = ' ' * 4 - JOINER = ', ' - - hex_list = ['0x%04X' % x for x in emoji_list] - lines = [] - tmp_line = '%s%s' % (INDENT, hex_list[0]) - for hex_str in hex_list[1:]: - if len(tmp_line) + len(JOINER) + len(hex_str) >= 100: - lines.append(tmp_line + ',') - tmp_line = '%s%s' % (INDENT, hex_str) - else: - tmp_line = '%s%s%s' % (tmp_line, JOINER, hex_str) - lines.append(tmp_line) - - template = UNICODE_EMOJI_TEMPLATE - template = template.replace('@@@EMOJI_DATA@@@', '\n'.join(lines)) - return template - - -if __name__ == '__main__': - opt_parser = _create_opt_parser() - opts, _ = opt_parser.parse_args() - - emoji_list = _read_emoji_data(opts.input) - header = _generate_header_contents(emoji_list) - with open(opts.output, 'w') as header_file: - header_file.write(header) - diff --git a/tests/unittest/MinikinInternalTest.cpp b/tests/unittest/MinikinInternalTest.cpp index e314dd1be5..1d3ecd76b8 100644 --- a/tests/unittest/MinikinInternalTest.cpp +++ b/tests/unittest/MinikinInternalTest.cpp @@ -16,6 +16,8 @@ #include +#include + #include "MinikinInternal.h" namespace minikin { @@ -23,12 +25,56 @@ namespace minikin { TEST(MinikinInternalTest, isEmojiTest) { EXPECT_TRUE(isEmoji(0x0023)); // NUMBER SIGN EXPECT_TRUE(isEmoji(0x0035)); // DIGIT FIVE + EXPECT_TRUE(isEmoji(0x2640)); // FEMALE SIGN + EXPECT_TRUE(isEmoji(0x2642)); // MALE SIGN + EXPECT_TRUE(isEmoji(0x2695)); // STAFF OF AESCULAPIUS EXPECT_TRUE(isEmoji(0x1F0CF)); // PLAYING CARD BLACK JOKER EXPECT_TRUE(isEmoji(0x1F1E9)); // REGIONAL INDICATOR SYMBOL LETTER D + EXPECT_TRUE(isEmoji(0x1F6F7)); // SLED + EXPECT_TRUE(isEmoji(0x1F9E6)); // SOCKS EXPECT_FALSE(isEmoji(0x0000)); // EXPECT_FALSE(isEmoji(0x0061)); // LATIN SMALL LETTER A + EXPECT_FALSE(isEmoji(0x1F93B)); // MODERN PENTATHLON + EXPECT_FALSE(isEmoji(0x1F946)); // RIFLE EXPECT_FALSE(isEmoji(0x29E3D)); // A han character. } +TEST(MinikinInternalTest, isEmojiModifierTest) { + EXPECT_TRUE(isEmojiModifier(0x1F3FB)); // EMOJI MODIFIER FITZPATRICK TYPE-1-2 + EXPECT_TRUE(isEmojiModifier(0x1F3FC)); // EMOJI MODIFIER FITZPATRICK TYPE-3 + EXPECT_TRUE(isEmojiModifier(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4 + EXPECT_TRUE(isEmojiModifier(0x1F3FE)); // EMOJI MODIFIER FITZPATRICK TYPE-5 + EXPECT_TRUE(isEmojiModifier(0x1F3FF)); // EMOJI MODIFIER FITZPATRICK TYPE-6 + + EXPECT_FALSE(isEmojiModifier(0x0000)); // + EXPECT_FALSE(isEmojiModifier(0x1F3FA)); // AMPHORA + EXPECT_FALSE(isEmojiModifier(0x1F400)); // RAT + EXPECT_FALSE(isEmojiModifier(0x29E3D)); // A han character. +} + +TEST(MinikinInternalTest, isEmojiBaseTest) { + EXPECT_TRUE(isEmojiBase(0x261D)); // WHITE UP POINTING INDEX + EXPECT_TRUE(isEmojiBase(0x270D)); // WRITING HAND + EXPECT_TRUE(isEmojiBase(0x1F385)); // FATHER CHRISTMAS + EXPECT_TRUE(isEmojiBase(0x1F3C2)); // SNOWBOARDER + EXPECT_TRUE(isEmojiBase(0x1F3C7)); // HORSE RACING + EXPECT_TRUE(isEmojiBase(0x1F3CC)); // GOLFER + EXPECT_TRUE(isEmojiBase(0x1F574)); // MAN IN BUSINESS SUIT LEVITATING + EXPECT_TRUE(isEmojiBase(0x1F6CC)); // SLEEPING ACCOMMODATION + EXPECT_TRUE(isEmojiBase(0x1F91D)); // HANDSHAKE (removed from Emoji 4.0, but we need it) + EXPECT_TRUE(isEmojiBase(0x1F91F)); // I LOVE YOU HAND SIGN + EXPECT_TRUE(isEmojiBase(0x1F931)); // BREAST-FEEDING + EXPECT_TRUE(isEmojiBase(0x1F932)); // PALMS UP TOGETHER + EXPECT_TRUE(isEmojiBase(0x1F93C)); // WRESTLERS (removed from Emoji 4.0, but we need it) + EXPECT_TRUE(isEmojiBase(0x1F9D1)); // ADULT + EXPECT_TRUE(isEmojiBase(0x1F9DD)); // ELF + + EXPECT_FALSE(isEmojiBase(0x0000)); // + EXPECT_FALSE(isEmojiBase(0x261C)); // WHITE LEFT POINTING INDEX + EXPECT_FALSE(isEmojiBase(0x1F384)); // CHRISTMAS TREE + EXPECT_FALSE(isEmojiBase(0x1F9DE)); // GENIE + EXPECT_FALSE(isEmojiBase(0x29E3D)); // A han character. +} + } // namespace minikin -- GitLab