提交 754913bd 编写于 作者: R Roozbeh Pournader

Update Minikin to use ICU's emoji data

Certain differences are still needed, since ICU appears to support
Emoji 4.0 only, while we need Emoji 5.0. But the bulk of the data is
now carried by ICU.

We no longer need the script that generates the tables, so that's
also removed.

Test: Comprehensive unit tests added.
Bug: 27365282
Bug: 30874706
Change-Id: I011443fbca9bb202deff7fffb40043f89e1f1fb1
上级 03adc8ba
......@@ -15,18 +15,6 @@
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
# Generate unicode emoji data from UCD.
UNICODE_EMOJI_H_GEN_PY := $(LOCAL_PATH)/unicode_emoji_h_gen.py
UNICODE_EMOJI_DATA := $(TOP)/external/unicode/emoji-data.txt
UNICODE_EMOJI_H := $(intermediates)/generated/UnicodeData.h
$(UNICODE_EMOJI_H): $(UNICODE_EMOJI_H_GEN_PY) $(UNICODE_EMOJI_DATA)
$(LOCAL_PATH)/MinikinInternal.cpp: $(UNICODE_EMOJI_H)
$(UNICODE_EMOJI_H): PRIVATE_CUSTOM_TOOL := python $(UNICODE_EMOJI_H_GEN_PY) \
-i $(UNICODE_EMOJI_DATA) \
-o $(UNICODE_EMOJI_H)
$(UNICODE_EMOJI_H):
$(transform-generated-source)
include $(CLEAR_VARS)
minikin_src_files := \
......
......@@ -19,8 +19,8 @@
#include "MinikinInternal.h"
#include "HbFontCache.h"
#include "generated/UnicodeData.h"
#include <unicode/uchar.h>
#include <log/log.h>
namespace minikin {
......@@ -36,7 +36,7 @@ void assertMinikinLocked() {
bool isEmoji(uint32_t c) {
// Emoji characters new in Unicode emoji 5.0.
// From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
// TODO: Remove once emoji-data.text 5.0 is in the tree.
// TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0.
if ((0x1F6F7 <= c && c <= 0x1F6F8)
|| c == 0x1F91F
|| (0x1F928 <= c && c <= 0x1F92F)
......@@ -47,54 +47,31 @@ bool isEmoji(uint32_t c) {
|| (0x1F9D0 <= c && c <= 0x1F9E6)) {
return true;
}
const size_t length = sizeof(generated::EMOJI_LIST) / sizeof(generated::EMOJI_LIST[0]);
return std::binary_search(generated::EMOJI_LIST, generated::EMOJI_LIST + length, c);
return u_hasBinaryProperty(c, UCHAR_EMOJI);
}
// Based on Emoji_Modifier from http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
bool isEmojiModifier(uint32_t c) {
return (0x1F3FB <= c && c <= 0x1F3FF);
// Emoji modifier are not expected to change, so there's a small change we need to customize
// this.
return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER);
}
// Based on Emoji_Modifier_Base from
// http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
bool isEmojiBase(uint32_t c) {
if (0x261D <= c && c <= 0x270D) {
return (c == 0x261D || c == 0x26F9 || (0x270A <= c && c <= 0x270D));
} else if (0x1F385 <= c && c <= 0x1F93E) {
return (c == 0x1F385
|| (0x1F3C2 <= c && c <= 0x1F3C4)
|| c == 0x1F3C7
|| (0x1F3CA <= c && c <= 0x1F3CC)
|| (0x1F442 <= c && c <= 0x1F443)
|| (0x1F446 <= c && c <= 0x1F450)
|| (0x1F466 <= c && c <= 0x1F469)
|| c == 0x1F46E
|| (0x1F470 <= c && c <= 0x1F478)
|| c == 0x1F47C
|| (0x1F481 <= c && c <= 0x1F483)
|| (0x1F485 <= c && c <= 0x1F487)
|| c == 0x1F4AA
|| (0x1F574 <= c && c <= 0x1F575)
|| c == 0x1F57A
|| c == 0x1F590
|| (0x1F595 <= c && c <= 0x1F596)
|| (0x1F645 <= c && c <= 0x1F647)
|| (0x1F64B <= c && c <= 0x1F64F)
|| c == 0x1F6A3
|| (0x1F6B4 <= c && c <= 0x1F6B6)
|| c == 0x1F6C0
|| c == 0x1F6CC
|| (0x1F918 <= c && c <= 0x1F91C)
|| (0x1F91E <= c && c <= 0x1F91F)
|| c == 0x1F926
|| (0x1F930 <= c && c <= 0x1F939)
|| (0x1F93D <= c && c <= 0x1F93E)
|| (0x1F9D1 <= c && c <= 0x1F9DD));
} else {
return false;
// These two characters were removed from Emoji_Modifier_Base in Emoji 4.0, but we need to keep
// them as emoji modifier bases since there are fonts and user-generated text out there that
// treats these as potential emoji bases.
if (c == 0x1F91D || c == 0x1F93C) {
return true;
}
// Emoji Modifier Base characters new in Unicode emoji 5.0.
// From http://www.unicode.org/Public/emoji/5.0/emoji-data.txt
// TODO: Remove once emoji-data.text 5.0 is in ICU or update to 6.0.
if (c == 0x1F91F
|| (0x1F931 <= c && c <= 0x1F932)
|| (0x1F9D1 <= c && c <= 0x1F9DD)) {
return true;
}
return u_hasBinaryProperty(c, UCHAR_EMOJI_MODIFIER_BASE);
}
hb_blob_t* getFontTable(const MinikinFont* minikinFont, uint32_t tag) {
......
#!/usr/bin/env python
#
# Copyright (C) 2016 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Generate header file for unicode data."""
import optparse
import sys
UNICODE_EMOJI_TEMPLATE="""
/* file generated by frameworks/minikin/lib/minikin/Android.mk */
#ifndef MINIKIN_UNICODE_EMOJI_H
#define MINIKIN_UNICODE_EMOJI_H
#include <stdint.h>
namespace minikin {
namespace generated {
int32_t EMOJI_LIST[] = {
@@@EMOJI_DATA@@@
};
} // namespace generated
} // namespace minikin
#endif // MINIKIN_UNICODE_EMOJI_H
"""
def _create_opt_parser():
parser = optparse.OptionParser()
parser.add_option('-i', '--input', type='str', action='store',
help='path to input emoji-data.txt')
parser.add_option('-o', '--output', type='str', action='store',
help='path to output UnicodeEmoji.h')
return parser
def _read_emoji_data(emoji_data_file_path):
result = []
with open(emoji_data_file_path) as emoji_data_file:
for line in emoji_data_file:
if '#' in line:
line = line[:line.index('#')] # Drop comments.
if not line.strip():
continue # Skip empty line.
code_points, prop = line.split(';')
code_points = code_points.strip()
prop = prop.strip()
if prop != 'Emoji':
break # Only collect Emoji property code points
if '..' in code_points: # code point range
cp_start, cp_end = code_points.split('..')
result.extend(xrange(int(cp_start, 16), int(cp_end, 16) + 1))
else:
code_point = int(code_points, 16)
result.append(code_point)
return result
def _generate_header_contents(emoji_list):
INDENT = ' ' * 4
JOINER = ', '
hex_list = ['0x%04X' % x for x in emoji_list]
lines = []
tmp_line = '%s%s' % (INDENT, hex_list[0])
for hex_str in hex_list[1:]:
if len(tmp_line) + len(JOINER) + len(hex_str) >= 100:
lines.append(tmp_line + ',')
tmp_line = '%s%s' % (INDENT, hex_str)
else:
tmp_line = '%s%s%s' % (tmp_line, JOINER, hex_str)
lines.append(tmp_line)
template = UNICODE_EMOJI_TEMPLATE
template = template.replace('@@@EMOJI_DATA@@@', '\n'.join(lines))
return template
if __name__ == '__main__':
opt_parser = _create_opt_parser()
opts, _ = opt_parser.parse_args()
emoji_list = _read_emoji_data(opts.input)
header = _generate_header_contents(emoji_list)
with open(opts.output, 'w') as header_file:
header_file.write(header)
......@@ -16,6 +16,8 @@
#include <gtest/gtest.h>
#include <unicode/uchar.h>
#include "MinikinInternal.h"
namespace minikin {
......@@ -23,12 +25,56 @@ namespace minikin {
TEST(MinikinInternalTest, isEmojiTest) {
EXPECT_TRUE(isEmoji(0x0023)); // NUMBER SIGN
EXPECT_TRUE(isEmoji(0x0035)); // DIGIT FIVE
EXPECT_TRUE(isEmoji(0x2640)); // FEMALE SIGN
EXPECT_TRUE(isEmoji(0x2642)); // MALE SIGN
EXPECT_TRUE(isEmoji(0x2695)); // STAFF OF AESCULAPIUS
EXPECT_TRUE(isEmoji(0x1F0CF)); // PLAYING CARD BLACK JOKER
EXPECT_TRUE(isEmoji(0x1F1E9)); // REGIONAL INDICATOR SYMBOL LETTER D
EXPECT_TRUE(isEmoji(0x1F6F7)); // SLED
EXPECT_TRUE(isEmoji(0x1F9E6)); // SOCKS
EXPECT_FALSE(isEmoji(0x0000)); // <control>
EXPECT_FALSE(isEmoji(0x0061)); // LATIN SMALL LETTER A
EXPECT_FALSE(isEmoji(0x1F93B)); // MODERN PENTATHLON
EXPECT_FALSE(isEmoji(0x1F946)); // RIFLE
EXPECT_FALSE(isEmoji(0x29E3D)); // A han character.
}
TEST(MinikinInternalTest, isEmojiModifierTest) {
EXPECT_TRUE(isEmojiModifier(0x1F3FB)); // EMOJI MODIFIER FITZPATRICK TYPE-1-2
EXPECT_TRUE(isEmojiModifier(0x1F3FC)); // EMOJI MODIFIER FITZPATRICK TYPE-3
EXPECT_TRUE(isEmojiModifier(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4
EXPECT_TRUE(isEmojiModifier(0x1F3FE)); // EMOJI MODIFIER FITZPATRICK TYPE-5
EXPECT_TRUE(isEmojiModifier(0x1F3FF)); // EMOJI MODIFIER FITZPATRICK TYPE-6
EXPECT_FALSE(isEmojiModifier(0x0000)); // <control>
EXPECT_FALSE(isEmojiModifier(0x1F3FA)); // AMPHORA
EXPECT_FALSE(isEmojiModifier(0x1F400)); // RAT
EXPECT_FALSE(isEmojiModifier(0x29E3D)); // A han character.
}
TEST(MinikinInternalTest, isEmojiBaseTest) {
EXPECT_TRUE(isEmojiBase(0x261D)); // WHITE UP POINTING INDEX
EXPECT_TRUE(isEmojiBase(0x270D)); // WRITING HAND
EXPECT_TRUE(isEmojiBase(0x1F385)); // FATHER CHRISTMAS
EXPECT_TRUE(isEmojiBase(0x1F3C2)); // SNOWBOARDER
EXPECT_TRUE(isEmojiBase(0x1F3C7)); // HORSE RACING
EXPECT_TRUE(isEmojiBase(0x1F3CC)); // GOLFER
EXPECT_TRUE(isEmojiBase(0x1F574)); // MAN IN BUSINESS SUIT LEVITATING
EXPECT_TRUE(isEmojiBase(0x1F6CC)); // SLEEPING ACCOMMODATION
EXPECT_TRUE(isEmojiBase(0x1F91D)); // HANDSHAKE (removed from Emoji 4.0, but we need it)
EXPECT_TRUE(isEmojiBase(0x1F91F)); // I LOVE YOU HAND SIGN
EXPECT_TRUE(isEmojiBase(0x1F931)); // BREAST-FEEDING
EXPECT_TRUE(isEmojiBase(0x1F932)); // PALMS UP TOGETHER
EXPECT_TRUE(isEmojiBase(0x1F93C)); // WRESTLERS (removed from Emoji 4.0, but we need it)
EXPECT_TRUE(isEmojiBase(0x1F9D1)); // ADULT
EXPECT_TRUE(isEmojiBase(0x1F9DD)); // ELF
EXPECT_FALSE(isEmojiBase(0x0000)); // <control>
EXPECT_FALSE(isEmojiBase(0x261C)); // WHITE LEFT POINTING INDEX
EXPECT_FALSE(isEmojiBase(0x1F384)); // CHRISTMAS TREE
EXPECT_FALSE(isEmojiBase(0x1F9DE)); // GENIE
EXPECT_FALSE(isEmojiBase(0x29E3D)); // A han character.
}
} // namespace minikin
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册