提交 01830719 编写于 作者: N nobu

fix for emoji-data.txt

* common.mk: download emoji-data.txt.  As emoji data files are
  located in a separate directory in Unicode.org site, reearranged
  Unicode data files directories same as the site.

* tool/enc-unicode.rb (get_file): search emoji data files in the
  second argument path.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@60977 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
上级 8b180dd7
......@@ -16,12 +16,14 @@ gnumake_recursive =
enable_shared = $(ENABLE_SHARED:no=)
UNICODE_VERSION = 10.0.0
UNICODE_EMOJI_VERSION = 5.0
### set the following environment variable or uncomment the line if
### the Unicode data files should be updated completely on every update ('make up',...).
# ALWAYS_UPDATE_UNICODE = yes
UNICODE_DATA_DIR = enc/unicode/data/$(UNICODE_VERSION)
UNICODE_DATA_DIR = enc/unicode/data/$(UNICODE_VERSION)/ucd
UNICODE_SRC_DATA_DIR = $(srcdir)/$(UNICODE_DATA_DIR)
UNICODE_SRC_EMOJI_DATA_DIR = $(srcdir)/enc/unicode/data/emoji/$(UNICODE_EMOJI_VERSION)
UNICODE_HDR_DIR = $(srcdir)/enc/unicode/$(UNICODE_VERSION)
UNICODE_DATA_HEADERS = \
$(UNICODE_HDR_DIR)/casefold.h \
......@@ -1207,21 +1209,34 @@ UNICODE_PROPERTY_FILES = \
$(UNICODE_SRC_DATA_DIR)/auxiliary/GraphemeBreakProperty.txt \
$(empty)
UNICODE_EMOJI_FILES = \
$(UNICODE_SRC_EMOJI_DATA_DIR)/emoji-data.txt \
$(empty)
update-unicode: $(UNICODE_FILES)
CACHE_DIR = $(srcdir)/.downloaded-cache
UNICODE_DOWNLOAD = \
$(BASERUBY) $(srcdir)/tool/downloader.rb \
--cache-dir=$(CACHE_DIR) \
-d $(srcdir)/$(UNICODE_DATA_DIR) \
-d $(UNICODE_SRC_DATA_DIR) \
-p $(UNICODE_VERSION)/ucd \
-e $(ALWAYS_UPDATE_UNICODE:yes=-a) unicode
UNICODE_EMOJI_DOWNLOAD = \
$(BASERUBY) $(srcdir)/tool/downloader.rb \
--cache-dir=$(CACHE_DIR) \
-d $(UNICODE_SRC_EMOJI_DATA_DIR) \
-p emoji/$(UNICODE_EMOJI_VERSION) \
-e $(ALWAYS_UPDATE_UNICODE:yes=-a) unicode
$(UNICODE_PROPERTY_FILES): update-unicode-property-files
update-unicode-property-files:
$(ECHO) Downloading Unicode $(UNICODE_VERSION) property files...
$(Q) $(MAKEDIRS) "$(UNICODE_SRC_DATA_DIR)/auxiliary"
$(Q) $(UNICODE_DOWNLOAD) $(UNICODE_PROPERTY_FILES)
$(ECHO) Downloading Unicode emoji $(UNICODE_VERSION) files...
$(Q) $(MAKEDIRS) "$(UNICODE_SRC_EMOJI_DATA_DIR)"
$(Q) $(UNICODE_EMOJI_DOWNLOAD) $(UNICODE_EMOJI_FILES)
$(UNICODE_FILES): update-unicode-files
update-unicode-files:
......@@ -1259,7 +1274,9 @@ $(UNICODE_HDR_DIR)/$(ALWAYS_UPDATE_UNICODE:yes=name2ctype.h): \
$(UNICODE_HDR_DIR)/name2ctype.h:
$(MAKEDIRS) $(@D)
$(BOOTSTRAPRUBY) $(srcdir)/tool/enc-unicode.rb --header $(UNICODE_SRC_DATA_DIR) > $@
$(BOOTSTRAPRUBY) $(srcdir)/tool/enc-unicode.rb --header \
$(UNICODE_SRC_DATA_DIR) $(UNICODE_SRC_EMOJI_DATA_DIR) > $@.new
$(MV) $@.new $@
# the next non-comment line was:
# $(UNICODE_HDR_DIR)/casefold.h: $(srcdir)/enc/unicode/case-folding.rb \
......
......@@ -1419,7 +1419,7 @@ static const CaseFold_11_Type CaseFold_11_Table[] = {
{0x0130, {2|F|D, {0x0069, 0x0307}}},
};
/* C code produced by gperf version 3.0.4 */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseFold_11_hash -N onigenc_unicode_CaseFold_11_lookup -n */
/* maximum key range = 3623, duplicates = 0 */
......@@ -1462,12 +1462,6 @@ onigenc_unicode_CaseFold_11_hash(const OnigCodePoint code)
return asso_values[bits_of(code, 2)+81] + asso_values[bits_of(code, 1)+2] + asso_values[bits_of(code, 0)];
}
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList3 *
onigenc_unicode_CaseFold_11_lookup(const OnigCodePoint code)
{
......@@ -3583,9 +3577,9 @@ onigenc_unicode_CaseFold_11_lookup(const OnigCodePoint code)
if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE)
{
register int key = onigenc_unicode_CaseFold_11_hash(code);
register unsigned int key = onigenc_unicode_CaseFold_11_hash(code);
if (key <= MAX_HASH_VALUE && key >= 0)
if (key <= MAX_HASH_VALUE)
{
register short s = wordlist[key];
......@@ -4868,7 +4862,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = {
{0x0069, {1|U, {0x0049}}},
};
/* C code produced by gperf version 3.0.4 */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_11_hash -N onigenc_unicode_CaseUnfold_11_lookup -n */
/* maximum key range = 2216, duplicates = 0 */
......@@ -4910,12 +4904,6 @@ onigenc_unicode_CaseUnfold_11_hash(const OnigCodePoint code)
return asso_values[bits_of(code, 2)+66] + asso_values[bits_of(code, 1)+4] + asso_values[bits_of(code, 0)];
}
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList3 *
onigenc_unicode_CaseUnfold_11_lookup(const OnigCodePoint code)
{
......@@ -6602,9 +6590,9 @@ onigenc_unicode_CaseUnfold_11_lookup(const OnigCodePoint code)
if (code <= MAX_CODE_VALUE && code >= MIN_CODE_VALUE)
{
register int key = onigenc_unicode_CaseUnfold_11_hash(code);
register unsigned int key = onigenc_unicode_CaseUnfold_11_hash(code);
if (key <= MAX_HASH_VALUE && key >= 0)
if (key <= MAX_HASH_VALUE)
{
register short s = wordlist[key];
......@@ -6679,7 +6667,7 @@ static const CaseUnfold_12_Type CaseUnfold_12_Table[] = {
{{0x0069, 0x0307}, {1, {0x0130}}},
};
/* C code produced by gperf version 3.0.4 */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3,4,5,6 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_12_hash -N onigenc_unicode_CaseUnfold_12_lookup -n */
/* maximum key range = 71, duplicates = 0 */
......@@ -6714,12 +6702,6 @@ onigenc_unicode_CaseUnfold_12_hash(const OnigCodePoint *codes)
return asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
}
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList2 *
onigenc_unicode_CaseUnfold_12_lookup(const OnigCodePoint *codes)
{
......@@ -6804,9 +6786,9 @@ onigenc_unicode_CaseUnfold_12_lookup(const OnigCodePoint *codes)
if (codes[0] <= MAX_CODE_VALUE && codes[0] >= MIN_CODE_VALUE &&
codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE)
{
register int key = onigenc_unicode_CaseUnfold_12_hash(codes);
register unsigned int key = onigenc_unicode_CaseUnfold_12_hash(codes);
if (key <= MAX_HASH_VALUE && key >= 0)
if (key <= MAX_HASH_VALUE)
{
register short s = wordlist[key];
......@@ -6835,7 +6817,7 @@ static const CaseUnfold_13_Type CaseUnfold_13_Table[] = {
{{0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7}}},
};
/* C code produced by gperf version 3.0.4 */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -7 -k1,2,3,4,5,6,7,8,9 -F,-1 -c -j1 -i1 -t -T -E -C -H onigenc_unicode_CaseUnfold_13_hash -N onigenc_unicode_CaseUnfold_13_lookup -n */
/* maximum key range = 20, duplicates = 0 */
......@@ -6870,12 +6852,6 @@ onigenc_unicode_CaseUnfold_13_hash(const OnigCodePoint *codes)
return asso_values[bits_at(codes, 8)] + asso_values[bits_at(codes, 7)] + asso_values[bits_at(codes, 6)] + asso_values[bits_at(codes, 5)] + asso_values[bits_at(codes, 4)] + asso_values[bits_at(codes, 3)] + asso_values[bits_at(codes, 2)] + asso_values[bits_at(codes, 1)] + asso_values[bits_at(codes, 0)];
}
#ifdef __GNUC__
__inline
#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
static const CodePointList2 *
onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
{
......@@ -6918,9 +6894,9 @@ onigenc_unicode_CaseUnfold_13_lookup(const OnigCodePoint *codes)
codes[1] <= MAX_CODE_VALUE && codes[1] >= MIN_CODE_VALUE &&
codes[2] <= MAX_CODE_VALUE && codes[2] >= MIN_CODE_VALUE)
{
register int key = onigenc_unicode_CaseUnfold_13_hash(codes);
register unsigned int key = onigenc_unicode_CaseUnfold_13_hash(codes);
if (key <= MAX_HASH_VALUE && key >= 0)
if (key <= MAX_HASH_VALUE)
{
register short s = wordlist[key];
......
此差异已折叠。
......@@ -7,7 +7,7 @@
# Constants for input and ouput directory
InputDataDir = ARGV[0] || 'enc/unicode/data'
unicode_version = InputDataDir[/[\d.]+\z/]
unicode_version = InputDataDir[/.*\/(\d+\.\d+\.\d+)(?=\/|\z)/, 1]
# convenience methods
class Integer
......
......@@ -14,8 +14,8 @@
header = true
ARGV.shift
end
unless ARGV.size == 1
abort "Usage: #{$0} data_directory"
unless ARGV.size == 2
abort "Usage: #{$0} data_directory emoji_data_directory"
end
$unicode_version = File.basename(ARGV[0])[/\A[.\d]+\z/]
......@@ -302,7 +302,7 @@ def constantize_blockname(name)
end
def get_file(name)
File.join(ARGV[0], name)
File.join(ARGV[name.start_with?("emoji-") ? 1 : 0], name)
end
def data_foreach(name, &block)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册