Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
int
Rust
提交
1cd132ee
R
Rust
项目概览
int
/
Rust
大约 1 年 前同步成功
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
Rust
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1cd132ee
编写于
12月 29, 2011
作者:
G
Graydon Hoare
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Teach unicode script to emit canonical and compat decomp mappings. Annoyingly large encoding.
上级
36c55b20
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
71 addition
and
46 deletion
+71
-46
src/etc/unicode.py
src/etc/unicode.py
+71
-46
未找到文件。
src/etc/unicode.py
浏览文件 @
1cd132ee
...
...
@@ -22,9 +22,11 @@ def fetch(f):
exit
(
1
)
def
load_
general_categories
(
f
):
def
load_
unicode_data
(
f
):
fetch
(
f
)
gencats
=
{}
canon_decomp
=
{}
compat_decomp
=
{}
curr_cat
=
""
c_lo
=
0
c_hi
=
0
...
...
@@ -38,6 +40,18 @@ def load_general_categories(f):
code
=
int
(
code
,
16
)
if
decomp
!=
""
:
if
decomp
.
startswith
(
'<'
):
seq
=
[]
for
i
in
decomp
.
split
()[
1
:]:
seq
.
append
(
int
(
i
,
16
))
compat_decomp
[
code
]
=
seq
else
:
seq
=
[]
for
i
in
decomp
.
split
():
seq
.
append
(
int
(
i
,
16
))
canon_decomp
[
code
]
=
seq
if
curr_cat
==
""
:
curr_cat
=
gencat
c_lo
=
code
...
...
@@ -53,7 +67,8 @@ def load_general_categories(f):
curr_cat
=
gencat
c_lo
=
code
c_hi
=
code
return
gencats
return
(
canon_decomp
,
compat_decomp
,
gencats
)
def
load_derived_core_properties
(
f
):
...
...
@@ -96,7 +111,7 @@ def escape_char(c):
return
"'
\\
u%4.4x'"
%
c
return
"'
\\
U%8.8x'"
%
c
def
emit_
rust
_module
(
f
,
mod
,
tbl
):
def
emit_
property
_module
(
f
,
mod
,
tbl
):
f
.
write
(
"mod %s {
\n
"
%
mod
)
keys
=
tbl
.
keys
()
keys
.
sort
()
...
...
@@ -120,53 +135,63 @@ def emit_rust_module(f, mod, tbl):
f
.
write
(
" }
\n\n
"
)
f
.
write
(
"}
\n
"
)
def
emit_cpp_module
(
f
,
mod
,
tbl
):
keys
=
tbl
.
keys
()
keys
.
sort
()
for
cat
in
keys
:
singles
=
[]
ranges
=
[]
for
pair
in
tbl
[
cat
]:
if
pair
[
0
]
==
pair
[
1
]:
singles
.
append
(
pair
[
0
])
else
:
ranges
.
append
(
pair
)
f
.
write
(
"bool %s_%s(unsigned c) {
\n
"
%
(
mod
,
cat
))
for
pair
in
ranges
:
f
.
write
(
" if (0x%x <= c && c <= 0x%x) { return true; }
\n
"
%
pair
)
if
len
(
singles
)
>
0
:
f
.
write
(
" switch (c) {
\n
"
);
for
single
in
singles
:
f
.
write
(
" case 0x%x:
\n
"
%
single
)
f
.
write
(
" return true;
\n
"
);
f
.
write
(
" default:
\n
"
);
f
.
write
(
" return false;
\n
"
);
f
.
write
(
" }
\n
"
)
f
.
write
(
"return false;
\n
"
)
f
.
write
(
"}
\n\n
"
)
def
emit_module
(
rf
,
cf
,
mod
,
tbl
):
emit_rust_module
(
rf
,
mod
,
tbl
)
emit_cpp_module
(
cf
,
mod
,
tbl
)
def
emit_decomp_module
(
f
,
canon
,
compat
):
canon_keys
=
canon
.
keys
()
canon_keys
.
sort
()
compat_keys
=
compat
.
keys
()
compat_keys
.
sort
()
f
.
write
(
"mod decompose {
\n\n
"
);
f
.
write
(
" export canonical, compatibility;
\n\n
"
)
f
.
write
(
" fn canonical(c: char, i: block(char)) { d(c, i, false); }
\n\n
"
)
f
.
write
(
" fn compatibility(c: char, i: block(char)) { d(c, i, true); }
\n\n
"
)
f
.
write
(
" fn d(c: char, i: block(char), k: bool) {
\n
"
)
f
.
write
(
" if c <= '
\\
x7f' { i(c); ret; }
\n
"
)
# First check the canonical decompositions
f
.
write
(
" // Canonical decomposition
\n
"
)
f
.
write
(
" alt c {
\n
"
)
for
char
in
canon_keys
:
f
.
write
(
" %s {
\n
"
%
escape_char
(
char
))
for
d
in
canon
[
char
]:
f
.
write
(
" d(%s, i, k);
\n
"
%
escape_char
(
d
))
f
.
write
(
" }
\n
"
)
f
.
write
(
" _ { }
\n
"
)
f
.
write
(
" }
\n\n
"
)
# Bottom out if we're not doing compat.
f
.
write
(
" if !k { i(c); ret; }
\n\n
"
)
# Then check the compatibility decompositions
f
.
write
(
" // Compatibility decomposition
\n
"
)
f
.
write
(
" alt c {
\n
"
)
for
char
in
compat_keys
:
f
.
write
(
" %s {
\n
"
%
escape_char
(
char
))
for
d
in
compat
[
char
]:
f
.
write
(
" d(%s, i, k);
\n
"
%
escape_char
(
d
))
f
.
write
(
" }
\n
"
)
f
.
write
(
" _ { }
\n
"
)
f
.
write
(
" }
\n\n
"
)
# Finally bottom out.
f
.
write
(
" i(c);
\n
"
)
f
.
write
(
" }
\n
"
)
f
.
write
(
"}
\n\n
"
)
r
=
"unicode.rs"
c
=
"unicode.cpp"
for
i
in
[
r
,
c
]:
for
i
in
[
r
]:
if
os
.
path
.
exists
(
i
):
os
.
remove
(
i
);
rf
=
open
(
r
,
"w"
)
cf
=
open
(
c
,
"w"
)
emit_module
(
rf
,
cf
,
"general_category"
,
load_general_categories
(
"UnicodeData.txt"
))
(
canon_decomp
,
compat_decomp
,
gencats
)
=
load_unicode_data
(
"UnicodeData.txt"
)
emit_decomp_module
(
rf
,
canon_decomp
,
compat_decomp
)
emit_property_module
(
rf
,
"general_category"
,
gencats
)
emit_
module
(
rf
,
c
f
,
"derived_property"
,
load_derived_core_properties
(
"DerivedCoreProperties.txt"
))
emit_
property_module
(
r
f
,
"derived_property"
,
load_derived_core_properties
(
"DerivedCoreProperties.txt"
))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录