remove useless third lib

3f3442b9 · Hui Zhang · aba37810 · aba37810 · aba37810 · aba37810
344 changed file
--- a/examples/other/g2p/.gitignore
+++ b/examples/other/g2p/.gitignore
-data
-exp
--- a/examples/other/g2p/README.md
+++ b/examples/other/g2p/README.md
-# G2P
-* zh - Chinese G2P
--- a/examples/other/g2p/zh/README.md
+++ b/examples/other/g2p/zh/README.md
-# G2P
-* WS
-jieba
-* G2P
-pypinyin
-* Tone sandhi
-simple
-We recommend using [Paraket](https://github.com/PaddlePaddle/Parakeet] [TextFrontEnd](https://github.com/PaddlePaddle/Parakeet/blob/develop/parakeet/frontend/__init__.py) to do G2P.
-The phoneme set should be changed, you can reference `examples/thchs30/a0/data/dict/syllable.lexicon`.
-## Download Baker dataset
-[Baker](https://test.data-baker.com/#/data/index/source) dataset has to be downloaded mannually and moved to './data',
-because you will have to pass the `CATTCHA` from a browswe to download the dataset.
-## RUN
-```
-. path.sh
-./run.sh
-```
-## Result
-```
-exp/
-|-- 000001-010000.txt
-|-- ref.pinyin
-|-- trans.jieba.pinyin
-`-- trans.pinyin
-0 directories, 4 files
-```
-```
-4f5a368441eb16aaf43dc1972f8b63dd  exp/000001-010000.txt
-01707896391c2de9b6fc4a39654be942  exp/ref.pinyin
-43380ef160f65a23a3a0544700aa49b8  exp/trans.jieba.pinyin
-8e6ff1fc22d8e8584082e804e8bcdeb7  exp/trans.pinyin
-```
-```
-==> exp/000001-010000.txt <==
-000001  卡尔普#2陪外孙#1玩滑梯#4。
-        ka2 er2 pu3 pei2 wai4 sun1 wan2 hua2 ti1
-000002  假语村言#2别再#1拥抱我#4。
-        jia2 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
-000003  宝马#1配挂#1跛骡鞍#3，貂蝉#1怨枕#2董翁榻#4。
-        bao2 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
-000004  邓小平#2与#1撒切尔#2会晤#4。
-        deng4 xiao3 ping2 yu3 sa4 qie4 er3 hui4 wu4
-000005  老虎#1幼崽#2与#1宠物犬#1玩耍#4。
-        lao2 hu3 you4 zai3 yu2 chong3 wu4 quan3 wan2 shua3
-==> exp/ref.pinyin <==
-000001 ka2 er2 pu3 pei2 wai4 sun1 wan2 hua2 ti1
-000002 jia2 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
-000003 bao2 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
-000004 deng4 xiao3 ping2 yu3 sa4 qie4 er3 hui4 wu4
-000005 lao2 hu3 you4 zai3 yu2 chong3 wu4 quan3 wan2 shua3
-000006 shen1 chang2 yue1 wu2 chi3 er4 cun4 wu3 fen1 huo4 yi3 shang4
-000007 zhao4 di2 yue1 cao2 yun2 teng2 qu4 gui3 wu1
-000008 zhan2 pin3 sui1 you3 zhan3 yuan2 que4 tui2
-000009 yi2 san3 ju1 er2 tong2 he2 you4 tuo1 er2 tong2 wei2 zhu3
-000010 ke1 te4 ni1 shen1 chuan1 bao4 wen2 da4 yi1
-==> exp/trans.jieba.pinyin <==
-000001 ka3 er3 pu3 pei2 wai4 sun1 wan2 hua2 ti1
-000002 jia3 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
-000003 bao3 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
-000004 deng4 xiao3 ping2 yu3 sa1 qie4 er3 hui4 wu4
-000005 lao3 hu3 you4 zai3 yu3 chong3 wu4 quan3 wan2 shua3
-000006 shen1 chang2 yue1 wu3 chi3 er4 cun4 wu3 fen1 huo4 yi3 shang4
-000007 zhao4 di2 yue1 cao2 yun2 teng2 qu4 gui3 wu1
-000008 zhan3 pin3 sui1 you3 zhan3 yuan2 que4 tui2
-000009 yi3 san3 ju1 er2 tong2 he2 you4 tuo1 er2 tong2 wei2 zhu3
-000010 ke1 te4 ni1 shen1 chuan1 bao4 wen2 da4 yi1
-==> exp/trans.pinyin <==
-000001 ka3 er3 pu3 pei2 wai4 sun1 wan2 hua2 ti1
-000002 jia3 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
-000003 bao3 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
-000004 deng4 xiao3 ping2 yu3 sa1 qie4 er3 hui4 wu4
-000005 lao3 hu3 you4 zai3 yu3 chong3 wu4 quan3 wan2 shua3
-000006 shen1 chang2 yue1 wu3 chi3 er4 cun4 wu3 fen1 huo4 yi3 shang4
-000007 zhao4 di2 yue1 cao2 yun2 teng2 qu4 gui3 wu1
-000008 zhan3 pin3 sui1 you3 zhan3 yuan2 que4 tui2
-000009 yi3 san3 ju1 er2 tong2 he2 you4 tuo1 er2 tong2 wei2 zhu3
-000010 ke1 te4 ni1 shen1 chuan1 bao4 wen2 da4 yi1
-```
--- a/examples/other/g2p/zh/local/convert_transcription.py
+++ b/examples/other/g2p/zh/local/convert_transcription.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import re
-import jieba
-from pypinyin import lazy_pinyin
-from pypinyin import Style
-def extract_pinyin(source, target, use_jieba=False):
-    with open(source, 'rt', encoding='utf-8') as fin:
-        with open(target, 'wt', encoding='utf-8') as fout:
-            for i, line in enumerate(fin):
-                if i % 2 == 0:
-                    sentence_id, raw_text = line.strip().split()
-                    raw_text = re.sub(r'#\d', '', raw_text)
-                    if use_jieba:
-                        raw_text = jieba.lcut(raw_text)
-                    syllables = lazy_pinyin(
-                        raw_text,
-                        errors='ignore',
-                        style=Style.TONE3,
-                        neutral_tone_with_five=True)
-                    transcription = ' '.join(syllables)
-                    fout.write(f'{sentence_id} {transcription}\n')
-                else:
-                    continue
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="extract baker pinyin labels")
-    parser.add_argument(
-        "input", type=str, help="source file of baker's prosody label file")
-    parser.add_argument(
-        "output", type=str, help="target file to write pinyin lables")
-    parser.add_argument(
-        "--use-jieba",
-        action='store_true',
-        help="use jieba for word segmentation.")
-    args = parser.parse_args()
-    extract_pinyin(args.input, args.output, use_jieba=args.use_jieba)
--- a/examples/other/g2p/zh/local/extract_pinyin_label.py
+++ b/examples/other/g2p/zh/local/extract_pinyin_label.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-def extract_pinyin_lables(source, target):
-    """Extract pinyin labels from Baker's prosody labeling."""
-    with open(source, 'rt', encoding='utf-8') as fin:
-        with open(target, 'wt', encoding='utf-8') as fout:
-            for i, line in enumerate(fin):
-                if i % 2 == 0:
-                    sentence_id, raw_text = line.strip().split()
-                    fout.write(f'{sentence_id} ')
-                else:
-                    transcription = line.strip()
-                    fout.write(f'{transcription}\n')
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="extract baker pinyin labels")
-    parser.add_argument(
-        "input", type=str, help="source file of baker's prosody label file")
-    parser.add_argument(
-        "output", type=str, help="target file to write pinyin lables")
-    args = parser.parse_args()
-    extract_pinyin_lables(args.input, args.output)
--- a/examples/other/g2p/zh/local/ignore_sandhi.py
+++ b/examples/other/g2p/zh/local/ignore_sandhi.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-from pathlib import Path
-from typing import List
-from typing import Union
-def erized(syllable: str) -> bool:
-    """Whether the syllable contains erhua effect.
-    Example
-    --------
-    huar -> True
-    guanr -> True
-    er -> False
-    """
-    # note: for pinyin, len(syllable) >=2 is always true
-    # if not: there is something wrong in the data
-    assert len(syllable) >= 2, f"inavlid syllable {syllable}"
-    return syllable[:2] != "er" and syllable[-2] == 'r'
-def ignore_sandhi(reference: List[str], generated: List[str]) -> List[str]:
-    """
-    Given a sequence of syllables from human annotation(reference), 
-    which makes sandhi explici and a sequence of syllables from some 
-    simple g2p program(generated), which does not consider sandhi, 
-    return a the reference sequence while ignore sandhi.
-    Example
-    --------
-    ['lao2', 'hu3'], ['lao3', 'hu3'] -> ['lao3', 'hu3']
-    """
-    i = 0
-    j = 0
-    # sandhi ignored in the result while other errors are not included
-    result = []
-    while i < len(reference):
-        if erized(reference[i]):
-            result.append(reference[i])
-            i += 1
-            j += 2
-        elif reference[i][:-1] == generated[i][:-1] and reference[i][
-                -1] == '2' and generated[i][-1] == '3':
-            result.append(generated[i])
-            i += 1
-            j += 1
-        else:
-            result.append(reference[i])
-            i += 1
-            j += 1
-    assert j == len(
-        generated
-    ), "length of transcriptions mismatch, There may be some characters that are ignored in the generated transcription."
-    return result
-def convert_transcriptions(reference: Union[str, Path],
-                           generated: Union[str, Path],
-                           output: Union[str, Path]):
-    with open(reference, 'rt') as f_ref:
-        with open(generated, 'rt') as f_gen:
-            with open(output, 'wt') as f_out:
-                for i, (ref, gen) in enumerate(zip(f_ref, f_gen)):
-                    sentence_id, ref_transcription = ref.strip().split(' ', 1)
-                    _, gen_transcription = gen.strip().split(' ', 1)
-                    try:
-                        result = ignore_sandhi(ref_transcription.split(),
-                                               gen_transcription.split())
-                        result = ' '.join(result)
-                    except Exception:
-                        print(
-                            f"sentence_id: {sentence_id} There is some annotation error in the reference or generated transcription. Use the reference."
-                        )
-                        result = ref_transcription
-                    f_out.write(f"{sentence_id} {result}\n")
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="reference transcription but ignore sandhi.")
-    parser.add_argument(
-        "--reference",
-        type=str,
-        help="path to the reference transcription of baker dataset.")
-    parser.add_argument(
-        "--generated", type=str, help="path to the generated transcription.")
-    parser.add_argument("--output", type=str, help="path to save result.")
-    args = parser.parse_args()
-    convert_transcriptions(args.reference, args.generated, args.output)
--- a/examples/other/g2p/zh/local/prepare_dataset.sh
+++ b/examples/other/g2p/zh/local/prepare_dataset.sh
-#!/bin/bash
-exp_dir="exp"
-data_dir="data"
-source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
-archive=${data_dir}/"BZNSYP.rar"
-if [ ! -f ${archive} ]; then
-    echo "Baker Dataset not found! Download it first to the data_dir."
-    exit -1
-fi
-MD5='c4350563bf7dc298f7dd364b2607be83'
-md5_result=$(md5sum ${archive} | awk -F[' '] '{print $1}')
-if [ ${md5_result} != ${MD5} ]; then
-    echo "MD5 mismatch! The Archive has been changed."
-    exit -1
-fi
-label_file='ProsodyLabeling/000001-010000.txt'
-filename='000001-010000.txt'
-unrar e ${archive} ${label_file}
-cp ${filename} ${exp_dir}
-rm -f ${filename}
-if [ ! -f ${exp_dir}/${filename} ];then
-    echo "File extraction failed!"
-    exit
-fi
-exit 0
--- a/examples/other/g2p/zh/path.sh
+++ b/examples/other/g2p/zh/path.sh
-export MAIN_ROOT=`realpath ${PWD}/../../../../`
-export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
-export LC_ALL=C
-# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/other/g2p/zh/requirements.txt
+++ b/examples/other/g2p/zh/requirements.txt
-jieba
--- a/examples/other/g2p/zh/run.sh
+++ b/examples/other/g2p/zh/run.sh
-#!/usr/bin/env bash
-source path.sh
-stage=-1
-stop_stage=100
-exp_dir=exp
-data=data
-source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
-mkdir -p ${exp_dir}
-if [ $stage -le -1 ] && [ $stop_stage -ge -1 ];then
-    mkdir -p ${data}
-    test -e ${data}/BZNSYP.rar || wget -c https://weixinxcxdb.oss-cn-beijing.aliyuncs.com/gwYinPinKu/BZNSYP.rar -P ${data}
-fi
-if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
-    echo "stage 0: Extracting Prosody Labeling"
-    bash local/prepare_dataset.sh --exp-dir ${exp_dir} --data-dir ${data}
-fi
-# convert transcription in chinese into pinyin with pypinyin or jieba+pypinyin
-filename="000001-010000.txt"
-if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
-    echo "stage 1: Processing transcriptions..."
-    python3 local/extract_pinyin_label.py ${exp_dir}/${filename} ${exp_dir}/ref.pinyin
-    python3 local/convert_transcription.py ${exp_dir}/${filename} ${exp_dir}/trans.pinyin
-    python3 local/convert_transcription.py --use-jieba ${exp_dir}/${filename} ${exp_dir}/trans.jieba.pinyin
-fi
-echo "done"
-exit 0
--- a/examples/other/tn/.gitignore
+++ b/examples/other/tn/.gitignore
-exp
--- a/examples/other/tn/README.md
+++ b/examples/other/tn/README.md
-# Regular expression based text normalization for Chinese
-For simplicity and ease of implementation, text normalization is basically done by rules and dictionaries. Here's an example.
-## Run
-```
-. path.sh
-bash run.sh
-```
-## Results
-```
-exp/
-`-- normalized.txt
-0 directories, 1 file
-```
-```
-aff31f8aa08e2a7360228c9ce5886b98  exp/normalized.txt
-```
-```
-今天的最低气温达到零下十度.
-只要有四分之三十三的人同意，就可以通过决议。
-一九四五年五月二日，苏联士兵在德国国会大厦上升起了胜利旗，象征着攻占柏林并战胜了纳粹德国。
-四月十六日，清晨的战斗以炮击揭幕，数以千计的大炮和喀秋莎火箭炮开始炮轰德军阵地，炮击持续了数天之久。
-如果剩下的百分之三十点六是过去，那么还有百分之六十九点四.
-事情发生在二零二零年三月三十一日的上午八点.
-警方正在找一支点二二口径的手枪。
-欢迎致电中国联通，北京二零二二年冬奥会官方合作伙伴为您服务
-充值缴费请按一，查询话费及余量请按二，跳过本次提醒请按井号键。
-快速解除流量封顶请按星号键，腾讯王卡产品介绍、使用说明、特权及活动请按九，查询话费、套餐余量、积分及活动返款请按一，手机上网流量开通及取消请按二，查���本机号码及本号所使用套餐请按四，密码修改及重置请按五，紧急开机请按六，挂失请按七，查询充值记录请按八，其它自助服务及工服务请按零
-```
--- a/examples/other/tn/data/sentences.txt
+++ b/examples/other/tn/data/sentences.txt
-今天的最低气温达到-10°C.
-只要有33/4的人同意，就可以通过决议。
-1945年5月2日，苏联士兵在德国国会大厦上升起了胜利旗，象征着攻占柏林并战胜了纳粹德国。
-4月16日，清晨的战斗以炮击揭幕，数以千计的大炮和喀秋莎火箭炮开始炮轰德军阵地，炮击持续了数天之久。
-如果剩下的30.6%是过去，那么还有69.4%.
-事情发生在2020/03/31的上午8:00.
-警方正在找一支.22口径的手枪。
-欢迎致电中国联通，北京2022年冬奥会官方合作伙伴为您服务
-充值缴费请按1，查询话费及余量请按2，跳过本次提醒请按井号键。
-快速解除流量封顶请按星号键，腾讯王卡产品介绍、使用说明、特权及活动请按9，查询话费、套餐余量、积分及活动返款请按1，手机上网流量开通及取消请按2，查询本机号码及本号所使用套餐请按4，密码修改及重置请按5，紧急开机请按6，挂失请按7，查询充值记录请按8，其它自助服务及人工服务请按0
-智能客服助理快速查话费、查流量请按9，了解北京联通业务请按1，宽带IPTV新装、查询请按2，障碍报修请按3，充值缴费请按4，投诉建议请按5，政企业务请按7，人工服务请按0，for english severice press star key
-您的帐户当前可用余额为63.89元，本月消费为2.17元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。
-您的帐户当前可用余额为负15.5元，本月消费为59.6元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。
-尊敬的客户，您目前的话费余额为负14.60元，已低于10元，为保证您的通信畅通，请及时缴纳费用。
-您的流量已用完，为避免您产生额外费用，建议您根据需求开通一个流量包以作补充。
-您可以直接说，查询话费及余量、开通流量包、缴费，您也可以说出其它需求，请问有什么可以帮您？
-您的账户当前可用余额为负36.00元，本月消费36.00元。
-请问你是电话13985608526的机主吗？
-如您对处理结果不满意，可拨打中国联通集团投诉电话10015进行投诉，按本地通话费收费，返回自助服务请按井号键
-“26314”号VIP客服代表为您服务。
-尊敬的5G用户，欢迎您致电中国联通
-首先是应用了M1芯片的iPad Pro，新款的iPad Pro支持5G，这也是苹果的第二款5G产品线。
-除此之外，摄像头方面再次升级，增加了前摄全新超广角摄像头，支持人物居中功能，搭配超广角可实现视频中始终让人物居中效果。
-屏幕方面，iPad Pro 12.9版本支持XDR体验的Mini-LEDS显示屏，支持HDR10、杜比视界，还支持杜比全景声。
-iPad Pro的秒控键盘这次也推出白色版本。
-售价方面，11英寸版本售价799美元起，12.9英寸售价1099美元起。
--- a/examples/other/tn/local/test_normalization.py
+++ b/examples/other/tn/local/test_normalization.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-from text_processing import normalization
-parser = argparse.ArgumentParser(
-    description="Normalize text in Chinese with some rules.")
-parser.add_argument("input", type=str, help="the input sentences")
-parser.add_argument("output", type=str, help="path to save the output file.")
-args = parser.parse_args()
-with open(args.input, 'rt') as fin:
-    with open(args.output, 'wt') as fout:
-        for sent in fin:
-            sent = normalization.normalize_sentence(sent.strip())
-            fout.write(sent)
-            fout.write('\n')
--- a/examples/other/tn/path.sh
+++ b/examples/other/tn/path.sh
-export MAIN_ROOT=`realpath ${PWD}/../../../`
-export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
-export LC_ALL=C
-# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
-export PYTHONPATH=${MAIN_ROOT}:${MAIN_ROOT}/third_party:${PYTHONPATH}#
--- a/examples/other/tn/run.sh
+++ b/examples/other/tn/run.sh
-#!/usr/bin/env bash
-source path.sh
-stage=-1
-stop_stage=100
-exp_dir=exp
-data_dir=data
-filename="sentences.txt"
-source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
-mkdir -p ${exp_dir}
-if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
-    echo "stage 1: Processing "
-    python3 local/test_normalization.py  ${data_dir}/${filename} ${exp_dir}/normalized.txt
-    if [ -f "${exp_dir}/normalized.txt" ]; then
-	echo "Normalized text save at ${exp_dir}/normalized.txt"
-    fi
-    # TODO(chenfeiyu): compute edit distance against ground-truth
-fi
-echo "done"
-exit 0
--- a/third_party/README.md
+++ b/third_party/README.md
@@ -22,3 +22,7 @@ licence: MIT
 * [phkit](https://github.com/KuangDD/phkit.git)
 commit: b2100293c1e36da531d7f30bd52c9b955a649522
 licence: None
+* [nnAudio](https://github.com/KinWaiCheuk/nnAudio.git)
+licence: MIT
--- a/third_party/chinese_text_normalization/.gitignore
+++ b/third_party/chinese_text_normalization/.gitignore
-*~
-*.far
--- a/third_party/chinese_text_normalization/LICENSE
+++ b/third_party/chinese_text_normalization/LICENSE
-MIT License
-Copyright (c) 2020 SpeechIO
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/third_party/chinese_text_normalization/README.md
+++ b/third_party/chinese_text_normalization/README.md
-# Chinese Text Normalization for Speech Processing
-## Problem
-Search for "Text Normalization"(TN) on Google and Github, you can hardly find open-source projects that are "read-to-use" for text normalization tasks. Instead, you find a bunch of NLP toolkits or frameworks that *supports* TN functionality.  There is quite some work between "support text normalization" and "do text normalization".
-## Reason
-* TN is language-dependent, more or less.
-    Some of TN processing methods are shared across languages, but a good TN module always involves language-specific knowledge and treatments, more or less.
-* TN is task-specific.
-    Even for the same language, different applications require quite different TN.
-* TN is "dirty"
-    Constructing and maintaining a set of TN rewrite-rules is painful, whatever toolkits and frameworks you choose.  Subtle and intrinsic complexities hide inside TN task itself, not in tools or frameworks.
-* mature TN module is an asset
-    Since constructing and maintaining TN is hard, it is actually an asset for commercial companies, hence it is unlikely to find a product-level TN in open-source community (correct me if you find any)
-* TN is a less important topic for either academic or commercials.
-## Goal
-This project sets up a ready-to-use TN module for **Chinese**. Since my background is **speech processing**, this project should be able to handle most common TN tasks, in **Chinese ASR** text processing pipelines.
-## Normalizers
-1. supported NSW (Non-Standard-Word) Normalization
-    |NSW type|raw|normalized|
-    |-|-|-|
-    |cardinal|这块黄金重达324.75克|这块黄金重达三百二十四点七五克|
-    |date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日 她弟弟出生于一九九五年三月一日|
-    |digit|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
-    |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
-    |money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五 三十四点五元 二十点一万|
-    |percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
-    |telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
-    acknowledgement: the NSW normalization codes are based on [Zhiyang Zhou's work here](https://github.com/Joee1995/chn_text_norm.git)
-1. punctuation removal
-    For Chinese, it removes punctuation list collected in [Zhon](https://github.com/tsroten/zhon) project, containing
-    * non-stop puncs
-        ```
-        '＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏'
-        ```
-    * stop puncs
-        ```
-        '！？｡。'
-        ```
-    For English, it removes Python's `string.punctuation`
-1. multilingual English word upper/lower case conversion
-    since ASR/TTS lexicons usually unify English entries to uppercase or lowercase, the TN module should adapt with lexicon accordingly.
-## Supported text format
-1. plain text, preferably one sentence per line(most common case in ASR processing).
-    ```
-    今天早饭吃了没
-    没吃回家吃去吧
-    ...
-    ```
-    plain text is default format.
-2. Kaldi's transcription format
-    ```
-    KALDI_KEY_UTT001    今天早饭吃了没
-    KALDI_KEY_UTT002    没吃回家吃去吧
-    ...
-    ```
-    TN will skip first column key section, normalize latter transcription text
-    pass `--has_key` option to switch to kaldi format.
-_note: All input text should be UTF-8 encoded._
-## Run examples
-* TN (python)
-make sure you have **python3**, python2.X won't work correctly.
-`sh run.sh` in `TN` dir, and compare raw text and normalized text.
-* ITN (thrax)
-make sure you  have **thrax** installed, and your PATH should be able to find thrax binaries.
-`sh run.sh` in `ITN` dir. check Makefile for grammar dependency.
-## possible future work
-Since TN is a typical "done is better than perfect" module in context of ASR, and the current state is sufficient for my purpose, I probably won't update this repo frequently.
-there are indeed something that needs to be improved:
-* For TN, NSW normalizers in TN dir are based on regular expression, I've found some unintended matches, those pattern regexps need to be refined for more precise TN coverage.
-* For ITN, extend those thrax rewriting grammars to cover more scenarios.
-* Further more, nowadays commercial systems start to introduce RNN-like models into TN, and a mix of (rule-based & model-based) system is state-of-the-art.  More readings about this, look for Richard Sproat and KyleGorman's work at Google.
-END
--- a/third_party/chinese_text_normalization/python/cn_tn.py
+++ b/third_party/chinese_text_normalization/python/cn_tn.py
--- a/third_party/chinese_text_normalization/python/example_kaldi.txt
+++ b/third_party/chinese_text_normalization/python/example_kaldi.txt
-UTT000	这块黄金重达324.75克
-UTT001	她出生于86年8月18日，她弟弟出生于1995年3月1日
-UTT002	电影中梁朝伟扮演的陈永仁的编号27149
-UTT003	现场有7/12的观众投出了赞成票
-UTT004	随便来几个价格12块5，34.5元，20.1万
-UTT005	明天有62％的概率降雨
-UTT006	这是固话0421-33441122或这是手机+86 18544139121
--- a/third_party/chinese_text_normalization/python/example_plain.txt
+++ b/third_party/chinese_text_normalization/python/example_plain.txt
-这块黄金重达324.75克
-她出生于86年8月18日，她弟弟出生于1995年3月1日
-电影中梁朝伟扮演的陈永仁的编号27149
-现场有7/12的观众投出了赞成票
-随便来几个价格12块5，34.5元，20.1万
-明天有62％的概率降雨
-这是固话0421-33441122或这是手机+86 18544139121
--- a/third_party/chinese_text_normalization/python/run.sh
+++ b/third_party/chinese_text_normalization/python/run.sh
-# for plain text
-python3 cn_tn.py example_plain.txt output_plain.txt
-diff example_plain.txt output_plain.txt
-# for Kaldi's trans format
-python3 cn_tn.py --has_key example_kaldi.txt output_kaldi.txt
-diff example_kaldi.txt output_kaldi.txt
--- a/third_party/chinese_text_normalization/thrax/INSTALL.txt
+++ b/third_party/chinese_text_normalization/thrax/INSTALL.txt
-0. place install_thrax.sh into $KALDI/tools/extras/
-1. recompile openfst with necessary option "--enable-grm" to support thrax:
-* cd $KALDI_ROOT/tools
-* make clean
-* edit $KALDI_ROOT/tools/Makefile, append "--enable-grm" option to OPENFST_CONFIGURE:
-OPENFST_CONFIGURE ?= --enable-static --enable-shared --enable-far --enable-ngram-fsts --enable-lookahead-fsts --with-pic --enable-grm
-* make -j 10
-2. install thrax
-cd $KALDI_ROOT/tools
-sh extras/install_thrax.sh
-3. add thrax binary path into $KALDI_ROOT/tools/env.sh:
-export PATH=/path/to/your/kaldi_root/tools/thrax-1.2.9/src/bin:${PATH}
-usage:
-before you run anything related to thrax, use:
-. $KALDI_ROOT/tools/env.sh
-to enable binary finding, like what we always do in kaldi.
-sample usage:
-sh run_en.sh
-sh run_cn.sh
--- a/third_party/chinese_text_normalization/thrax/install_thrax.sh
+++ b/third_party/chinese_text_normalization/thrax/install_thrax.sh
-#!/bin/bash
-## This script should be placed under $KALDI_ROOT/tools/extras/, and see INSTALL.txt for installation guide
-if [ ! -f thrax-1.2.9.tar.gz ]; then
-    wget http://www.openfst.org/twiki/pub/GRM/ThraxDownload/thrax-1.2.9.tar.gz
-    tar -zxf thrax-1.2.9.tar.gz
-fi
-cd thrax-1.2.9
-OPENFSTPREFIX=`pwd`/../openfst
-LDFLAGS="-L${OPENFSTPREFIX}/lib" CXXFLAGS="-I${OPENFSTPREFIX}/include" ./configure --prefix ${OPENFSTPREFIX}
-make -j 10; make install
-cd ..
--- a/third_party/chinese_text_normalization/thrax/papers/gorman-sproat-2016.pdf
+++ b/third_party/chinese_text_normalization/thrax/papers/gorman-sproat-2016.pdf
--- a/third_party/chinese_text_normalization/thrax/papers/wu-etal-2016.pdf
+++ b/third_party/chinese_text_normalization/thrax/papers/wu-etal-2016.pdf
--- a/third_party/chinese_text_normalization/thrax/run_cn.sh
+++ b/third_party/chinese_text_normalization/thrax/run_cn.sh
-cd src/cn
-thraxmakedep itn.grm
-make
-#thraxrewrite-tester --far=itn.far --rules=ITN 
-cat ../../testcase_cn.txt | thraxrewrite-tester --far=itn.far --rules=ITN 
-cd -
--- a/third_party/chinese_text_normalization/thrax/run_en.sh
+++ b/third_party/chinese_text_normalization/thrax/run_en.sh
-cd src
-thraxmakedep en/verbalizer/podspeech.grm
-make
-cat ../testcase_en.txt
-cat ../testcase_en.txt | thraxrewrite-tester --far=en/verbalizer/podspeech.far --rules=POD_SPEECH_TN
-cd -
--- a/third_party/chinese_text_normalization/thrax/src/LICENSE
+++ b/third_party/chinese_text_normalization/thrax/src/LICENSE
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-   1. Definitions.
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-   END OF TERMS AND CONDITIONS
-   APPENDIX: How to apply the Apache License to your work.
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-       http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/third_party/chinese_text_normalization/thrax/src/Makefile
+++ b/third_party/chinese_text_normalization/thrax/src/Makefile
-en/verbalizer/podspeech.far: en/verbalizer/podspeech.grm util/util.far util/case.far en/verbalizer/extra_numbers.far en/verbalizer/float.far en/verbalizer/math.far en/verbalizer/miscellaneous.far en/verbalizer/money.far en/verbalizer/numbers.far en/verbalizer/numbers_plus.far en/verbalizer/spelled.far en/verbalizer/spoken_punct.far en/verbalizer/time.far en/verbalizer/urls.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-util/util.far: util/util.grm util/byte.far util/case.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-util/byte.far: util/byte.grm 
-	thraxcompiler --input_grammar=$< --output_far=$@
-util/case.far: util/case.grm util/byte.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/extra_numbers.far: en/verbalizer/extra_numbers.grm util/byte.far en/verbalizer/numbers.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/numbers.far: en/verbalizer/numbers.grm en/verbalizer/number_names.far util/byte.far universal/thousands_punct.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/number_names.far: en/verbalizer/number_names.grm util/arithmetic.far en/verbalizer/g.fst en/verbalizer/cardinals.tsv en/verbalizer/ordinals.tsv
-	thraxcompiler --input_grammar=$< --output_far=$@
-util/arithmetic.far: util/arithmetic.grm util/byte.far util/germanic.tsv
-	thraxcompiler --input_grammar=$< --output_far=$@
-universal/thousands_punct.far: universal/thousands_punct.grm util/byte.far util/util.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/float.far: en/verbalizer/float.grm en/verbalizer/factorization.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/factorization.far: en/verbalizer/factorization.grm util/byte.far util/util.far en/verbalizer/numbers.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/lexical_map.far: en/verbalizer/lexical_map.grm util/byte.far en/verbalizer/lexical_map.tsv
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/math.far: en/verbalizer/math.grm en/verbalizer/float.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/miscellaneous.far: en/verbalizer/miscellaneous.grm util/byte.far ru/classifier/cyrillic.far en/verbalizer/extra_numbers.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far en/verbalizer/spelled.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-ru/classifier/cyrillic.far: ru/classifier/cyrillic.grm 
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/spelled.far: en/verbalizer/spelled.grm util/byte.far ru/classifier/cyrillic.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/money.far: en/verbalizer/money.grm util/byte.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far en/verbalizer/money.tsv
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/numbers_plus.far: en/verbalizer/numbers_plus.grm en/verbalizer/factorization.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/spoken_punct.far: en/verbalizer/spoken_punct.grm en/verbalizer/lexical_map.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/time.far: en/verbalizer/time.grm util/byte.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-en/verbalizer/urls.far: en/verbalizer/urls.grm util/byte.far en/verbalizer/lexical_map.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-clean:
-	rm -f util/util.far util/case.far en/verbalizer/extra_numbers.far en/verbalizer/float.far en/verbalizer/math.far en/verbalizer/miscellaneous.far en/verbalizer/money.far en/verbalizer/numbers.far en/verbalizer/numbers_plus.far en/verbalizer/spelled.far en/verbalizer/spoken_punct.far en/verbalizer/time.far en/verbalizer/urls.far util/byte.far en/verbalizer/number_names.far universal/thousands_punct.far util/arithmetic.far en/verbalizer/factorization.far en/verbalizer/lexical_map.far ru/classifier/cyrillic.far
--- a/third_party/chinese_text_normalization/thrax/src/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/README.md
-# Text normalization covering grammars
-This repository provides covering grammars for English and Russian text normalization as
-documented in:
-  Gorman, K., and Sproat, R. 2016. Minimally supervised number normalization.
-  _Transactions of the Association for Computational Linguistics_ 4: 507-519.
-  Ng, A. H., Gorman, K., and Sproat, R. 2017. Minimally supervised
-  written-to-spoken text normalization. In _ASRU_, pages 665-670.
-If you use these grammars in a publication, we would appreciate if you cite these works.
-## Building
-The grammars are written in [Thrax](thrax.opengrm.org) and compile into [OpenFst](openfst.org) FAR (FstARchive) files. To compile, simply run `make` in the `src/` directory.
-## License
-See `LICENSE`.
-## Mandatory disclaimer
-This is not an official Google product.
--- a/third_party/chinese_text_normalization/thrax/src/cn/Makefile
+++ b/third_party/chinese_text_normalization/thrax/src/cn/Makefile
-itn.far: itn.grm byte.far number.far hotfix.far percentage.far date.far amount.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-byte.far: byte.grm 
-	thraxcompiler --input_grammar=$< --output_far=$@
-number.far: number.grm byte.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-hotfix.far: hotfix.grm byte.far hotfix.list
-	thraxcompiler --input_grammar=$< --output_far=$@
-percentage.far: percentage.grm byte.far number.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-date.far: date.grm byte.far number.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-amount.far: amount.grm byte.far number.far
-	thraxcompiler --input_grammar=$< --output_far=$@
-clean:
-	rm -f byte.far number.far hotfix.far percentage.far date.far amount.far
--- a/third_party/chinese_text_normalization/thrax/src/cn/amount.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/amount.grm
-import 'byte.grm' as b;
-import 'number.grm' as n;
-unit = (
-	"匹"|"张"|"座"|"回"|"场"|"尾"|"条"|"个"|"首"|"阙"|"阵"|"网"|"炮"|
-	"顶"|"丘"|"棵"|"只"|"支"|"袭"|"辆"|"挑"|"担"|"颗"|"壳"|"窠"|"曲"|
-	"墙"|"群"|"腔"|"砣"|"座"|"客"|"贯"|"扎"|"捆"|"刀"|"令"|"打"|"手"|
-	"罗"|"坡"|"山"|"岭"|"江"|"溪"|"钟"|"队"|"单"|"双"|"对"|"出"|"口"|
-	"头"|"脚"|"板"|"跳"|"枝"|"件"|"贴"|"针"|"线"|"管"|"名"|"位"|"身"|
-	"堂"|"课"|"本"|"页"|"家"|"户"|"层"|"丝"|"毫"|"厘"|"分"|"钱"|"两"|
-	"斤"|"担"|"铢"|"石"|"钧"|"锱"|"忽"|"毫"|"厘"|"分"|"寸"|"尺"|"丈"|
-	"里"|"寻"|"常"|"铺"|"程"|"撮"|"勺"|"合"|"升"|"斗"|"石"|"盘"|"碗"|
-	"碟"|"叠"|"桶"|"笼"|"盆"|"盒"|"杯"|"钟"|"斛"|"锅"|"簋"|"篮"|"盘"|
-	"桶"|"罐"|"瓶"|"壶"|"卮"|"盏"|"箩"|"箱"|"煲"|"啖"|"袋"|"钵"|"年"|
-	"月"|"日"|"季"|"刻"|"时"|"周"|"天"|"秒"|"分"|"旬"|"纪"|"岁"|"世"|
-	"更"|"夜"|"春"|"夏"|"秋"|"冬"|"代"|"伏"|"辈"|"丸"|"泡"|"粒"|"颗"|
-	"幢"|"堆"|"条"|"根"|"支"|"道"|"面"|"片"|"张"|"颗"|"块"|
-	(("千克":"kg")|("毫克":"mg")|("微克":"µg"))|
-	(("千米":"km")|("厘米":"cm")|("毫米":"mm")|("微米":"µm")|("纳米":"nm"))
-);
-amount = n.number unit;
-export AMOUNT = CDRewrite[amount, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/byte.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/byte.grm
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Copyright 2005-2011 Google, Inc.
-# Author: ttai@google.com (Terry Tai)
-# Standard constants for ASCII (byte) based strings.  This mirrors the
-# functions provided by C/C++'s ctype.h library.
-# Note that [0] is missing.  Matching the string-termination character is kinda weird.
-export kBytes = Optimize[
-  "[1]" |   "[2]" |   "[3]" |   "[4]" |   "[5]" |   "[6]" |   "[7]" |   "[8]" |   "[9]" |  "[10]" |
- "[11]" |  "[12]" |  "[13]" |  "[14]" |  "[15]" |  "[16]" |  "[17]" |  "[18]" |  "[19]" |  "[20]" |
- "[21]" |  "[22]" |  "[23]" |  "[24]" |  "[25]" |  "[26]" |  "[27]" |  "[28]" |  "[29]" |  "[30]" |
- "[31]" |  "[32]" |  "[33]" |  "[34]" |  "[35]" |  "[36]" |  "[37]" |  "[38]" |  "[39]" |  "[40]" |
- "[41]" |  "[42]" |  "[43]" |  "[44]" |  "[45]" |  "[46]" |  "[47]" |  "[48]" |  "[49]" |  "[50]" |
- "[51]" |  "[52]" |  "[53]" |  "[54]" |  "[55]" |  "[56]" |  "[57]" |  "[58]" |  "[59]" |  "[60]" |
- "[61]" |  "[62]" |  "[63]" |  "[64]" |  "[65]" |  "[66]" |  "[67]" |  "[68]" |  "[69]" |  "[70]" |
- "[71]" |  "[72]" |  "[73]" |  "[74]" |  "[75]" |  "[76]" |  "[77]" |  "[78]" |  "[79]" |  "[80]" |
- "[81]" |  "[82]" |  "[83]" |  "[84]" |  "[85]" |  "[86]" |  "[87]" |  "[88]" |  "[89]" |  "[90]" |
- "[91]" |  "[92]" |  "[93]" |  "[94]" |  "[95]" |  "[96]" |  "[97]" |  "[98]" |  "[99]" | "[100]" |
-"[101]" | "[102]" | "[103]" | "[104]" | "[105]" | "[106]" | "[107]" | "[108]" | "[109]" | "[110]" |
-"[111]" | "[112]" | "[113]" | "[114]" | "[115]" | "[116]" | "[117]" | "[118]" | "[119]" | "[120]" |
-"[121]" | "[122]" | "[123]" | "[124]" | "[125]" | "[126]" | "[127]" | "[128]" | "[129]" | "[130]" |
-"[131]" | "[132]" | "[133]" | "[134]" | "[135]" | "[136]" | "[137]" | "[138]" | "[139]" | "[140]" |
-"[141]" | "[142]" | "[143]" | "[144]" | "[145]" | "[146]" | "[147]" | "[148]" | "[149]" | "[150]" |
-"[151]" | "[152]" | "[153]" | "[154]" | "[155]" | "[156]" | "[157]" | "[158]" | "[159]" | "[160]" |
-"[161]" | "[162]" | "[163]" | "[164]" | "[165]" | "[166]" | "[167]" | "[168]" | "[169]" | "[170]" |
-"[171]" | "[172]" | "[173]" | "[174]" | "[175]" | "[176]" | "[177]" | "[178]" | "[179]" | "[180]" |
-"[181]" | "[182]" | "[183]" | "[184]" | "[185]" | "[186]" | "[187]" | "[188]" | "[189]" | "[190]" |
-"[191]" | "[192]" | "[193]" | "[194]" | "[195]" | "[196]" | "[197]" | "[198]" | "[199]" | "[200]" |
-"[201]" | "[202]" | "[203]" | "[204]" | "[205]" | "[206]" | "[207]" | "[208]" | "[209]" | "[210]" |
-"[211]" | "[212]" | "[213]" | "[214]" | "[215]" | "[216]" | "[217]" | "[218]" | "[219]" | "[220]" |
-"[221]" | "[222]" | "[223]" | "[224]" | "[225]" | "[226]" | "[227]" | "[228]" | "[229]" | "[230]" |
-"[231]" | "[232]" | "[233]" | "[234]" | "[235]" | "[236]" | "[237]" | "[238]" | "[239]" | "[240]" |
-"[241]" | "[242]" | "[243]" | "[244]" | "[245]" | "[246]" | "[247]" | "[248]" | "[249]" | "[250]" |
-"[251]" | "[252]" | "[253]" | "[254]" | "[255]"
-];
-export kDigit = Optimize[
-    "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-];
-export kLower = Optimize[
-    "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" |
-    "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
-];
-export kUpper = Optimize[
-    "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" |
-    "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
-];
-export kAlpha = Optimize[kLower | kUpper];
-export kAlnum = Optimize[kDigit | kAlpha];
-export kSpace = Optimize[
-    " " | "\t" | "\n" | "\r"
-];
-export kNotSpace = Optimize[kBytes - kSpace];
-export kPunct = Optimize[
-    "!" | "\"" | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*" | "+" | "," |
-    "-" | "." | "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "\[" | "\\" |
-    "\]" | "^" | "_" | "`" | "{" | "|" | "}" | "~"
-];
-export kGraph = Optimize[kAlnum | kPunct];
--- a/third_party/chinese_text_normalization/thrax/src/cn/date.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/date.grm
-import 'byte.grm' as b;
-import 'number.grm' as n;
-date_day = n.number_1_to_99 ("日"|"号");
-date_month_day = n.number_1_to_99 "月" date_day;
-date_year_month_day = ((n.number_0_to_9){2,4} | n.number) "年" date_month_day;
-date = date_year_month_day | date_month_day | date_day;
-export DATE = CDRewrite[date, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/hotfix.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/hotfix.grm
-import 'byte.grm' as b;
-hotfix = StringFile['hotfix.list'];
-export HOTFIX = CDRewrite[hotfix, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/hotfix.list
+++ b/third_party/chinese_text_normalization/thrax/src/cn/hotfix.list
-0头	零头
-10字	十字
-东4环	东4环	-1.0
-东4	东四	-0.5
-4惠	四惠
-3元桥	三元桥
-4平市	四平市
-5台山	五台山
-西2旗	西二旗
-西3旗	西三旗
-4道口	四道口	-1.0
-5道口	五道口	-1.0
-6道口	六道口	-1.0
-6里桥	六里桥
-7里庄	七里庄
-8宝山	八宝山
-9颗松	九棵松
-10里堡	十里堡
--- a/third_party/chinese_text_normalization/thrax/src/cn/itn.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/itn.grm
-import 'byte.grm' as b;
-import 'number.grm' as number;
-import 'hotfix.grm' as hotfix;
-import 'percentage.grm' as percentage;
-import 'date.grm' as date;
-import 'amount.grm' as amount; # seems not useful for now
-export ITN = Optimize[percentage.PERCENTAGE @ (date.DATE <-1>) @ number.NUMBER @ hotfix.HOTFIX];
--- a/third_party/chinese_text_normalization/thrax/src/cn/number.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/number.grm
-import 'byte.grm' as b;
-number_1_to_9 = (
-  ("一":"1") | ("幺":"1") |
-  ("二":"2") | ("两":"2") |
-  ("三":"3") |
-  ("四":"4") |
-  ("五":"5") |
-  ("六":"6") |
-  ("七":"7") |
-  ("八":"8") |
-  ("九":"9") 
-);
-export number_0_to_9 = (("零":"0") | number_1_to_9);
-number_10_to_19 = (
-  ("十":"10") |
-  ("十一":"11") |
-  ("十二":"12") |
-  ("十三":"13") |
-  ("十四":"14") |
-  ("十五":"15") |
-  ("十六":"16") |
-  ("十七":"17") |
-  ("十八":"18") |
-  ("十九":"19") 
-);
-number_10s    = (number_1_to_9 ("十":""));
-number_100s   = (number_1_to_9 ("百":""));
-number_1000s  = (number_1_to_9 ("千":""));
-number_10000s = (number_1_to_9 ("万":""));
-number_10_to_99 = (
-  ((number_10s number_1_to_9)<-0.3>) | 
-  ((number_10s ("":"0"))<-0.2>) | 
-  (number_10_to_19 <-0.1>)
-);
-export number_1_to_99 = (number_1_to_9 | number_10_to_99);
-number_100_to_999 = (
-  ((number_100s ("零":"0") number_1_to_9)<0.0>)|
-  ((number_100s number_10_to_99)<0.0>) |
-  ((number_100s number_1_to_9 ("":"0"))<0.0>) |
-  ((number_100s ("":"00"))<0.1>)
-);
-number_1000_to_9999 = (
-  ((number_1000s number_100_to_999)<0.0>) |
-  ((number_1000s ("零":"0") number_10_to_99)<0.0>)|
-  ((number_1000s ("零":"00") number_1_to_9)<0.0>)|
-  ((number_1000s ("":"000"))<1>) |
-  ((number_1000s number_1_to_9 ("":"00"))<0.0>)
-);
-export number = number_1_to_99 | (number_100_to_999 <-1>) | (number_1000_to_9999 <-2>);
-export NUMBER = CDRewrite[number, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/percentage.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/percentage.grm
-import 'byte.grm' as b;
-import 'number.grm' as n;
-percentage = (
-  ("百分之":"") n.number_1_to_99 ("":"%")
-);
-export PERCENTAGE = CDRewrite[percentage, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/en/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/en/README.md
-# English covering grammar definitions
-This directory defines a English text normalization covering grammar. The
-primary entry-point is the FST `VERBALIZER`, defined in
-`verbalizer/verbalizer.grm` and compiled in the FST archive
-`verbalizer/verbalizer.far`.
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/Makefile
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/Makefile
-verbalizer.far: verbalizer.grm util/util.far en/verbalizer/extra_numbers.far en/verbalizer/float.far en/verbalizer/math.far en/verbalizer/miscellaneous.far en/verbalizer/money.far en/verbalizer/numbers.far en/verbalizer/numbers_plus.far en/verbalizer/spelled.far en/verbalizer/spoken_punct.far en/verbalizer/time.far en/verbalizer/urls.far
-	thraxcompiler --input_grammar=$< --output_far=$@
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/cardinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/cardinals.tsv
-0	zero
-1	one
-2	two
-3	three
-4	four
-5	five
-6	six
-7	seven
-8	eight
-9	nine
-10	ten
-11	eleven
-12	twelve
-13	thirteen
-14	fourteen
-15	fifteen
-16	sixteen
-17	seventeen
-18	eighteen
-19	nineteen
-20	twenty
-30	thirty
-40	forty
-50	fifty
-60	sixty
-70	seventy
-80	eighty
-90	ninety
-100	hundred
-1000	thousand
-1000000	million
-1000000000	billion
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/extra_numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/extra_numbers.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/byte.grm' as b;
-import 'en/verbalizer/numbers.grm' as n;
-digit = b.kDigit @ n.CARDINAL_NUMBERS | ("0" : "@@OTHER_ZERO_VERBALIZATIONS@@");
-export DIGITS  = digit (n.I[" "] digit)*;
-# Various common factorizations
-two_digits = b.kDigit{2} @ n.CARDINAL_NUMBERS;
-three_digits = b.kDigit{3} @ n.CARDINAL_NUMBERS;
-mixed =
-   (digit n.I[" "] two_digits)
- | (two_digits n.I[" "] two_digits)
- | (two_digits n.I[" "] three_digits)
- | (two_digits n.I[" "] two_digits n.I[" "] two_digits)
-;
-export MIXED_NUMBERS = Optimize[mixed];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/factorization.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/factorization.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/byte.grm' as b;
-import 'util/util.grm' as u;
-import 'en/verbalizer/numbers.grm' as n;
-func ToNumberName[expr] {
-  number_name_seq = n.CARDINAL_NUMBERS (" " n.CARDINAL_NUMBERS)*;
-  return Optimize[expr @ number_name_seq];
-}
-d = b.kDigit;
-leading_zero = CDRewrite[n.I[" "], ("[BOS]" | " ") "0", "", b.kBytes*];
-by_ones = d n.I[" "];
-by_twos = (d{2} @ leading_zero) n.I[" "];
-by_threes = (d{3} @ leading_zero) n.I[" "];
-groupings = by_twos* (by_threes | by_twos | by_ones);
-export FRACTIONAL_PART_UNGROUPED =
-  Optimize[ToNumberName[by_ones+ @ u.CLEAN_SPACES]]
-;
-export FRACTIONAL_PART_GROUPED =
-  Optimize[ToNumberName[groupings @ u.CLEAN_SPACES]]
-;
-export FRACTIONAL_PART_UNPARSED = Optimize[ToNumberName[d*]];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/float.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/float.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'en/verbalizer/factorization.grm' as f;
-import 'en/verbalizer/lexical_map.grm' as l;
-import 'en/verbalizer/numbers.grm' as n;
-fractional_part_ungrouped = f.FRACTIONAL_PART_UNGROUPED;
-fractional_part_grouped = f.FRACTIONAL_PART_GROUPED;
-fractional_part_unparsed = f.FRACTIONAL_PART_UNPARSED;
-__fractional_part__ = fractional_part_ungrouped | fractional_part_unparsed;
-__decimal_marker__ = ".";
-export FLOAT = Optimize[
- (n.CARDINAL_NUMBERS
-  (__decimal_marker__ : " @@DECIMAL_DOT_EXPRESSION@@ ")
-  __fractional_part__) @ l.LEXICAL_MAP]
-;
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/g.fst
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/g.fst
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/byte.grm' as b;
-lexical_map = StringFile['en/verbalizer/lexical_map.tsv'];
-sigma_star = b.kBytes*;
-del_null = CDRewrite["__NULL__" : "", "", "", sigma_star];
-export LEXICAL_MAP = Optimize[
-  CDRewrite[lexical_map, "", "", sigma_star] @ del_null]
-;
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.tsv
-@@CONNECTOR_RANGE@@	to
-@@CONNECTOR_RATIO@@	to
-@@CONNECTOR_BY@@	by
-@@CONNECTOR_CONSECUTIVE_YEAR@@	to
-@@JANUARY@@	january
-@@FEBRUARY@@	february
-@@MARCH@@	march
-@@APRIL@@	april
-@@MAY@@	may
-@@JUNE@@	june
-@@JULY@@	july
-@@AUGUST@@	august
-@@SEPTEMBER@@	september
-@@OCTOBER@@	october
-@@NOVEMBER@@	november
-@@DECEMBER@@	december
-@@MINUS@@	minus
-@@DECIMAL_DOT_EXPRESSION@@	point
-@@URL_DOT_EXPRESSION@@	dot
-@@DECIMAL_EXPONENT@@	to the
-@@DECIMAL_EXPONENT@@	to the power of
-@@COLON@@	colon
-@@SLASH@@	slash
-@@SLASH@@	forward slash
-@@DASH@@	dash
-@@PASSWORD@@	password
-@@AT@@	at
-@@PORT@@	port
-@@QUESTION_MARK@@	question mark
-@@HASH@@	hash
-@@HASH@@	hash tag
-@@FRACTION_OVER@@	over
-@@MONEY_AND@@	and
-@@AND@@	and
-@@PHONE_PLUS@@	plus
-@@PHONE_EXTENSION@@	extension
-@@TIME_AM@@		a m
-@@TIME_PM@@		p m
-@@HOUR@@		o'clock
-@@MINUTE@@		minute
-@@MINUTE@@		minutes
-@@TIME_AFTER@@		after
-@@TIME_AFTER@@		past
-@@TIME_BEFORE@@		to
-@@TIME_BEFORE@@		till
-@@TIME_QUARTER@@	quarter
-@@TIME_HALF@@		half
-@@TIME_ZERO@@		oh
-@@TIME_THREE_QUARTER@@	three quarters
-@@ARITHMETIC_PLUS@@	plus
-@@ARITHMETIC_TIMES@@	times
-@@ARITHMETIC_TIMES@@	multiplied by
-@@ARITHMETIC_MINUS@@	minus
-@@ARITHMETIC_DIVISION@@	divided by
-@@ARITHMETIC_DIVISION@@	over
-@@ARITHMETIC_EQUALS@@	equals
-@@PERCENT@@		percent
-@@DEGREE@@		degree
-@@DEGREE@@		degrees
-@@SQUARE_ROOT@@		square root of
-@@SQUARE_ROOT@@		the square root of
-@@STAR@@		star
-@@HYPHEN@@		hyphen
-@@AT@@			at
-@@PER@@			per
-@@PERIOD@@		period
-@@PERIOD@@		full stop
-@@PERIOD@@		dot
-@@EXCLAMATION_MARK@@	exclamation mark
-@@EXCLAMATION_MARK@@	exclamation point
-@@COMMA@@		comma
-@@POSITIVE@@		positive
-@@NEGATIVE@@		negative
-@@OTHER_ZERO_VERBALIZATIONS@@	oh
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/math.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/math.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'en/verbalizer/float.grm' as f;
-import 'en/verbalizer/lexical_map.grm' as l;
-import 'en/verbalizer/numbers.grm' as n;
-float = f.FLOAT;
-card = n.CARDINAL_NUMBERS;
-number = card | float;
-plus = "+" : " @@ARITHMETIC_PLUS@@ ";
-times = "*" : " @@ARITHMETIC_TIMES@@ ";
-minus = "-" : " @@ARITHMETIC_MINUS@@ ";
-division = "/" : " @@ARITHMETIC_DIVISION@@ ";
-operator = plus | times | minus | division;
-percent = "%" : " @@PERCENT@@";
-export ARITHMETIC =
-  Optimize[((number operator number) | (number percent)) @ l.LEXICAL_MAP]
-;
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/miscellaneous.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/miscellaneous.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/byte.grm' as b;
-import 'ru/classifier/cyrillic.grm' as c;
-import 'en/verbalizer/extra_numbers.grm' as e;
-import 'en/verbalizer/lexical_map.grm' as l;
-import 'en/verbalizer/numbers.grm' as n;
-import 'en/verbalizer/spelled.grm' as s;
-letter = b.kAlpha | c.kCyrillicAlpha;
-dash   = "-";
-word = letter+;
-possibly_split_word = word (((dash | ".") : " ") word)* n.D["."]?;
-post_word_symbol =
-   ("+" : ("@@ARITHMETIC_PLUS@@" | "@@POSITIVE@@")) |
-   ("-" : ("@@ARITHMETIC_MINUS@@" | "@@NEGATIVE@@")) |
-   ("*" : "@@STAR@@")
-;
-pre_word_symbol =
-   ("@" : "@@AT@@") |
-   ("/" : "@@SLASH@@") |
-   ("#" : "@@HASH@@")
-;
-post_word = possibly_split_word n.I[" "] post_word_symbol;
-pre_word = pre_word_symbol n.I[" "] possibly_split_word;
-## Number/digit sequence combos, maybe with a dash
-spelled_word = word @ s.SPELLED_NO_LETTER;
-word_number =
-  (word | spelled_word)
-  (n.I[" "] | (dash : " "))
-  (e.DIGITS | n.CARDINAL_NUMBERS | e.MIXED_NUMBERS)
-;
-number_word =
-  (e.DIGITS | n.CARDINAL_NUMBERS | e.MIXED_NUMBERS)
-  (n.I[" "] | (dash : " "))
-  (word | spelled_word)
-;
-## Two-digit year.
-# Note that in this case to be fair we really have to allow ordinals too since
-# in some languages that's what you would have.
-two_digit_year = n.D["'"] (b.kDigit{2} @ (n.CARDINAL_NUMBERS | e.DIGITS));
-dot_com = ("." : "@@URL_DOT_EXPRESSION@@") n.I[" "] "com";
-miscellaneous = Optimize[
-    possibly_split_word
-  | post_word
-  | pre_word
-  | word_number
-  | number_word
-  | two_digit_year
-  | dot_com
-];
-export MISCELLANEOUS = Optimize[miscellaneous @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/byte.grm' as b;
-import 'en/verbalizer/lexical_map.grm' as l;
-import 'en/verbalizer/numbers.grm' as n;
-card = n.CARDINAL_NUMBERS;
-__currency__ = StringFile['en/verbalizer/money.tsv'];
-d = b.kDigit;
-D = d - "0";
-cents = ((n.D["0"] | D) d) @ card;
-# Only dollar for the verbalizer tests for English. Will need to add other
-# currencies.
-usd_maj = Project["usd_maj" @ __currency__, 'output'];
-usd_min = Project["usd_min" @ __currency__, 'output'];
-and = " @@MONEY_AND@@ " | " ";
-dollar1 =
-  n.D["$"] card n.I[" " usd_maj] n.I[and] n.D["."] cents n.I[" " usd_min]
-;
-dollar2 = n.D["$"] card n.I[" " usd_maj] n.D["."] n.D["00"];
-dollar3 = n.D["$"] card n.I[" " usd_maj];
-dollar = Optimize[dollar1 | dollar2 | dollar3];
-export MONEY = Optimize[dollar @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.tsv
-usd_maj	dollar
-usd_maj	dollars
-usd_min	cent
-usd_min	cents
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/number_names.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/number_names.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# English minimally supervised number grammar.
-#
-# Supports both cardinals and ordinals without overt marking.
-#
-# The language-specific acceptor G was compiled with digit, teen, and decade
-# preterminals. The lexicon transducer L is unambiguous so no LM is used.
-import 'util/arithmetic.grm' as a;
-# Intersects the universal factorization transducer (F) with the
-# language-specific acceptor (G).
-d = a.DELTA_STAR;
-f = a.IARITHMETIC_RESTRICTED;
-g = LoadFst['en/verbalizer/g.fst'];
-fg = Optimize[d @ Optimize[f @ Optimize[f @ Optimize[f @ g]]]];
-test1 = AssertEqual["230" @ fg, "(+ (* 2 100 *) 30 +)"];
-# Compiles lexicon transducer (L).
-cardinal_name = StringFile['en/verbalizer/cardinals.tsv'];
-cardinal_l = Optimize[(cardinal_name " ")* cardinal_name];
-test2 = AssertEqual["2 100 30" @ cardinal_l, "two hundred thirty"];
-ordinal_name = StringFile['en/verbalizer/ordinals.tsv'];
-# In English, ordinals have the same syntax as cardinals and all but the final
-# element is verbalized using a cardinal number word; e.g., "two hundred
-# thirtieth".
-ordinal_l = Optimize[(cardinal_name " ")* ordinal_name];
-test3 = AssertEqual["2 100 30" @ ordinal_l, "two hundred thirtieth"];
-# Composes L with the leaf transducer (P), then composes that with FG.
-p = a.LEAVES;
-export CARDINAL_NUMBER_NAME = Optimize[fg @ (p @ cardinal_l)];
-test4 = AssertEqual["230" @ CARDINAL_NUMBER_NAME, "two hundred thirty"];
-export ORDINAL_NUMBER_NAME = Optimize[fg @ (p @ ordinal_l)];
-test5 = AssertEqual["230" @ ORDINAL_NUMBER_NAME, "two hundred thirtieth"];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'en/verbalizer/number_names.grm' as n;
-import 'util/byte.grm' as bytelib;
-import 'universal/thousands_punct.grm' as t;
-cardinal = n.CARDINAL_NUMBER_NAME;
-ordinal = n.ORDINAL_NUMBER_NAME;
-# Putting these here since this grammar gets incorporated by all the others.
-func I[expr] {
-  return "" : expr;
-}
-func D[expr] {
-  return expr : "";
-}
-separators = t.comma_thousands | t.no_delimiter;
-# Language specific endings for ordinals.
-d = bytelib.kDigit;
-endings = "st" | "nd" | "rd" | "th";
-st = (d* "1") - (d* "11");
-nd = (d* "2") - (d* "12");
-rd = (d* "3") - (d* "13");
-th = Optimize[d* - st - nd - rd];
-first = st ("st" : "");
-second = nd ("nd" : "");
-third = rd ("rd" : "");
-other = th ("th" : "");
-marked_ordinal = Optimize[first | second | third | other];
-# The separator is a no-op here but will be needed once we replace
-# the above targets.
-export CARDINAL_NUMBERS = Optimize[separators @ cardinal];
-export ORDINAL_NUMBERS =
-  Optimize[(separators endings) @ marked_ordinal @ ordinal]
-;
-export ORDINAL_NUMBERS_UNMARKED = Optimize[separators @ ordinal];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers_plus.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers_plus.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Grammar for things built mostly on numbers.
-import 'en/verbalizer/factorization.grm' as f;
-import 'en/verbalizer/lexical_map.grm' as l;
-import 'en/verbalizer/numbers.grm' as n;
-num = n.CARDINAL_NUMBERS;
-ord = n.ORDINAL_NUMBERS_UNMARKED;
-digits = f.FRACTIONAL_PART_UNGROUPED;
-# Various symbols.
-plus = "+" : "@@ARITHMETIC_PLUS@@";
-minus = "-" : "@@ARITHMETIC_MINUS@@";
-slash = "/" : "@@SLASH@@";
-dot = "." : "@@URL_DOT_EXPRESSION@@";
-dash = "-" : "@@DASH@@";
-equals = "=" : "@@ARITHMETIC_EQUALS@@";
-degree = "°" : "@@DEGREE@@";
-division = ("/" | "÷") : "@@ARITHMETIC_DIVISION@@";
-times = ("x" | "*") : "@@ARITHMETIC_TIMES@@";
-power = "^" : "@@DECIMAL_EXPONENT@@";
-square_root = "√" : "@@SQUARE_ROOT@@";
-percent = "%" : "@@PERCENT@@";
-# Safe roman numbers.
-# NB: Do not change the formatting here. NO_EDIT must be on the same
-# line as the path.
-rfile = 
-  'universal/roman_numerals.tsv' # NO_EDIT
-;
-roman = StringFile[rfile];
-## Main categories.
-cat_dot_number =
-   num
-   n.I[" "] dot n.I[" "] num
-   (n.I[" "] dot n.I[" "] num)+
-;
-cat_slash_number =
-   num
-   n.I[" "] slash n.I[" "] num
-   (n.I[" "] slash n.I[" "] num)*
-;
-cat_dash_number =
-   num
-   n.I[" "] dash n.I[" "] num
-   (n.I[" "] dash n.I[" "] num)*
-;
-cat_signed_number = ((plus | minus) n.I[" "])? num;
-cat_degree = cat_signed_number n.I[" "] degree;
-cat_country_code = plus n.I[" "] (num | digits);
-cat_math_operations =
-     plus
-   | minus
-   | division
-   | times
-   | equals
-   | percent
-   | power
-   | square_root
-;
-# Roman numbers are often either cardinals or ordinals in various languages.
-cat_roman = roman @ (num | ord);
-# Allow
-#
-# number:number
-# number-number
-#
-# to just be
-#
-# number number.
-cat_number_number =
-   num ((":" | "-") : " ") num
-;
-# Some additional readings for these symbols.
-cat_additional_readings =
-  ("/" : "@@PER@@") |
-  ("+" : "@@AND@@") |
-  ("-" : ("@@HYPHEN@@" | "@@CONNECTOR_TO@@")) |
-  ("*" : "@@STAR@@") |
-  ("x" : ("x" | "@@CONNECTOR_BY@@")) |
-  ("@" : "@@AT@@")
-;
-numbers_plus = Optimize[
-   cat_dot_number
- | cat_slash_number
- | cat_dash_number
- | cat_signed_number
- | cat_degree
- | cat_country_code
- | cat_math_operations
- | cat_roman
- | cat_number_number
- | cat_additional_readings
-];
-export NUMBERS_PLUS = Optimize[numbers_plus @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/ordinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/ordinals.tsv
-0	zeroth
-1	first
-2	second
-3	third
-4	fourth
-5	fifth
-6	sixth
-7	seventh
-8	eighth
-9	ninth
-10	tenth
-11	eleventh
-12	twelfth
-13	thirteenth
-14	fourteenth
-15	fifteenth
-16	sixteenth
-17	seventeenth
-18	eighteenth
-19	nineteenth
-20	twentieth
-30	thirtieth
-40	fortieth
-50	fiftieth
-60	sixtieth
-70	seventieth
-80	eightieth
-90	ninetieth
-100	hundredth
-1000	thousandth
-1000000	millionth
-1000000000	billionth
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/params.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/params.tsv
-float.grm	__fractional_part__ = fractional_part_ungrouped | fractional_part_unparsed;
-telephone.grm	__grouping__ = f.UNGROUPED;
-measure.grm	__measure__ = StringFile['en/verbalizer/measures.tsv'];
-money.grm	__currency__ = StringFile['en/verbalizer/money.tsv'];
-time.grm	__sep__ = ":";
-time.grm	__am__ = "a.m." | "am" | "AM";
-time.grm	__pm__ = "p.m." | "pm" | "PM";
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/podspeech.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/podspeech.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/util.grm' as util;
-import 'util/case.grm' as case;
-import 'en/verbalizer/extra_numbers.grm' as e;
-import 'en/verbalizer/float.grm' as f;
-import 'en/verbalizer/math.grm' as ma;
-import 'en/verbalizer/miscellaneous.grm' as mi;
-import 'en/verbalizer/money.grm' as mo;
-import 'en/verbalizer/numbers.grm' as n;
-import 'en/verbalizer/numbers_plus.grm' as np;
-import 'en/verbalizer/spelled.grm' as s;
-import 'en/verbalizer/spoken_punct.grm' as sp;
-import 'en/verbalizer/time.grm' as t;
-import 'en/verbalizer/urls.grm' as u;
-export POD_SPEECH_TN = Optimize[RmWeight[
- (u.URL 
-  | e.MIXED_NUMBERS
-  | e.DIGITS
-  | f.FLOAT
-  | ma.ARITHMETIC
-  | mo.MONEY
-  | n.CARDINAL_NUMBERS
-  | n.ORDINAL_NUMBERS
-  | np.NUMBERS_PLUS
-  | s.SPELLED
-  | sp.SPOKEN_PUNCT
-  | t.TIME
-  | u.URL
-  | u.EMAILS) @ util.CLEAN_SPACES @ case.TOUPPER
-]];
-#export POD_SPEECH_TN = Optimize[RmWeight[(mi.MISCELLANEOUS) @ util.CLEAN_SPACES @ case.TOUPPER]];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spelled.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spelled.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# This verbalizer is used whenever there is an LM symbol that consists of
-# letters immediately followed by "{spelled}".l This strips the "{spelled}"
-# suffix.
-import 'util/byte.grm' as b;
-import 'ru/classifier/cyrillic.grm' as c;
-import 'en/verbalizer/lexical_map.grm' as l;
-import 'en/verbalizer/numbers.grm' as n;
-digit = b.kDigit @ n.CARDINAL_NUMBERS;
-char_set = (("a" | "A") : "letter-a")
-        | (("b" | "B") : "letter-b")
-        | (("c" | "C") : "letter-c")
-        | (("d" | "D") : "letter-d")
-        | (("e" | "E") : "letter-e")
-        | (("f" | "F") : "letter-f")
-        | (("g" | "G") : "letter-g")
-        | (("h" | "H") : "letter-h")
-        | (("i" | "I") : "letter-i")
-        | (("j" | "J") : "letter-j")
-        | (("k" | "K") : "letter-k")
-        | (("l" | "L") : "letter-l")
-        | (("m" | "M") : "letter-m")
-        | (("n" | "N") : "letter-n")
-        | (("o" | "O") : "letter-o")
-        | (("p" | "P") : "letter-p")
-        | (("q" | "Q") : "letter-q")
-        | (("r" | "R") : "letter-r")
-        | (("s" | "S") : "letter-s")
-        | (("t" | "T") : "letter-t")
-        | (("u" | "U") : "letter-u")
-        | (("v" | "V") : "letter-v")
-        | (("w" | "W") : "letter-w")
-        | (("x" | "X") : "letter-x")
-        | (("y" | "Y") : "letter-y")
-        | (("z" | "Z") : "letter-z")
-        | (digit)
-        | ("&" : "@@AND@@")
-        | ("." : "")
-        | ("-" : "")
-        | ("_" : "")
-        | ("/" : "")
-        | (n.I["letter-"] c.kCyrillicAlpha)
-        ;
-ins_space = "" : " ";
-suffix = "{spelled}" : "";
-spelled = Optimize[char_set (ins_space char_set)* suffix];
-export SPELLED = Optimize[spelled @ l.LEXICAL_MAP];
-sigma_star = b.kBytes*;
-# Gets rid of the letter- prefix since in some cases we don't want it.
-del_letter = CDRewrite[n.D["letter-"], "", "", sigma_star];
-spelled_no_tag = Optimize[char_set (ins_space char_set)*];
-export SPELLED_NO_LETTER = Optimize[spelled_no_tag @ del_letter];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spoken_punct.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spoken_punct.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'en/verbalizer/lexical_map.grm' as l;
-punct =
-   ("." : "@@PERIOD@@")
- | ("," : "@@COMMA@@")
- | ("!" : "@@EXCLAMATION_MARK@@")
- | ("?" : "@@QUESTION_MARK@@")
-;
-export SPOKEN_PUNCT = Optimize[punct @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/time.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/time.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/byte.grm' as b;
-import 'en/verbalizer/lexical_map.grm' as l;
-import 'en/verbalizer/numbers.grm' as n;
-# Only handles 24-hour time with quarter-to, half-past and quarter-past.
-increment_hour =
-    ("0" : "1")
-  | ("1" : "2")
-  | ("2" : "3")
-  | ("3" : "4")
-  | ("4" : "5")
-  | ("5" : "6")
-  | ("6" : "7")
-  | ("7" : "8")
-  | ("8" : "9")
-  | ("9" : "10")
-  | ("10" : "11")
-  | ("11" : "12")
-  | ("12" : "1")  # If someone uses 12, we assume 12-hour by default.
-  | ("13" : "14")
-  | ("14" : "15")
-  | ("15" : "16")
-  | ("16" : "17")
-  | ("17" : "18")
-  | ("18" : "19")
-  | ("19" : "20")
-  | ("20" : "21")
-  | ("21" : "22")
-  | ("22" : "23")
-  | ("23" : "12")
-;
-hours = Project[increment_hour, 'input'];
-d = b.kDigit;
-D = d - "0";
-minutes09 = "0" D;
-minutes = ("1" | "2" | "3" | "4" | "5") d;
-__sep__ = ":";
-sep_space = __sep__ : " ";
-verbalize_hours = hours @ n.CARDINAL_NUMBERS;
-verbalize_minutes =
-   ("00" : "@@HOUR@@")
- | (minutes09 @ (("0" : "@@TIME_ZERO@@") n.I[" "] n.CARDINAL_NUMBERS))
- | (minutes @ n.CARDINAL_NUMBERS)
-;
-time_basic = Optimize[verbalize_hours sep_space verbalize_minutes];
-# Special cases we handle right now.
-# TODO: Need to allow for cases like
-#
-#   half twelve (in the UK English sense)
-#   half twaalf (in the Dutch sense)
-time_quarter_past =
-   n.I["@@TIME_QUARTER@@ @@TIME_AFTER@@ "]
-   verbalize_hours
-   n.D[__sep__ "15"];
-time_half_past =
-   n.I["@@TIME_HALF@@ @@TIME_AFTER@@ "]
-   verbalize_hours
-   n.D[__sep__ "30"];
-time_quarter_to =
-   n.I["@@TIME_QUARTER@@ @@TIME_BEFORE@@ "]
-   (increment_hour @ verbalize_hours)
-   n.D[__sep__ "45"];
-time_extra = Optimize[
-  time_quarter_past | time_half_past | time_quarter_to]
-;
-# Basic time periods which most languages can be expected to have.
-__am__ = "a.m." | "am" | "AM";
-__pm__ = "p.m." | "pm" | "PM";
-period = (__am__ : "@@TIME_AM@@") | (__pm__ : "@@TIME_PM@@");
-time_variants = time_basic | time_extra;
-time = Optimize[
-    (period (" " | n.I[" "]))? time_variants
- |  time_variants ((" " | n.I[" "]) period)?]
-;
-export TIME = Optimize[time @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/urls.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/urls.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# Rules for URLs and email addresses.
-import 'util/byte.grm' as bytelib;
-import 'en/verbalizer/lexical_map.grm' as l;
-ins_space = "" : " ";
-dot = "." : "@@URL_DOT_EXPRESSION@@";
-at = "@" : "@@AT@@";
-url_suffix =
-  (".com" : dot ins_space "com") |
-  (".gov" : dot ins_space "gov") |
-  (".edu" : dot ins_space "e d u") |
-  (".org" : dot ins_space "org") |
-  (".net" : dot ins_space "net")
-;
-letter_string = (bytelib.kAlnum)* bytelib.kAlnum;
-letter_string_dot =
-  ((letter_string ins_space dot ins_space)* letter_string)
-;
-# Rules for URLs.
-export URL = Optimize[
- ((letter_string_dot) (ins_space)
-  (url_suffix)) @ l.LEXICAL_MAP
-];
-# Rules for email addresses.
-letter_by_letter = ((bytelib.kAlnum ins_space)* bytelib.kAlnum);
-letter_by_letter_dot =
-  ((letter_by_letter ins_space dot ins_space)*
-  letter_by_letter)
-;
-export EMAIL1 = Optimize[
- ((letter_by_letter) (ins_space)
-  (at) (ins_space)
-  (letter_by_letter_dot) (ins_space)
-  (url_suffix)) @ l.LEXICAL_MAP
-];
-export EMAIL2 = Optimize[
- ((letter_by_letter) (ins_space)
-  (at) (ins_space)
-  (letter_string_dot) (ins_space)
-  (url_suffix)) @ l.LEXICAL_MAP
-];
-export EMAILS = Optimize[
-  EMAIL1 | EMAIL2
-];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/verbalizer.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/verbalizer.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import 'util/util.grm' as util;
-import 'en/verbalizer/extra_numbers.grm' as e;
-import 'en/verbalizer/float.grm' as f;
-import 'en/verbalizer/math.grm' as ma;
-import 'en/verbalizer/miscellaneous.grm' as mi;
-import 'en/verbalizer/money.grm' as mo;
-import 'en/verbalizer/numbers.grm' as n;
-import 'en/verbalizer/numbers_plus.grm' as np;
-import 'en/verbalizer/spelled.grm' as s;
-import 'en/verbalizer/spoken_punct.grm' as sp;
-import 'en/verbalizer/time.grm' as t;
-import 'en/verbalizer/urls.grm' as u;
-export VERBALIZER = Optimize[RmWeight[
- (  e.MIXED_NUMBERS
-  | e.DIGITS
-  | f.FLOAT
-  | ma.ARITHMETIC
-  | mi.MISCELLANEOUS
-  | mo.MONEY
-  | n.CARDINAL_NUMBERS
-  | n.ORDINAL_NUMBERS
-  | np.NUMBERS_PLUS
-  | s.SPELLED
-  | sp.SPOKEN_PUNCT
-  | t.TIME
-  | u.URL) @ util.CLEAN_SPACES
-]];
--- a/third_party/chinese_text_normalization/thrax/src/number_data/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/README.md
-This directory contains data used in:
-  Gorman, K., and Sproat, R. 2016. Minimally supervised number normalization.
-  Transactions of the Association for Computational Linguistics 4: 507-519.
-* `minimal.txt`: A list of 30 curated numbers used as the "minimal" training
-  set.
-* `random-trn.txt`: A list of 9000 randomly-generated numbers used as the
-  "medium" training set.
-* `random-tst.txt`: A list of 1000 randomly-generated numbers used as the test
-  set.
-Note that `random-trn.txt` and `random-tst.txt` are totally disjoint, but that
-a small number of examples occur both in `minimal.txt` and `random-tst.txt`.
-For information about the sampling procedure used to generate the random data
-sets, see appendix A of the aforementioned paper.
--- a/third_party/chinese_text_normalization/thrax/src/number_data/minimal.txt
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/minimal.txt
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-10
-11
-12
-13
-14
-15
-16
-17
-18
-19
-20
-21
-22
-23
-24
-25
-26
-27
-28
-29
-30
-31
-32
-33
-34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
-87
-88
-89
-90
-91
-92
-93
-94
-95
-96
-97
-98
-99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133
-134
-135
-136
-137
-138
-139
-140
-141
-142
-143
-144
-145
-146
-147
-148
-149
-150
-151
-152
-153
-154
-155
-156
-157
-158
-159
-160
-161
-162
-163
-164
-165
-166
-167
-168
-169
-170
-171
-172
-173
-174
-175
-176
-177
-178
-179
-180
-181
-182
-183
-184
-185
-186
-187
-188
-189
-190
-191
-192
-193
-194
-195
-196
-197
-198
-199
-200
-201
-202
-203
-204
-205
-206
-207
-208
-209
-210
-211
-212
-220
-221
-230
-300
-400
-500
-600
-700
-800
-900
-1000
-1001
-1002
-1003
-1004
-1005
-1006
-1007
-1008
-1009
-1010
-1011
-1012
-1020
-1021
-1030
-1200
-2000
-2001
-2002
-2003
-2004
-2005
-2006
-2007
-2008
-2009
-2010
-2011
-2012
-2020
-2021
-2030
-2100
-2200
-5001
-10000
-12000
-20000
-21000
-50001
-100000
-120000
-200000
-210000
-500001
-1000000
-1001000
-1200000
-2000000
-2100000
-5000001
-10000000
-10001000
-12000000
-20000000
-50000001
-100000000
-100001000
-120000000
-200000000
-500000001
-1000000000
-1000001000
-1200000000
-2000000000
-5000000001
-10000000000
-10000001000
-12000000000
-20000000000
-50000000001
-100000000000
-100000001000
-120000000000
-200000000000
-500000000001
--- a/third_party/chinese_text_normalization/thrax/src/number_data/random-trn.txt
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/random-trn.txt
--- a/third_party/chinese_text_normalization/thrax/src/number_data/random-tst.txt
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/random-tst.txt
-209
-220
-250
-254
-263
-266
-276
-303
-310
-317
-322
-364
-386
-405
-414
-424
-429
-489
-505
-520
-523
-525
-554
-624
-627
-640
-665
-680
-704
-715
-723
-741
-742
-775
-776
-845
-847
-851
-868
-898
-921
-927
-972
-973
-984
-986
-994
-1038
-1055
-1077
-1079
-1083
-1090
-1123
-1137
-1161
-1184
-1186
-1235
-1257
-1258
-1285
-1302
-1307
-1311
-1358
-1369
-1372
-1383
-1391
-1418
-1441
-1442
-1447
-1476
-1478
-1509
-1535
-1548
-1550
-1571
-1581
-1593
-1615
-1623
-1639
-1660
-1686
-1688
-1717
-1735
-1782
-1813
-1815
-1824
-1831
-1875
-1881
-1924
-1931
-1949
-1951
-1966
-1970
-1984
-1990
-1992
-2012
-2013
-2024
-2040
-2058
-2062
-2064
-2067
-2075
-2116
-2130
-2135
-2171
-2197
-2200
-2215
-2220
-2226
-2246
-2259
-2277
-2294
-2303
-2318
-2342
-2347
-2349
-2355
-2364
-2413
-2419
-2420
-2433
-2441
-2445
-2451
-2468
-2488
-2498
-2499
-2500
-2502
-2514
-2523
-2524
-2557
-2568
-2598
-2609
-2612
-2629
-2685
-2697
-2718
-2724
-2734
-2739
-2760
-2763
-2779
-2796
-2797
-2809
-2818
-2828
-2839
-2842
-2850
-2857
-2864
-2916
-2923
-2984
-2987
-2991
-2994
-3021
-3025
-3026
-3054
-3070
-3080
-3086
-3098
-3114
-3121
-3130
-3136
-3137
-3157
-3175
-3182
-3200
-3233
-3245
-3250
-3270
-3298
-3303
-3330
-3341
-3347
-3368
-3392
-3394
-3398
-3400
-3427
-3435
-3441
-3449
-3474
-3477
-3497
-3501
-3525
-3526
-3551
-3570
-3576
-3597
-3612
-3630
-3636
-3639
-3649
-3651
-3675
-3692
-3719
-3742
-3773
-3785
-3790
-3850
-3870
-3873
-3875
-3885
-3910
-3926
-3927
-3928
-3941
-3943
-3945
-3950
-3961
-3971
-3990
-3992
-3996
-4010
-4013
-4018
-4024
-4032
-4047
-4065
-4069
-4079
-4089
-4097
-4114
-4125
-4127
-4148
-4155
-4173
-4180
-4206
-4249
-4256
-4284
-4298
-4303
-4305
-4345
-4354
-4409
-4417
-4433
-4437
-4470
-4474
-4486
-4494
-4527
-4538
-4544
-4572
-4629
-4630
-4634
-4647
-4652
-4654
-4658
-4680
-4699
-4747
-4748
-4773
-4791
-4852
-4863
-4884
-4907
-4927
-4943
-4953
-5027
-5032
-5037
-5080
-5095
-5108
-5134
-5163
-5168
-5186
-5210
-5236
-5237
-5265
-5273
-5283
-5330
-5351
-5362
-5396
-5438
-5446
-5465
-5495
-5511
-5526
-5534
-5556
-5567
-5611
-5639
-5642
-5725
-5738
-5751
-5774
-5777
-5786
-5813
-5837
-5864
-5879
-5885
-5889
-5898
-5921
-5924
-5946
-5955
-5959
-5968
-5976
-5981
-6021
-6047
-6049
-6080
-6158
-6162
-6170
-6176
-6206
-6214
-6220
-6243
-6253
-6261
-6284
-6307
-6322
-6330
-6338
-6367
-6413
-6430
-6434
-6437
-6470
-6492
-6499
-6504
-6512
-6660
-6670
-6680
-6699
-6710
-6737
-6741
-6751
-6776
-6779
-6802
-6819
-6890
-6892
-6969
-6970
-7040
-7045
-7052
-7063
-7065
-7088
-7128
-7129
-7133
-7155
-7164
-7166
-7181
-7210
-7219
-7234
-7236
-7256
-7266
-7270
-7303
-7364
-7370
-7378
-7499
-7593
-7629
-7633
-7640
-7675
-7709
-7753
-7791
-7792
-7812
-7838
-7860
-7890
-7972
-8014
-8025
-8096
-8106
-8123
-8154
-8159
-8200
-8228
-8343
-8381
-8429
-8490
-8515
-8526
-8560
-8568
-8579
-8658
-8668
-8672
-8688
-8710
-8731
-8739
-8752
-8771
-8790
-8833
-8900
-8917
-8929
-9002
-9035
-9043
-9067
-9078
-9122
-9138
-9144
-9183
-9199
-9211
-9235
-9240
-9257
-9330
-9385
-9390
-9450
-9512
-9523
-9530
-9535
-9564
-9596
-9601
-9602
-9603
-9626
-9655
-9691
-9695
-9772
-9780
-9808
-9849
-9881
-9911
-9923
-9946
-9970
-9986
-10009
-10019
-10168
-10178
-10180
-10190
-10290
-10348
-10470
-10520
-10525
-10535
-10545
-10627
-10675
-10715
-10757
-10772
-10786
-10896
-10940
-10970
-11000
-11101
-11120
-11132
-11192
-11201
-11209
-11265
-11337
-11392
-11549
-11557
-11567
-11736
-11767
-11807
-11814
-11866
-11881
-11913
-12073
-12098
-12111
-12137
-12291
-12370
-12376
-12397
-12435
-12439
-12443
-12511
-12520
-12567
-12575
-12615
-12700
-12710
-12726
-12729
-12814
-12822
-12883
-12890
-12910
-12915
-12980
-13069
-13075
-13127
-13193
-13209
-13386
-13390
-13393
-13511
-13586
-13607
-13625
-13630
-13647
-13656
-13763
-13810
-13910
-13979
-13991
-14073
-14096
-14111
-14170
-14210
-14259
-14306
-14350
-14351
-14360
-14479
-14587
-14613
-14736
-14745
-14797
-14810
-14822
-14824
-14830
-15020
-15068
-15118
-15197
-15230
-15270
-15310
-15404
-15510
-15603
-15680
-15700
-15721
-15820
-15928
-15990
-16012
-16018
-16030
-16073
-16123
-16243
-16275
-16501
-16690
-16710
-16765
-16870
-16958
-17014
-17030
-17138
-17190
-17272
-17409
-17424
-17430
-17477
-17678
-17684
-17687
-17820
-17840
-17898
-18097
-18219
-18284
-18349
-18525
-18634
-18680
-19042
-19070
-19084
-19120
-19151
-19250
-19389
-19679
-19932
-20080
-20100
-20133
-20321
-20440
-20801
-20819
-20969
-21190
-21300
-21340
-21350
-21360
-21490
-21531
-21640
-21728
-21796
-21831
-21860
-22040
-22208
-22282
-22410
-22566
-22850
-23060
-23196
-23380
-24190
-24350
-24360
-24380
-24475
-24480
-24491
-24521
-24644
-24695
-24747
-24760
-24945
-25000
-25510
-25754
-25870
-26200
-26300
-26410
-26447
-26472
-26510
-27000
-27017
-27400
-27430
-27531
-27600
-27740
-27870
-28200
-28544
-28570
-28618
-28629
-28716
-28753
-28850
-29027
-29040
-29045
-29129
-29190
-29404
-29600
-29970
-30030
-30050
-30190
-30375
-30500
-30700
-30778
-30790
-30838
-31310
-31379
-31480
-31547
-31698
-31986
-32600
-32991
-33417
-33603
-34751
-34900
-34980
-35059
-35101
-35190
-35496
-35500
-35707
-35761
-36320
-36496
-36893
-37200
-37520
-37780
-38370
-38500
-38600
-39200
-39575
-39580
-40324
-40560
-41222
-41300
-41485
-41973
-43110
-43229
-44097
-44550
-44666
-45078
-45085
-45090
-45600
-46170
-46772
-47060
-48280
-48500
-48518
-49400
-49430
-50100
-50167
-50359
-50800
-51386
-51390
-51531
-51800
-52092
-52100
-52590
-52663
-52670
-52738
-52990
-53025
-53450
-53600
-53620
-54070
-54505
-56160
-56165
-57100
-57730
-58825
-58900
-60151
-60500
-61306
-61710
-62250
-62270
-62400
-63310
-63960
-64235
-64760
-65200
-65654
-66240
-66400
-66600
-68670
-68920
-71000
-71400
-72630
-72700
-72860
-73700
-75841
-76108
-77122
-79220
-79400
-79670
-81110
-83574
-84100
-84500
-86090
-87078
-87300
-87860
-88340
-88880
-89154
-89950
-92600
-96220
-96870
-97503
-99600
-101000
-104000
-105100
-105570
-106900
-108290
-108400
-110840
-110975
-113773
-115000
-116500
-119200
-124720
-127000
-127780
-128200
-128966
-138900
-140900
-141000
-141228
-144000
-145000
-145061
-147245
-147562
-148450
-152218
-154990
-158775
-159940
-161000
-161300
-163500
-165500
-170559
-176000
-178000
-184000
-188800
-196100
-204400
-204880
-210900
-216616
-220930
-238000
-239740
-257226
-265000
-271590
-273200
-285810
-309620
-315612
-320959
-321500
-341400
-348697
-350260
-359030
-360000
-360600
-376500
-378265
-383070
-394740
-410000
-446000
-471750
-497384
-510600
-560000
-590000
-608400
-696900
-704000
-1448374
-2256800
-3275000
-3980000
-4500000
-5066940
-5166299
-7113500
-9842447
-13020696
-70477170
--- a/third_party/chinese_text_normalization/thrax/src/ru/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/ru/README.md
-# Russian covering grammar definitions
-This directory defines a Russian text normalization covering grammar. The
-primary entry-point is the FST `VERBALIZER`, defined in
-`verbalizer/verbalizer.grm` and compiled in the FST archive
-`verbalizer/verbalizer.far`.
--- a/third_party/chinese_text_normalization/thrax/src/ru/classifier/cyrillic.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/classifier/cyrillic.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-export kRussianLowerAlpha = Optimize[
-    "а" | "б" | "в" | "г" | "д" | "е" | "ё" | "ж" | "з" | "и" | "й" |
-    "к" | "л" | "м" | "н" | "о" | "п" | "р" | "с" | "т" | "у" | "ф" |
-    "х" | "ц" | "ч" | "ш" | "щ" | "ъ" | "ы" | "ь" | "э" | "ю" | "я" ];
-export kRussianUpperAlpha = Optimize[
-    "А" | "Б" | "В" | "Г" | "Д" | "Е" | "Ё" | "Ж" | "З" | "И" | "Й" |
-    "К" | "Л" | "М" | "Н" | "О" | "П" | "Р" | "С" | "Т" | "У" | "Ф" |
-    "Х" | "Ц" | "Ч" | "Ш" | "Щ" | "Ъ" | "Ы" | "Ь" | "Э" | "Ю" | "Я" ];
-export kRussianLowerAlphaStressed = Optimize[
-    "а́" | "е́" | "ё́" | "и́" | "о́" | "у́" | "ы́" | "э́" | "ю́" | "я́" ];
-export kRussianUpperAlphaStressed = Optimize[
-    "А́" | "Е́" | "Ё́" | "И́" | "О́" | "У́" | "Ы́" | "Э́" | "Ю́" | "Я́" ];
-export kRussianRewriteStress = Optimize[
-    ("А́" : "А'") | ("Е́" : "Е'") | ("Ё́" : "Ё'") | ("И́" : "И'") |
-    ("О́" : "О'") | ("У́" : "У'") | ("Ы́" : "Ы'") | ("Э́" : "Э'") |
-    ("Ю́" : "Ю'") | ("Я́" : "Я'") |
-    ("а́" : "а'") | ("е́" : "е'") | ("ё́" : "ё'") | ("и́" : "и'") |
-    ("о́" : "о'") | ("у́" : "у'") | ("ы́" : "ы'") | ("э́" : "э'") |
-    ("ю́" : "ю'") | ("я́" : "я'")
-];
-export kRussianRemoveStress = Optimize[
-    ("А́" : "А") | ("Е́" : "Е") | ("Ё́" : "Ё") | ("И́" : "И") | ("О́" : "О") |
-    ("У́" : "У") | ("Ы́" : "Ы") | ("Э́" : "Э") | ("Ю́" : "Ю") | ("Я́" : "Я") |
-    ("а́" : "а") | ("е́" : "е") | ("ё́" : "ё") | ("и́" : "и") | ("о́" : "о") |
-    ("у́" : "у") | ("ы́" : "ы") | ("э́" : "э") | ("ю́" : "ю") | ("я́" : "я")
-];
-# Pre-reform characters, just in case.
-export kRussianPreReform = Optimize[
-    "ѣ" | "Ѣ"   # http://en.wikipedia.org/wiki/Yat
-];
-export kCyrillicAlphaStressed = Optimize[
-  kRussianLowerAlphaStressed | kRussianUpperAlphaStressed
-];
-export kCyrillicAlpha = Optimize[
-    kRussianLowerAlpha | kRussianUpperAlpha | kRussianPreReform
-];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals-lex.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals-lex.grm
-# Copyright 2017 Google Inc.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# AUTOMATICALLY GENERATED: DO NOT EDIT.
-import 'util/byte.grm' as b;
-# Utilities for insertion and deletion.
-func I[expr] {
-  return "" : expr;
-}
-func D[expr] {
-  return expr : "";
-}
-# Powers of base 10.
-export POWERS =
-    "[E15]"
-  | "[E14]"
-  | "[E13]"
-  | "[E12]"
-  | "[E11]"
-  | "[E10]"
-  | "[E9]"
-  | "[E8]"
-  | "[E7]"
-  | "[E6]"
-  | "[E5]"
-  | "[E4]"
-  | "[E3]"
-  | "[E2]"
-  | "[E1]"
-;
-export SIGMA = b.kBytes | POWERS;
-export SIGMA_STAR = SIGMA*;
-export SIGMA_PLUS = SIGMA+;
-################################################################################
-# BEGIN LANGUAGE SPECIFIC DATA
-revaluations =
-    ("[E4]" : "[E1]")
-  | ("[E5]" : "[E2]")
-  | ("[E7]" : "[E1]")
-  | ("[E8]" : "[E2]")
-;
-Ms = "[E3]" | "[E6]" | "[E9]";
-func Zero[expr] {
-  return expr : ("");
-}
-space = " ";
-lexset3 = Optimize[
-    ("1[E1]+1" : "одиннадцати")
-  | ("1[E1]+1" : "одиннадцать")
-  | ("1[E1]+1" : "одиннадцатью")
-  | ("1[E1]+2" : "двенадцати")
-  | ("1[E1]+2" : "двенадцать")
-  | ("1[E1]+2" : "двенадцатью")
-  | ("1[E1]+3" : "тринадцати")
-  | ("1[E1]+3" : "тринадцать")
-  | ("1[E1]+3" : "тринадцатью")
-  | ("1[E1]+4" : "четырнадцати")
-  | ("1[E1]+4" : "четырнадцать")
-  | ("1[E1]+4" : "четырнадцатью")
-  | ("1[E1]+5" : "пятнадцати")
-  | ("1[E1]+5" : "пятнадцать")
-  | ("1[E1]+5" : "пятнадцатью")
-  | ("1[E1]+6" : "шестнадцати")
-  | ("1[E1]+6" : "шестнадцать")
-  | ("1[E1]+6" : "шестнадцатью")
-  | ("1[E1]+7" : "семнадцати")
-  | ("1[E1]+7" : "семнадцать")
-  | ("1[E1]+7" : "семнадцатью")
-  | ("1[E1]+8" : "восемнадцати")
-  | ("1[E1]+8" : "восемнадцать")
-  | ("1[E1]+8" : "восемнадцатью")
-  | ("1[E1]+9" : "девятнадцати")
-  | ("1[E1]+9" : "девятнадцать")
-  | ("1[E1]+9" : "девятнадцатью")]
-;
-lex3 = CDRewrite[lexset3 I[space], "", "", SIGMA_STAR];
-lexset2 = Optimize[
-    ("1[E1]" : "десяти")
-  | ("1[E1]" : "десять")
-  | ("1[E1]" : "десятью")
-  | ("1[E2]" : "ста")
-  | ("1[E2]" : "сто")
-  | ("2[E1]" : "двадцати")
-  | ("2[E1]" : "двадцать")
-  | ("2[E1]" : "двадцатью")
-  | ("2[E2]" : "двести")
-  | ("2[E2]" : "двумстам")
-  | ("2[E2]" : "двумястами")
-  | ("2[E2]" : "двухсот")
-  | ("2[E2]" : "двухстах")
-  | ("3[E1]" : "тридцати")
-  | ("3[E1]" : "тридцать")
-  | ("3[E1]" : "тридцатью")
-  | ("3[E2]" : "тремстам")
-  | ("3[E2]" : "тремястами")
-  | ("3[E2]" : "трехсот")
-  | ("3[E2]" : "трехстах")
-  | ("3[E2]" : "триста")
-  | ("4[E1]" : "сорок")
-  | ("4[E1]" : "сорока")
-  | ("4[E2]" : "четыремстам")
-  | ("4[E2]" : "четыреста")
-  | ("4[E2]" : "четырехсот")
-  | ("4[E2]" : "четырехстах")
-  | ("4[E2]" : "четырьмястами")
-  | ("5[E1]" : "пятидесяти")
-  | ("5[E1]" : "пятьдесят")
-  | ("5[E1]" : "пятьюдесятью")
-  | ("5[E2]" : "пятисот")
-  | ("5[E2]" : "пятистам")
-  | ("5[E2]" : "пятистах")
-  | ("5[E2]" : "пятьсот")
-  | ("5[E2]" : "пятьюстами")
-  | ("6[E1]" : "шестидесяти")
-  | ("6[E1]" : "шестьдесят")
-  | ("6[E1]" : "шестьюдесятью")
-  | ("6[E2]" : "шестисот")
-  | ("6[E2]" : "шестистам")
-  | ("6[E2]" : "шестистах")
-  | ("6[E2]" : "шестьсот")
-  | ("6[E2]" : "шестьюстами")
-  | ("7[E1]" : "семидесяти")
-  | ("7[E1]" : "семьдесят")
-  | ("7[E1]" : "семьюдесятью")
-  | ("7[E2]" : "семисот")
-  | ("7[E2]" : "семистам")
-  | ("7[E2]" : "семистах")
-  | ("7[E2]" : "семьсот")
-  | ("7[E2]" : "семьюстами")
-  | ("8[E1]" : "восемьдесят")
-  | ("8[E1]" : "восьмидесяти")
-  | ("8[E1]" : "восьмьюдесятью")
-  | ("8[E2]" : "восемьсот")
-  | ("8[E2]" : "восемьюстами")
-  | ("8[E2]" : "восьмисот")
-  | ("8[E2]" : "восьмистам")
-  | ("8[E2]" : "восьмистах")
-  | ("8[E2]" : "восьмьюстами")
-  | ("9[E1]" : "девяноста")
-  | ("9[E1]" : "девяносто")
-  | ("9[E2]" : "девятисот")
-  | ("9[E2]" : "девятистам")
-  | ("9[E2]" : "девятистах")
-  | ("9[E2]" : "девятьсот")
-  | ("9[E2]" : "девятьюстами")]
-;
-lex2 = CDRewrite[lexset2 I[space], "", "", SIGMA_STAR];
-lexset1 = Optimize[
-    ("+" : "")
-  | ("1" : "один")
-  | ("1" : "одна")
-  | ("1" : "одни")
-  | ("1" : "одним")
-  | ("1" : "одними")
-  | ("1" : "одних")
-  | ("1" : "одно")
-  | ("1" : "одного")
-  | ("1" : "одной")
-  | ("1" : "одном")
-  | ("1" : "одному")
-  | ("1" : "одною")
-  | ("1" : "одну")
-  | ("2" : "два")
-  | ("2" : "две")
-  | ("2" : "двум")
-  | ("2" : "двумя")
-  | ("2" : "двух")
-  | ("3" : "трем")
-  | ("3" : "тремя")
-  | ("3" : "трех")
-  | ("3" : "три")
-  | ("4" : "четыре")
-  | ("4" : "четырем")
-  | ("4" : "четырех")
-  | ("4" : "четырьмя")
-  | ("5" : "пяти")
-  | ("5" : "пять")
-  | ("5" : "пятью")
-  | ("6" : "шести")
-  | ("6" : "шесть")
-  | ("6" : "шестью")
-  | ("7" : "семи")
-  | ("7" : "семь")
-  | ("7" : "семью")
-  | ("8" : "восемь")
-  | ("8" : "восьми")
-  | ("8" : "восьмью")
-  | ("9" : "девяти")
-  | ("9" : "девять")
-  | ("9" : "девятью")
-  | ("[E3]" : "тысяч")
-  | ("[E3]" : "тысяча")
-  | ("[E3]" : "тысячам")
-  | ("[E3]" : "тысячами")
-  | ("[E3]" : "тысячах")
-  | ("[E3]" : "тысяче")
-  | ("[E3]" : "тысячей")
-  | ("[E3]" : "тысячи")
-  | ("[E3]" : "тысячу")
-  | ("[E3]" : "тысячью")
-  | ("[E6]" : "миллион")
-  | ("[E6]" : "миллиона")
-  | ("[E6]" : "миллионам")
-  | ("[E6]" : "миллионами")
-  | ("[E6]" : "миллионах")
-  | ("[E6]" : "миллионе")
-  | ("[E6]" : "миллионов")
-  | ("[E6]" : "миллионом")
-  | ("[E6]" : "миллиону")
-  | ("[E6]" : "миллионы")
-  | ("[E9]" : "миллиард")
-  | ("[E9]" : "миллиарда")
-  | ("[E9]" : "миллиардам")
-  | ("[E9]" : "миллиардами")
-  | ("[E9]" : "миллиардах")
-  | ("[E9]" : "миллиарде")
-  | ("[E9]" : "миллиардов")
-  | ("[E9]" : "миллиардом")
-  | ("[E9]" : "миллиарду")
-  | ("[E9]" : "миллиарды")
-  | ("|0|" : "ноле")
-  | ("|0|" : "нолем")
-  | ("|0|" : "ноль")
-  | ("|0|" : "нолю")
-  | ("|0|" : "ноля")
-  | ("|0|" : "нуле")
-  | ("|0|" : "нулем")
-  | ("|0|" : "нуль")
-  | ("|0|" : "нулю")
-  | ("|0|" : "нуля")]
-;
-lex1 = CDRewrite[lexset1 I[space], "", "", SIGMA_STAR];
-export LEX = Optimize[lex3 @ lex2 @ lex1];
-export INDEPENDENT_EXPONENTS = "[E3]" | "[E6]" | "[E9]";
-# END LANGUAGE SPECIFIC DATA
-################################################################################
-# Inserts a marker after the Ms.
-export INSERT_BOUNDARY = CDRewrite["" : "%", Ms, "", SIGMA_STAR];
-# Deletes all powers and "+".
-export DELETE_POWERS = CDRewrite[D[POWERS | "+"], "", "", SIGMA_STAR];
-# Deletes trailing zeros at the beginning of a number, so that "0003" does not
-# get treated as an ordinary number.
-export DELETE_INITIAL_ZEROS =
-  CDRewrite[("0" POWERS "+") : "", "[BOS]", "", SIGMA_STAR]
-;
-NonMs = Optimize[POWERS - Ms];
-# Deletes (usually) zeros before a non-M. E.g., +0[E1] should be deleted.
-export DELETE_INTERMEDIATE_ZEROS1 =
-  CDRewrite[Zero["+0" NonMs], "", "", SIGMA_STAR]
-;
-# Deletes (usually) zeros before an M, if there is no non-zero element between
-# that and the previous boundary. Thus, if after the result of the rule above we
-# end up with "%+0[E3]", then that gets deleted. Also (really) deletes a final
-# zero.
-export DELETE_INTERMEDIATE_ZEROS2 = Optimize[
-   CDRewrite[Zero["%+0" Ms], "", "", SIGMA_STAR]
- @ CDRewrite[D["+0"], "", "[EOS]", SIGMA_STAR]]
-;
-# Final clean up of stray zeros.
-export DELETE_REMAINING_ZEROS = Optimize[
-   CDRewrite[Zero["+0"], "", "", SIGMA_STAR]
- @ CDRewrite[Zero["0"], "", "", SIGMA_STAR]]
-;
-# Applies the revaluation map. For example in English, changes [E4] to [E1] as a
-# modifier of [E3].
-export REVALUE = CDRewrite[revaluations, "", "", SIGMA_STAR];
-# Deletes the various marks and powers in the input and output.
-export DELETE_MARKS = CDRewrite[D["%" | "+" | POWERS], "", "", SIGMA_STAR];
-export CLEAN_SPACES = Optimize[
-   CDRewrite[" "+ : " ", b.kNotSpace, b.kNotSpace, SIGMA_STAR]
- @ CDRewrite[" "* : "", "[BOS]", "", SIGMA_STAR]
- @ CDRewrite[" "* : "", "", "[EOS]", SIGMA_STAR]]
-;
-d = b.kDigit;
-# Germanic inversion rule.
-germanic =
-    (I["1+"] d "[E1]" D["+1"])
-  | (I["2+"] d "[E1]" D["+2"])
-  | (I["3+"] d "[E1]" D["+3"])
-  | (I["4+"] d "[E1]" D["+4"])
-  | (I["5+"] d "[E1]" D["+5"])
-  | (I["6+"] d "[E1]" D["+6"])
-  | (I["7+"] d "[E1]" D["+7"])
-  | (I["8+"] d "[E1]" D["+8"])
-  | (I["9+"] d "[E1]" D["+9"])
-;
-germanic_inversion =
-  CDRewrite[germanic, "", "", SIGMA_STAR, 'ltr', 'opt']
-;
-export GERMANIC_INVERSION = SIGMA_STAR;
-export ORDINAL_RESTRICTION = SIGMA_STAR;
-nondigits = b.kBytes - b.kDigit;
-export ORDINAL_SUFFIX = D[nondigits*];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals.tsv
-0	ноле
-0	ноль
-0	нолю
-0	ноля
-0	нолём
-0	нуле
-0	нуль
-0	нулю
-0	нуля
-0	нулём
-1	один
-1	одна
-1	одни
-1	одним
-1	одними
-1	одних
-1	одно
-1	одного
-1	одной
-1	одном
-1	одному
-1	одною
-1	раз
-1	одну
-2	два
-2	две
-2	двум
-2	двумя
-2	двух
-3	тремя
-3	три
-3	трём
-3	трёх
-4	четыре
-4	четырьмя
-4	четырём
-4	четырёх
-5	пяти
-5	пять
-5	пятью
-6	шести
-6	шесть
-6	шестью
-7	семи
-7	семь
-7	семью
-8	восемь
-8	восьми
-8	восьмью
-9	девяти
-9	девять
-9	девятью
-10	десяти
-10	десять
-10	десятью
-11	одиннадцати
-11	одиннадцать
-11	одиннадцатью
-12	двенадцати
-12	двенадцать
-12	двенадцатью
-13	тринадцати
-13	тринадцать
-13	тринадцатью
-14	четырнадцати
-14	четырнадцать
-14	четырнадцатью
-15	пятнадцати
-15	пятнадцать
-15	пятнадцатью
-16	шестнадцати
-16	шестнадцать
-16	шестнадцатью
-17	семнадцати
-17	семнадцать
-17	семнадцатью
-18	восемнадцати
-18	восемнадцать
-18	восемнадцатью
-19	девятнадцати
-19	девятнадцать
-19	девятнадцатью
-20	двадцати
-20	двадцать
-20	двадцатью
-30	тридцати
-30	тридцать
-30	тридцатью
-40	сорок
-40	сорока
-50	пятидесяти
-50	пятьдесят
-50	пятьюдесятью
-60	шестидесяти
-60	шестьдесят
-60	шестьюдесятью
-70	семидесяти
-70	семьдесят
-70	семьюдесятью
-80	восемьдесят
-80	восьмидесяти
-80	восьмьюдесятью
-90	девяноста
-90	девяносто
-100	ста
-100	сто
-200	двести
-200	двумстам
-200	двумястами
-200	двухсот
-200	двухстах
-300	тремястами
-300	трехсот
-300	триста
-300	трёмстам
-300	трёхстах
-400	четыреста
-400	четырьмястами
-400	четырёмстам
-400	четырёхсот
-400	четырёхстах
-500	пятисот
-500	пятистам
-500	пятистах
-500	пятьсот
-500	пятьюстами
-600	шестисот
-600	шестистам
-600	шестистах
-600	шестьсот
-600	шестьюстами
-700	семисот
-700	семистам
-700	семистах
-700	семьсот
-700	семьюстами
-800	восемьсот
-800	восемьюстами
-800	восьмисот
-800	восьмистам
-800	восьмистах
-800	восьмьюстами
-900	девятисот
-900	девятистам
-900	девятистах
-900	девятьсот
-900	девятьюстами
-1000	тысяч
-1000	тысяча
-1000	тысячам
-1000	тысячами
-1000	тысячах
-1000	тысяче
-1000	тысячей
-1000	тысячи
-1000	тысячу
-1000	тысячью
-1000000	миллион
-1000000	миллиона
-1000000	миллионам
-1000000	миллионами
-1000000	миллионах
-1000000	миллионе
-1000000	миллионов
-1000000	миллионом
-1000000	миллиону
-1000000	миллионы
-1000000000	миллиард
-1000000000	миллиарда
-1000000000	миллиардам
-1000000000	миллиардами
-1000000000	миллиардах
-1000000000	миллиарде
-1000000000	миллиардов
-1000000000	миллиардом
-1000000000	миллиарду
-1000000000	миллиарды
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/extra_numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/extra_numbers.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/factorization.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/factorization.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/float.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/float.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/g.fst
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/g.fst
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.tsv
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/math.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/math.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/miscellaneous.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/miscellaneous.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.tsv
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/nominatives.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/nominatives.tsv
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/number_names.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/number_names.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers_plus.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers_plus.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinal_endings.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinal_endings.tsv
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals-lex.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals-lex.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals.tsv
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spelled.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spelled.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spoken_punct.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spoken_punct.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/time.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/time.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/urls.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/urls.grm
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/verbalizer.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/verbalizer.grm
--- a/third_party/chinese_text_normalization/thrax/src/universal/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/universal/README.md
-# Language-universal grammar definitions
-This directory contains various language-universal grammar definitions.
--- a/third_party/chinese_text_normalization/thrax/src/universal/roman_numerals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/universal/roman_numerals.tsv
--- a/third_party/chinese_text_normalization/thrax/src/universal/thousands_punct.grm
+++ b/third_party/chinese_text_normalization/thrax/src/universal/thousands_punct.grm
--- a/third_party/chinese_text_normalization/thrax/src/util/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/util/README.md
-# Utility grammar definitions
-This directory contains various utility grammar definitions.
--- a/third_party/chinese_text_normalization/thrax/src/util/arithmetic.grm
+++ b/third_party/chinese_text_normalization/thrax/src/util/arithmetic.grm
--- a/third_party/chinese_text_normalization/thrax/src/util/byte.grm
+++ b/third_party/chinese_text_normalization/thrax/src/util/byte.grm
--- a/third_party/chinese_text_normalization/thrax/src/util/case.grm
+++ b/third_party/chinese_text_normalization/thrax/src/util/case.grm
--- a/third_party/chinese_text_normalization/thrax/src/util/germanic.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/util/germanic.tsv
--- a/third_party/chinese_text_normalization/thrax/src/util/util.grm
+++ b/third_party/chinese_text_normalization/thrax/src/util/util.grm
--- a/third_party/chinese_text_normalization/thrax/testcase_cn.txt
+++ b/third_party/chinese_text_normalization/thrax/testcase_cn.txt
--- a/third_party/chinese_text_normalization/thrax/testcase_en.txt
+++ b/third_party/chinese_text_normalization/thrax/testcase_en.txt
--- a/third_party/install.sh
+++ b/third_party/install.sh
--- a/third_party/nnAudio/.gitignore
+++ b/third_party/nnAudio/.gitignore
--- a/third_party/nnAudio/nnAudio/Spectrogram.py
+++ b/third_party/nnAudio/nnAudio/Spectrogram.py
--- a/third_party/nnAudio/nnAudio/__init__.py
+++ b/third_party/nnAudio/nnAudio/__init__.py
--- a/third_party/nnAudio/nnAudio/librosa_functions.py
+++ b/third_party/nnAudio/nnAudio/librosa_functions.py
--- a/third_party/nnAudio/nnAudio/utils.py
+++ b/third_party/nnAudio/nnAudio/utils.py
--- a/third_party/nnAudio/setup.py
+++ b/third_party/nnAudio/setup.py
--- a/third_party/nnAudio/tests/parameters.py
+++ b/third_party/nnAudio/tests/parameters.py
--- a/third_party/nnAudio/tests/test_spectrogram.py
+++ b/third_party/nnAudio/tests/test_spectrogram.py
--- a/third_party/paddle_audio/__init__.py
+++ b/third_party/paddle_audio/__init__.py
--- a/third_party/paddle_audio/frontend/common.py
+++ b/third_party/paddle_audio/frontend/common.py
--- a/third_party/paddle_audio/frontend/english.wav
+++ b/third_party/paddle_audio/frontend/english.wav
--- a/third_party/paddle_audio/frontend/kaldi.py
+++ b/third_party/paddle_audio/frontend/kaldi.py
--- a/third_party/paddle_audio/frontend/kaldi_test.py
+++ b/third_party/paddle_audio/frontend/kaldi_test.py
--- a/third_party/phkit/README.md
+++ b/third_party/phkit/README.md
--- a/third_party/phkit/phkit/__init__.py
+++ b/third_party/phkit/phkit/__init__.py
--- a/third_party/phkit/phkit/chinese/__init__.py
+++ b/third_party/phkit/phkit/chinese/__init__.py
--- a/third_party/phkit/phkit/chinese/convert.py
+++ b/third_party/phkit/phkit/chinese/convert.py
--- a/third_party/phkit/phkit/chinese/hanziconv.py
+++ b/third_party/phkit/phkit/chinese/hanziconv.py
--- a/third_party/phkit/phkit/chinese/number.py
+++ b/third_party/phkit/phkit/chinese/number.py
--- a/third_party/phkit/phkit/chinese/phoneme.py
+++ b/third_party/phkit/phkit/chinese/phoneme.py
--- a/third_party/phkit/phkit/chinese/pinyin.py
+++ b/third_party/phkit/phkit/chinese/pinyin.py
--- a/third_party/phkit/phkit/chinese/sequence.py
+++ b/third_party/phkit/phkit/chinese/sequence.py
--- a/third_party/phkit/phkit/chinese/style.py
+++ b/third_party/phkit/phkit/chinese/style.py
--- a/third_party/phkit/phkit/chinese/symbol.py
+++ b/third_party/phkit/phkit/chinese/symbol.py
--- a/third_party/phkit/phkit/english/LICENSE
+++ b/third_party/phkit/phkit/english/LICENSE
--- a/third_party/phkit/phkit/english/__init__.py
+++ b/third_party/phkit/phkit/english/__init__.py
--- a/third_party/phkit/phkit/english/cleaners.py
+++ b/third_party/phkit/phkit/english/cleaners.py
--- a/third_party/phkit/phkit/english/cmu_dictionary
+++ b/third_party/phkit/phkit/english/cmu_dictionary
--- a/third_party/phkit/phkit/english/cmudict.py
+++ b/third_party/phkit/phkit/english/cmudict.py
--- a/third_party/phkit/phkit/english/numbers.py
+++ b/third_party/phkit/phkit/english/numbers.py
--- a/third_party/phkit/phkit/english/symbols.py
+++ b/third_party/phkit/phkit/english/symbols.py
--- a/third_party/phkit/phkit/pinyinkit/__init__.py
+++ b/third_party/phkit/phkit/pinyinkit/__init__.py
--- a/third_party/phkit/requirements.txt
+++ b/third_party/phkit/requirements.txt
--- a/third_party/phkit/run_local.py
+++ b/third_party/phkit/run_local.py
--- a/third_party/phkit/setup.py
+++ b/third_party/phkit/setup.py
--- a/third_party/phkit/test.py
+++ b/third_party/phkit/test.py
--- a/third_party/pymmseg-cpp/.gitignore
+++ b/third_party/pymmseg-cpp/.gitignore
--- a/third_party/pymmseg-cpp/DESCRIPTION
+++ b/third_party/pymmseg-cpp/DESCRIPTION
--- a/third_party/pymmseg-cpp/MANIFEST.in
+++ b/third_party/pymmseg-cpp/MANIFEST.in
--- a/third_party/pymmseg-cpp/README.md
+++ b/third_party/pymmseg-cpp/README.md
--- a/third_party/pymmseg-cpp/bin/pymmseg
+++ b/third_party/pymmseg-cpp/bin/pymmseg
--- a/third_party/pymmseg-cpp/mmseg/data/chars.dic
+++ b/third_party/pymmseg-cpp/mmseg/data/chars.dic
--- a/third_party/pymmseg-cpp/mmseg/data/words.dic
+++ b/third_party/pymmseg-cpp/mmseg/data/words.dic
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/algor.cpp
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/algor.cpp
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/algor.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/algor.h
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/chunk.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/chunk.h
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/dict.cpp
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/dict.cpp
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/dict.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/dict.h
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/memory.cpp
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/memory.cpp
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/memory.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/memory.h
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/mmseg.cpp
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/mmseg.cpp
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/rules.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/rules.h
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/token.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/token.h
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/utils.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/utils.h
--- a/third_party/pymmseg-cpp/mmseg/mmseg-cpp/word.h
+++ b/third_party/pymmseg-cpp/mmseg/mmseg-cpp/word.h
--- a/third_party/pymmseg-cpp/setup.cfg
+++ b/third_party/pymmseg-cpp/setup.cfg
--- a/third_party/pymmseg-cpp/setup.py
+++ b/third_party/pymmseg-cpp/setup.py
--- a/third_party/pymmseg-cpp/tests/mmseg_test.py
+++ b/third_party/pymmseg-cpp/tests/mmseg_test.py
--- a/third_party/pymmseg-cpp/tests/test.sh
+++ b/third_party/pymmseg-cpp/tests/test.sh
--- a/third_party/python-pinyin/.bumpversion.cfg
+++ b/third_party/python-pinyin/.bumpversion.cfg
--- a/third_party/python-pinyin/.circleci/config.yml
+++ b/third_party/python-pinyin/.circleci/config.yml
--- a/third_party/python-pinyin/.coveragerc
+++ b/third_party/python-pinyin/.coveragerc
--- a/third_party/python-pinyin/.editorconfig
+++ b/third_party/python-pinyin/.editorconfig
--- a/third_party/python-pinyin/.flake8
+++ b/third_party/python-pinyin/.flake8
--- a/third_party/python-pinyin/.github/CONTRIBUTING.md
+++ b/third_party/python-pinyin/.github/CONTRIBUTING.md
--- a/third_party/python-pinyin/.github/ISSUE_TEMPLATE.md
+++ b/third_party/python-pinyin/.github/ISSUE_TEMPLATE.md
--- a/third_party/python-pinyin/.github/PULL_REQUEST_TEMPLATE.md
+++ b/third_party/python-pinyin/.github/PULL_REQUEST_TEMPLATE.md
--- a/third_party/python-pinyin/.github/workflows/ci.yml
+++ b/third_party/python-pinyin/.github/workflows/ci.yml
--- a/third_party/python-pinyin/.github/workflows/codeql-analysis.yml
+++ b/third_party/python-pinyin/.github/workflows/codeql-analysis.yml
--- a/third_party/python-pinyin/.gitignore
+++ b/third_party/python-pinyin/.gitignore
--- a/third_party/python-pinyin/.gitmodules
+++ b/third_party/python-pinyin/.gitmodules
--- a/third_party/python-pinyin/.pre-commit-config.yaml
+++ b/third_party/python-pinyin/.pre-commit-config.yaml
--- a/third_party/python-pinyin/.style.yapf
+++ b/third_party/python-pinyin/.style.yapf
--- a/third_party/python-pinyin/.whitesource
+++ b/third_party/python-pinyin/.whitesource
--- a/third_party/python-pinyin/CHANGELOG.rst
+++ b/third_party/python-pinyin/CHANGELOG.rst
--- a/third_party/python-pinyin/CODE_OF_CONDUCT.md
+++ b/third_party/python-pinyin/CODE_OF_CONDUCT.md
--- a/third_party/python-pinyin/LICENSE.txt
+++ b/third_party/python-pinyin/LICENSE.txt
--- a/third_party/python-pinyin/MANIFEST.in
+++ b/third_party/python-pinyin/MANIFEST.in
--- a/third_party/python-pinyin/Makefile
+++ b/third_party/python-pinyin/Makefile
--- a/third_party/python-pinyin/README.md
+++ b/third_party/python-pinyin/README.md
--- a/third_party/python-pinyin/README.rst
+++ b/third_party/python-pinyin/README.rst
--- a/third_party/python-pinyin/docs/CHANGELOG.rst
+++ b/third_party/python-pinyin/docs/CHANGELOG.rst
--- a/third_party/python-pinyin/docs/Makefile
+++ b/third_party/python-pinyin/docs/Makefile
--- a/third_party/python-pinyin/docs/api.rst
+++ b/third_party/python-pinyin/docs/api.rst
--- a/third_party/python-pinyin/docs/conf.py
+++ b/third_party/python-pinyin/docs/conf.py
--- a/third_party/python-pinyin/docs/contrib.rst
+++ b/third_party/python-pinyin/docs/contrib.rst
--- a/third_party/python-pinyin/docs/develop.rst
+++ b/third_party/python-pinyin/docs/develop.rst
--- a/third_party/python-pinyin/docs/faq.rst
+++ b/third_party/python-pinyin/docs/faq.rst
--- a/third_party/python-pinyin/docs/index.rst
+++ b/third_party/python-pinyin/docs/index.rst
--- a/third_party/python-pinyin/docs/installation.rst
+++ b/third_party/python-pinyin/docs/installation.rst
--- a/third_party/python-pinyin/docs/make.bat
+++ b/third_party/python-pinyin/docs/make.bat
--- a/third_party/python-pinyin/docs/related.rst
+++ b/third_party/python-pinyin/docs/related.rst
--- a/third_party/python-pinyin/docs/usage.rst
+++ b/third_party/python-pinyin/docs/usage.rst
--- a/third_party/python-pinyin/gen_phrases_dict.py
+++ b/third_party/python-pinyin/gen_phrases_dict.py
--- a/third_party/python-pinyin/gen_pinyin_dict.py
+++ b/third_party/python-pinyin/gen_pinyin_dict.py
--- a/third_party/python-pinyin/phrase-pinyin-data/.bumpversion.cfg
+++ b/third_party/python-pinyin/phrase-pinyin-data/.bumpversion.cfg
--- a/third_party/python-pinyin/phrase-pinyin-data/.gitignore
+++ b/third_party/python-pinyin/phrase-pinyin-data/.gitignore
--- a/third_party/python-pinyin/phrase-pinyin-data/.travis.yml
+++ b/third_party/python-pinyin/phrase-pinyin-data/.travis.yml
--- a/third_party/python-pinyin/phrase-pinyin-data/CHANGELOG.md
+++ b/third_party/python-pinyin/phrase-pinyin-data/CHANGELOG.md
--- a/third_party/python-pinyin/phrase-pinyin-data/LICENSE
+++ b/third_party/python-pinyin/phrase-pinyin-data/LICENSE
--- a/third_party/python-pinyin/phrase-pinyin-data/Makefile
+++ b/third_party/python-pinyin/phrase-pinyin-data/Makefile
--- a/third_party/python-pinyin/phrase-pinyin-data/README.md
+++ b/third_party/python-pinyin/phrase-pinyin-data/README.md
--- a/third_party/python-pinyin/phrase-pinyin-data/cc_cedict.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/cc_cedict.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/get_latest_cc_cedict.py
+++ b/third_party/python-pinyin/phrase-pinyin-data/get_latest_cc_cedict.py
--- a/third_party/python-pinyin/phrase-pinyin-data/large_pinyin.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/large_pinyin.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/merge.py
+++ b/third_party/python-pinyin/phrase-pinyin-data/merge.py
--- a/third_party/python-pinyin/phrase-pinyin-data/overwrite.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/overwrite.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/parse_latest_cc_cedict.py
+++ b/third_party/python-pinyin/phrase-pinyin-data/parse_latest_cc_cedict.py
--- a/third_party/python-pinyin/phrase-pinyin-data/pinyin.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/pinyin.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/requirements_dev.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/requirements_dev.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/zdic_cibs.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/zdic_cibs.txt
--- a/third_party/python-pinyin/phrase-pinyin-data/zdic_cybs.txt
+++ b/third_party/python-pinyin/phrase-pinyin-data/zdic_cybs.txt
--- a/third_party/python-pinyin/pinyin-data/.bumpversion.cfg
+++ b/third_party/python-pinyin/pinyin-data/.bumpversion.cfg
--- a/third_party/python-pinyin/pinyin-data/.github/workflows/python-app.yml
+++ b/third_party/python-pinyin/pinyin-data/.github/workflows/python-app.yml
--- a/third_party/python-pinyin/pinyin-data/.gitignore
+++ b/third_party/python-pinyin/pinyin-data/.gitignore
--- a/third_party/python-pinyin/pinyin-data/.travis.yml
+++ b/third_party/python-pinyin/pinyin-data/.travis.yml
--- a/third_party/python-pinyin/pinyin-data/CHANGELOG.md
+++ b/third_party/python-pinyin/pinyin-data/CHANGELOG.md
--- a/third_party/python-pinyin/pinyin-data/GBK_PUA.txt
+++ b/third_party/python-pinyin/pinyin-data/GBK_PUA.txt
--- a/third_party/python-pinyin/pinyin-data/LICENSE
+++ b/third_party/python-pinyin/pinyin-data/LICENSE
--- a/third_party/python-pinyin/pinyin-data/Makefile
+++ b/third_party/python-pinyin/pinyin-data/Makefile
--- a/third_party/python-pinyin/pinyin-data/README.md
+++ b/third_party/python-pinyin/pinyin-data/README.md
--- a/third_party/python-pinyin/pinyin-data/kHanyuPinlu.txt
+++ b/third_party/python-pinyin/pinyin-data/kHanyuPinlu.txt
--- a/third_party/python-pinyin/pinyin-data/kHanyuPinyin.txt
+++ b/third_party/python-pinyin/pinyin-data/kHanyuPinyin.txt
--- a/third_party/python-pinyin/pinyin-data/kMandarin.txt
+++ b/third_party/python-pinyin/pinyin-data/kMandarin.txt
--- a/third_party/python-pinyin/pinyin-data/kMandarin_8105.txt
+++ b/third_party/python-pinyin/pinyin-data/kMandarin_8105.txt
--- a/third_party/python-pinyin/pinyin-data/kMandarin_overwrite.txt
+++ b/third_party/python-pinyin/pinyin-data/kMandarin_overwrite.txt
--- a/third_party/python-pinyin/pinyin-data/kTGHZ2013.txt
+++ b/third_party/python-pinyin/pinyin-data/kTGHZ2013.txt
--- a/third_party/python-pinyin/pinyin-data/kXHC1983.txt
+++ b/third_party/python-pinyin/pinyin-data/kXHC1983.txt
--- a/third_party/python-pinyin/pinyin-data/kanji.txt
+++ b/third_party/python-pinyin/pinyin-data/kanji.txt
--- a/third_party/python-pinyin/pinyin-data/merge_unihan.py
+++ b/third_party/python-pinyin/pinyin-data/merge_unihan.py
--- a/third_party/python-pinyin/pinyin-data/nonCJKUI.txt
+++ b/third_party/python-pinyin/pinyin-data/nonCJKUI.txt
--- a/third_party/python-pinyin/pinyin-data/overwrite.txt
+++ b/third_party/python-pinyin/pinyin-data/overwrite.txt
--- a/third_party/python-pinyin/pinyin-data/pinyin.txt
+++ b/third_party/python-pinyin/pinyin-data/pinyin.txt
--- a/third_party/python-pinyin/pinyin-data/tools/china-8105-06062014.txt
+++ b/third_party/python-pinyin/pinyin-data/tools/china-8105-06062014.txt
--- a/third_party/python-pinyin/pinyin-data/tools/gen_8105.py
+++ b/third_party/python-pinyin/pinyin-data/tools/gen_8105.py
--- a/third_party/python-pinyin/pinyin-data/tools/gen_gb_pua.py
+++ b/third_party/python-pinyin/pinyin-data/tools/gen_gb_pua.py
--- a/third_party/python-pinyin/pinyin-data/tools/improve_8105.py
+++ b/third_party/python-pinyin/pinyin-data/tools/improve_8105.py
--- a/third_party/python-pinyin/pinyin-data/tools/requirements.txt
+++ b/third_party/python-pinyin/pinyin-data/tools/requirements.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/.gitignore
+++ b/third_party/python-pinyin/pinyin-data/unihan/.gitignore
--- a/third_party/python-pinyin/pinyin-data/unihan/Makefile
+++ b/third_party/python-pinyin/pinyin-data/unihan/Makefile
--- a/third_party/python-pinyin/pinyin-data/unihan/README.md
+++ b/third_party/python-pinyin/pinyin-data/unihan/README.md
--- a/third_party/python-pinyin/pinyin-data/unihan/diff.sh
+++ b/third_party/python-pinyin/pinyin-data/unihan/diff.sh
--- a/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinlu.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinlu.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinyin.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kHanyuPinyin.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kMandarin.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kMandarin.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kTGHZ2013.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kTGHZ2013.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/kXHC1983.txt
+++ b/third_party/python-pinyin/pinyin-data/unihan/kXHC1983.txt
--- a/third_party/python-pinyin/pinyin-data/unihan/parse_pinyin.py
+++ b/third_party/python-pinyin/pinyin-data/unihan/parse_pinyin.py
--- a/third_party/python-pinyin/pinyin-data/zdic.txt
+++ b/third_party/python-pinyin/pinyin-data/zdic.txt
--- a/third_party/python-pinyin/pypinyin/__init__.py
+++ b/third_party/python-pinyin/pypinyin/__init__.py
--- a/third_party/python-pinyin/pypinyin/__main__.py
+++ b/third_party/python-pinyin/pypinyin/__main__.py
--- a/third_party/python-pinyin/pypinyin/constants.py
+++ b/third_party/python-pinyin/pypinyin/constants.py
--- a/third_party/python-pinyin/pypinyin/contrib/_tone_rule.py
+++ b/third_party/python-pinyin/pypinyin/contrib/_tone_rule.py
--- a/third_party/python-pinyin/pypinyin/contrib/neutral_tone.py
+++ b/third_party/python-pinyin/pypinyin/contrib/neutral_tone.py
--- a/third_party/python-pinyin/pypinyin/contrib/tone_convert.py
+++ b/third_party/python-pinyin/pypinyin/contrib/tone_convert.py
--- a/third_party/python-pinyin/pypinyin/contrib/uv.py
+++ b/third_party/python-pinyin/pypinyin/contrib/uv.py
--- a/third_party/python-pinyin/pypinyin/converter.py
+++ b/third_party/python-pinyin/pypinyin/converter.py
--- a/third_party/python-pinyin/pypinyin/core.py
+++ b/third_party/python-pinyin/pypinyin/core.py
--- a/third_party/python-pinyin/pypinyin/phonetic_symbol.py
+++ b/third_party/python-pinyin/pypinyin/phonetic_symbol.py
--- a/third_party/python-pinyin/pypinyin/phrases_dict.py
+++ b/third_party/python-pinyin/pypinyin/phrases_dict.py
--- a/third_party/python-pinyin/pypinyin/pinyin_dict.py
+++ b/third_party/python-pinyin/pypinyin/pinyin_dict.py
--- a/third_party/python-pinyin/pypinyin/runner.py
+++ b/third_party/python-pinyin/pypinyin/runner.py
--- a/third_party/python-pinyin/pypinyin/seg/__init__.py
+++ b/third_party/python-pinyin/pypinyin/seg/__init__.py
--- a/third_party/python-pinyin/pypinyin/seg/mmseg.py
+++ b/third_party/python-pinyin/pypinyin/seg/mmseg.py
--- a/third_party/python-pinyin/pypinyin/seg/simpleseg.py
+++ b/third_party/python-pinyin/pypinyin/seg/simpleseg.py
--- a/third_party/python-pinyin/pypinyin/standard.py
+++ b/third_party/python-pinyin/pypinyin/standard.py
--- a/third_party/python-pinyin/pypinyin/style/__init__.py
+++ b/third_party/python-pinyin/pypinyin/style/__init__.py
--- a/third_party/python-pinyin/pypinyin/style/_constants.py
+++ b/third_party/python-pinyin/pypinyin/style/_constants.py
--- a/third_party/python-pinyin/pypinyin/style/_utils.py
+++ b/third_party/python-pinyin/pypinyin/style/_utils.py
--- a/third_party/python-pinyin/pypinyin/style/bopomofo.py
+++ b/third_party/python-pinyin/pypinyin/style/bopomofo.py
--- a/third_party/python-pinyin/pypinyin/style/cyrillic.py
+++ b/third_party/python-pinyin/pypinyin/style/cyrillic.py
--- a/third_party/python-pinyin/pypinyin/style/finals.py
+++ b/third_party/python-pinyin/pypinyin/style/finals.py
--- a/third_party/python-pinyin/pypinyin/style/initials.py
+++ b/third_party/python-pinyin/pypinyin/style/initials.py
--- a/third_party/python-pinyin/pypinyin/style/others.py
+++ b/third_party/python-pinyin/pypinyin/style/others.py
--- a/third_party/python-pinyin/pypinyin/style/tone.py
+++ b/third_party/python-pinyin/pypinyin/style/tone.py
--- a/third_party/python-pinyin/pypinyin/utils.py
+++ b/third_party/python-pinyin/pypinyin/utils.py
--- a/third_party/python-pinyin/pytest.ini
+++ b/third_party/python-pinyin/pytest.ini
--- a/third_party/python-pinyin/requirements.txt
+++ b/third_party/python-pinyin/requirements.txt
--- a/third_party/python-pinyin/setup.cfg
+++ b/third_party/python-pinyin/setup.cfg
--- a/third_party/python-pinyin/setup.py
+++ b/third_party/python-pinyin/setup.py
--- a/third_party/python-pinyin/tests/__init__.py
+++ b/third_party/python-pinyin/tests/__init__.py
--- a/third_party/python-pinyin/tests/conftest.py
+++ b/third_party/python-pinyin/tests/conftest.py
--- a/third_party/python-pinyin/tests/contrib/__init__.py
+++ b/third_party/python-pinyin/tests/contrib/__init__.py
--- a/third_party/python-pinyin/tests/contrib/test_neutral_tone.py
+++ b/third_party/python-pinyin/tests/contrib/test_neutral_tone.py
--- a/third_party/python-pinyin/tests/contrib/test_tone_convert.py
+++ b/third_party/python-pinyin/tests/contrib/test_tone_convert.py
--- a/third_party/python-pinyin/tests/contrib/test_tone_rule.py
+++ b/third_party/python-pinyin/tests/contrib/test_tone_rule.py
--- a/third_party/python-pinyin/tests/contrib/test_uv.py
+++ b/third_party/python-pinyin/tests/contrib/test_uv.py
--- a/third_party/python-pinyin/tests/seg/__init__.py
+++ b/third_party/python-pinyin/tests/seg/__init__.py
--- a/third_party/python-pinyin/tests/seg/test_mmseg.py
+++ b/third_party/python-pinyin/tests/seg/test_mmseg.py
--- a/third_party/python-pinyin/tests/test_cmd.py
+++ b/third_party/python-pinyin/tests/test_cmd.py
--- a/third_party/python-pinyin/tests/test_converter.py
+++ b/third_party/python-pinyin/tests/test_converter.py
--- a/third_party/python-pinyin/tests/test_core_cls.py
+++ b/third_party/python-pinyin/tests/test_core_cls.py
--- a/third_party/python-pinyin/tests/test_env.py
+++ b/third_party/python-pinyin/tests/test_env.py
--- a/third_party/python-pinyin/tests/test_others.py
+++ b/third_party/python-pinyin/tests/test_others.py
--- a/third_party/python-pinyin/tests/test_pinyin.py
+++ b/third_party/python-pinyin/tests/test_pinyin.py
--- a/third_party/python-pinyin/tests/test_standard.py
+++ b/third_party/python-pinyin/tests/test_standard.py
--- a/third_party/python-pinyin/tests/test_style.py
+++ b/third_party/python-pinyin/tests/test_style.py
--- a/third_party/python-pinyin/tests/utils.py
+++ b/third_party/python-pinyin/tests/utils.py
--- a/third_party/python-pinyin/tidy_phrases_dict.py
+++ b/third_party/python-pinyin/tidy_phrases_dict.py
--- a/third_party/python-pinyin/tox.ini
+++ b/third_party/python-pinyin/tox.ini
--- a/third_party/text_processing/__ini__.py
+++ b/third_party/text_processing/__ini__.py
--- a/third_party/text_processing/__init__.py
+++ b/third_party/text_processing/__init__.py
--- a/third_party/text_processing/normalization/__init__.py
+++ b/third_party/text_processing/normalization/__init__.py
--- a/third_party/text_processing/normalization/char_convert.py
+++ b/third_party/text_processing/normalization/char_convert.py
--- a/third_party/text_processing/normalization/chronology.py
+++ b/third_party/text_processing/normalization/chronology.py
--- a/third_party/text_processing/normalization/constants.py
+++ b/third_party/text_processing/normalization/constants.py
--- a/third_party/text_processing/normalization/num.py
+++ b/third_party/text_processing/normalization/num.py
--- a/third_party/text_processing/normalization/phone.py
+++ b/third_party/text_processing/normalization/phone.py
--- a/third_party/text_processing/normalization/quantifier.py
+++ b/third_party/text_processing/normalization/quantifier.py
--- a/third_party/text_processing/normalization/sentence_split.py
+++ b/third_party/text_processing/normalization/sentence_split.py
--- a/third_party/zhon/.gitignore
+++ b/third_party/zhon/.gitignore
--- a/third_party/zhon/.travis.yml
+++ b/third_party/zhon/.travis.yml
--- a/third_party/zhon/MANIFEST.in
+++ b/third_party/zhon/MANIFEST.in
--- a/third_party/zhon/Makefile
+++ b/third_party/zhon/Makefile
--- a/third_party/zhon/README.rst
+++ b/third_party/zhon/README.rst
--- a/third_party/zhon/docs/Makefile
+++ b/third_party/zhon/docs/Makefile
--- a/third_party/zhon/docs/conf.py
+++ b/third_party/zhon/docs/conf.py
--- a/third_party/zhon/docs/index.rst
+++ b/third_party/zhon/docs/index.rst
--- a/third_party/zhon/docs/make.bat
+++ b/third_party/zhon/docs/make.bat
--- a/third_party/zhon/requirements.txt
+++ b/third_party/zhon/requirements.txt
--- a/third_party/zhon/setup.cfg
+++ b/third_party/zhon/setup.cfg
--- a/third_party/zhon/setup.py
+++ b/third_party/zhon/setup.py
--- a/third_party/zhon/tests/__init__.py
+++ b/third_party/zhon/tests/__init__.py
--- a/third_party/zhon/tests/test-cedict.py
+++ b/third_party/zhon/tests/test-cedict.py
--- a/third_party/zhon/tests/test-hanzi.py
+++ b/third_party/zhon/tests/test-hanzi.py
--- a/third_party/zhon/tests/test-pinyin.py
+++ b/third_party/zhon/tests/test-pinyin.py
--- a/third_party/zhon/tests/test-zhuyin.py
+++ b/third_party/zhon/tests/test-zhuyin.py
--- a/third_party/zhon/tox.ini
+++ b/third_party/zhon/tox.ini
--- a/third_party/zhon/zhon/__init__.py
+++ b/third_party/zhon/zhon/__init__.py
--- a/third_party/zhon/zhon/cedict/__init__.py
+++ b/third_party/zhon/zhon/cedict/__init__.py
--- a/third_party/zhon/zhon/cedict/all.py
+++ b/third_party/zhon/zhon/cedict/all.py
--- a/third_party/zhon/zhon/cedict/simplified.py
+++ b/third_party/zhon/zhon/cedict/simplified.py
--- a/third_party/zhon/zhon/cedict/traditional.py
+++ b/third_party/zhon/zhon/cedict/traditional.py
--- a/third_party/zhon/zhon/hanzi.py
+++ b/third_party/zhon/zhon/hanzi.py
--- a/third_party/zhon/zhon/pinyin.py
+++ b/third_party/zhon/zhon/pinyin.py
--- a/third_party/zhon/zhon/zhuyin.py
+++ b/third_party/zhon/zhon/zhuyin.py