From 5c0e596940a2434635f40e7f5b6d21fb2d6a6842 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 10 May 2021 02:56:48 +0000 Subject: [PATCH] fix ds2 configs --- docs/src/text_front_end.md | 15 +++++++++++++++ examples/aishell/s0/.gitignore | 3 +++ examples/aishell/s0/conf/deepspeech2.yaml | 2 +- examples/librispeech/s0/conf/deepspeech2.yaml | 2 +- 4 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 docs/src/text_front_end.md create mode 100644 examples/aishell/s0/.gitignore diff --git a/docs/src/text_front_end.md b/docs/src/text_front_end.md new file mode 100644 index 00000000..a8cde175 --- /dev/null +++ b/docs/src/text_front_end.md @@ -0,0 +1,15 @@ +# Text Front End + +## MMSEG +* [MMSEG: A Word Identification System for Mandarin Chinese Text Based on Two Variants of the Maximum Matching Algorithm](http://technology.chtsai.org/mmseg/) +* [`中文分词`简单高效的MMSeg](https://www.cnblogs.com/en-heng/p/5872308.html) +* [mmseg分词算法及实现](https://blog.csdn.net/daniel_ustc/article/details/50488040) +* [Mmseg算法](https://www.jianshu.com/p/e4ae8d194487) +* [浅谈中文分词](http://www.isnowfy.com/introduction-to-chinese-segmentation/) + +* [ustcdane/mmseg](https://github.com/ustcdane/mmseg) +* [jkom-cloud/mmseg](https://github.com/jkom-cloud/mmseg) + + +## CScanner +* [CScanner - A Chinese Lexical Scanner](http://technology.chtsai.org/cscanner/) diff --git a/examples/aishell/s0/.gitignore b/examples/aishell/s0/.gitignore new file mode 100644 index 00000000..b7fa0dd7 --- /dev/null +++ b/examples/aishell/s0/.gitignore @@ -0,0 +1,3 @@ +exp +data +*log diff --git a/examples/aishell/s0/conf/deepspeech2.yaml b/examples/aishell/s0/conf/deepspeech2.yaml index 835cf58b..02c68df9 100644 --- a/examples/aishell/s0/conf/deepspeech2.yaml +++ b/examples/aishell/s0/conf/deepspeech2.yaml @@ -3,7 +3,7 @@ data: train_manifest: data/manifest.train dev_manifest: data/manifest.dev test_manifest: data/manifest.test - mean_std_filepath: data/mean_std.npz + mean_std_filepath: data/mean_std.json vocab_filepath: data/vocab.txt augmentation_config: conf/augmentation.json batch_size: 64 # one gpu diff --git a/examples/librispeech/s0/conf/deepspeech2.yaml b/examples/librispeech/s0/conf/deepspeech2.yaml index 32496428..688f0cba 100644 --- a/examples/librispeech/s0/conf/deepspeech2.yaml +++ b/examples/librispeech/s0/conf/deepspeech2.yaml @@ -3,7 +3,7 @@ data: train_manifest: data/manifest.train dev_manifest: data/manifest.dev-clean test_manifest: data/manifest.test-clean - mean_std_filepath: data/mean_std.npz + mean_std_filepath: data/mean_std.json vocab_filepath: data/vocab.txt augmentation_config: conf/augmentation.json batch_size: 20 -- GitLab