提交 c9dc388c 编写于 作者: H huangyuxin

add calc CER in cli

上级 7be1b1b6
...@@ -55,6 +55,7 @@ args = parser.parse_args() ...@@ -55,6 +55,7 @@ args = parser.parse_args()
def create_manifest(data_dir, manifest_path_prefix): def create_manifest(data_dir, manifest_path_prefix):
print("Creating manifest %s ..." % manifest_path_prefix) print("Creating manifest %s ..." % manifest_path_prefix)
json_lines = [] json_lines = []
reference_lines = []
transcript_path = os.path.join(data_dir, 'transcript', transcript_path = os.path.join(data_dir, 'transcript',
'aishell_transcript_v0.8.txt') 'aishell_transcript_v0.8.txt')
transcript_dict = {} transcript_dict = {}
...@@ -88,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -88,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix):
duration = float(len(audio_data) / samplerate) duration = float(len(audio_data) / samplerate)
text = transcript_dict[audio_id] text = transcript_dict[audio_id]
json_lines.append(audio_path) json_lines.append(audio_path)
reference_lines.append(str(total_num+1) + "\t" + text)
total_sec += duration total_sec += duration
total_text += len(text) total_text += len(text)
...@@ -98,6 +100,10 @@ def create_manifest(data_dir, manifest_path_prefix): ...@@ -98,6 +100,10 @@ def create_manifest(data_dir, manifest_path_prefix):
for line in json_lines: for line in json_lines:
fout.write(line + '\n') fout.write(line + '\n')
with codecs.open(manifest_path + ".text", 'w', 'utf-8') as fout:
for line in reference_lines:
fout.write(line + '\n')
manifest_dir = os.path.dirname(manifest_path_prefix) manifest_dir = os.path.dirname(manifest_path_prefix)
def prepare_dataset(url, md5sum, target_dir, manifest_path=None): def prepare_dataset(url, md5sum, target_dir, manifest_path=None):
......
...@@ -3,6 +3,10 @@ ...@@ -3,6 +3,10 @@
source path.sh source path.sh
stage=-1 stage=-1
stop_stage=100 stop_stage=100
model_name=conformer_online_aishell
gpus=5
log_file=res.log
res_file=res.rsl
MAIN_ROOT=../../.. MAIN_ROOT=../../..
. ${MAIN_ROOT}/utils/parse_options.sh || exit -1; . ${MAIN_ROOT}/utils/parse_options.sh || exit -1;
...@@ -20,9 +24,16 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then ...@@ -20,9 +24,16 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "Prepare Aishell failed. Terminated." echo "Prepare Aishell failed. Terminated."
exit 1 exit 1
fi fi
fi fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cat data/manifest.test | paddlespeech asr --model conformer_online_aishell --device gpu --decode_method ctc_prefix_beam_search --rtf -v export CUDA_VISIBLE_DEVICES=${gpus}
cat data/manifest.test | paddlespeech asr --model ${model_name} --device gpu --decode_method attention_rescoring --rtf -v &> ${log_file}
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cat ${log_file} | grep "^[0-9]" > ${res_file}
python utils/compute-wer.py --char=1 --v=1 \
data/manifest.test.text ${res_file} > ${res_file}.error
fi fi
../../../utils
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册