Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
3ed24474
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3ed24474
编写于
10月 08, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
wenetspeech asr1 quant
上级
bdf876ea
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
279 addition
and
0 deletion
+279
-0
examples/wenetspeech/asr1/local/quant.sh
examples/wenetspeech/asr1/local/quant.sh
+59
-0
paddlespeech/s2t/exps/u2/bin/quant.py
paddlespeech/s2t/exps/u2/bin/quant.py
+220
-0
未找到文件。
examples/wenetspeech/asr1/local/quant.sh
0 → 100755
浏览文件 @
3ed24474
#!/bin/bash
if
[
$#
!=
4
]
;
then
echo
"usage:
${
0
}
config_path decode_config_path ckpt_path_prefix audio_file"
exit
-1
fi
ngpu
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
echo
"using
$ngpu
gpus..."
config_path
=
$1
decode_config_path
=
$2
ckpt_prefix
=
$3
audio_file
=
$4
mkdir
-p
data
wget
-nc
https://paddlespeech.bj.bcebos.com/datasets/single_wav/zh/demo_01_03.wav
-P
data/
if
[
$?
-ne
0
]
;
then
exit
1
fi
if
[
!
-f
${
audio_file
}
]
;
then
echo
"Plase input the right audio_file path"
exit
1
fi
chunk_mode
=
false
if
[[
${
config_path
}
=
~ ^.
*
chunk_.
*
yaml
$
]]
;
then
chunk_mode
=
true
fi
# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
# exit 1
#fi
for
type
in
attention_rescoring
;
do
echo
"decoding
${
type
}
"
batch_size
=
1
output_dir
=
${
ckpt_prefix
}
mkdir
-p
${
output_dir
}
python3
-u
${
BIN_DIR
}
/quant.py
\
--ngpu
${
ngpu
}
\
--config
${
config_path
}
\
--decode_cfg
${
decode_config_path
}
\
--result_file
${
output_dir
}
/
${
type
}
.rsl
\
--checkpoint_path
${
ckpt_prefix
}
\
--opts
decode.decoding_method
${
type
}
\
--opts
decode.decode_batch_size
${
batch_size
}
\
--audio_file
${
audio_file
}
if
[
$?
-ne
0
]
;
then
echo
"Failed in evaluation!"
exit
1
fi
done
exit
0
paddlespeech/s2t/exps/u2/bin/quant.py
0 → 100644
浏览文件 @
3ed24474
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for U2 model."""
import
os
import
sys
from
pathlib
import
Path
import
paddle
import
soundfile
from
yacs.config
import
CfgNode
from
paddlespeech.audio.transform.transformation
import
Transformation
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.models.u2
import
U2Model
from
paddlespeech.s2t.training.cli
import
default_argument_parser
from
paddlespeech.s2t.utils.log
import
Log
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
from
paddleslim
import
PTQ
logger
=
Log
(
__name__
).
getlog
()
class
U2Infer
():
def
__init__
(
self
,
config
,
args
):
self
.
args
=
args
self
.
config
=
config
self
.
audio_file
=
args
.
audio_file
self
.
preprocess_conf
=
config
.
preprocess_config
self
.
preprocess_args
=
{
"train"
:
False
}
self
.
preprocessing
=
Transformation
(
self
.
preprocess_conf
)
self
.
reverse_weight
=
getattr
(
config
.
model_conf
,
'reverse_weight'
,
0.0
)
self
.
text_feature
=
TextFeaturizer
(
unit_type
=
config
.
unit_type
,
vocab
=
config
.
vocab_filepath
,
spm_model_prefix
=
config
.
spm_model_prefix
)
paddle
.
set_device
(
'gpu'
if
self
.
args
.
ngpu
>
0
else
'cpu'
)
# model
model_conf
=
config
with
UpdateConfig
(
model_conf
):
model_conf
.
input_dim
=
config
.
feat_dim
model_conf
.
output_dim
=
self
.
text_feature
.
vocab_size
model
=
U2Model
.
from_config
(
model_conf
)
self
.
model
=
model
self
.
model
.
eval
()
self
.
ptq
=
PTQ
()
self
.
model
=
self
.
ptq
.
quantize
(
model
)
# load model
params_path
=
self
.
args
.
checkpoint_path
+
".pdparams"
model_dict
=
paddle
.
load
(
params_path
)
self
.
model
.
set_state_dict
(
model_dict
)
logger
.
info
(
f
"model_dict:
{
model_dict
.
keys
()
}
"
)
def
run
(
self
):
check
(
args
.
audio_file
)
with
paddle
.
no_grad
():
# read
audio
,
sample_rate
=
soundfile
.
read
(
self
.
audio_file
,
dtype
=
"int16"
,
always_2d
=
True
)
audio
=
audio
[:,
0
]
logger
.
info
(
f
"audio shape:
{
audio
.
shape
}
"
)
# fbank
feat
=
self
.
preprocessing
(
audio
,
**
self
.
preprocess_args
)
logger
.
info
(
f
"feat shape:
{
feat
.
shape
}
"
)
ilen
=
paddle
.
to_tensor
(
feat
.
shape
[
0
])
xs
=
paddle
.
to_tensor
(
feat
,
dtype
=
'float32'
).
unsqueeze
(
0
)
decode_config
=
self
.
config
.
decode
logger
.
info
(
f
"decode cfg:
{
decode_config
}
"
)
result_transcripts
=
self
.
model
.
decode
(
xs
,
ilen
,
text_feature
=
self
.
text_feature
,
decoding_method
=
decode_config
.
decoding_method
,
beam_size
=
decode_config
.
beam_size
,
ctc_weight
=
decode_config
.
ctc_weight
,
decoding_chunk_size
=
decode_config
.
decoding_chunk_size
,
num_decoding_left_chunks
=
decode_config
.
num_decoding_left_chunks
,
simulate_streaming
=
decode_config
.
simulate_streaming
,
reverse_weight
=
self
.
reverse_weight
)
rsl
=
result_transcripts
[
0
][
0
]
utt
=
Path
(
self
.
audio_file
).
name
logger
.
info
(
f
"hyp:
{
utt
}
{
result_transcripts
[
0
][
0
]
}
"
)
# print(self.model)
# print(self.model.forward_encoder_chunk)
# return rsl
logger
.
info
(
"-------------start export ----------------------"
)
batch_size
=
1
feat_dim
=
80
model_size
=
512
num_left_chunks
=
-
1
logger
.
info
(
f
"U2 Export Model Params: batch_size
{
batch_size
}
, feat_dim
{
feat_dim
}
, model_size
{
model_size
}
, num_left_chunks
{
num_left_chunks
}
"
)
# ######################## self.model.forward_encoder_chunk ############
# input_spec = [
# # (T,), int16
# paddle.static.InputSpec(shape=[None], dtype='int16'),
# ]
# self.model.forward_feature = paddle.jit.to_static(
# self.model.forward_feature, input_spec=input_spec)
######################### self.model.forward_encoder_chunk ############
input_spec
=
[
# xs, (B, T, D)
paddle
.
static
.
InputSpec
(
shape
=
[
batch_size
,
None
,
feat_dim
],
dtype
=
'float32'
),
# offset, int, but need be tensor
paddle
.
static
.
InputSpec
(
shape
=
[
1
],
dtype
=
'int32'
),
# required_cache_size, int
num_left_chunks
,
# att_cache
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
None
],
dtype
=
'float32'
),
# cnn_cache
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
None
,
None
,
None
],
dtype
=
'float32'
)
]
self
.
model
.
forward_encoder_chunk
=
paddle
.
jit
.
to_static
(
self
.
model
.
forward_encoder_chunk
,
input_spec
=
input_spec
)
######################### self.model.ctc_activation ########################
input_spec
=
[
# encoder_out, (B,T,D)
paddle
.
static
.
InputSpec
(
shape
=
[
batch_size
,
None
,
model_size
],
dtype
=
'float32'
)
]
self
.
model
.
ctc_activation
=
paddle
.
jit
.
to_static
(
self
.
model
.
ctc_activation
,
input_spec
=
input_spec
)
######################### self.model.forward_attention_decoder ########################
reverse_weight
=
0.3
input_spec
=
[
# hyps, (B, U)
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
None
],
dtype
=
'int64'
),
# hyps_lens, (B,)
paddle
.
static
.
InputSpec
(
shape
=
[
None
],
dtype
=
'int64'
),
# encoder_out, (B,T,D)
paddle
.
static
.
InputSpec
(
shape
=
[
batch_size
,
None
,
model_size
],
dtype
=
'float32'
),
reverse_weight
]
self
.
model
.
forward_attention_decoder
=
paddle
.
jit
.
to_static
(
self
.
model
.
forward_attention_decoder
,
input_spec
=
input_spec
)
################################################################################
# jit save
logger
.
info
(
f
"export save:
{
self
.
args
.
export_path
}
"
)
config
=
{
'is_static'
:
True
,
'combine_params'
:
True
,
'skip_forward'
:
True
}
self
.
ptq
.
save_quantized_model
(
self
.
model
,
self
.
args
.
export_path
)
# paddle.jit.save(
# self.model,
# self.args.export_path,
# combine_params=True,
# skip_forward=True)
def
check
(
audio_file
):
if
not
os
.
path
.
isfile
(
audio_file
):
print
(
"Please input the right audio file path"
)
sys
.
exit
(
-
1
)
logger
.
info
(
"checking the audio file format......"
)
try
:
sig
,
sample_rate
=
soundfile
.
read
(
audio_file
)
except
Exception
as
e
:
logger
.
error
(
str
(
e
))
logger
.
error
(
"can not open the wav file, please check the audio file format"
)
sys
.
exit
(
-
1
)
logger
.
info
(
"The sample rate is %d"
%
sample_rate
)
assert
(
sample_rate
==
16000
)
logger
.
info
(
"The audio file format is right"
)
def
main
(
config
,
args
):
U2Infer
(
config
,
args
).
run
()
if
__name__
==
"__main__"
:
parser
=
default_argument_parser
()
# save asr result to
parser
.
add_argument
(
"--result_file"
,
type
=
str
,
help
=
"path of save the asr result"
)
parser
.
add_argument
(
"--audio_file"
,
type
=
str
,
help
=
"path of the input audio file"
)
parser
.
add_argument
(
"--export_path"
,
type
=
str
,
default
=
'export'
,
help
=
"path of the input audio file"
)
args
=
parser
.
parse_args
()
config
=
CfgNode
(
new_allowed
=
True
)
if
args
.
config
:
config
.
merge_from_file
(
args
.
config
)
if
args
.
decode_cfg
:
decode_confs
=
CfgNode
(
new_allowed
=
True
)
decode_confs
.
merge_from_file
(
args
.
decode_cfg
)
config
.
decode
=
decode_confs
if
args
.
opts
:
config
.
merge_from_list
(
args
.
opts
)
config
.
freeze
()
main
(
config
,
args
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录