提交 d89c3a48 编写于 作者: L lfchener

unify api to 1.6 version and fix some problems

上级 d74f4ff3
......@@ -25,7 +25,7 @@ To avoid the trouble of environment setup, [running in Docker container](#runnin
### Prerequisites
- Python 2.7 only supported
- PaddlePaddle the latest version (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/1.5/beginners_guide/install/index_en.html))
- PaddlePaddle 1.6 version (Coming soon ...)
### Setup
- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis`, `boost` and `swig`, e.g. installing them via `apt-get`:
......@@ -183,7 +183,7 @@ python tools/build_vocab.py --help
```
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python train.py \
--init_from_pretrain_model CHECKPOINT_PATH_TO_RESUME_FROM
--init_from_pretrained_model CHECKPOINT_PATH_TO_RESUME_FROM
```
For more help on arguments:
......
此差异已折叠。
......@@ -57,7 +57,7 @@ class DataGenerator(object):
converting to index sequence.
:type keep_transcription_text: bool
:param place: The place to run the program.
:type place: CPU or GPU
:type place: CPUPlace or CUDAPlace
:param is_training: If set to True, generate text data for training,
otherwise, generate text data for infer.
:type is_training: bool
......
......@@ -162,7 +162,7 @@ def start_server():
num_rnn_layers=args.num_rnn_layers,
rnn_layer_size=args.rnn_layer_size,
use_gru=args.use_gru,
init_from_pretrain_model=args.model_path,
init_from_pretrained_model=args.model_path,
place=place,
share_rnn_weights=args.share_rnn_weights)
......
......@@ -3,7 +3,7 @@
cd ../.. > /dev/null
# train model
# if you wish to resume from an exists model, uncomment --init_from_pretrain_model
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
export FLAGS_sync_nccl_allreduce=0
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
python -u train.py \
......
......@@ -3,7 +3,7 @@
cd ../.. > /dev/null
# train model
# if you wish to resume from an exists model, uncomment --init_from_pretrain_model
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
export FLAGS_sync_nccl_allreduce=0
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
......
......@@ -3,7 +3,7 @@
cd ../.. > /dev/null
# train model
# if you wish to resume from an exists model, uncomment --init_from_pretrain_model
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
export FLAGS_sync_nccl_allreduce=0
CUDA_VISIBLE_DEVICES=0,1,2,3 \
python -u train.py \
......
......@@ -12,6 +12,7 @@ import functools
import paddle.fluid as fluid
from data_utils.data import DataGenerator
from model_utils.model import DeepSpeech2Model
from model_utils.model_check import check_cuda, check_version
from utils.error_rate import wer, cer
from utils.utility import add_arguments, print_arguments
......@@ -66,6 +67,12 @@ args = parser.parse_args()
def infer():
"""Inference for DeepSpeech2."""
# check if set use_gpu=True in paddlepaddle cpu version
check_cuda(args.use_gpu)
# check if paddlepaddle version is satisfied
check_version()
if args.use_gpu:
place = fluid.CUDAPlace(0)
else:
......@@ -94,7 +101,7 @@ def infer():
use_gru=args.use_gru,
share_rnn_weights=args.share_rnn_weights,
place=place,
init_from_pretrain_model=args.model_path)
init_from_pretrained_model=args.model_path)
# decoders only accept string encoded in utf-8
vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list]
......
......@@ -44,7 +44,7 @@ class DeepSpeech2Model(object):
for GRU, weight sharing is not supported.
:type share_rnn_weights: bool
:param place: Program running place.
:type place: CPU or GPU
:type place: CPUPlace or CUDAPlace
:param init_from_pretrained_model: Pretrained model path. If None, will train
from stratch.
:type init_from_pretrained_model: string|None
......@@ -60,7 +60,7 @@ class DeepSpeech2Model(object):
use_gru=False,
share_rnn_weights=True,
place=fluid.CPUPlace(),
init_from_pretrain_model=None,
init_from_pretrained_model=None,
output_model_dir=None):
self._vocab_size = vocab_size
self._num_conv_layers = num_conv_layers
......@@ -69,7 +69,7 @@ class DeepSpeech2Model(object):
self._use_gru = use_gru
self._share_rnn_weights = share_rnn_weights
self._place = place
self._init_from_pretrain_model = init_from_pretrain_model
self._init_from_pretrained_model = init_from_pretrained_model
self._output_model_dir = output_model_dir
self._ext_scorer = None
self.logger = logging.getLogger("")
......@@ -90,13 +90,14 @@ class DeepSpeech2Model(object):
if not is_infer:
input_fields = {
'names': ['audio_data', 'text_data', 'seq_len_data', 'masks'],
'shapes': [[-1, 161, 161], [-1, 1], [-1, 1], [-1, 32, 81, 1]],
'shapes':
[[None, 161, None], [None, 1], [None, 1], [None, 32, 81, None]],
'dtypes': ['float32', 'int32', 'int64', 'float32'],
'lod_levels': [0, 1, 0, 0]
}
inputs = [
fluid.layers.data(
fluid.data(
name=input_fields['names'][i],
shape=input_fields['shapes'][i],
dtype=input_fields['dtypes'][i],
......@@ -104,7 +105,7 @@ class DeepSpeech2Model(object):
for i in range(len(input_fields['names']))
]
reader = fluid.io.PyReader(
reader = fluid.io.DataLoader.from_generator(
feed_list=inputs,
capacity=64,
iterable=False,
......@@ -112,16 +113,19 @@ class DeepSpeech2Model(object):
(audio_data, text_data, seq_len_data, masks) = inputs
else:
audio_data = fluid.layers.data(
audio_data = fluid.data(
name='audio_data',
shape=[-1, 161, 161],
shape=[None, 161, None],
dtype='float32',
lod_level=0)
seq_len_data = fluid.layers.data(
name='seq_len_data', shape=[-1, 1], dtype='int64', lod_level=0)
masks = fluid.layers.data(
seq_len_data = fluid.data(
name='seq_len_data',
shape=[None, 1],
dtype='int64',
lod_level=0)
masks = fluid.data(
name='masks',
shape=[-1, 32, 81, 1],
shape=[None, 32, 81, None],
dtype='float32',
lod_level=0)
text_data = None
......@@ -141,26 +145,26 @@ class DeepSpeech2Model(object):
share_rnn_weights=self._share_rnn_weights)
return reader, log_probs, loss
def init_from_pretrain_model(self, exe, program):
def init_from_pretrained_model(self, exe, program):
'''Init params from pretrain model. '''
assert isinstance(self._init_from_pretrain_model, str)
assert isinstance(self._init_from_pretrained_model, str)
if not os.path.exists(self._init_from_pretrain_model):
print(self._init_from_pretrain_model)
if not os.path.exists(self._init_from_pretrained_model):
print(self._init_from_pretrained_model)
raise Warning("The pretrained params do not exist.")
return False
fluid.io.load_params(
exe,
self._init_from_pretrain_model,
self._init_from_pretrained_model,
main_program=program,
filename="params.pdparams")
print("finish initing model from pretrained params from %s" %
(self._init_from_pretrain_model))
(self._init_from_pretrained_model))
pre_epoch = 0
dir_name = self._init_from_pretrain_model.split('_')
dir_name = self._init_from_pretrained_model.split('_')
if len(dir_name) >= 2 and dir_name[-2].endswith('epoch') and dir_name[
-1].isdigit():
pre_epoch = int(dir_name[-1])
......@@ -186,7 +190,7 @@ class DeepSpeech2Model(object):
return True
def test(self, exe, dev_batch_reader, test_program, test_pyreader,
def test(self, exe, dev_batch_reader, test_program, test_reader,
fetch_list):
'''Test the model.
......@@ -196,14 +200,14 @@ class DeepSpeech2Model(object):
:type dev_batch_reader: read generator
:param test_program: The program of test.
:type test_program: Program
:param test_pyreader: Pyreader of test.
:type test_pyreader: Pyreader
:param test_reader: Reader of test.
:type test_reader: Reader
:param fetch_list: Fetch list.
:type fetch_list: list
:return: An output unnormalized log probability.
:rtype: array
'''
test_pyreader.start()
test_reader.start()
epoch_loss = []
while True:
try:
......@@ -214,7 +218,7 @@ class DeepSpeech2Model(object):
epoch_loss.extend(np.array(each_loss[0]))
except fluid.core.EOFException:
test_pyreader.reset()
test_reader.reset()
break
return np.mean(np.array(epoch_loss))
......@@ -274,7 +278,7 @@ class DeepSpeech2Model(object):
startup_prog = fluid.Program()
with fluid.program_guard(train_program, startup_prog):
with fluid.unique_name.guard():
train_pyreader, log_probs, ctc_loss = self.create_network()
train_reader, log_probs, ctc_loss = self.create_network()
# prepare optimizer
optimizer = fluid.optimizer.AdamOptimizer(
learning_rate=fluid.layers.exponential_decay(
......@@ -290,7 +294,7 @@ class DeepSpeech2Model(object):
test_prog = fluid.Program()
with fluid.program_guard(test_prog, startup_prog):
with fluid.unique_name.guard():
test_pyreader, _, ctc_loss = self.create_network()
test_reader, _, ctc_loss = self.create_network()
test_prog = test_prog.clone(for_test=True)
......@@ -299,8 +303,8 @@ class DeepSpeech2Model(object):
# init from some pretrain models, to better solve the current task
pre_epoch = 0
if self._init_from_pretrain_model:
pre_epoch = self.init_from_pretrain_model(exe, train_program)
if self._init_from_pretrained_model:
pre_epoch = self.init_from_pretrained_model(exe, train_program)
build_strategy = compiler.BuildStrategy()
exec_strategy = fluid.ExecutionStrategy()
......@@ -312,12 +316,12 @@ class DeepSpeech2Model(object):
build_strategy=build_strategy,
exec_strategy=exec_strategy)
train_pyreader.decorate_batch_generator(train_batch_reader)
test_pyreader.decorate_batch_generator(dev_batch_reader)
train_reader.set_batch_generator(train_batch_reader)
test_reader.set_batch_generator(dev_batch_reader)
# run train
for epoch_id in range(num_epoch):
train_pyreader.start()
train_reader.start()
epoch_loss = []
time_begin = time.time()
batch_id = 0
......@@ -346,7 +350,7 @@ class DeepSpeech2Model(object):
batch_id = batch_id + 1
except fluid.core.EOFException:
train_pyreader.reset()
train_reader.reset()
break
time_end = time.time()
used_time = time_end - time_begin
......@@ -359,7 +363,7 @@ class DeepSpeech2Model(object):
exe,
dev_batch_reader=dev_batch_reader,
test_program=test_prog,
test_pyreader=test_pyreader,
test_reader=test_reader,
fetch_list=[ctc_loss])
print(
"--------Time: %f sec, epoch: %d, train loss: %f, test loss: %f"
......@@ -402,10 +406,10 @@ class DeepSpeech2Model(object):
exe = fluid.Executor(self._place)
exe.run(startup_prog)
# init param from pretrain_model
if not self._init_from_pretrain_model:
# init param from pretrained_model
if not self._init_from_pretrained_model:
exit("No pretrain model file path!")
self.init_from_pretrain_model(exe, infer_program)
self.init_from_pretrained_model(exe, infer_program)
infer_results = []
time_begin = time.time()
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
import paddle.fluid as fluid
def check_cuda(use_cuda, err = \
"\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
):
"""
Log error and exit when set use_gpu=true in paddlepaddle
cpu version.
"""
try:
if use_cuda == True and fluid.is_compiled_with_cuda() == False:
print(err)
sys.exit(1)
except Exception as e:
pass
def check_version():
"""
Log error and exit when the installed version of paddlepaddle is
not satisfied.
"""
err = "PaddlePaddle version 1.6 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \
try:
fluid.require_version('1.6.0')
except Exception as e:
print(err)
sys.exit(1)
......@@ -61,17 +61,17 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride,
def simple_rnn(input, size, param_attr=None, bias_attr=None, is_reverse=False):
'''A simple rnn layer.
:param input:input layer.
:type input:Variable
:param size:Number of RNN cells.
:type size:int
:param param_attr:Parameter properties of hidden layer weights that
:param input: input layer.
:type input: Variable
:param size: Dimension of RNN cells.
:type size: int
:param param_attr: Parameter properties of hidden layer weights that
can be learned
:type param_attr:ParamAttr
:param bias_attr:Bias properties of hidden layer weights that can be learned
:type bias_attr:ParamAttr
:param is_reverse:Whether to calculate the inverse RNN
:type is_reverse:bool
:type param_attr: ParamAttr
:param bias_attr: Bias properties of hidden layer weights that can be learned
:type bias_attr: ParamAttr
:param is_reverse: Whether to calculate the inverse RNN
:type is_reverse: bool
:return: A simple RNN layer.
:rtype: Variable
'''
......@@ -112,7 +112,7 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
:type name: string
:param input: Input layer.
:type input: Variable
:param size: Number of RNN cells.
:param size: Dimension of RNN cells.
:type size: int
:param share_weights: Whether to share input-hidden weights between
forward and backward directional RNNs.
......@@ -206,7 +206,7 @@ def bidirectional_gru_bn_layer(name, input, size, act):
:type name: string
:param input: Input layer.
:type input: Variable
:param size: Number of GRU cells.
:param size: Dimension of GRU cells.
:type size: int
:param act: Activation type.
:type act: string
......@@ -317,7 +317,7 @@ def rnn_group(input, size, num_stacks, num_conv_layers, use_gru,
"""RNN group with stacked bidirectional simple RNN or GRU layers.
:param input: Input layer.
:type input: Variable
:param size: Number of RNN cells in each layer.
:param size: Dimension of RNN cells in each layer.
:type size: int
:param num_stacks: Number of stacked rnn layers.
:type num_stacks: int
......@@ -373,7 +373,7 @@ def deep_speech_v2_network(audio_data,
:type num_conv_layers: int
:param num_rnn_layers: Number of stacking RNN layers.
:type num_rnn_layers: int
:param rnn_size: RNN layer size (number of RNN cells).
:param rnn_size: RNN layer size (dimension of RNN cells).
:type rnn_size: int
:param use_gru: Use gru if set True. Use simple rnn if set False.
:type use_gru: bool
......
......@@ -8,6 +8,7 @@ import functools
import paddle.fluid as fluid
from data_utils.data import DataGenerator
from model_utils.model import DeepSpeech2Model
from model_utils.model_check import check_cuda, check_version
from utils.error_rate import char_errors, word_errors
from utils.utility import add_arguments, print_arguments
......@@ -62,6 +63,12 @@ args = parser.parse_args()
def evaluate():
"""Evaluate on whole test data for DeepSpeech2."""
# check if set use_gpu=True in paddlepaddle cpu version
check_cuda(args.use_gpu)
# check if paddlepaddle version is satisfied
check_version()
if args.use_gpu:
place = fluid.CUDAPlace(0)
else:
......@@ -89,7 +96,7 @@ def evaluate():
use_gru=args.use_gru,
share_rnn_weights=args.share_rnn_weights,
place=place,
init_from_pretrain_model=args.model_path)
init_from_pretrained_model=args.model_path)
# decoders only accept string encoded in utf-8
vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list]
......
......@@ -103,7 +103,7 @@ def tune():
rnn_layer_size=args.rnn_layer_size,
use_gru=args.use_gru,
place=place,
init_from_pretrain_model=args.model_path,
init_from_pretrained_model=args.model_path,
share_rnn_weights=args.share_rnn_weights)
# decoders only accept string encoded in utf-8
......
......@@ -7,6 +7,7 @@ import argparse
import functools
import io
from model_utils.model import DeepSpeech2Model
from model_utils.model_check import check_cuda, check_version
from data_utils.data import DataGenerator
from utils.utility import add_arguments, print_arguments
......@@ -34,7 +35,7 @@ add_arg('use_gru', bool, False, "Use GRUs instead of simple RNNs.")
add_arg('is_local', bool, True, "Use pserver or not.")
add_arg('share_rnn_weights',bool, True, "Share input-hidden weights across "
"bi-directional RNNs. Not for GRU.")
add_arg('init_from_pretrain_model',str,
add_arg('init_from_pretrained_model',str,
None,
"If None, the training starts from scratch, "
"otherwise, it resumes from the pre-trained model.")
......@@ -71,6 +72,12 @@ args = parser.parse_args()
def train():
"""DeepSpeech2 training."""
# check if set use_gpu=True in paddlepaddle cpu version
check_cuda(args.use_gpu)
# check if paddlepaddle version is satisfied
check_version()
if args.use_gpu:
place = fluid.CUDAPlace(0)
else:
......@@ -93,7 +100,7 @@ def train():
train_batch_reader = train_generator.batch_reader_creator(
manifest_path=args.train_manifest,
batch_size=args.batch_size,
sortagrad=args.use_sortagrad if args.init_from_pretrain_model is None else False,
sortagrad=args.use_sortagrad if args.init_from_pretrained_model is None else False,
shuffle_method=args.shuffle_method)
dev_batch_reader = dev_generator.batch_reader_creator(
manifest_path=args.dev_manifest,
......@@ -109,7 +116,7 @@ def train():
use_gru=args.use_gru,
share_rnn_weights=args.share_rnn_weights,
place=place,
init_from_pretrain_model=args.init_from_pretrain_model,
init_from_pretrained_model=args.init_from_pretrained_model,
output_model_dir=args.output_model_dir)
ds2_model.train(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册