unify api to 1.6 version and fix some problems

d89c3a48 · lfchener · d74f4ff3 · d89c3a48 · d89c3a48 · d89c3a48
14 changed file
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ To avoid the trouble of environment setup, [running in Docker container](#runnin

 ### Prerequisites
 - Python 2.7 only supported
- PaddlePaddle the latest version (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/1.5/beginners_guide/install/index_en.html))
+- PaddlePaddle 1.6 version (Coming soon ...)

 ### Setup
 - Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis`, `boost` and `swig`, e.g. installing them via `apt-get`:
@@ -183,7 +183,7 @@ python tools/build_vocab.py --help
    ```
    CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
    python train.py \
-    --init_from_pretrain_model CHECKPOINT_PATH_TO_RESUME_FROM
+    --init_from_pretrained_model CHECKPOINT_PATH_TO_RESUME_FROM
    ```

 For more help on arguments:

--- a/README_cn.md
+++ b/README_cn.md
--- a/data_utils/data.py
+++ b/data_utils/data.py
@@ -57,7 +57,7 @@ class DataGenerator(object):
                                    converting to index sequence.
    :type keep_transcription_text: bool
    :param place: The place to run the program.
-    :type place: CPU or GPU
+    :type place: CPUPlace or CUDAPlace
    :param is_training: If set to True, generate text data for training, 
                        otherwise,  generate text data for infer.
    :type is_training: bool 

--- a/deploy/demo_server.py
+++ b/deploy/demo_server.py
@@ -162,7 +162,7 @@ def start_server():
        num_rnn_layers=args.num_rnn_layers,
        rnn_layer_size=args.rnn_layer_size,
        use_gru=args.use_gru,
-        init_from_pretrain_model=args.model_path,
+        init_from_pretrained_model=args.model_path,
        place=place,
        share_rnn_weights=args.share_rnn_weights)


--- a/examples/aishell/run_train.sh
+++ b/examples/aishell/run_train.sh
@@ -3,7 +3,7 @@
 cd ../.. > /dev/null

 # train model
-# if you wish to resume from an exists model, uncomment --init_from_pretrain_model
+# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
 python -u train.py \

--- a/examples/librispeech/run_train.sh
+++ b/examples/librispeech/run_train.sh
@@ -3,7 +3,7 @@
 cd ../.. > /dev/null

 # train model
-# if you wish to resume from an exists model, uncomment --init_from_pretrain_model
+# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0

 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \

--- a/examples/tiny/run_train.sh
+++ b/examples/tiny/run_train.sh
@@ -3,7 +3,7 @@
 cd ../.. > /dev/null

 # train model
-# if you wish to resume from an exists model, uncomment --init_from_pretrain_model
+# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
 python -u train.py \

--- a/infer.py
+++ b/infer.py
@@ -12,6 +12,7 @@ import functools
 import paddle.fluid as fluid
 from data_utils.data import DataGenerator
 from model_utils.model import DeepSpeech2Model
+from model_utils.model_check import check_cuda, check_version
 from utils.error_rate import wer, cer
 from utils.utility import add_arguments, print_arguments

@@ -66,6 +67,12 @@ args = parser.parse_args()

 def infer():
    """Inference for DeepSpeech2."""
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    check_cuda(args.use_gpu)
+    # check if paddlepaddle version is satisfied
+    check_version()
+
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
@@ -94,7 +101,7 @@ def infer():
        use_gru=args.use_gru,
        share_rnn_weights=args.share_rnn_weights,
        place=place,
-        init_from_pretrain_model=args.model_path)
+        init_from_pretrained_model=args.model_path)

    # decoders only accept string encoded in utf-8
    vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list]

--- a/model_utils/model.py
+++ b/model_utils/model.py
@@ -44,7 +44,7 @@ class DeepSpeech2Model(object):
                              for GRU, weight sharing is not supported.
    :type share_rnn_weights: bool
    :param place: Program running place.
-    :type place: CPU or GPU
+    :type place: CPUPlace or CUDAPlace
    :param init_from_pretrained_model: Pretrained model path. If None, will train
                                  from stratch.
    :type init_from_pretrained_model: string|None
@@ -60,7 +60,7 @@ class DeepSpeech2Model(object):
                 use_gru=False,
                 share_rnn_weights=True,
                 place=fluid.CPUPlace(),
-                 init_from_pretrain_model=None,
+                 init_from_pretrained_model=None,
                 output_model_dir=None):
        self._vocab_size = vocab_size
        self._num_conv_layers = num_conv_layers
@@ -69,7 +69,7 @@ class DeepSpeech2Model(object):
        self._use_gru = use_gru
        self._share_rnn_weights = share_rnn_weights
        self._place = place
-        self._init_from_pretrain_model = init_from_pretrain_model
+        self._init_from_pretrained_model = init_from_pretrained_model
        self._output_model_dir = output_model_dir
        self._ext_scorer = None
        self.logger = logging.getLogger("")
@@ -90,13 +90,14 @@ class DeepSpeech2Model(object):
        if not is_infer:
            input_fields = {
                'names': ['audio_data', 'text_data', 'seq_len_data', 'masks'],
-                'shapes': [[-1, 161, 161], [-1, 1], [-1, 1], [-1, 32, 81, 1]],
+                'shapes':
+                [[None, 161, None], [None, 1], [None, 1], [None, 32, 81, None]],
                'dtypes': ['float32', 'int32', 'int64', 'float32'],
                'lod_levels': [0, 1, 0, 0]
            }

            inputs = [
-                fluid.layers.data(
+                fluid.data(
                    name=input_fields['names'][i],
                    shape=input_fields['shapes'][i],
                    dtype=input_fields['dtypes'][i],
@@ -104,7 +105,7 @@ class DeepSpeech2Model(object):
                for i in range(len(input_fields['names']))
            ]

-            reader = fluid.io.PyReader(
+            reader = fluid.io.DataLoader.from_generator(
                feed_list=inputs,
                capacity=64,
                iterable=False,
@@ -112,16 +113,19 @@ class DeepSpeech2Model(object):

            (audio_data, text_data, seq_len_data, masks) = inputs
        else:
-            audio_data = fluid.layers.data(
+            audio_data = fluid.data(
                name='audio_data',
-                shape=[-1, 161, 161],
+                shape=[None, 161, None],
                dtype='float32',
                lod_level=0)
-            seq_len_data = fluid.layers.data(
-                name='seq_len_data', shape=[-1, 1], dtype='int64', lod_level=0)
-            masks = fluid.layers.data(
+            seq_len_data = fluid.data(
+                name='seq_len_data',
+                shape=[None, 1],
+                dtype='int64',
+                lod_level=0)
+            masks = fluid.data(
                name='masks',
-                shape=[-1, 32, 81, 1],
+                shape=[None, 32, 81, None],
                dtype='float32',
                lod_level=0)
            text_data = None
@@ -141,26 +145,26 @@ class DeepSpeech2Model(object):
            share_rnn_weights=self._share_rnn_weights)
        return reader, log_probs, loss

-    def init_from_pretrain_model(self, exe, program):
+    def init_from_pretrained_model(self, exe, program):
        '''Init params from pretrain model. '''

-        assert isinstance(self._init_from_pretrain_model, str)
+        assert isinstance(self._init_from_pretrained_model, str)

-        if not os.path.exists(self._init_from_pretrain_model):
-            print(self._init_from_pretrain_model)
+        if not os.path.exists(self._init_from_pretrained_model):
+            print(self._init_from_pretrained_model)
            raise Warning("The pretrained params do not exist.")
            return False
        fluid.io.load_params(
            exe,
-            self._init_from_pretrain_model,
+            self._init_from_pretrained_model,
            main_program=program,
            filename="params.pdparams")

        print("finish initing model from pretrained params from %s" %
-              (self._init_from_pretrain_model))
+              (self._init_from_pretrained_model))

        pre_epoch = 0
-        dir_name = self._init_from_pretrain_model.split('_')
+        dir_name = self._init_from_pretrained_model.split('_')
        if len(dir_name) >= 2 and dir_name[-2].endswith('epoch') and dir_name[
                -1].isdigit():
            pre_epoch = int(dir_name[-1])
@@ -186,7 +190,7 @@ class DeepSpeech2Model(object):

        return True

-    def test(self, exe, dev_batch_reader, test_program, test_pyreader,
+    def test(self, exe, dev_batch_reader, test_program, test_reader,
             fetch_list):
        '''Test the model.

@@ -196,14 +200,14 @@ class DeepSpeech2Model(object):
        :type dev_batch_reader: read generator 
        :param test_program: The program of test.
        :type test_program: Program
-        :param test_pyreader: Pyreader of test.
-        :type test_pyreader: Pyreader
+        :param test_reader: Reader of test.
+        :type test_reader: Reader
        :param fetch_list: Fetch list.
        :type fetch_list: list
        :return: An output unnormalized log probability. 
        :rtype: array
        '''
-        test_pyreader.start()
+        test_reader.start()
        epoch_loss = []
        while True:
            try:
@@ -214,7 +218,7 @@ class DeepSpeech2Model(object):
                epoch_loss.extend(np.array(each_loss[0]))

            except fluid.core.EOFException:
-                test_pyreader.reset()
+                test_reader.reset()
                break
        return np.mean(np.array(epoch_loss))

@@ -274,7 +278,7 @@ class DeepSpeech2Model(object):
        startup_prog = fluid.Program()
        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
-                train_pyreader, log_probs, ctc_loss = self.create_network()
+                train_reader, log_probs, ctc_loss = self.create_network()
                # prepare optimizer
                optimizer = fluid.optimizer.AdamOptimizer(
                    learning_rate=fluid.layers.exponential_decay(
@@ -290,7 +294,7 @@ class DeepSpeech2Model(object):
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
-                test_pyreader, _, ctc_loss = self.create_network()
+                test_reader, _, ctc_loss = self.create_network()

        test_prog = test_prog.clone(for_test=True)

@@ -299,8 +303,8 @@ class DeepSpeech2Model(object):

        # init from some pretrain models, to better solve the current task
        pre_epoch = 0
-        if self._init_from_pretrain_model:
-            pre_epoch = self.init_from_pretrain_model(exe, train_program)
+        if self._init_from_pretrained_model:
+            pre_epoch = self.init_from_pretrained_model(exe, train_program)

        build_strategy = compiler.BuildStrategy()
        exec_strategy = fluid.ExecutionStrategy()
@@ -312,12 +316,12 @@ class DeepSpeech2Model(object):
                build_strategy=build_strategy,
                exec_strategy=exec_strategy)

-        train_pyreader.decorate_batch_generator(train_batch_reader)
-        test_pyreader.decorate_batch_generator(dev_batch_reader)
+        train_reader.set_batch_generator(train_batch_reader)
+        test_reader.set_batch_generator(dev_batch_reader)

        # run train 
        for epoch_id in range(num_epoch):
-            train_pyreader.start()
+            train_reader.start()
            epoch_loss = []
            time_begin = time.time()
            batch_id = 0
@@ -346,7 +350,7 @@ class DeepSpeech2Model(object):

                    batch_id = batch_id + 1
                except fluid.core.EOFException:
-                    train_pyreader.reset()
+                    train_reader.reset()
                    break
            time_end = time.time()
            used_time = time_end - time_begin
@@ -359,7 +363,7 @@ class DeepSpeech2Model(object):
                    exe,
                    dev_batch_reader=dev_batch_reader,
                    test_program=test_prog,
-                    test_pyreader=test_pyreader,
+                    test_reader=test_reader,
                    fetch_list=[ctc_loss])
                print(
                    "--------Time: %f sec, epoch: %d, train loss: %f, test loss: %f"
@@ -402,10 +406,10 @@ class DeepSpeech2Model(object):
        exe = fluid.Executor(self._place)
        exe.run(startup_prog)

-        # init param from pretrain_model
-        if not self._init_from_pretrain_model:
+        # init param from pretrained_model
+        if not self._init_from_pretrained_model:
            exit("No pretrain model file path!")
-        self.init_from_pretrain_model(exe, infer_program)
+        self.init_from_pretrained_model(exe, infer_program)

        infer_results = []
        time_begin = time.time()

--- a/model_utils/model_check.py
+++ b/model_utils/model_check.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import paddle
+import paddle.fluid as fluid
+
+
+def check_cuda(use_cuda, err = \
+    "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
+    Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
+                                                                                                                     ):
+    """
+    Log error and exit when set use_gpu=true in paddlepaddle
+    cpu version.
+    """
+    try:
+        if use_cuda == True and fluid.is_compiled_with_cuda() == False:
+            print(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
+
+
+def check_version():
+    """
+    Log error and exit when the installed version of paddlepaddle is
+    not satisfied.
+    """
+    err = "PaddlePaddle version 1.6 or higher is required, " \
+          "or a suitable develop version is satisfied as well. \n" \
+          "Please make sure the version is good with your code." \
+
+    try:
+        fluid.require_version('1.6.0')
+    except Exception as e:
+        print(err)
+        sys.exit(1)
--- a/model_utils/network.py
+++ b/model_utils/network.py
@@ -61,17 +61,17 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride,

 def simple_rnn(input, size, param_attr=None, bias_attr=None, is_reverse=False):
    '''A simple rnn layer.
-    :param input:input layer.
-    :type input:Variable
-    :param size:Number of RNN cells.
-    :type size:int
-    :param param_attr:Parameter properties of hidden layer weights that 
+    :param input: input layer.
+    :type input: Variable
+    :param size: Dimension of RNN cells.
+    :type size: int
+    :param param_attr: Parameter properties of hidden layer weights that 
                      can be learned
-    :type param_attr:ParamAttr
-    :param bias_attr:Bias properties of hidden layer weights that can be learned
-    :type bias_attr:ParamAttr
-    :param is_reverse:Whether to calculate the inverse RNN
-    :type is_reverse:bool
+    :type param_attr: ParamAttr
+    :param bias_attr: Bias properties of hidden layer weights that can be learned
+    :type bias_attr: ParamAttr
+    :param is_reverse: Whether to calculate the inverse RNN
+    :type is_reverse: bool
    :return: A simple RNN layer.
    :rtype: Variable
    '''
@@ -112,7 +112,7 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
    :type name: string
    :param input: Input layer.
    :type input: Variable
-    :param size: Number of RNN cells.
+    :param size: Dimension of RNN cells.
    :type size: int
    :param share_weights: Whether to share input-hidden weights between
                          forward and backward directional RNNs.
@@ -206,7 +206,7 @@ def bidirectional_gru_bn_layer(name, input, size, act):
    :type name: string
    :param input: Input layer.
    :type input: Variable
-    :param size: Number of GRU cells.
+    :param size: Dimension of GRU cells.
    :type size: int
    :param act: Activation type.
    :type act: string
@@ -317,7 +317,7 @@ def rnn_group(input, size, num_stacks, num_conv_layers, use_gru,
    """RNN group with stacked bidirectional simple RNN or GRU layers.
    :param input: Input layer.
    :type input: Variable
-    :param size: Number of RNN cells in each layer.
+    :param size: Dimension of RNN cells in each layer.
    :type size: int
    :param num_stacks: Number of stacked rnn layers.
    :type num_stacks: int
@@ -373,7 +373,7 @@ def deep_speech_v2_network(audio_data,
    :type num_conv_layers: int
    :param num_rnn_layers: Number of stacking RNN layers.
    :type num_rnn_layers: int
-    :param rnn_size: RNN layer size (number of RNN cells).
+    :param rnn_size: RNN layer size (dimension of RNN cells).
    :type rnn_size: int
    :param use_gru: Use gru if set True. Use simple rnn if set False.
    :type use_gru: bool

--- a/test.py
+++ b/test.py
@@ -8,6 +8,7 @@ import functools
 import paddle.fluid as fluid
 from data_utils.data import DataGenerator
 from model_utils.model import DeepSpeech2Model
+from model_utils.model_check import check_cuda, check_version
 from utils.error_rate import char_errors, word_errors
 from utils.utility import add_arguments, print_arguments

@@ -62,6 +63,12 @@ args = parser.parse_args()

 def evaluate():
    """Evaluate on whole test data for DeepSpeech2."""
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    check_cuda(args.use_gpu)
+    # check if paddlepaddle version is satisfied
+    check_version()
+
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
@@ -89,7 +96,7 @@ def evaluate():
        use_gru=args.use_gru,
        share_rnn_weights=args.share_rnn_weights,
        place=place,
-        init_from_pretrain_model=args.model_path)
+        init_from_pretrained_model=args.model_path)

    # decoders only accept string encoded in utf-8
    vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list]

--- a/tools/tune.py
+++ b/tools/tune.py
@@ -103,7 +103,7 @@ def tune():
        rnn_layer_size=args.rnn_layer_size,
        use_gru=args.use_gru,
        place=place,
-        init_from_pretrain_model=args.model_path,
+        init_from_pretrained_model=args.model_path,
        share_rnn_weights=args.share_rnn_weights)

    # decoders only accept string encoded in utf-8

--- a/train.py
+++ b/train.py
@@ -7,6 +7,7 @@ import argparse
 import functools
 import io
 from model_utils.model import DeepSpeech2Model
+from model_utils.model_check import check_cuda, check_version
 from data_utils.data import DataGenerator
 from utils.utility import add_arguments, print_arguments

@@ -34,7 +35,7 @@ add_arg('use_gru',          bool,   False,  "Use GRUs instead of simple RNNs.")
 add_arg('is_local',         bool,   True,   "Use pserver or not.")
 add_arg('share_rnn_weights',bool,   True,   "Share input-hidden weights across "
                                           "bi-directional RNNs. Not for GRU.")
-add_arg('init_from_pretrain_model',str,
+add_arg('init_from_pretrained_model',str,
         None,
         "If None, the training starts from scratch, "
         "otherwise, it resumes from the pre-trained model.")
@@ -71,6 +72,12 @@ args = parser.parse_args()

 def train():
    """DeepSpeech2 training."""
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    check_cuda(args.use_gpu)
+    # check if paddlepaddle version is satisfied
+    check_version()
+
    if args.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
@@ -93,7 +100,7 @@ def train():
    train_batch_reader = train_generator.batch_reader_creator(
        manifest_path=args.train_manifest,
        batch_size=args.batch_size,
-        sortagrad=args.use_sortagrad if args.init_from_pretrain_model is None else False,
+        sortagrad=args.use_sortagrad if args.init_from_pretrained_model is None else False,
        shuffle_method=args.shuffle_method)
    dev_batch_reader = dev_generator.batch_reader_creator(
        manifest_path=args.dev_manifest,
@@ -109,7 +116,7 @@ def train():
        use_gru=args.use_gru,
        share_rnn_weights=args.share_rnn_weights,
        place=place,
-        init_from_pretrain_model=args.init_from_pretrain_model,
+        init_from_pretrained_model=args.init_from_pretrained_model,
        output_model_dir=args.output_model_dir)

    ds2_model.train(