append some changes

902c35bd · Yibing Liu · 0bda37cb · 453b3009 · 902c35bd · 902c35bd
8 changed file
--- a/deep_speech_2/decoders/swig/ctc_decoders.cpp
+++ b/deep_speech_2/decoders/swig/ctc_decoders.cpp
@@ -18,7 +18,7 @@ std::string ctc_greedy_decoder(
    const std::vector<std::string> &vocabulary) {
  // dimension check
  size_t num_time_steps = probs_seq.size();
-  for (size_t i = 0; i < num_time_steps; i++) {
+  for (size_t i = 0; i < num_time_steps; ++i) {
    VALID_CHECK_EQ(probs_seq[i].size(),
                   vocabulary.size() + 1,
                   "The shape of probs_seq does not match with "
@@ -28,7 +28,7 @@ std::string ctc_greedy_decoder(
  size_t blank_id = vocabulary.size();

  std::vector<size_t> max_idx_vec;
-  for (size_t i = 0; i < num_time_steps; i++) {
+  for (size_t i = 0; i < num_time_steps; ++i) {
    double max_prob = 0.0;
    size_t max_idx = 0;
    for (size_t j = 0; j < probs_seq[i].size(); j++) {
@@ -41,14 +41,14 @@ std::string ctc_greedy_decoder(
  }

  std::vector<size_t> idx_vec;
-  for (size_t i = 0; i < max_idx_vec.size(); i++) {
+  for (size_t i = 0; i < max_idx_vec.size(); ++i) {
    if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) {
      idx_vec.push_back(max_idx_vec[i]);
    }
  }

  std::string best_path_result;
-  for (size_t i = 0; i < idx_vec.size(); i++) {
+  for (size_t i = 0; i < idx_vec.size(); ++i) {
    if (idx_vec[i] != blank_id) {
      best_path_result += vocabulary[idx_vec[i]];
    }
@@ -65,7 +65,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
    Scorer *ext_scorer) {
  // dimension check
  size_t num_time_steps = probs_seq.size();
-  for (size_t i = 0; i < num_time_steps; i++) {
+  for (size_t i = 0; i < num_time_steps; ++i) {
    VALID_CHECK_EQ(probs_seq[i].size(),
                   vocabulary.size() + 1,
                   "The shape of probs_seq does not match with "
@@ -111,7 +111,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
  for (size_t time_step = 0; time_step < num_time_steps; time_step++) {
    std::vector<double> prob = probs_seq[time_step];
    std::vector<std::pair<int, double>> prob_idx;
-    for (size_t i = 0; i < prob.size(); i++) {
+    for (size_t i = 0; i < prob.size(); ++i) {
      prob_idx.push_back(std::pair<int, double>(i, prob[i]));
    }

@@ -134,7 +134,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
      if (cutoff_prob < 1.0) {
        double cum_prob = 0.0;
        cutoff_len = 0;
-        for (size_t i = 0; i < prob_idx.size(); i++) {
+        for (size_t i = 0; i < prob_idx.size(); ++i) {
          cum_prob += prob_idx[i].second;
          cutoff_len += 1;
          if (cum_prob >= cutoff_prob) break;
@@ -145,7 +145,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
          prob_idx.begin(), prob_idx.begin() + cutoff_len);
    }
    std::vector<std::pair<size_t, float>> log_prob_idx;
-    for (size_t i = 0; i < cutoff_len; i++) {
+    for (size_t i = 0; i < cutoff_len; ++i) {
      log_prob_idx.push_back(std::pair<int, float>(
          prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN)));
    }
@@ -155,7 +155,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
      auto c = log_prob_idx[index].first;
      float log_prob_c = log_prob_idx[index].second;

-      for (size_t i = 0; i < prefixes.size() && i < beam_size; i++) {
+      for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
        auto prefix = prefixes[i];

        if (full_beam && log_prob_c + prefix->score < min_cutoff) {
@@ -222,14 +222,14 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
                       prefixes.end(),
                       prefix_compare);

-      for (size_t i = beam_size; i < prefixes.size(); i++) {
+      for (size_t i = beam_size; i < prefixes.size(); ++i) {
        prefixes[i]->remove();
      }
    }
  }  // end of loop over time

  // compute aproximate ctc score as the return score
-  for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) {
+  for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
    double approx_ctc = prefixes[i]->score;

    if (ext_scorer != nullptr) {
@@ -249,14 +249,14 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
  // allow for the post processing
  std::vector<PathTrie *> space_prefixes;
  if (space_prefixes.empty()) {
-    for (size_t i = 0; i < beam_size && i < prefixes.size(); i++) {
+    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
      space_prefixes.push_back(prefixes[i]);
    }
  }

  std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare);
  std::vector<std::pair<double, std::string>> output_vecs;
-  for (size_t i = 0; i < beam_size && i < space_prefixes.size(); i++) {
+  for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) {
    std::vector<int> output;
    space_prefixes[i]->get_path_vec(output);
    // convert index to string
@@ -301,7 +301,7 @@ ctc_beam_search_decoder_batch(

  // enqueue the tasks of decoding
  std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
-  for (size_t i = 0; i < batch_size; i++) {
+  for (size_t i = 0; i < batch_size; ++i) {
    res.emplace_back(pool.enqueue(ctc_beam_search_decoder,
                                  probs_split[i],
                                  beam_size,
@@ -313,7 +313,7 @@ ctc_beam_search_decoder_batch(

  // get decoding results
  std::vector<std::vector<std::pair<double, std::string>>> batch_results;
-  for (size_t i = 0; i < batch_size; i++) {
+  for (size_t i = 0; i < batch_size; ++i) {
    batch_results.emplace_back(res[i].get());
  }
  return batch_results;

--- a/deep_speech_2/models/aishell/download_model.sh
+++ b/deep_speech_2/models/aishell/download_model.sh
+#! /usr/bin/bash
+
+source ../../utils/utility.sh
+
+URL='http://cloud.dlnel.org/filepub/?uuid=6c83b9d8-3255-4adf-9726-0fe0be3d0274'
+MD5=28521a58552885a81cf92a1e9b133a71
+TARGET=./aishell_model.tar.gz
+
+
+echo "Download Aishell model ..."
+download $URL $MD5 $TARGET
+if [ $? -ne 0 ]; then
+    echo "Fail to download Aishell model!"
+    exit 1
+fi
+tar -zxvf $TARGET
+
+
+exit 0
--- a/deep_speech_2/models/librispeech/download_model.sh
+++ b/deep_speech_2/models/librispeech/download_model.sh
@@ -2,9 +2,8 @@

 source ../../utils/utility.sh

-# TODO: add urls
-URL='to-be-added'
-MD5=5b4af224b26c1dc4dd972b7d32f2f52a
+URL='http://cloud.dlnel.org/filepub/?uuid=17404caf-cf19-492f-9707-1fad07c19aae'
+MD5=ea5024a457a91179472f6dfee60e053d
 TARGET=./librispeech_model.tar.gz



--- a/deep_speech_2/models/lm/download_lm_ch.sh
+++ b/deep_speech_2/models/lm/download_lm_ch.sh
+#! /usr/bin/bash
+
+source ../../utils/utility.sh
+
+URL=http://cloud.dlnel.org/filepub/?uuid=d21861e4-4ed6-45bb-ad8e-ae417a43195e
+MD5="29e02312deb2e59b3c8686c7966d4fe3"
+TARGET=./zh_giga.no_cna_cmn.prune01244.klm
+
+
+echo "Download language model ..."
+download $URL $MD5 $TARGET
+if [ $? -ne 0 ]; then
+    echo "Fail to download the language model!"
+    exit 1
+fi
+
+
+exit 0
--- a/deep_speech_2/requirements.txt
+++ b/deep_speech_2/requirements.txt
@@ -2,4 +2,3 @@ scipy==0.13.1
 resampy==0.1.5
 SoundFile==0.9.0.post1
 python_speech_features
-https://github.com/luotao1/kenlm/archive/master.zip
--- a/deep_speech_2/utils/utility.sh
+++ b/deep_speech_2/utils/utility.sh
@@ -11,10 +11,9 @@ download() {
        fi
    fi

-    wget -c $URL -P `dirname "$TARGET"`
+    wget -c $URL -O "$TARGET"
    md5_result=`md5sum $TARGET | awk -F[' '] '{print $1}'`
    if [ ! $MD5 == $md5_result ]; then
-        echo "Fail to download the language model!"
        return 1
    fi
 }
--- a/mt_with_external_memory/external_memory.py
+++ b/mt_with_external_memory/external_memory.py
@@ -35,6 +35,8 @@ class ExternalMemory(object):
                       sequence layer has sequence length indicating the number
                       of memory slots, and size as memory slot size.
    :type boot_layer: LayerOutput
+    :param initial_weight: Initializer for addressing weights.
+    :type initial_weight: LayerOutput
    :param readonly: If true, the memory is read-only, and write function cannot
                     be called. Default is false.
    :type readonly: bool
@@ -49,6 +51,7 @@ class ExternalMemory(object):
                 name,
                 mem_slot_size,
                 boot_layer,
+                 initial_weight,
                 readonly=False,
                 enable_interpolation=True):
        self.name = name
@@ -57,11 +60,7 @@ class ExternalMemory(object):
        self.enable_interpolation = enable_interpolation
        self.external_memory = paddle.layer.memory(
            name=self.name, size=self.mem_slot_size, boot_layer=boot_layer)
-        # prepare a constant (zero) intializer for addressing weights 
-        self.zero_addressing_init = paddle.layer.slope_intercept(
-            input=paddle.layer.fc(input=boot_layer, size=1),
-            slope=0.0,
-            intercept=0.0)
+        self.initial_weight = initial_weight
        # set memory to constant when readonly=True
        if self.readonly:
            self.updated_external_memory = paddle.layer.mixed(
@@ -111,7 +110,7 @@ class ExternalMemory(object):
        last_addressing_weight = paddle.layer.memory(
            name=self.name + "_addressing_weight_" + head_name,
            size=1,
-            boot_layer=self.zero_addressing_init)
+            boot_layer=self.initial_weight)
        interpolated_weight = paddle.layer.interpolation(
            name=self.name + "_addressing_weight_" + head_name,
            input=[addressing_weight, addressing_weight],

--- a/mt_with_external_memory/model.py
+++ b/mt_with_external_memory/model.py
@@ -125,7 +125,15 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size,
            bounded_memory_perturbation
        ],
        act=paddle.activation.Linear())
+    bounded_memory_weight_init = paddle.layer.slope_intercept(
+        input=paddle.layer.fc(input=bounded_memory_init, size=1),
+        slope=0.0,
+        intercept=0.0)
    unbounded_memory_init = source_context
+    unbounded_memory_weight_init = paddle.layer.slope_intercept(
+        input=paddle.layer.fc(input=unbounded_memory_init, size=1),
+        slope=0.0,
+        intercept=0.0)

    # prepare step function for reccurent group
    def recurrent_decoder_step(cur_embedding):
@@ -136,12 +144,14 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size,
            name="bounded_memory",
            mem_slot_size=size,
            boot_layer=bounded_memory_init,
+            initial_weight=bounded_memory_weight_init,
            readonly=False,
            enable_interpolation=True)
        unbounded_memory = ExternalMemory(
            name="unbounded_memory",
            mem_slot_size=size * 2,
            boot_layer=unbounded_memory_init,
+            initial_weight=unbounded_memory_weight_init,
            readonly=True,
            enable_interpolation=False)
        # write bounded memory
@@ -154,7 +164,7 @@ def memory_enhanced_decoder(input, target, initial_state, source_context, size,
            size=size,
            act=paddle.activation.Tanh(),
            bias_attr=False)
-        # read unbounded memory (i.e. attention mechanism) 
+        # read unbounded memory (i.e. attention mechanism)
        context = unbounded_memory.read(key_for_unbounded_memory)
        # gated recurrent unit
        gru_inputs = paddle.layer.fc(