From df2b054b13d19d467afa51aafdf1871569c6fa56 Mon Sep 17 00:00:00 2001
From: tensor-tang <jian.j.tang@intel.com>
Date: Wed, 3 Jan 2018 11:37:55 +0800
Subject: [PATCH] follow comments refine code

---
 .../layers/MKLPackedRecurrentLayer.cpp        | 64 ++++++++-----------
 .../gserver/layers/MKLPackedRecurrentLayer.h  | 29 ++-------
 paddle/gserver/layers/MKLPackedWeight.h       | 20 +-----
 paddle/gserver/layers/RecurrentLayer.cpp      |  4 --
 4 files changed, 36 insertions(+), 81 deletions(-)

diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp
index bd3c4ceb5..b4a641304 100644
--- a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp
+++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp
@@ -53,28 +53,19 @@ void MKLPackedRecurrentLayer::forwardBatch(int batchSize,
     REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str());
     /* forward one batch */
     for (size_t n = 0; n < batchValue_->getNumBatch(); n++) {
-      MatrixPtr batch2 = batchValue_->getBatchValue(n);
+      MatrixPtr batchValue = batchValue_->getBatchValue(n);
 
       if (n != 0) {
-        MatrixPtr batch1 =
-            batchValue_->getBatchValue(n - 1, batch2->getHeight());
+        MatrixPtr preBatchValue =
+            batchValue_->getBatchValue(n - 1, batchValue->getHeight());
 
-        // batch2->mul(*batch1, *weight_->getW(), 1, 1);
-        packed_weight_->compute(batch2, batch1);
-      }
-
-#pragma omp parallel for collapse(2)
-      for (size_t i = 0; i < batch2->getHeight(); i++) {
-        for (size_t j = 0; j < batch2->getWidth(); j++) {
-          *(batch2->getData() + i * batch2->getWidth() + j) =
-              *(batch2->getData() + i * batch2->getWidth() + j) > 0
-                  ? *(batch2->getData() + i * batch2->getWidth() + j)
-                  : 0;
-        }
+        packed_weight_->compute(batchValue, preBatchValue);
       }
+      Argument arg;
+      arg.value = batchValue;
+      activation_->forward(arg).check();
     }
   }
-
   batchValue_->copyBackSeq(*output_.value);
 }
 
@@ -94,25 +85,27 @@ void MKLPackedRecurrentLayer::backwardBatch(int batchSize,
     REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str());
     /* backward one batch */
     for (int n = (int)numBatch - 1; n >= 0; n--) {
-      MatrixPtr batch2 = batchGrad_->getBatchValue(n);
-      MatrixPtr batch1 = batchValue_->getBatchValue(n, batch2->getHeight());
+      MatrixPtr batchGrad = batchGrad_->getBatchValue(n);
+      MatrixPtr batchValue =
+          batchValue_->getBatchValue(n, batchGrad->getHeight());
 
       Argument arg;
-      arg.value = batch1;
-      arg.grad = batch2;
+      arg.value = batchValue;
+      arg.grad = batchGrad;
       activation_->backward(arg).check();
 
       if (n != 0) {
-        batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
-        // batch1->mul(*batch2, *weightT, 1, 1);
-        packed_weightT_->compute(batch1, batch2);
+        batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight());
+        packed_weightT_->compute(batchValue, batchGrad);
       }
 
       if (backwardByBatch && weight_->getWGrad()) {
         if (n != 0) {
           /* backward weight */
-          batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight());
-          weight_->getWGrad()->mul(*batch1->getTranspose(), *batch2, 1, 1);
+          batchValue =
+              batchValue_->getBatchValue(n - 1, batchGrad->getHeight());
+          weight_->getWGrad()->mul(
+              *batchValue->getTranspose(), *batchGrad, 1, 1);
         }
       }
     }
@@ -124,19 +117,14 @@ void MKLPackedRecurrentLayer::backwardBatch(int batchSize,
     REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str());
     for (size_t seq = 0; seq < numSequences; ++seq) {
       int len = starts[seq + 1] - starts[seq];
-      if (!reversed_) {
-        weight_->getWGrad()->mul(
-            *output_.value->subMatrix(starts[seq], len - 1)->getTranspose(),
-            *output_.grad->subMatrix(starts[seq] + 1, len - 1),
-            1,
-            1);
-      } else {
-        weight_->getWGrad()->mul(
-            *output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(),
-            *output_.grad->subMatrix(starts[seq], len - 1),
-            1,
-            1);
-      }
+      weight_->getWGrad()->mul(
+          *output_.value
+               ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1)
+               ->getTranspose(),
+          *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1,
+                                   len - 1),
+          1,
+          1);
     }
   }
 }
diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.h b/paddle/gserver/layers/MKLPackedRecurrentLayer.h
index ba6487b11..19874d538 100644
--- a/paddle/gserver/layers/MKLPackedRecurrentLayer.h
+++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.h
@@ -14,36 +14,18 @@ limitations under the License. */
 
 #pragma once
 
-#include <gflags/gflags.h>
-#include "Layer.h"
 #include "MKLPackedWeight.h"
 #include "RecurrentLayer.h"
-#include "SequenceToBatch.h"
-#include "paddle/utils/Stat.h"
 
 DECLARE_bool(rnn_use_batch);
 
 namespace paddle {
 
 /**
- * @brief MKLPackedRecurrentLayer takes 1 input layer. The output size is the
- * same with
- * input layer.
- * For each sequence [start, end] it performs the following computation:
- * \f[
- *    out_{i} = act(in_{i})     \      \      \text{for} \ i = start \\
- *    out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end
- *
- * \f]
- * If reversed is true, the order is reversed:
- * \f[
- *   out_{i} = act(in_{i})           \    \   \text{for} \ i = end  \\
- *   out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end
- * \f]
- * There are two methods to calculate rnn. One way is to compute rnn one
- * sequence by one sequence. The other way is to reorganize the input
- * into batches, then compute rnn one batch by one batch. Users can select
- * them by rnn_use_batch flag.
+ * @brief MKLPackedRecurrentLayer is same with RecurrentLayer but is optimized
+ * with MKL cblas packed gemm.
+ * More details:
+ * https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/mkl/mkl_packed.md
  */
 
 class MKLPackedRecurrentLayer : public RecurrentLayer {
@@ -66,7 +48,10 @@ protected:
                      const int* starts) override;
 
 protected:
+  /// packed_weight_ is contains same data with
+  /// RecurrentLayer::weight_ but is packed
   std::unique_ptr<MKLPackedWeight> packed_weight_;
+  /// packed_weightT_ is the transposition matrix of packed_weight_
   std::unique_ptr<MKLPackedWeight> packed_weightT_;
 };
 
diff --git a/paddle/gserver/layers/MKLPackedWeight.h b/paddle/gserver/layers/MKLPackedWeight.h
index cc8a33615..f77aa4dbb 100644
--- a/paddle/gserver/layers/MKLPackedWeight.h
+++ b/paddle/gserver/layers/MKLPackedWeight.h
@@ -22,7 +22,9 @@ namespace paddle {
 
 class MKLPackedWeight {
 protected:
+  /// The pointor of weight
   real *weight_;
+  /// The pointor of cblas packed gemm to weight
   real *packedWeight_;
   size_t height_;
   size_t width_;
@@ -41,7 +43,7 @@ public:
 
   void pack() { pack_(weight_); }
 
-  void compute(MatrixPtr dst, MatrixPtr src) {
+  void compute(MatrixPtr dst, const MatrixPtr src) {
     cblas_sgemm_compute(CblasRowMajor,
                         CblasNoTrans,
                         CblasPacked,
@@ -57,22 +59,6 @@ public:
                         dst->getWidth());
   }
 
-  void compute(size_t M, real *A, size_t lda, real *C, size_t ldc) {
-    cblas_sgemm_compute(CblasRowMajor,
-                        CblasNoTrans,
-                        CblasPacked,
-                        M,
-                        width_,
-                        height_,
-                        A,
-                        lda,
-                        packedWeight_,
-                        width_,
-                        1.0,
-                        C,
-                        ldc);
-  }
-
 protected:
   void pack_(real *src) {
     if (!packedWeight_) {
diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp
index 285b11b5a..6bd42c06c 100644
--- a/paddle/gserver/layers/RecurrentLayer.cpp
+++ b/paddle/gserver/layers/RecurrentLayer.cpp
@@ -13,10 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "RecurrentLayer.h"
-#include <gflags/gflags.h>
-#include "Layer.h"
-#include "SequenceToBatch.h"
-#include "paddle/utils/Stat.h"
 
 DEFINE_bool(rnn_use_batch, false, "Using the batch method for calculation.");
 
-- 
GitLab