From df2b054b13d19d467afa51aafdf1871569c6fa56 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 3 Jan 2018 11:37:55 +0800 Subject: [PATCH] follow comments refine code --- .../layers/MKLPackedRecurrentLayer.cpp | 64 ++++++++----------- .../gserver/layers/MKLPackedRecurrentLayer.h | 29 ++------- paddle/gserver/layers/MKLPackedWeight.h | 20 +----- paddle/gserver/layers/RecurrentLayer.cpp | 4 -- 4 files changed, 36 insertions(+), 81 deletions(-) diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp index bd3c4ceb5..b4a641304 100644 --- a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp +++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp @@ -53,28 +53,19 @@ void MKLPackedRecurrentLayer::forwardBatch(int batchSize, REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str()); /* forward one batch */ for (size_t n = 0; n < batchValue_->getNumBatch(); n++) { - MatrixPtr batch2 = batchValue_->getBatchValue(n); + MatrixPtr batchValue = batchValue_->getBatchValue(n); if (n != 0) { - MatrixPtr batch1 = - batchValue_->getBatchValue(n - 1, batch2->getHeight()); + MatrixPtr preBatchValue = + batchValue_->getBatchValue(n - 1, batchValue->getHeight()); - // batch2->mul(*batch1, *weight_->getW(), 1, 1); - packed_weight_->compute(batch2, batch1); - } - -#pragma omp parallel for collapse(2) - for (size_t i = 0; i < batch2->getHeight(); i++) { - for (size_t j = 0; j < batch2->getWidth(); j++) { - *(batch2->getData() + i * batch2->getWidth() + j) = - *(batch2->getData() + i * batch2->getWidth() + j) > 0 - ? *(batch2->getData() + i * batch2->getWidth() + j) - : 0; - } + packed_weight_->compute(batchValue, preBatchValue); } + Argument arg; + arg.value = batchValue; + activation_->forward(arg).check(); } } - batchValue_->copyBackSeq(*output_.value); } @@ -94,25 +85,27 @@ void MKLPackedRecurrentLayer::backwardBatch(int batchSize, REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str()); /* backward one batch */ for (int n = (int)numBatch - 1; n >= 0; n--) { - MatrixPtr batch2 = batchGrad_->getBatchValue(n); - MatrixPtr batch1 = batchValue_->getBatchValue(n, batch2->getHeight()); + MatrixPtr batchGrad = batchGrad_->getBatchValue(n); + MatrixPtr batchValue = + batchValue_->getBatchValue(n, batchGrad->getHeight()); Argument arg; - arg.value = batch1; - arg.grad = batch2; + arg.value = batchValue; + arg.grad = batchGrad; activation_->backward(arg).check(); if (n != 0) { - batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); - // batch1->mul(*batch2, *weightT, 1, 1); - packed_weightT_->compute(batch1, batch2); + batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight()); + packed_weightT_->compute(batchValue, batchGrad); } if (backwardByBatch && weight_->getWGrad()) { if (n != 0) { /* backward weight */ - batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight()); - weight_->getWGrad()->mul(*batch1->getTranspose(), *batch2, 1, 1); + batchValue = + batchValue_->getBatchValue(n - 1, batchGrad->getHeight()); + weight_->getWGrad()->mul( + *batchValue->getTranspose(), *batchGrad, 1, 1); } } } @@ -124,19 +117,14 @@ void MKLPackedRecurrentLayer::backwardBatch(int batchSize, REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str()); for (size_t seq = 0; seq < numSequences; ++seq) { int len = starts[seq + 1] - starts[seq]; - if (!reversed_) { - weight_->getWGrad()->mul( - *output_.value->subMatrix(starts[seq], len - 1)->getTranspose(), - *output_.grad->subMatrix(starts[seq] + 1, len - 1), - 1, - 1); - } else { - weight_->getWGrad()->mul( - *output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(), - *output_.grad->subMatrix(starts[seq], len - 1), - 1, - 1); - } + weight_->getWGrad()->mul( + *output_.value + ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1) + ->getTranspose(), + *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1, + len - 1), + 1, + 1); } } } diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.h b/paddle/gserver/layers/MKLPackedRecurrentLayer.h index ba6487b11..19874d538 100644 --- a/paddle/gserver/layers/MKLPackedRecurrentLayer.h +++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.h @@ -14,36 +14,18 @@ limitations under the License. */ #pragma once -#include -#include "Layer.h" #include "MKLPackedWeight.h" #include "RecurrentLayer.h" -#include "SequenceToBatch.h" -#include "paddle/utils/Stat.h" DECLARE_bool(rnn_use_batch); namespace paddle { /** - * @brief MKLPackedRecurrentLayer takes 1 input layer. The output size is the - * same with - * input layer. - * For each sequence [start, end] it performs the following computation: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = start \\ - * out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end - * - * \f] - * If reversed is true, the order is reversed: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = end \\ - * out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end - * \f] - * There are two methods to calculate rnn. One way is to compute rnn one - * sequence by one sequence. The other way is to reorganize the input - * into batches, then compute rnn one batch by one batch. Users can select - * them by rnn_use_batch flag. + * @brief MKLPackedRecurrentLayer is same with RecurrentLayer but is optimized + * with MKL cblas packed gemm. + * More details: + * https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/mkl/mkl_packed.md */ class MKLPackedRecurrentLayer : public RecurrentLayer { @@ -66,7 +48,10 @@ protected: const int* starts) override; protected: + /// packed_weight_ is contains same data with + /// RecurrentLayer::weight_ but is packed std::unique_ptr packed_weight_; + /// packed_weightT_ is the transposition matrix of packed_weight_ std::unique_ptr packed_weightT_; }; diff --git a/paddle/gserver/layers/MKLPackedWeight.h b/paddle/gserver/layers/MKLPackedWeight.h index cc8a33615..f77aa4dbb 100644 --- a/paddle/gserver/layers/MKLPackedWeight.h +++ b/paddle/gserver/layers/MKLPackedWeight.h @@ -22,7 +22,9 @@ namespace paddle { class MKLPackedWeight { protected: + /// The pointor of weight real *weight_; + /// The pointor of cblas packed gemm to weight real *packedWeight_; size_t height_; size_t width_; @@ -41,7 +43,7 @@ public: void pack() { pack_(weight_); } - void compute(MatrixPtr dst, MatrixPtr src) { + void compute(MatrixPtr dst, const MatrixPtr src) { cblas_sgemm_compute(CblasRowMajor, CblasNoTrans, CblasPacked, @@ -57,22 +59,6 @@ public: dst->getWidth()); } - void compute(size_t M, real *A, size_t lda, real *C, size_t ldc) { - cblas_sgemm_compute(CblasRowMajor, - CblasNoTrans, - CblasPacked, - M, - width_, - height_, - A, - lda, - packedWeight_, - width_, - 1.0, - C, - ldc); - } - protected: void pack_(real *src) { if (!packedWeight_) { diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index 285b11b5a..6bd42c06c 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -13,10 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "RecurrentLayer.h" -#include -#include "Layer.h" -#include "SequenceToBatch.h" -#include "paddle/utils/Stat.h" DEFINE_bool(rnn_use_batch, false, "Using the batch method for calculation."); -- GitLab