diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 41ead3c5ecef248830cfb0f8be360f21dcd58e7b..3d6ced713f00bd72622d8aeed3967642b6774ffe 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -34,6 +34,16 @@ else() message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") endif() +if(NOT WITH_MKLML) + file(GLOB_RECURSE MKL_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.h") + file(GLOB_RECURSE MKL_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLPacked*.cpp") + list(REMOVE_ITEM GSERVER_HEADER ${MKL_HEADER}) + list(REMOVE_ITEM GSERVER_SOURCES ${MKL_SOURCES}) + message(STATUS "Skip compiling with MKLPackedLayers") +else() + message(STATUS "Compile with MKLPackedLayers") +endif() + if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER layers/CudnnConvBaseLayer.h diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dd75555fae134664d92ba9f8ffdea8af78166b7e --- /dev/null +++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLPackedRecurrentLayer.h" + +namespace paddle { + +REGISTER_LAYER(mkl_packed_recurrent, MKLPackedRecurrentLayer); + +bool MKLPackedRecurrentLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!RecurrentLayer::init(layerMap, parameterMap)) return false; + packed_weight_.reset(new MKLPackedWeight(weight_->getW())); + packed_weight_->pack(); + if (needGradient_) { + packed_weightT_.reset(new MKLPackedWeight(weight_->getW(), true)); + packed_weightT_->pack(); + } + return true; +} + +void MKLPackedRecurrentLayer::backward(const UpdateCallback& callback) { + RecurrentLayer::backward(callback); + packed_weight_->pack(); + if (needGradient_) { + packed_weightT_->pack(); + } +} + +void MKLPackedRecurrentLayer::forwardBatch(int batchSize, + size_t numSequences, + const int* starts) { + if (!batchValue_) { + batchValue_.reset(new SequenceToBatch(useGpu_)); + } + + batchValue_->resizeOrCreateBatch(batchSize, numSequences, starts, reversed_); + + batchValue_->copyFromSeq(*output_.value); + + { + REGISTER_TIMER_INFO("RecurrentFwBatch", getName().c_str()); + /* forward one batch */ + for (size_t n = 0; n < batchValue_->getNumBatch(); n++) { + MatrixPtr batchValue = batchValue_->getBatchValue(n); + + if (n != 0) { + MatrixPtr preBatchValue = + batchValue_->getBatchValue(n - 1, batchValue->getHeight()); + + packed_weight_->gemm_compute(preBatchValue, batchValue); + } + Argument arg; + arg.value = batchValue; + activation_->forward(arg).check(); + } + } + batchValue_->copyBackSeq(*output_.value); +} + +void MKLPackedRecurrentLayer::backwardBatch(int batchSize, + size_t numSequences, + const int* starts) { + if (!batchGrad_) { + batchGrad_.reset(new SequenceToBatch(useGpu_)); + } + batchGrad_->shareIndexWith(*batchValue_); + + size_t numBatch = batchGrad_->getNumBatch(); + bool backwardByBatch = numBatch < numSequences; + + batchGrad_->copyFromSeq(*output_.grad); + { + REGISTER_TIMER_INFO("RecurrentBwData", getName().c_str()); + /* backward one batch */ + for (int n = (int)numBatch - 1; n >= 0; n--) { + MatrixPtr batchGrad = batchGrad_->getBatchValue(n); + MatrixPtr batchValue = + batchValue_->getBatchValue(n, batchGrad->getHeight()); + + Argument arg; + arg.value = batchValue; + arg.grad = batchGrad; + activation_->backward(arg).check(); + + if (n != 0) { + batchValue = batchGrad_->getBatchValue(n - 1, batchGrad->getHeight()); + packed_weightT_->gemm_compute(batchGrad, batchValue); + } + + if (backwardByBatch && weight_->getWGrad()) { + if (n != 0) { + /* backward weight */ + batchValue = + batchValue_->getBatchValue(n - 1, batchGrad->getHeight()); + weight_->getWGrad()->mul( + *batchValue->getTranspose(), *batchGrad, 1, 1); + } + } + } + } + + batchGrad_->copyBackSeq(*output_.grad); + + if (!backwardByBatch && weight_->getWGrad()) { + REGISTER_TIMER_INFO("RecurrentBwWeight", getName().c_str()); + for (size_t seq = 0; seq < numSequences; ++seq) { + int len = starts[seq + 1] - starts[seq]; + weight_->getWGrad()->mul( + *output_.value + ->subMatrix(reversed_ ? starts[seq] + 1 : starts[seq], len - 1) + ->getTranspose(), + *output_.grad->subMatrix(reversed_ ? starts[seq] : starts[seq] + 1, + len - 1), + 1, + 1); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.h b/paddle/gserver/layers/MKLPackedRecurrentLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..bded523a8fbd6ff18f28859bd2a1bf3c1a25e2a0 --- /dev/null +++ b/paddle/gserver/layers/MKLPackedRecurrentLayer.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLPackedWeight.h" +#include "RecurrentLayer.h" + +DECLARE_bool(rnn_use_batch); + +namespace paddle { + +/** + * @brief MKLPackedRecurrentLayer is almost the same with RecurrentLayer + * but is optimized with MKL cblas packed gemm. + * More details: + * https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/mkl/mkl_packed.md + */ + +class MKLPackedRecurrentLayer : public RecurrentLayer { +public: + explicit MKLPackedRecurrentLayer(const LayerConfig& config) + : RecurrentLayer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void backward(const UpdateCallback& callback) override; + +protected: + void forwardBatch(int batchSize, + size_t numSequences, + const int* starts) override; + + void backwardBatch(int batchSize, + size_t numSequences, + const int* starts) override; + +protected: + /// packed_weight_ contains same data with + /// RecurrentLayer::weight_ but is packed + std::unique_ptr packed_weight_; + /// packed_weightT_ is the transposition matrix of packed_weight_ + std::unique_ptr packed_weightT_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLPackedWeight.h b/paddle/gserver/layers/MKLPackedWeight.h new file mode 100644 index 0000000000000000000000000000000000000000..15d5093beb43e2f086601c2616ace033da34f341 --- /dev/null +++ b/paddle/gserver/layers/MKLPackedWeight.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/math/MathFunctions.h" +#include "paddle/parameter/Parameter.h" +#include "paddle/parameter/Weight.h" + +namespace paddle { + +class MKLPackedWeight { +protected: + /// The pointer of weight + real *weight_; + /// The pointer of cblas packed gemm to weight + real *packedWeight_; + size_t height_; + size_t width_; + bool transW_; + +public: + explicit MKLPackedWeight(MatrixPtr weight, bool transW = false) { + packedWeight_ = nullptr; + weight_ = weight->getData(); + height_ = weight->getHeight(); + width_ = weight->getWidth(); + transW_ = transW; + } + + ~MKLPackedWeight() { free_(); } + + void pack() { pack_(weight_); } + + void gemm_compute(const MatrixPtr src, MatrixPtr dst) { + cblas_sgemm_compute(CblasRowMajor, + CblasNoTrans, + CblasPacked, + src->getHeight(), + transW_ ? height_ : width_, + transW_ ? width_ : height_, + src->getData(), + src->getWidth(), + packedWeight_, + width_, + 1.0, + dst->getData(), + dst->getWidth()); + } + +protected: + void pack_(real *src) { + if (!packedWeight_) { + packedWeight_ = cblas_sgemm_alloc(CblasBMatrix, 1, width_, height_); + } + cblas_sgemm_pack(CblasRowMajor, + CblasBMatrix, + transW_ ? CblasTrans : CblasNoTrans, + 1, + transW_ ? height_ : width_, + transW_ ? width_ : height_, + 1.0, + src, + width_, + packedWeight_); + } + + void free_() { + if (packedWeight_) { + cblas_sgemm_free(packedWeight_); + } + } +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index e4c2b483d2fa4032735858dab17647592791a9c7..6bd42c06cadf755e8703f3fc299d0e6248dd1478 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -12,119 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include "Layer.h" -#include "SequenceToBatch.h" -#include "paddle/utils/Stat.h" +#include "RecurrentLayer.h" DEFINE_bool(rnn_use_batch, false, "Using the batch method for calculation."); namespace paddle { -/** - * @brief RecurrentLayer takes 1 input layer. The output size is the same with - * input layer. - * For each sequence [start, end] it performs the following computation: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = start \\ - * out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end - * - * \f] - * If reversed is true, the order is reversed: - * \f[ - * out_{i} = act(in_{i}) \ \ \text{for} \ i = end \\ - * out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end - * \f] - * There are two methods to calculate rnn. One way is to compute rnn one - * sequence by one sequence. The other way is to reorganize the input - * into batches, then compute rnn one batch by one batch. Users can select - * them by rnn_use_batch flag. - */ - -class RecurrentLayer : public Layer { -public: - explicit RecurrentLayer(const LayerConfig& config) : Layer(config) {} - - bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) override; - - void forward(PassType passType) override; - - void backward(const UpdateCallback& callback) override; - - void resetState() override; - - void setState(LayerStatePtr state) override; - - LayerStatePtr getState() override; - -protected: - /** - * @brief If user do not set --rnn_use_batch=true, it will - * compute rnn forward one sequence by one sequence in default. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void forwardSequence(int batchSize, size_t numSequences, const int* starts); - /** - * @brief Compute rnn forward by one sequence. - * @param start The start position of this sequence (or sample). - * @param length The length of this sequence (or sample), namely the words - * number of this sequence. - */ - void forwardOneSequence(int start, int length); - /** - * @brief Compute rnn backward one sequence by onesequence. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void backwardSequence(int batchSize, size_t numSequences, const int* starts); - /** - * @brief Compute rnn backward by one sequence. - * @param start The start position of this sequence (or sample). - * @param length The length of this sequence (or sample), namely the words - * number of this sequence. - */ - void backwardOneSequence(int start, int length); - - /** - * @brief Reorganize input into batches and compute rnn forward batch - * by batch. It will convert batch shape to sequence after finishing forward. - * The batch info can refer to SequenceToBatch class. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void forwardBatch(int batchSize, size_t numSequences, const int* starts); - - /** - * @brief Reorganize input into batches and compute rnn forward batch - * by batch. - * @param batchSize Total words number of all samples in this batch. - * @param numSequences The sample number. - * @param starts Each start position of each samples. - */ - void backwardBatch(int batchSize, size_t numSequences, const int* starts); - -protected: - std::unique_ptr weight_; - std::unique_ptr bias_; - - /// frameOutput_[i] is used to hold the i-th sample of output_ - std::vector frameOutput_; - MatrixPtr prevOutput_; - /// Whether compute rnn by reverse. - bool reversed_; - /// If compute batch by batch, batchValue_ will be used to save the - /// reorganized input value. - std::unique_ptr batchValue_; - /// If compute batch by batch, batchGrad_ will be used to save the - /// gradient with respect to reorganized input value. - std::unique_ptr batchGrad_; -}; - REGISTER_LAYER(recurrent, RecurrentLayer); bool RecurrentLayer::init(const LayerMap& layerMap, @@ -260,7 +153,6 @@ void RecurrentLayer::backward(const UpdateCallback& callback) { bias_->getWGrad()->collectBias(*output_.grad, 1); bias_->getParameterPtr()->incUpdate(callback); } - weight_->getParameterPtr()->incUpdate(callback); } diff --git a/paddle/gserver/layers/RecurrentLayer.h b/paddle/gserver/layers/RecurrentLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..f40dbe150fa93becfc26f6ea9e55e40eaf208860 --- /dev/null +++ b/paddle/gserver/layers/RecurrentLayer.h @@ -0,0 +1,130 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once +#include +#include "Layer.h" +#include "SequenceToBatch.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +/** + * @brief RecurrentLayer takes 1 input layer. The output size is the same with + * input layer. + * For each sequence [start, end] it performs the following computation: + * \f[ + * out_{i} = act(in_{i}) \ \ \text{for} \ i = start \\ + * out_{i} = act(in_{i} + out_{i-1} * W) \ \ \text{for} \ start < i <= end + * + * \f] + * If reversed is true, the order is reversed: + * \f[ + * out_{i} = act(in_{i}) \ \ \text{for} \ i = end \\ + * out_{i} = act(in_{i} + out_{i+1} * W) \ \ \text{for} \ start <= i < end + * \f] + * There are two methods to calculate rnn. One way is to compute rnn one + * sequence by one sequence. The other way is to reorganize the input + * into batches, then compute rnn one batch by one batch. Users can select + * them by rnn_use_batch flag. + */ + +class RecurrentLayer : public Layer { +public: + explicit RecurrentLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + + void backward(const UpdateCallback& callback) override; + + void resetState() override; + + void setState(LayerStatePtr state) override; + + LayerStatePtr getState() override; + +protected: + /** + * @brief If user do not set --rnn_use_batch=true, it will + * compute rnn forward one sequence by one sequence in default. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + void forwardSequence(int batchSize, size_t numSequences, const int* starts); + /** + * @brief Compute rnn forward by one sequence. + * @param start The start position of this sequence (or sample). + * @param length The length of this sequence (or sample), namely the words + * number of this sequence. + */ + void forwardOneSequence(int start, int length); + /** + * @brief Compute rnn backward one sequence by onesequence. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + void backwardSequence(int batchSize, size_t numSequences, const int* starts); + /** + * @brief Compute rnn backward by one sequence. + * @param start The start position of this sequence (or sample). + * @param length The length of this sequence (or sample), namely the words + * number of this sequence. + */ + void backwardOneSequence(int start, int length); + + /** + * @brief Reorganize input into batches and compute rnn forward batch + * by batch. It will convert batch shape to sequence after finishing forward. + * The batch info can refer to SequenceToBatch class. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + virtual void forwardBatch(int batchSize, + size_t numSequences, + const int* starts); + + /** + * @brief Reorganize input into batches and compute rnn forward batch + * by batch. + * @param batchSize Total words number of all samples in this batch. + * @param numSequences The sample number. + * @param starts Each start position of each samples. + */ + virtual void backwardBatch(int batchSize, + size_t numSequences, + const int* starts); + +protected: + std::unique_ptr weight_; + std::unique_ptr bias_; + + /// frameOutput_[i] is used to hold the i-th sample of output_ + std::vector frameOutput_; + MatrixPtr prevOutput_; + /// Whether compute rnn by reverse. + bool reversed_; + /// If compute batch by batch, batchValue_ will be used to save the + /// reorganized input value. + std::unique_ptr batchValue_; + /// If compute batch by batch, batchGrad_ will be used to save the + /// gradient with respect to reorganized input value. + std::unique_ptr batchGrad_; +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_RecurrentLayer.cpp b/paddle/gserver/tests/test_RecurrentLayer.cpp index 16ab0e6aecb6a895b20389992a44dc542eb3b00a..0e130843339a1030f86f4d48891499bac20e9ca2 100644 --- a/paddle/gserver/tests/test_RecurrentLayer.cpp +++ b/paddle/gserver/tests/test_RecurrentLayer.cpp @@ -222,6 +222,7 @@ TEST(Layer, RecurrentLayer) { #define protected public #include "paddle/gserver/layers/GatedRecurrentLayer.h" #include "paddle/gserver/layers/LstmLayer.h" +#include "paddle/gserver/layers/RecurrentLayer.h" template class TestRecurrentLayer { public: @@ -420,12 +421,151 @@ TEST(Layer, LstmLayer) { } } +#ifdef PADDLE_WITH_MKLML + +#include "paddle/gserver/layers/MKLPackedRecurrentLayer.h" + +LayerPtr initMKLPackedLayer(LayerConfig layerConfig, + bool reversed, + int layerSize, + LayerPtr dataLayer, + ParameterPtr para, + ParameterPtr bias = nullptr) { + LayerMap layerMap; + ParameterMap parameterMap; + layerMap[dataLayer->getName()] = dataLayer; + parameterMap[para->getName()] = para; + if (bias) { + parameterMap[bias->getName()] = bias; + layerConfig.set_bias_parameter_name("bias_0"); + } + + layerConfig.set_size(layerSize); + layerConfig.set_reversed(reversed); + layerConfig.add_inputs(); + LayerInputConfig& input = *(layerConfig.mutable_inputs(0)); + input.set_input_layer_name("layer_0"); + input.set_input_parameter_name("para_0"); + + LayerPtr testLayer = Layer::create(layerConfig); + layerMap[testLayer->getName()] = testLayer; + + testLayer->init(layerMap, parameterMap); + testLayer->setNeedGradient(true); + + return testLayer; +} + +void checkMKLPackedLayer(LayerConfig layerConfig1, + LayerConfig layerConfig2, + bool reversed, + int layerSize, + int batchSize, + bool useBatch1, + bool useBatch2) { + LayerPtr dataLayer; + ParameterPtr para, bias; + + if (layerConfig1.type() == "recurrent") { + dataLayer = creatDataLayer("layer_0", batchSize, layerSize, false); + para = creatParameter("para_0", 0, layerSize * layerSize, false); + bias = nullptr; + } else if (layerConfig1.type() == "gated_recurrent") { + dataLayer = creatDataLayer("layer_0", batchSize, layerSize * 3, false); + para = creatParameter("para_0", 0, layerSize * layerSize * 3, false); + bias = creatParameterBias("bias_0", 1, layerSize * 3, false); + } + + LayerPtr testLayer1 = initMKLPackedLayer( + layerConfig1, reversed, layerSize, dataLayer, para, bias); + LayerPtr testLayer2 = initMKLPackedLayer( + layerConfig2, reversed, layerSize, dataLayer, para, bias); + + const VectorPtr& weightGrad = + (testLayer1->getParameters()[0])->getBuf(PARAMETER_GRADIENT); + const MatrixPtr& inputGrad = testLayer1->getPrev(0)->getOutputGrad(); + CpuVector wgt_grad1(weightGrad->getSize()); + CpuVector wgt_grad2(weightGrad->getSize()); + CpuMatrix input_grad1(inputGrad->getHeight(), inputGrad->getWidth()); + CpuMatrix input_grad2(inputGrad->getHeight(), inputGrad->getWidth()); + + for (int i = 0; i < 2; i++) { + FLAGS_rnn_use_batch = useBatch1; + + testLayer1->forward(PASS_GC); + + FLAGS_rnn_use_batch = useBatch2; + testLayer2->forward(PASS_GC); + + testLayer1->getOutputGrad()->randomizeUniform(); + testLayer2->getOutputGrad()->copyFrom(*testLayer1->getOutputGrad()); + + weightGrad->zero(); + inputGrad->zero(); + FLAGS_rnn_use_batch = useBatch1; + testLayer1->backward(nullptr); + + wgt_grad1.copyFrom(*weightGrad); + input_grad1.copyFrom(*inputGrad); + + weightGrad->zero(); + inputGrad->zero(); + FLAGS_rnn_use_batch = useBatch2; + testLayer2->backward(nullptr); + + wgt_grad2.copyFrom(*weightGrad); + input_grad2.copyFrom(*inputGrad); + + checkError(*testLayer1->getOutputValue(), *testLayer2->getOutputValue()); + checkError(wgt_grad1, wgt_grad2); + checkError(input_grad1, input_grad2); + } +} + +TEST(MKLPackedLayer, RecurrentLayer) { + LayerConfig layerConfig1; + LayerConfig layerConfig2; + + layerConfig1.set_name("paddle-rnn"); + layerConfig1.set_type("recurrent"); + layerConfig1.set_active_type("relu"); + + layerConfig2.set_name("mkl-packed-rnn"); + layerConfig2.set_type("mkl_packed_recurrent"); + layerConfig2.set_active_type("relu"); + + FLAGS_use_gpu = false; + + for (auto layerSize : {32, 64, 128, 256, 512}) { + for (auto batchSize : {1, 5, 100, 500}) { + for (auto reversed : {true, false}) { + for (auto paddle_use_batch : {true, false}) { + for (auto MKLPacked_use_batch : {true, false}) { + LOG(INFO) << " layerSize=" << layerSize + << " batchSize=" << batchSize << " reversed=" << reversed + << " paddle_use_batch=" << paddle_use_batch + << " MKLPacked_use_batch=" << MKLPacked_use_batch; + + checkMKLPackedLayer(layerConfig1, + layerConfig2, + reversed, + layerSize, + batchSize, + paddle_use_batch, + MKLPacked_use_batch); + } + } + } + } + } +} +#endif + int main(int argc, char** argv) { - if (version::isWithGpu()) { - testing::InitGoogleTest(&argc, argv); - initMain(argc, argv); - return RUN_ALL_TESTS(); - } else { - return 0; + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + if (!version::isWithGpu()) { + testing::GTEST_FLAG(filter) = "-Layer.*"; } + return RUN_ALL_TESTS(); }