From 7483087c8ca80dafe79317496710b19e8db3dca3 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 22 Sep 2017 23:23:39 +0800 Subject: [PATCH] enable mkldnn_softmax --- .../gserver/activations/MKLDNNActivation.cpp | 24 +++ paddle/gserver/activations/MKLDNNActivation.h | 137 ++++++++++++------ paddle/gserver/tests/test_MKLDNN.cpp | 4 +- python/paddle/trainer/config_parser.py | 2 +- 4 files changed, 123 insertions(+), 44 deletions(-) diff --git a/paddle/gserver/activations/MKLDNNActivation.cpp b/paddle/gserver/activations/MKLDNNActivation.cpp index ac50937ef3..5b111f4062 100644 --- a/paddle/gserver/activations/MKLDNNActivation.cpp +++ b/paddle/gserver/activations/MKLDNNActivation.cpp @@ -26,6 +26,25 @@ static ClassRegistrar gMKLDNNActivationRegistrar; */ #define MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) mkldnn_##ACT_TYPE##Activation +/** + * @def DEFINE_MKLDNN_ACTIVATION + */ +#define DEFINE_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ + class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) : public BASE_CLASS { \ + private: \ + static const std::string name; \ + \ + public: \ + const std::string& getName() const { return name; } \ + }; \ + const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ + "mkldnn_" #ACT_TYPE; \ + static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ + gMKLDNNActivationRegistrar \ + .registerClass( \ + "mkldnn_" #ACT_TYPE); \ + }); + /** * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION */ @@ -73,6 +92,11 @@ DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f) */ DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f) +/** + * @brief MKLDNN Softmax Activation + */ +DEFINE_MKLDNN_ACTIVATION(softmax, MKLDNNSoftmaxActivation) + ActivationFunction* MKLDNNActivation::create(const std::string& type) { return gMKLDNNActivationRegistrar.createByType(type); } diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/gserver/activations/MKLDNNActivation.h index 40dd8c618a..ed0dd891a0 100644 --- a/paddle/gserver/activations/MKLDNNActivation.h +++ b/paddle/gserver/activations/MKLDNNActivation.h @@ -36,6 +36,7 @@ protected: // mkldnn matrix, primitive, stream and pipeline MKLDNNMatrixPtr val_; MKLDNNMatrixPtr grad_; + std::shared_ptr engine_; std::shared_ptr stream_; std::shared_ptr fwd_; std::shared_ptr bwd_; @@ -48,8 +49,44 @@ public: static ActivationFunction* create(const std::string& type); static std::vector getAllRegisteredTypes(); virtual const std::string& getName() const = 0; - virtual Error __must_check forward(Argument& act) = 0; - virtual Error __must_check backward(Argument& act) = 0; + /** + * reset the forward primitives + */ + virtual void resetFwd(Argument& act) { + VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; + cnt_ = act.value->getElementCnt(); + pipelineFwd_.clear(); + stream_.reset(new MKLDNNStream()); + engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0)); + val_ = std::dynamic_pointer_cast(act.value); + if (val_ == nullptr) { + int bs = act.getBatchSize(); + int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1; + int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1; + int ic = cnt_ / bs / ih / iw; + CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw); + val_ = MKLDNNMatrix::create( + act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_); + CHECK(val_); + val_->downSpatial(); + } + } + /** + * reset the backward primitives, + * can not merge this functions into resetFwd as the grad data + * would be changing before backward. + */ + virtual void resetBwd(Argument& act) {} + virtual Error __must_check forward(Argument& act) { + resetFwd(act); + stream_->submit(pipelineFwd_); + return Error(); + } + virtual Error __must_check backward(Argument& act) { + resetBwd(act); + stream_->submit(pipelineBwd_); + return Error(); + } }; /** @@ -70,9 +107,7 @@ protected: public: MKLDNNEltwiseActivation() {} - ~MKLDNNEltwiseActivation() {} - virtual const std::string& getName() const = 0; // in common, the alpha of forward and backward should be equal. @@ -93,42 +128,21 @@ public: return (mkldnn::algorithm)0; } - /** - * reshape and reset the forward primitives - */ - void resetFwd(Argument& act) { + void resetFwd(Argument& act) override { if (cnt_ == act.value->getElementCnt()) { return; } - VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; - cnt_ = act.value->getElementCnt(); - stream_.reset(new MKLDNNStream()); - auto eng = CPUEngine::Instance().getEngine(); - - // get algo setting - mkldnn::algorithm algo = getAlgo(this->getName()); + MKLDNNActivation::resetFwd(act); // note: alpha represents the NegativeSlope when used in relu. float alpha = getAlpha(); float beta = getBeta(); - - pipelineFwd_.clear(); - val_ = std::dynamic_pointer_cast(act.value); - if (val_ == nullptr) { - int bs = act.getBatchSize(); - int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1; - int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1; - int ic = cnt_ / bs / ih / iw; - CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw); - val_ = MKLDNNMatrix::create( - act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, eng); - CHECK(val_); - } + mkldnn::algorithm algo = getAlgo(this->getName()); auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training, algo, val_->getMemoryDesc(), alpha, beta); - fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, eng)); + fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, *engine_)); // use inplace for forward but save input value before submit inVal_ = val_; copyInVal_ = nullptr; @@ -144,11 +158,7 @@ public: needResetBwd_ = true; } - /** - * reset the backward primitives, can not merge into resetFwd as the grad data - * would be changing before backward. - */ - void resetBwd(Argument& act) { + void resetBwd(Argument& act) override { if (!needResetBwd_) { return; } @@ -167,16 +177,61 @@ public: pipelineBwd_.clear(); pipelineBwd_.push_back(*bwd_); } +}; - Error __must_check forward(Argument& act) { - resetFwd(act); - stream_->submit(pipelineFwd_); - return Error(); +/** + * @brief Base class of MKLDNN softmax Activation, + * only have mkldnn forward, use cpu implement for backward. + */ +class MKLDNNSoftmaxActivation : public MKLDNNActivation { + typedef mkldnn::softmax_forward softmax_fwd; + +private: + // for backward + MatrixPtr sftMaxSum_; + MatrixPtr sftMaxDot_; + +public: + MKLDNNSoftmaxActivation() {} + ~MKLDNNSoftmaxActivation() {} + virtual const std::string& getName() const = 0; + void resetFwd(Argument& act) override { + if (cnt_ == act.value->getElementCnt()) { + return; + } + MKLDNNActivation::resetFwd(act); + int axis = 1; + auto fwdDesc = softmax_fwd::desc( + mkldnn::prop_kind::forward_scoring, val_->getMemoryDesc(), axis); + auto fwdPD = softmax_fwd::primitive_desc(fwdDesc, *engine_); + fwd_.reset(new softmax_fwd(fwdPD, *val_, *val_)); + pipelineFwd_.push_back(*fwd_); } - Error __must_check backward(Argument& act) { - resetBwd(act); - stream_->submit(pipelineBwd_); + Error __must_check backward(Argument& act) override { + MatrixPtr outputV = act.value; + MatrixPtr outputG = act.grad; + + if (outputG->useGpu()) { + outputG->softmaxBackward(*outputV); + } else { + SetDevice device(act.deviceId); + Matrix::resizeOrCreate(sftMaxDot_, + outputG->getHeight(), + outputG->getWidth(), + /* trans */ false, + useGpu(act.deviceId)); + Matrix::resizeOrCreate(sftMaxSum_, + outputG->getHeight(), + 1, + /* trans */ false, + useGpu(act.deviceId)); + + sftMaxDot_->dotMul(*outputG, *outputV); + sftMaxSum_->colMerge(*sftMaxDot_); + + act.grad->softmaxDerivative(*act.value, *sftMaxSum_); + } return Error(); } }; diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index 1bfbbde424..857d07df3e 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -222,8 +222,8 @@ static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) { } void testActivation(std::string& actType, const testActDesc& pm) { - // TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation - if (actType == "mkldnn_softmax" || actType == "mkldnn_elu") { + // TODO(TJ): remove me when paddle support elu activation + if (actType == "mkldnn_elu") { return; } const std::string compareTypes[] = {actType, actType.erase(0, 7)}; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 0f57b81966..098a51ab87 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1566,7 +1566,7 @@ class LayerBase(object): self.config = g_config.model_config.layers.add() assert isinstance(self.config, LayerConfig) use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) - mkldnn_acts = ['relu', 'tanh'] + mkldnn_acts = ['relu', 'tanh', 'softmax'] if use_mkldnn and active_type in mkldnn_acts: active_type = "mkldnn_" + active_type self.config.name = name -- GitLab