diff --git a/paddle/gserver/activations/MKLDNNActivation.cpp b/paddle/gserver/activations/MKLDNNActivation.cpp index 5b111f406257adc96aac215b521896bf6d258480..3b32d7e2d83f4c17725d4bbcb3303ed9721b0527 100644 --- a/paddle/gserver/activations/MKLDNNActivation.cpp +++ b/paddle/gserver/activations/MKLDNNActivation.cpp @@ -27,49 +27,52 @@ static ClassRegistrar gMKLDNNActivationRegistrar; #define MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) mkldnn_##ACT_TYPE##Activation /** - * @def DEFINE_MKLDNN_ACTIVATION + * @def BEGIN_MKLDNN_ACTIVATION */ -#define DEFINE_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ - class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) : public BASE_CLASS { \ - private: \ - static const std::string name; \ - \ - public: \ - const std::string& getName() const { return name; } \ - }; \ - const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ - "mkldnn_" #ACT_TYPE; \ - static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ - gMKLDNNActivationRegistrar \ - .registerClass( \ - "mkldnn_" #ACT_TYPE); \ +#define BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ + class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) : public BASE_CLASS { +/** + * @def END_MKLDNN_ACTIVATION + */ +#define END_MKLDNN_ACTIVATION(ACT_TYPE) \ +private: \ + static const std::string name; \ + \ +public: \ + const std::string& getName() const { return name; } \ + } \ + ; \ + const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ + "mkldnn_" #ACT_TYPE; \ + static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ + gMKLDNNActivationRegistrar \ + .registerClass( \ + "mkldnn_" #ACT_TYPE); \ }); +/** + * @def DEFINE_MKLDNN_ACTIVATION + */ +#define DEFINE_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ + BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ + END_MKLDNN_ACTIVATION(ACT_TYPE) + /** * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION */ -#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA) \ - class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \ - : public MKLDNNEltwiseActivation { \ - private: \ - static const std::string name; \ - static const float alpha; \ - static const float bwdAlpha; \ - \ - public: \ - const std::string& getName() const { return name; } \ - float getAlpha() const { return alpha; } \ - float getBwdAlpha() const { return bwdAlpha; } \ - }; \ - const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ - "mkldnn_" #ACT_TYPE; \ - const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \ - const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \ - static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ - gMKLDNNActivationRegistrar \ - .registerClass( \ - "mkldnn_" #ACT_TYPE); \ - }); +#define DEFINE_MKLDNN_ELTWISE_ACTIVATION( \ + ACT_TYPE, BASE_CLASS, ALPHA, BWD_ALPHA) \ + BEGIN_MKLDNN_ACTIVATION(ACT_TYPE, BASE_CLASS) \ +private: \ + static const float alpha; \ + static const float bwdAlpha; \ + \ +public: \ + float getAlpha() const { return alpha; } \ + float getBwdAlpha() const { return bwdAlpha; } \ + END_MKLDNN_ACTIVATION(ACT_TYPE) \ + const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \ + const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; /** * @brief MKLDNN Relu Activation. @@ -78,25 +81,138 @@ static ClassRegistrar gMKLDNNActivationRegistrar; * f(x) = negative_slope * x (x < 0) * @note the negative_slope should be -0.f in forward */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f, 0.f) +DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, MKLDNNEltwiseActivation, -0.f, 0.f) /** * @brief MKLDNN Tanh Activation. */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f) +DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, MKLDNNEltwiseActivation, 0.f, 0.f) /** * @brief MKLDNN ELU(Exponential Linear Unit) Activation. * f(x) = x (x >= 0) * f(x) = negative_slope * (exp(x) - 1) (x < 0) */ -DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f) +DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, MKLDNNEltwiseActivation, 0.f, 0.f) + +mkldnn::algorithm MKLDNNEltwiseActivation::getAlgo(std::string type) const { + const std::map algoMap = { + {"relu", algorithm::eltwise_relu}, + {"tanh", algorithm::eltwise_tanh}, + {"elu", algorithm::eltwise_elu}}; + type.erase(0, 7); // remove mkldnn_ + algorithm algo = (algorithm)0; + mapGet(type, algoMap, &algo); + return algo; +} + +void MKLDNNEltwiseActivation::resetFwd(Argument& act) { + if (cnt_ == act.value->getElementCnt()) { + return; + } + MKLDNNActivation::resetFwd(act); + // note: alpha represents the NegativeSlope when used in relu. + float alpha = getAlpha(); + float beta = getBeta(); + algorithm algo = getAlgo(this->getName()); + auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training, + algo, + val_->getMemoryDesc(), + alpha, + beta); + fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, *engine_)); + // use inplace for forward but save input value before submit + inVal_ = val_; + copyInVal_ = nullptr; + if (act.grad && algo == algorithm::eltwise_tanh) { + // tanh need save src input for backward + inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc()); + copyInVal_ = std::make_shared(*val_, *inVal_); + CHECK(copyInVal_) << "should not be emptry"; + pipelineFwd_.push_back(*copyInVal_); + } + fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_)); + pipelineFwd_.push_back(*fwd_); + needResetBwd_ = true; +} + +void MKLDNNEltwiseActivation::resetBwd(Argument& act) { + if (!needResetBwd_) { + return; + } + VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; + needResetBwd_ = false; + algorithm algo = getAlgo(this->getName()); + float alpha = getBwdAlpha(); + float beta = getBeta(); + grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc()); + auto eng = CPUEngine::Instance().getEngine(); + auto bwdDesc = eltwise_bwd::desc( + algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta); + auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_); + CHECK(inVal_); + bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_)); + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwd_); +} /** * @brief MKLDNN Softmax Activation */ DEFINE_MKLDNN_ACTIVATION(softmax, MKLDNNSoftmaxActivation) +void MKLDNNSoftmaxActivation::resetFwd(Argument& act) { + if (cnt_ == act.value->getElementCnt()) { + return; + } + MKLDNNActivation::resetFwd(act); + int axis = 1; + auto fwdDesc = softmax_fwd::desc( + mkldnn::prop_kind::forward_scoring, val_->getMemoryDesc(), axis); + auto fwdPD = softmax_fwd::primitive_desc(fwdDesc, *engine_); + fwd_.reset(new softmax_fwd(fwdPD, *val_, *val_)); + pipelineFwd_.push_back(*fwd_); +} + +Error __must_check MKLDNNSoftmaxActivation::forward(Argument& act) { + resetFwd(act); + stream_->submit(pipelineFwd_); + real* v = act.value->getData(); + real threshold = exp(-64); +#pragma omp parallel for + for (size_t i = 0; i < act.value->getElementCnt(); ++i) { + v[i] = v[i] < threshold ? threshold : v[i]; + } + return Error(); +} + +Error __must_check MKLDNNSoftmaxActivation::backward(Argument& act) { + MatrixPtr outputV = act.value; + MatrixPtr outputG = act.grad; + + if (outputG->useGpu()) { + outputG->softmaxBackward(*outputV); + } else { + SetDevice device(act.deviceId); + Matrix::resizeOrCreate(sftMaxDot_, + outputG->getHeight(), + outputG->getWidth(), + /* trans */ false, + useGpu(act.deviceId)); + Matrix::resizeOrCreate(sftMaxSum_, + outputG->getHeight(), + 1, + /* trans */ false, + useGpu(act.deviceId)); + + sftMaxDot_->dotMul(*outputG, *outputV); + sftMaxSum_->colMerge(*sftMaxDot_); + + act.grad->softmaxDerivative(*act.value, *sftMaxSum_); + } + return Error(); +} + ActivationFunction* MKLDNNActivation::create(const std::string& type) { return gMKLDNNActivationRegistrar.createByType(type); } @@ -108,4 +224,34 @@ std::vector MKLDNNActivation::getAllRegisteredTypes() { return types; } +void MKLDNNActivation::resetFwd(Argument& act) { + VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; + cnt_ = act.value->getElementCnt(); + pipelineFwd_.clear(); + stream_.reset(new MKLDNNStream()); + engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0)); + val_ = std::dynamic_pointer_cast(act.value); + if (val_ == nullptr) { + int bs = act.getBatchSize(); + int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1; + int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1; + int ic = cnt_ / bs / ih / iw; + CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw); + val_ = MKLDNNMatrix::create( + act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_); + CHECK(val_); + val_->downSpatial(); + } +} + +Error __must_check MKLDNNActivation::forward(Argument& act) { + resetFwd(act); + stream_->submit(pipelineFwd_); + return Error(); +} +Error __must_check MKLDNNActivation::backward(Argument& act) { + resetBwd(act); + stream_->submit(pipelineBwd_); + return Error(); +} } // namespace paddle diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/gserver/activations/MKLDNNActivation.h index ed0dd891a0f0a7e8d5d53a203aa5977b70809fdd..dd16421fd6e93b49c30b1d3b601f95980afec57b 100644 --- a/paddle/gserver/activations/MKLDNNActivation.h +++ b/paddle/gserver/activations/MKLDNNActivation.h @@ -52,41 +52,15 @@ public: /** * reset the forward primitives */ - virtual void resetFwd(Argument& act) { - VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; - cnt_ = act.value->getElementCnt(); - pipelineFwd_.clear(); - stream_.reset(new MKLDNNStream()); - engine_.reset(new mkldnn::engine(mkldnn::engine::cpu, 0)); - val_ = std::dynamic_pointer_cast(act.value); - if (val_ == nullptr) { - int bs = act.getBatchSize(); - int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1; - int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1; - int ic = cnt_ / bs / ih / iw; - CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw); - val_ = MKLDNNMatrix::create( - act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, *engine_); - CHECK(val_); - val_->downSpatial(); - } - } + virtual void resetFwd(Argument& act); /** * reset the backward primitives, * can not merge this functions into resetFwd as the grad data * would be changing before backward. */ virtual void resetBwd(Argument& act) {} - virtual Error __must_check forward(Argument& act) { - resetFwd(act); - stream_->submit(pipelineFwd_); - return Error(); - } - virtual Error __must_check backward(Argument& act) { - resetBwd(act); - stream_->submit(pipelineBwd_); - return Error(); - } + virtual Error __must_check forward(Argument& act); + virtual Error __must_check backward(Argument& act); }; /** @@ -96,6 +70,7 @@ public: class MKLDNNEltwiseActivation : public MKLDNNActivation { typedef mkldnn::eltwise_forward eltwise_fwd; typedef mkldnn::eltwise_backward eltwise_bwd; + typedef mkldnn::algorithm algorithm; protected: // save the forward primitive desc, which can be used backward @@ -115,68 +90,9 @@ public: virtual float getAlpha() const = 0; virtual float getBwdAlpha() const = 0; virtual float getBeta() const { return 0.f; } - virtual mkldnn::algorithm getAlgo(const std::string& type) const { - if (type == "mkldnn_relu") { - return mkldnn::algorithm::eltwise_relu; - } else if (type == "mkldnn_tanh") { - return mkldnn::algorithm::eltwise_tanh; - } else if (type == "mkldnn_elu") { - return mkldnn::algorithm::eltwise_elu; - } else { - LOG(FATAL) << "Unkown eltwise activation type: " << type; - } - return (mkldnn::algorithm)0; - } - - void resetFwd(Argument& act) override { - if (cnt_ == act.value->getElementCnt()) { - return; - } - MKLDNNActivation::resetFwd(act); - // note: alpha represents the NegativeSlope when used in relu. - float alpha = getAlpha(); - float beta = getBeta(); - mkldnn::algorithm algo = getAlgo(this->getName()); - auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training, - algo, - val_->getMemoryDesc(), - alpha, - beta); - fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, *engine_)); - // use inplace for forward but save input value before submit - inVal_ = val_; - copyInVal_ = nullptr; - if (act.grad && algo == mkldnn::algorithm::eltwise_tanh) { - // tanh need save src input for backward - inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc()); - copyInVal_ = std::make_shared(*val_, *inVal_); - CHECK(copyInVal_) << "should not be emptry"; - pipelineFwd_.push_back(*copyInVal_); - } - fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_)); - pipelineFwd_.push_back(*fwd_); - needResetBwd_ = true; - } - - void resetBwd(Argument& act) override { - if (!needResetBwd_) { - return; - } - VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; - needResetBwd_ = false; - mkldnn::algorithm algo = getAlgo(this->getName()); - float alpha = getBwdAlpha(); - float beta = getBeta(); - grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc()); - auto eng = CPUEngine::Instance().getEngine(); - auto bwdDesc = eltwise_bwd::desc( - algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta); - auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_); - CHECK(inVal_); - bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_)); - pipelineBwd_.clear(); - pipelineBwd_.push_back(*bwd_); - } + virtual algorithm getAlgo(std::string type) const; + void resetFwd(Argument& act) override; + void resetBwd(Argument& act) override; }; /** @@ -195,45 +111,9 @@ public: MKLDNNSoftmaxActivation() {} ~MKLDNNSoftmaxActivation() {} virtual const std::string& getName() const = 0; - void resetFwd(Argument& act) override { - if (cnt_ == act.value->getElementCnt()) { - return; - } - MKLDNNActivation::resetFwd(act); - int axis = 1; - auto fwdDesc = softmax_fwd::desc( - mkldnn::prop_kind::forward_scoring, val_->getMemoryDesc(), axis); - auto fwdPD = softmax_fwd::primitive_desc(fwdDesc, *engine_); - fwd_.reset(new softmax_fwd(fwdPD, *val_, *val_)); - pipelineFwd_.push_back(*fwd_); - } - - Error __must_check backward(Argument& act) override { - MatrixPtr outputV = act.value; - MatrixPtr outputG = act.grad; - - if (outputG->useGpu()) { - outputG->softmaxBackward(*outputV); - } else { - SetDevice device(act.deviceId); - Matrix::resizeOrCreate(sftMaxDot_, - outputG->getHeight(), - outputG->getWidth(), - /* trans */ false, - useGpu(act.deviceId)); - Matrix::resizeOrCreate(sftMaxSum_, - outputG->getHeight(), - 1, - /* trans */ false, - useGpu(act.deviceId)); - - sftMaxDot_->dotMul(*outputG, *outputV); - sftMaxSum_->colMerge(*sftMaxDot_); - - act.grad->softmaxDerivative(*act.value, *sftMaxSum_); - } - return Error(); - } + void resetFwd(Argument& act) override; + Error __must_check forward(Argument& act) override; + Error __must_check backward(Argument& act) override; }; } // namespace paddle diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 588310fbeecdf297f2eb24a6aadb26a87d25b1ff..0023b4d0f5da500f380ecb836b7c54e050b13d67 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3637,7 +3637,7 @@ void CpuMatrix::oneHotCrossEntropy(Matrix& output, IVector& label) { for (size_t i = 0; i < numSamples; ++i, out += dim) { CHECK_GE(lbl[i], 0); CHECK_LT((size_t)lbl[i], dim); - cost[i] = -std::log(std::max(out[lbl[i]], real(FLT_MIN))); + cost[i] = -std::log(out[lbl[i]]); } } @@ -3652,7 +3652,7 @@ void CpuMatrix::oneHotCrossEntropyBp(Matrix& output, IVector& label) { real* grad = getData(); int* lbl = label.getData(); for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim) { - grad[lbl[i]] -= 1 / std::max(out[lbl[i]], real(FLT_MIN)); + grad[lbl[i]] -= 1 / out[lbl[i]]; } }