提交 9d692e3b 编写于 作者: T tensor-tang

add gtest for MKLDNN activation and pass them

上级 24f13b1a
...@@ -22,9 +22,12 @@ limitations under the License. */ ...@@ -22,9 +22,12 @@ limitations under the License. */
#include <type_traits> #include <type_traits>
#include "paddle/parameter/Argument.h" #include "paddle/parameter/Argument.h"
#include "paddle/utils/ClassRegistrar.h" #include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#ifdef PADDLE_USE_MKLDNN
#include "MKLDNNActivation.h"
#endif
namespace paddle { namespace paddle {
static ClassRegistrar<ActivationFunction> gActivationRegistrar; static ClassRegistrar<ActivationFunction> gActivationRegistrar;
...@@ -456,6 +459,12 @@ Error __must_check backward(Argument& act) { ...@@ -456,6 +459,12 @@ Error __must_check backward(Argument& act) {
END_DEFINE_ACTIVATION(log) END_DEFINE_ACTIVATION(log)
ActivationFunction* ActivationFunction::create(const std::string& type) { ActivationFunction* ActivationFunction::create(const std::string& type) {
#ifdef PADDLE_USE_MKLDNN
if (!type.empty() && type.compare(0, 7, "mkldnn_") == 0) {
return MKLDNNActivation::create(type);
}
#endif
return gActivationRegistrar.createByType(type); return gActivationRegistrar.createByType(type);
} }
......
...@@ -29,20 +29,23 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar; ...@@ -29,20 +29,23 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
/** /**
* @def DEFINE_MKLDNN_ELTWISE_ACTIVATION * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION
*/ */
#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA) \ #define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA) \
class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \ class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \
: public MKLDNNEltwiseActivation { \ : public MKLDNNEltwiseActivation { \
private: \ private: \
static const std::string name; \ static const std::string name; \
static const float alpha; \ static const float alpha; \
static const float bwdAlpha; \
\ \
public: \ public: \
const std::string& getName() const { return name; } \ const std::string& getName() const { return name; } \
float getAlpha() const { return alpha; } \ float getAlpha() const { return alpha; } \
float getBwdAlpha() const { return bwdAlpha; } \
}; \ }; \
const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \ const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \
"mkldnn_" #ACT_TYPE; \ "mkldnn_" #ACT_TYPE; \
const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \ const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \
const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \
static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \ static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \
gMKLDNNActivationRegistrar \ gMKLDNNActivationRegistrar \
.registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>( \ .registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>( \
...@@ -54,21 +57,21 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar; ...@@ -54,21 +57,21 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
* Actually mkldnn_relu is Leaky Relu. * Actually mkldnn_relu is Leaky Relu.
* f(x) = x (x >= 0) * f(x) = x (x >= 0)
* f(x) = negative_slope * x (x < 0) * f(x) = negative_slope * x (x < 0)
* @note the negative_slope should be -0.f * @note the negative_slope should be -0.f in forward
*/ */
DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f) DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f, 0.f)
/** /**
* @brief MKLDNN Tanh Activation. * @brief MKLDNN Tanh Activation.
*/ */
DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f) DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f)
/** /**
* @brief MKLDNN ELU(Exponential Linear Unit) Activation. * @brief MKLDNN ELU(Exponential Linear Unit) Activation.
* f(x) = x (x >= 0) * f(x) = x (x >= 0)
* f(x) = negative_slope * (exp(x) - 1) (x < 0) * f(x) = negative_slope * (exp(x) - 1) (x < 0)
*/ */
DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f) DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f)
ActivationFunction* MKLDNNActivation::create(const std::string& type) { ActivationFunction* MKLDNNActivation::create(const std::string& type) {
return gMKLDNNActivationRegistrar.createByType(type); return gMKLDNNActivationRegistrar.createByType(type);
......
...@@ -30,6 +30,9 @@ class MKLDNNActivation : public ActivationFunction { ...@@ -30,6 +30,9 @@ class MKLDNNActivation : public ActivationFunction {
protected: protected:
// input value element count // input value element count
size_t cnt_; size_t cnt_;
// should not merge the resetBwd into resetFwd,
// because the grad data would be changing before backward.
bool needResetBwd_;
// mkldnn matrix, primitive, stream and pipeline // mkldnn matrix, primitive, stream and pipeline
MKLDNNMatrixPtr val_; MKLDNNMatrixPtr val_;
MKLDNNMatrixPtr grad_; MKLDNNMatrixPtr grad_;
...@@ -40,7 +43,7 @@ protected: ...@@ -40,7 +43,7 @@ protected:
std::vector<mkldnn::primitive> pipelineBwd_; std::vector<mkldnn::primitive> pipelineBwd_;
public: public:
MKLDNNActivation() : cnt_(0) {} MKLDNNActivation() : cnt_(0), needResetBwd_(true) {}
~MKLDNNActivation() {} ~MKLDNNActivation() {}
static ActivationFunction* create(const std::string& type); static ActivationFunction* create(const std::string& type);
static std::vector<std::string> getAllRegisteredTypes(); static std::vector<std::string> getAllRegisteredTypes();
...@@ -57,19 +60,43 @@ class MKLDNNEltwiseActivation : public MKLDNNActivation { ...@@ -57,19 +60,43 @@ class MKLDNNEltwiseActivation : public MKLDNNActivation {
typedef mkldnn::eltwise_forward eltwise_fwd; typedef mkldnn::eltwise_forward eltwise_fwd;
typedef mkldnn::eltwise_backward eltwise_bwd; typedef mkldnn::eltwise_backward eltwise_bwd;
protected:
// save the forward primitive desc, which can be used backward
std::shared_ptr<eltwise_fwd::primitive_desc> fwdPD_;
// eltwise_bwd need src input value
MKLDNNMatrixPtr inVal_;
// use for copy data
std::shared_ptr<mkldnn::reorder> copyInVal_;
public: public:
MKLDNNEltwiseActivation() {} MKLDNNEltwiseActivation() {}
~MKLDNNEltwiseActivation() {} ~MKLDNNEltwiseActivation() {}
virtual const std::string& getName() const = 0; virtual const std::string& getName() const = 0;
// in common, the alpha of forward and backward should be equal.
// but for relu, to avoid negative value, they should be opposite
virtual float getAlpha() const = 0; virtual float getAlpha() const = 0;
virtual float getBwdAlpha() const = 0;
virtual float getBeta() const { return 0.f; } virtual float getBeta() const { return 0.f; }
virtual mkldnn::algorithm getAlgo(const std::string& type) const {
if (type == "mkldnn_relu") {
return mkldnn::algorithm::eltwise_relu;
} else if (type == "mkldnn_tanh") {
return mkldnn::algorithm::eltwise_tanh;
} else if (type == "mkldnn_elu") {
return mkldnn::algorithm::eltwise_elu;
} else {
LOG(FATAL) << "Unkown eltwise activation type: " << type;
}
return (mkldnn::algorithm)0;
}
/** /**
* reshape and reset the forward and backward primitives * reshape and reset the forward primitives
*/ */
void resetPrimitives(Argument& act) { void resetFwd(Argument& act) {
if (cnt_ == act.value->getElementCnt()) { if (cnt_ == act.value->getElementCnt()) {
return; return;
} }
...@@ -78,21 +105,13 @@ public: ...@@ -78,21 +105,13 @@ public:
auto eng = CPUEngine::Instance().getEngine(); auto eng = CPUEngine::Instance().getEngine();
// get algo setting // get algo setting
mkldnn::algorithm algo; mkldnn::algorithm algo = getAlgo(this->getName());
if (this->getName() == "mkldnn_relu") {
algo = mkldnn::algorithm::eltwise_relu;
} else if (this->getName() == "mkldnn_tanh") {
algo = mkldnn::algorithm::eltwise_tanh;
} else if (this->getName() == "mkldnn_elu") {
algo = mkldnn::algorithm::eltwise_elu;
} else {
LOG(FATAL) << "Unkown eltwise activation type: " << this->getName();
}
// note: alpha represents the NegativeSlope when used in relu. // note: alpha represents the NegativeSlope when used in relu.
float alpha = getAlpha(); float alpha = getAlpha();
float beta = getBeta(); float beta = getBeta();
/// forward /// forward
pipelineFwd_.clear();
val_ = std::dynamic_pointer_cast<MKLDNNMatrix>(act.value); val_ = std::dynamic_pointer_cast<MKLDNNMatrix>(act.value);
if (val_ == nullptr) { if (val_ == nullptr) {
int bs = act.getBatchSize(); int bs = act.getBatchSize();
...@@ -109,33 +128,52 @@ public: ...@@ -109,33 +128,52 @@ public:
val_->getMemoryDesc(), val_->getMemoryDesc(),
alpha, alpha,
beta); beta);
auto fwdPD = eltwise_fwd::primitive_desc(fwdDesc, eng); fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, eng));
// inplace buffer, dst = src // use inplace for forward but save input value before submit
fwd_.reset(new eltwise_fwd(fwdPD, *val_, *val_)); inVal_ = val_;
pipelineFwd_.clear(); if (act.grad) {
// only copy when need do backward
inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
CHECK(copyInVal_) << "should not be emptry";
pipelineFwd_.push_back(*copyInVal_);
}
fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_));
pipelineFwd_.push_back(*fwd_); pipelineFwd_.push_back(*fwd_);
needResetBwd_ = true;
}
/// backward /**
if (act.grad == nullptr) { * reset the backward primitives, can not merge into resetFwd as the grad data
grad_ = nullptr; * would be changing before backward.
*/
void resetBwd(Argument& act) {
if (!needResetBwd_) {
return; return;
} }
needResetBwd_ = false;
mkldnn::algorithm algo = getAlgo(this->getName());
float alpha = getBwdAlpha();
float beta = getBeta();
grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc()); grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
auto eng = CPUEngine::Instance().getEngine();
auto bwdDesc = eltwise_bwd::desc( auto bwdDesc = eltwise_bwd::desc(
algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta); algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, fwdPD); auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_);
bwd_.reset(new eltwise_bwd(bwdPD, *val_, *grad_, *grad_)); CHECK(inVal_);
bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_));
pipelineBwd_.clear(); pipelineBwd_.clear();
pipelineBwd_.push_back(*bwd_); pipelineBwd_.push_back(*bwd_);
} }
Error __must_check forward(Argument& act) { Error __must_check forward(Argument& act) {
resetPrimitives(act); resetFwd(act);
stream_->submit(pipelineFwd_); stream_->submit(pipelineFwd_);
return Error(); return Error();
} }
Error __must_check backward(Argument& act) { Error __must_check backward(Argument& act) {
resetBwd(act);
stream_->submit(pipelineBwd_); stream_->submit(pipelineBwd_);
return Error(); return Error();
} }
......
...@@ -64,15 +64,17 @@ void MKLDNNTester::reset(const TestConfig& dnn, ...@@ -64,15 +64,17 @@ void MKLDNNTester::reset(const TestConfig& dnn,
configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i]));
} }
refLayer_ = testLayers_[REF]; refLayer_ = testLayers_[REF];
dnnLayer_ = std::dynamic_pointer_cast<MKLDNNLayer>(testLayers_[DNN]); dnnLayer_ = testLayers_[DNN];
CHECK(dnnLayer_);
// for comparison with Paddle reference results,
// need manually add cpu device output for test
dnnLayer_->addOutputArgument(CPU_DEVICE);
EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size());
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
setInputImgSize(); setInputImgSize();
// for comparison with Paddle reference results,
// need manually add cpu device output for test
MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
if (dnnLayer) {
dnnLayer->addOutputArgument(CPU_DEVICE);
}
} }
void MKLDNNTester::setInputImgSize() { void MKLDNNTester::setInputImgSize() {
...@@ -122,7 +124,7 @@ void MKLDNNTester::randomTopDiffs() { ...@@ -122,7 +124,7 @@ void MKLDNNTester::randomTopDiffs() {
void MKLDNNTester::checkForward() { void MKLDNNTester::checkForward() {
VLOG(MKLDNN_ALL) << "Check Forward"; VLOG(MKLDNN_ALL) << "Check Forward";
printTopDatas(); printTopDatas();
double delta = compareMatrix(dnnLayer_->getOutput(-1).value, double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value,
refLayer_->getOutputValue()); refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
...@@ -155,7 +157,10 @@ void MKLDNNTester::checkBackwardWgts() { ...@@ -155,7 +157,10 @@ void MKLDNNTester::checkBackwardWgts() {
vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights
saveWgt(parameters_[DNN], dnnWgts); saveWgt(parameters_[DNN], dnnWgts);
dnnLayer_->convertWeightsToPaddle(); MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
if (dnnLayer) {
dnnLayer->convertWeightsToPaddle();
}
for (size_t i = 0; i < parameters_[DNN].size(); ++i) { for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
...@@ -322,6 +327,10 @@ void MKLDNNTester::runOnce() { ...@@ -322,6 +327,10 @@ void MKLDNNTester::runOnce() {
// and clearTopDatas(REF) should be coverd by ref layers // and clearTopDatas(REF) should be coverd by ref layers
clearBotDiffs(REF); clearBotDiffs(REF);
clearWgtDiffs(REF); clearWgtDiffs(REF);
// it is necessary to clear bottom diffs when only activation is dnn type
if (configs_[DNN].layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) {
clearBotDiffs(DNN);
}
} }
void MKLDNNTester::run(const TestConfig& dnn, void MKLDNNTester::run(const TestConfig& dnn,
...@@ -333,8 +342,19 @@ void MKLDNNTester::run(const TestConfig& dnn, ...@@ -333,8 +342,19 @@ void MKLDNNTester::run(const TestConfig& dnn,
float epsilon, float epsilon,
bool log, bool log,
int level) { int level) {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() CHECK(dnn.layerConfig.type().compare(0, 7, "mkldnn_") == 0 ||
<< " vs " << ref.layerConfig.type(); dnn.layerConfig.active_type().compare(0, 7, "mkldnn_") == 0)
<< "should be MKLDNN layer or MKLDNN activation";
if (dnn.layerConfig.type() == ref.layerConfig.type()) {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: "
<< dnn.layerConfig.active_type() << " vs "
<< ref.layerConfig.active_type();
} else {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: "
<< dnn.layerConfig.type() << " vs "
<< ref.layerConfig.type();
}
ih_ = inputImgH; ih_ = inputImgH;
iw_ = inputImgW; iw_ = inputImgW;
iter_ = iter; iter_ = iter;
......
...@@ -41,8 +41,7 @@ protected: ...@@ -41,8 +41,7 @@ protected:
vector<LayerMap> layerMaps_; vector<LayerMap> layerMaps_;
vector<vector<ParameterPtr>> parameters_; vector<vector<ParameterPtr>> parameters_;
vector<LayerPtr> testLayers_; vector<LayerPtr> testLayers_;
LayerPtr refLayer_; LayerPtr refLayer_, dnnLayer_;
MKLDNNLayerPtr dnnLayer_;
/// run some iterations, all the result should pass /// run some iterations, all the result should pass
size_t iter_; size_t iter_;
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "MKLDNNTester.h" #include "MKLDNNTester.h"
#include "ModelConfig.pb.h" #include "ModelConfig.pb.h"
#include "paddle/gserver/activations/MKLDNNActivation.h"
#include "paddle/math/MathUtils.h" #include "paddle/math/MathUtils.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
...@@ -190,7 +191,7 @@ void testPoolLayer(const testPoolDesc& pm) { ...@@ -190,7 +191,7 @@ void testPoolLayer(const testPoolDesc& pm) {
} }
} }
TEST(MkldnnLayer, PoolLayer) { TEST(MKLDNNLayer, PoolLayer) {
/* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw*/ /* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw*/
testPoolLayer({2, 1, 4, 4, 2, 2, 3, 3, 0, 0, 2, 2}); testPoolLayer({2, 1, 4, 4, 2, 2, 3, 3, 0, 0, 2, 2});
testPoolLayer({10, 8, 16, 16, 8, 8, 2, 2, 0, 0, 2, 2}); testPoolLayer({10, 8, 16, 16, 8, 8, 2, 2, 0, 0, 2, 2});
...@@ -202,6 +203,49 @@ TEST(MkldnnLayer, PoolLayer) { ...@@ -202,6 +203,49 @@ TEST(MkldnnLayer, PoolLayer) {
testPoolLayer({2, 8, 56, 56, 29, 29, 3, 3, 1, 1, 2, 2}); testPoolLayer({2, 8, 56, 56, 29, 29, 3, 3, 1, 1, 2, 2});
} }
struct testActDesc {
int bs, ch;
int ih, iw;
};
static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) {
cfg.biasSize = 0;
cfg.layerConfig.set_type("addto");
cfg.layerConfig.set_size(pm.ch * pm.ih * pm.iw);
cfg.inputDefs.push_back(
{INPUT_DATA,
"layer_0",
/* size of input layer= */ size_t(pm.ch * pm.ih * pm.iw),
0});
cfg.layerConfig.add_inputs();
}
void testActivation(std::string& type, const testActDesc& pm) {
const std::string compareTypes[] = {type, type.erase(0, 7)};
TestConfig cfg;
getAddtoConfig(cfg, pm);
TestConfig ref = cfg;
cfg.layerConfig.set_active_type(compareTypes[0]);
ref.layerConfig.set_active_type(compareTypes[1]);
MKLDNNTester tester;
for (auto bs : {pm.bs, 1}) {
tester.run(cfg, ref, bs, pm.ih, pm.iw);
}
}
TEST(MKLDNNActivation, Activations) {
auto types = MKLDNNActivation::getAllRegisteredTypes();
// TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation
std::set<string> excluded{"mkldnn_softmax", "mkldnn_elu"};
for (auto type : types) {
if (excluded.count(type)) {
continue;
}
testActivation(type, {16, 64, 32, 32});
}
}
// TODO(TJ): add branch test // TODO(TJ): add branch test
int main(int argc, char** argv) { int main(int argc, char** argv) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册