提交 9d692e3b 编写于 作者: T tensor-tang

add gtest for MKLDNN activation and pass them

上级 24f13b1a
......@@ -22,9 +22,12 @@ limitations under the License. */
#include <type_traits>
#include "paddle/parameter/Argument.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Logging.h"
#ifdef PADDLE_USE_MKLDNN
#include "MKLDNNActivation.h"
#endif
namespace paddle {
static ClassRegistrar<ActivationFunction> gActivationRegistrar;
......@@ -456,6 +459,12 @@ Error __must_check backward(Argument& act) {
END_DEFINE_ACTIVATION(log)
ActivationFunction* ActivationFunction::create(const std::string& type) {
#ifdef PADDLE_USE_MKLDNN
if (!type.empty() && type.compare(0, 7, "mkldnn_") == 0) {
return MKLDNNActivation::create(type);
}
#endif
return gActivationRegistrar.createByType(type);
}
......
......@@ -29,20 +29,23 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
/**
* @def DEFINE_MKLDNN_ELTWISE_ACTIVATION
*/
#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA) \
#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA) \
class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) \
: public MKLDNNEltwiseActivation { \
private: \
static const std::string name; \
static const float alpha; \
static const float bwdAlpha; \
\
public: \
const std::string& getName() const { return name; } \
float getAlpha() const { return alpha; } \
float getBwdAlpha() const { return bwdAlpha; } \
}; \
const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name = \
"mkldnn_" #ACT_TYPE; \
const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA; \
const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \
static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] { \
gMKLDNNActivationRegistrar \
.registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>( \
......@@ -54,21 +57,21 @@ static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
* Actually mkldnn_relu is Leaky Relu.
* f(x) = x (x >= 0)
* f(x) = negative_slope * x (x < 0)
* @note the negative_slope should be -0.f
* @note the negative_slope should be -0.f in forward
*/
DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f)
DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f, 0.f)
/**
* @brief MKLDNN Tanh Activation.
*/
DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f)
DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f)
/**
* @brief MKLDNN ELU(Exponential Linear Unit) Activation.
* f(x) = x (x >= 0)
* f(x) = negative_slope * (exp(x) - 1) (x < 0)
*/
DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f)
DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f)
ActivationFunction* MKLDNNActivation::create(const std::string& type) {
return gMKLDNNActivationRegistrar.createByType(type);
......
......@@ -30,6 +30,9 @@ class MKLDNNActivation : public ActivationFunction {
protected:
// input value element count
size_t cnt_;
// should not merge the resetBwd into resetFwd,
// because the grad data would be changing before backward.
bool needResetBwd_;
// mkldnn matrix, primitive, stream and pipeline
MKLDNNMatrixPtr val_;
MKLDNNMatrixPtr grad_;
......@@ -40,7 +43,7 @@ protected:
std::vector<mkldnn::primitive> pipelineBwd_;
public:
MKLDNNActivation() : cnt_(0) {}
MKLDNNActivation() : cnt_(0), needResetBwd_(true) {}
~MKLDNNActivation() {}
static ActivationFunction* create(const std::string& type);
static std::vector<std::string> getAllRegisteredTypes();
......@@ -57,19 +60,43 @@ class MKLDNNEltwiseActivation : public MKLDNNActivation {
typedef mkldnn::eltwise_forward eltwise_fwd;
typedef mkldnn::eltwise_backward eltwise_bwd;
protected:
// save the forward primitive desc, which can be used backward
std::shared_ptr<eltwise_fwd::primitive_desc> fwdPD_;
// eltwise_bwd need src input value
MKLDNNMatrixPtr inVal_;
// use for copy data
std::shared_ptr<mkldnn::reorder> copyInVal_;
public:
MKLDNNEltwiseActivation() {}
~MKLDNNEltwiseActivation() {}
virtual const std::string& getName() const = 0;
// in common, the alpha of forward and backward should be equal.
// but for relu, to avoid negative value, they should be opposite
virtual float getAlpha() const = 0;
virtual float getBwdAlpha() const = 0;
virtual float getBeta() const { return 0.f; }
virtual mkldnn::algorithm getAlgo(const std::string& type) const {
if (type == "mkldnn_relu") {
return mkldnn::algorithm::eltwise_relu;
} else if (type == "mkldnn_tanh") {
return mkldnn::algorithm::eltwise_tanh;
} else if (type == "mkldnn_elu") {
return mkldnn::algorithm::eltwise_elu;
} else {
LOG(FATAL) << "Unkown eltwise activation type: " << type;
}
return (mkldnn::algorithm)0;
}
/**
* reshape and reset the forward and backward primitives
* reshape and reset the forward primitives
*/
void resetPrimitives(Argument& act) {
void resetFwd(Argument& act) {
if (cnt_ == act.value->getElementCnt()) {
return;
}
......@@ -78,21 +105,13 @@ public:
auto eng = CPUEngine::Instance().getEngine();
// get algo setting
mkldnn::algorithm algo;
if (this->getName() == "mkldnn_relu") {
algo = mkldnn::algorithm::eltwise_relu;
} else if (this->getName() == "mkldnn_tanh") {
algo = mkldnn::algorithm::eltwise_tanh;
} else if (this->getName() == "mkldnn_elu") {
algo = mkldnn::algorithm::eltwise_elu;
} else {
LOG(FATAL) << "Unkown eltwise activation type: " << this->getName();
}
mkldnn::algorithm algo = getAlgo(this->getName());
// note: alpha represents the NegativeSlope when used in relu.
float alpha = getAlpha();
float beta = getBeta();
/// forward
pipelineFwd_.clear();
val_ = std::dynamic_pointer_cast<MKLDNNMatrix>(act.value);
if (val_ == nullptr) {
int bs = act.getBatchSize();
......@@ -109,33 +128,52 @@ public:
val_->getMemoryDesc(),
alpha,
beta);
auto fwdPD = eltwise_fwd::primitive_desc(fwdDesc, eng);
// inplace buffer, dst = src
fwd_.reset(new eltwise_fwd(fwdPD, *val_, *val_));
pipelineFwd_.clear();
fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, eng));
// use inplace for forward but save input value before submit
inVal_ = val_;
if (act.grad) {
// only copy when need do backward
inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
CHECK(copyInVal_) << "should not be emptry";
pipelineFwd_.push_back(*copyInVal_);
}
fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_));
pipelineFwd_.push_back(*fwd_);
needResetBwd_ = true;
}
/// backward
if (act.grad == nullptr) {
grad_ = nullptr;
/**
* reset the backward primitives, can not merge into resetFwd as the grad data
* would be changing before backward.
*/
void resetBwd(Argument& act) {
if (!needResetBwd_) {
return;
}
needResetBwd_ = false;
mkldnn::algorithm algo = getAlgo(this->getName());
float alpha = getBwdAlpha();
float beta = getBeta();
grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
auto eng = CPUEngine::Instance().getEngine();
auto bwdDesc = eltwise_bwd::desc(
algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, fwdPD);
bwd_.reset(new eltwise_bwd(bwdPD, *val_, *grad_, *grad_));
auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_);
CHECK(inVal_);
bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_));
pipelineBwd_.clear();
pipelineBwd_.push_back(*bwd_);
}
Error __must_check forward(Argument& act) {
resetPrimitives(act);
resetFwd(act);
stream_->submit(pipelineFwd_);
return Error();
}
Error __must_check backward(Argument& act) {
resetBwd(act);
stream_->submit(pipelineBwd_);
return Error();
}
......
......@@ -64,15 +64,17 @@ void MKLDNNTester::reset(const TestConfig& dnn,
configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i]));
}
refLayer_ = testLayers_[REF];
dnnLayer_ = std::dynamic_pointer_cast<MKLDNNLayer>(testLayers_[DNN]);
CHECK(dnnLayer_);
// for comparison with Paddle reference results,
// need manually add cpu device output for test
dnnLayer_->addOutputArgument(CPU_DEVICE);
dnnLayer_ = testLayers_[DNN];
EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size());
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
setInputImgSize();
// for comparison with Paddle reference results,
// need manually add cpu device output for test
MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
if (dnnLayer) {
dnnLayer->addOutputArgument(CPU_DEVICE);
}
}
void MKLDNNTester::setInputImgSize() {
......@@ -122,7 +124,7 @@ void MKLDNNTester::randomTopDiffs() {
void MKLDNNTester::checkForward() {
VLOG(MKLDNN_ALL) << "Check Forward";
printTopDatas();
double delta = compareMatrix(dnnLayer_->getOutput(-1).value,
double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value,
refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_);
}
......@@ -155,7 +157,10 @@ void MKLDNNTester::checkBackwardWgts() {
vector<VectorPtr> dnnWgts; // used to temply save mkldnn weights
saveWgt(parameters_[DNN], dnnWgts);
dnnLayer_->convertWeightsToPaddle();
MKLDNNLayerPtr dnnLayer = std::dynamic_pointer_cast<MKLDNNLayer>(dnnLayer_);
if (dnnLayer) {
dnnLayer->convertWeightsToPaddle();
}
for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
......@@ -322,6 +327,10 @@ void MKLDNNTester::runOnce() {
// and clearTopDatas(REF) should be coverd by ref layers
clearBotDiffs(REF);
clearWgtDiffs(REF);
// it is necessary to clear bottom diffs when only activation is dnn type
if (configs_[DNN].layerConfig.active_type().compare(0, 7, "mkldnn_") == 0) {
clearBotDiffs(DNN);
}
}
void MKLDNNTester::run(const TestConfig& dnn,
......@@ -333,8 +342,19 @@ void MKLDNNTester::run(const TestConfig& dnn,
float epsilon,
bool log,
int level) {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type()
<< " vs " << ref.layerConfig.type();
CHECK(dnn.layerConfig.type().compare(0, 7, "mkldnn_") == 0 ||
dnn.layerConfig.active_type().compare(0, 7, "mkldnn_") == 0)
<< "should be MKLDNN layer or MKLDNN activation";
if (dnn.layerConfig.type() == ref.layerConfig.type()) {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: "
<< dnn.layerConfig.active_type() << " vs "
<< ref.layerConfig.active_type();
} else {
VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: "
<< dnn.layerConfig.type() << " vs "
<< ref.layerConfig.type();
}
ih_ = inputImgH;
iw_ = inputImgW;
iter_ = iter;
......
......@@ -41,8 +41,7 @@ protected:
vector<LayerMap> layerMaps_;
vector<vector<ParameterPtr>> parameters_;
vector<LayerPtr> testLayers_;
LayerPtr refLayer_;
MKLDNNLayerPtr dnnLayer_;
LayerPtr refLayer_, dnnLayer_;
/// run some iterations, all the result should pass
size_t iter_;
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <vector>
#include "MKLDNNTester.h"
#include "ModelConfig.pb.h"
#include "paddle/gserver/activations/MKLDNNActivation.h"
#include "paddle/math/MathUtils.h"
using namespace paddle; // NOLINT
......@@ -190,7 +191,7 @@ void testPoolLayer(const testPoolDesc& pm) {
}
}
TEST(MkldnnLayer, PoolLayer) {
TEST(MKLDNNLayer, PoolLayer) {
/* bs, ch, ih, iw, oh, ow, fh, fw, ph, pw, sh, sw*/
testPoolLayer({2, 1, 4, 4, 2, 2, 3, 3, 0, 0, 2, 2});
testPoolLayer({10, 8, 16, 16, 8, 8, 2, 2, 0, 0, 2, 2});
......@@ -202,6 +203,49 @@ TEST(MkldnnLayer, PoolLayer) {
testPoolLayer({2, 8, 56, 56, 29, 29, 3, 3, 1, 1, 2, 2});
}
struct testActDesc {
int bs, ch;
int ih, iw;
};
static void getAddtoConfig(TestConfig& cfg, const testActDesc& pm) {
cfg.biasSize = 0;
cfg.layerConfig.set_type("addto");
cfg.layerConfig.set_size(pm.ch * pm.ih * pm.iw);
cfg.inputDefs.push_back(
{INPUT_DATA,
"layer_0",
/* size of input layer= */ size_t(pm.ch * pm.ih * pm.iw),
0});
cfg.layerConfig.add_inputs();
}
void testActivation(std::string& type, const testActDesc& pm) {
const std::string compareTypes[] = {type, type.erase(0, 7)};
TestConfig cfg;
getAddtoConfig(cfg, pm);
TestConfig ref = cfg;
cfg.layerConfig.set_active_type(compareTypes[0]);
ref.layerConfig.set_active_type(compareTypes[1]);
MKLDNNTester tester;
for (auto bs : {pm.bs, 1}) {
tester.run(cfg, ref, bs, pm.ih, pm.iw);
}
}
TEST(MKLDNNActivation, Activations) {
auto types = MKLDNNActivation::getAllRegisteredTypes();
// TODO(TJ): mkldnn_softmax not implemented, paddle do not have elu activation
std::set<string> excluded{"mkldnn_softmax", "mkldnn_elu"};
for (auto type : types) {
if (excluded.count(type)) {
continue;
}
testActivation(type, {16, 64, 32, 32});
}
}
// TODO(TJ): add branch test
int main(int argc, char** argv) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册