提交 e1954857 编写于 作者: T tensor-tang

fix bug: merge grad must before backward act.

and add branch net comparing with cpu result
上级 698071cc
...@@ -67,8 +67,14 @@ protected: ...@@ -67,8 +67,14 @@ protected:
// merge grad primitive // merge grad primitive
std::shared_ptr<mkldnn::primitive> mergeGrad_; std::shared_ptr<mkldnn::primitive> mergeGrad_;
std::vector<mkldnn::primitive> pipelineMergeGrad_;
// tmp input argument to save input grad, only used to merge grad // tmp input argument to save input grad, only used to merge grad
Argument tmpInArg_; Argument tmpInArg_;
// since mkldnn sum do not support different formats:
// can refer to https://github.com/01org/mkl-dnn/issues/134
// so need create reorder manually and save tmp MKLDNNMatrix
MKLDNNMatrixPtr tmpOutGrad_;
std::shared_ptr<mkldnn::primitive> tmpCvt_;
public: public:
explicit MKLDNNLayer(const LayerConfig& config) explicit MKLDNNLayer(const LayerConfig& config)
...@@ -148,9 +154,17 @@ public: ...@@ -148,9 +154,17 @@ public:
if (needResetBwd_) { if (needResetBwd_) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
pipelineBwd_.clear(); pipelineBwd_.clear();
pipelineMergeGrad_.clear();
mergeGrad_ = nullptr;
resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_);
needResetBwd_ = false; needResetBwd_ = false;
} }
// merge grad must before backward activation
if (mergeGrad_) {
REGISTER_TIMER_INFO("MergeBpGrad", getName().c_str());
stream_->submit(pipelineMergeGrad_);
}
{ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation(); backwardActivation();
...@@ -262,6 +276,7 @@ protected: ...@@ -262,6 +276,7 @@ protected:
mkldnn::memory::primitive_desc pd) { mkldnn::memory::primitive_desc pd) {
CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet"; CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet";
mergeGrad_ = nullptr; mergeGrad_ = nullptr;
pipelineMergeGrad_.clear();
out = MKLDNNMatrix::create(output_.grad, pd); out = MKLDNNMatrix::create(output_.grad, pd);
if (outputMap_.size() <= 1) { if (outputMap_.size() <= 1) {
return; return;
...@@ -272,6 +287,7 @@ protected: ...@@ -272,6 +287,7 @@ protected:
for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) { for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) {
MKLDNNMatrixPtr src = MKLDNNMatrixPtr src =
std::dynamic_pointer_cast<MKLDNNMatrix>(it->second->grad); std::dynamic_pointer_cast<MKLDNNMatrix>(it->second->grad);
VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first;
CHECK(src) << "should be MKLDNNMatrix"; CHECK(src) << "should be MKLDNNMatrix";
auto srcDims = src->getDims(); auto srcDims = src->getDims();
auto dstDims = out->getDims(); auto dstDims = out->getDims();
...@@ -283,9 +299,26 @@ protected: ...@@ -283,9 +299,26 @@ protected:
srcs.push_back(*src); srcs.push_back(*src);
scales.push_back(1.0); scales.push_back(1.0);
} }
auto sumPD = mkldnn::sum::primitive_desc(pd.desc(), scales, srcPDs);
mergeGrad_.reset(new mkldnn::sum(sumPD, srcs, *out)); // TODO(TJ): remove me when mkldnn sum support different formats
pipelineBwd_.insert(pipelineBwd_.begin(), *mergeGrad_); for (size_t i = 1; i < srcPDs.size(); ++i) {
CHECK(srcPDs[0] == srcPDs[i]);
}
tmpOutGrad_ = nullptr;
tmpCvt_ = nullptr;
if (out->getPrimitiveDesc() != srcPDs[0]) {
tmpOutGrad_ = MKLDNNMatrix::create(nullptr, srcPDs[0]);
tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out);
CHECK(tmpCvt_);
pipelineMergeGrad_.push_back(*tmpCvt_);
} else {
tmpOutGrad_ = out;
}
auto sumPD = mkldnn::sum::primitive_desc(
tmpOutGrad_->getMemoryDesc(), scales, srcPDs);
mergeGrad_.reset(new mkldnn::sum(sumPD, srcs, *tmpOutGrad_));
pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_);
} }
/** /**
...@@ -299,7 +332,7 @@ protected: ...@@ -299,7 +332,7 @@ protected:
const MatrixPtr& grad = const MatrixPtr& grad =
input->getOutputMapSize() > 1 ? nullptr : input->getOutput().grad; input->getOutputMapSize() > 1 ? nullptr : input->getOutput().grad;
in = MKLDNNMatrix::create(grad, pd); in = MKLDNNMatrix::create(grad, pd);
auto arg = input->getOutput(this->getName()); Argument& arg = input->getOutput(this->getName());
arg.grad = std::dynamic_pointer_cast<Matrix>(in); arg.grad = std::dynamic_pointer_cast<Matrix>(in);
} }
......
...@@ -48,6 +48,13 @@ if(WITH_MKLDNN) ...@@ -48,6 +48,13 @@ if(WITH_MKLDNN)
--config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False --config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False
--use_gpu=False --use_gpu=False
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
add_test(NAME test_CompareMKLDNNandCPU_Banches
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU
--config_file_a=trainer/tests/sample_trainer_config_branch_net.conf --use_mkldnn_a=True
--config_file_b=trainer/tests/sample_trainer_config_branch_net.conf --use_mkldnn_b=False
--use_gpu=False
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
endif() endif()
############### test_CompareTwoOpts ################### ############### test_CompareTwoOpts ###################
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ###################################
settings(batch_size = 256,
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
################################### Network Configuration ###################################
data = data_layer(name ="input", size=784)
tmp = img_conv_layer(input=data,
num_channels=1,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
a1 = img_conv_layer(input=tmp,
filter_size=1,
num_filters=32,
padding=0,
shared_biases=True,
act=ReluActivation())
a2 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
tmp = concat_layer(input=[a1, a2])
tmp = img_pool_layer(input=tmp,
num_channels=64,
pool_size=3,
stride=2,
padding=1,
pool_type=AvgPooling())
b1 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=64,
padding=1,
shared_biases=True,
act=ReluActivation())
b1 = img_pool_layer(input=b1,
pool_size=3,
stride=1,
padding=1,
pool_type=MaxPooling())
b2 = img_conv_layer(input=tmp,
filter_size=5,
num_filters=64,
padding=2,
shared_biases=True,
act=ReluActivation())
b2 = img_pool_layer(input=b2,
pool_size=5,
stride=1,
padding=2,
pool_type=MaxPooling())
tmp = addto_layer(input=[b1, b2],
act=ReluActivation(),
bias_attr=False)
tmp = img_pool_layer(input=tmp,
pool_size=3,
stride=2,
padding=1,
pool_type=MaxPooling())
tmp = fc_layer(input=tmp, size=64,
bias_attr=False,
act=TanhActivation())
output = fc_layer(input=tmp, size=10,
bias_attr=True,
act=SoftmaxActivation())
lbl = data_layer(name ="label", size=10)
cost = classification_cost(input=output, label=lbl)
outputs(cost)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册