diff --git a/.gitignore b/.gitignore index 9622ab78e0e0556ec2b4cc974fee93ff680d54d2..4f21fefda9f64a0392881971a715b97c234030e3 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ cmake-build-* # generated while compiling python/paddle/v2/framework/core.so +paddle/pybind/pybind.h CMakeFiles cmake_install.cmake paddle/.timestamp diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 8d5d533126c9b7fa84c725d614cf3486126d0284..4823dc3e91390002aefac70f7931b4197db05789 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -26,9 +26,9 @@ set(IGNORE_PATTERN .*ImportanceSampler.* .*cblas\\.h.* .*\\.pb\\.txt - .*LtrDataProvider.* .*MultiDataProvider.* - .*pb.*) + .*pb.* + .*pybind.h) # add_style_check_target # diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index e3892849abe21fc207d2fcbe4adc65184ba771f4..c6570b89aedfaac1aef9b00e889b0b3ed21d8d65 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -34,7 +34,7 @@ Kernel实现 | CPU、GPU共享Kernel实现在`.h`文件中,否则,CPU 注册Op | Op注册实现在`.cc`文件;Kernel注册CPU实现在`.cc`文件中,GPU实现在`.cu`文件中 -实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 +实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。**系统会根据文件名自动构建op和其对应的Python扩展。** 下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 @@ -224,45 +224,15 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs, ### 5. 编译 -- 简单**无特殊依赖**的OP无需修改CMakeList.txt文件。[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt) 会自动将 `paddle/operators` 目录下新增的 `*_op.cc` 文件加入编译。 -- 较为复杂、**有额外依赖** 的operator仍需要修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)。如,`mul_op` 依赖 `math_function`,需要在`CMakeLists.txt`中添加如下内容: +运行下面命令可以进行编译: - ``` - op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) + - ``` - -- 运行下面命令可以进行编译: - - ``` - make mul_op - ``` +``` +make mul_op +``` ## 绑定Python -- 绑定Python - - 在 [`paddle/pybind/pybind.cc -`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc) 使用`USE_OP`告知编译器需要链接的Op,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 - - ``` - USE_OP(mul); - ``` - 如果只实现了CPU版本,则使用`USE_CPU_ONLY_OP`: - - ``` - USE_CPU_ONLY_OP(gather); - ``` - - 如果OP不带Kernel,则使用`USE_NO_KENREL_OP`: - - ``` - USE_NO_KENREL_OP(recurrent); - ``` - - - - 生成库 - - `paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动添加链接到生成的lib库中。 +系统会对新增的op自动绑定Python,并链接到生成的lib库中。 ## 实现单元测试 @@ -367,3 +337,10 @@ make test ARGS="-R test_mul_op -V" ```bash ctest -R test_mul_op ``` + +## 注意事项 + +- 为每个Op创建单独的`*_op.h`(如有)、`*_op.cc`和`*_op.cu`(如有)。不允许一个文件中包含多个Op,这将会导致编译出错。 +- 注册Op时的类型名,需要和该Op的名字一样。即不允许在`A_op.cc`里面,注册`REGISTER_OP(B, ...)`等,这将会导致单元测试出错。 +- 如果Op没有实现GPU Kernel,请不要创建空的`*_op.cu`,这将会导致单元测试出错。 +- 如果多个Op依赖一些共用的函数,可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。 diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 568f4e89819c8345d8908634f6fa56f09483a763..fac5cd20aa7f9db0792f8102bb442192ab1ad63f 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -51,18 +51,15 @@ bool operator==(const LoD& a, const LoD& b); * LoDTensor (Level of details Tensor) * see https://en.wikipedia.org/wiki/Level_of_details for reference. */ -class LoDTensor { +class LoDTensor : public Tensor { public: LoDTensor() {} - LoDTensor(const LoD& lod, Tensor* t) : lod_(lod), tensor_(t) {} - void set_lod(const LoD& lod) { lod_ = lod; } - - void set_tensor(Tensor* tensor) { tensor_ = tensor; } + explicit LoDTensor(const LoD& lod) : lod_(lod) {} - Tensor& tensor() { return *tensor_; } + void set_lod(const LoD& lod) { lod_ = lod; } - LoD lod() { return lod_; } + LoD lod() const { return lod_; } /* * Get a element from LoD. @@ -104,7 +101,6 @@ class LoDTensor { private: LoD lod_; - Tensor* tensor_; // not owned }; } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 1da8553134f377f7a4fbe8008d12fe8d4a0e47f4..7915326b27a22e9280e3f09d9bbfc2a58f46aff7 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -36,69 +36,64 @@ class LoDTensorTester : public ::testing::Test { ASSERT_EQ(lod.size(), 3UL); - tensor.Resize({20 /*batch size*/, 128 /*dim*/}); + lod_tensor_.Resize({20 /*batch size*/, 128 /*dim*/}); // malloc memory - tensor.mutable_data(place); + lod_tensor_.mutable_data(place); - lod_tensor.set_lod(lod); - lod_tensor.set_tensor(&tensor); + lod_tensor_.set_lod(lod); } protected: platform::CPUPlace place; - Tensor tensor; - LoDTensor lod_tensor; + LoDTensor lod_tensor_; }; -TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); } +TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor_.NumLevels(), 3UL); } TEST_F(LoDTensorTester, NumElements) { - ASSERT_EQ(lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(lod_tensor.NumElements(2), 8UL); + ASSERT_EQ(lod_tensor_.NumElements(0), 2UL); + ASSERT_EQ(lod_tensor_.NumElements(1), 4UL); + ASSERT_EQ(lod_tensor_.NumElements(2), 8UL); } TEST_F(LoDTensorTester, SliceLevels) { // slice 1 level for (size_t level = 0; level < 3UL; ++level) { - LoDTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.SliceLevels(level, level + 1); ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level)); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level)); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); } // slice 2 level for (size_t level = 0; level < 2UL; ++level) { - LoDTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.SliceLevels(level, level + 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level)); - ASSERT_EQ(new_lod_tensor.NumElements(1), lod_tensor.NumElements(level + 1)); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor_.NumElements(level + 1)); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); } } TEST_F(LoDTensorTester, SliceInLevel) { size_t level = 0; - LoDTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.SliceInLevel(level, 0, 2); EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL); EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL); EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL); EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); level = 1; - new_lod_tensor = lod_tensor; + new_lod_tensor = lod_tensor_; new_lod_tensor.SliceInLevel(level, 0, 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); } } // namespace framework diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu index 1079a36a2e7b24f6f8a5bcbb296355567305a765..97e69cdb2e5e1e64031c899f5e04020665485ba8 100644 --- a/paddle/framework/lod_tensor_test.cu +++ b/paddle/framework/lod_tensor_test.cu @@ -26,18 +26,16 @@ __global__ void test(size_t* a, int size) { } TEST(LoDTensor, LoDInGPU) { - paddle::framework::Tensor tensor; paddle::framework::LoDTensor lod_tensor; paddle::platform::GPUPlace place(0); paddle::framework::LoD src_lod; src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14}); - tensor.Resize({14, 16}); - tensor.mutable_data(place); + lod_tensor.Resize({14, 16}); + lod_tensor.mutable_data(place); lod_tensor.set_lod(src_lod); - lod_tensor.set_tensor(&tensor); CHECK_EQ(lod_tensor.lod_element(0, 2), 4); CHECK_EQ(lod_tensor.lod_element(0, 4), 8); diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index e1e122091f7759b1a68f1f982bc2a35e8241f9f0..c57537be4bf67a8db6a49669ab8d2ed1b1324bdc 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -186,6 +186,48 @@ void OperatorBase::GenerateTemporaryNames() { } } +template <> +const Tensor* InferShapeContext::Input(const std::string& name) const { + auto* var = InputVar(name); + return var == nullptr ? nullptr : GetTensorFromVar(var); +} + +template <> +const std::vector InferShapeContext::MultiInput( + const std::string& name) const { + auto names = op().Inputs(name); + std::vector res; + res.reserve(names.size()); + std::transform(names.begin(), names.end(), std::back_inserter(res), + [&](const std::string& sub_name) { + auto var = scope_.FindVar(sub_name); + return var == nullptr ? nullptr : GetTensorFromVar(var); + }); + return res; +} + +template <> +Tensor* ExecutionContext::Output(const std::string& name) const { + auto* var = OutputVar(name); + return var == nullptr ? nullptr : const_cast(GetTensorFromVar(var)); +} + +template <> +std::vector ExecutionContext::MultiOutput( + const std::string& name) const { + auto names = op().Outputs(name); + std::vector res; + res.reserve(names.size()); + std::transform(names.begin(), names.end(), std::back_inserter(res), + [&](const std::string& sub_name) { + auto var = scope().FindVar(sub_name); + return var == nullptr + ? nullptr + : const_cast(GetTensorFromVar(var)); + }); + return res; +} + void OpProtoAndCheckerMaker::Validate() { validated_ = true; CheckNoDuplicatedInOutAttrs(); diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 4600b06009bcef7d0774d25b816aac4733f30795..adae7bfc3d7d31b1ed0373f01db4ef80343a08f7 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -22,6 +22,7 @@ limitations under the License. */ #include "op_info.h" #include "paddle/framework/attribute.h" #include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" #include "paddle/framework/scope.h" #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" @@ -326,11 +327,27 @@ class InferShapeContext { return res; } + const Tensor* GetTensorFromVar(const Variable* var) const { + if (var->IsType()) { + return &var->Get(); + } + PADDLE_ENFORCE(var->IsType(), + "The Input(%s) must be LoDTensor or Tensor."); + return &var->Get(); + } + private: const OperatorBase& op_; const Scope& scope_; }; +template <> +const Tensor* InferShapeContext::Input(const std::string& name) const; + +template <> +const std::vector InferShapeContext::MultiInput( + const std::string& name) const; + template struct EigenDeviceConverter; @@ -363,9 +380,37 @@ class ExecutionContext : public InferShapeContext { return device_context_; } + // redefine Output function, + // use Variable::Get instead of Variable::GetMutable + template + T* Output(const std::string& name) const { + auto var = OutputVar(name); + return var == nullptr ? nullptr : const_cast(&var->Get()); + } + + // redefine MultiOutput function. + // use Variable::Get instead of Variable::GetMutable + template + std::vector MultiOutput(const std::string& name) const { + auto names = op().Outputs(name); + std::vector res; + res.reserve(names.size()); + std::transform( + names.begin(), names.end(), std::back_inserter(res), + [&](const std::string& sub_name) { return Output(sub_name); }); + return res; + } + const platform::DeviceContext* device_context_; }; +template <> +Tensor* ExecutionContext::Output(const std::string& name) const; + +template <> +std::vector ExecutionContext::MultiOutput( + const std::string& name) const; + class OpKernel { public: /** diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f8c06c5f868f8d48a9a222b92315ee0ef2cf265e --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -0,0 +1,543 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNConvLayer.h" +#include "paddle/math/MathUtils.h" +#include "paddle/utils/Logging.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; + +namespace paddle { + +REGISTER_LAYER(mkldnn_conv, MKLDNNConvLayer); + +bool MKLDNNConvLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(config_.shared_biases()) << "Only support shared biases yet"; + + oc_ = config_.num_filters(); + const ConvConfig& conf = config_.inputs(0).conv_conf(); + ic_ = conf.channels(); + fw_ = conf.filter_size(); + fh_ = conf.filter_size_y(); + pw_ = conf.padding(); + ph_ = conf.padding_y(); + dw_ = conf.dilation(); + dh_ = conf.dilation_y(); + sw_ = conf.stride(); + sh_ = conf.stride_y(); + gp_ = conf.groups(); + oh_ = conf.output_y(); + ow_ = conf.output_x(); + ih_ = conf.img_size_y(); + iw_ = conf.img_size(); + caffeMode_ = conf.caffe_mode(); + CHECK(caffeMode_) << "Only support caffe mode yet"; + CHECK(dh_ == 1 && dw_ == 1) << "Only support dilation 1 yet"; + // check group setting + CHECK_EQ((oc_ / gp_) * gp_, oc_) << "group is indivisible for oc"; + CHECK_EQ((ic_ / gp_) * gp_, ic_) << "group is indivisible for ic"; + + // create weight + size_t height = oc_ / gp_; + size_t width = ic_ * fh_ * fw_; + CHECK_EQ(parameters_[0]->getSize(), height * width); + weight_ = + std::unique_ptr(new Weight(height, width, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MKLDNNConvLayer::convertWeightsFromPaddle() { + if (hasInitedWgt_) { + return; + } + + CHECK(wgtVal_) << "should have been initialized"; + // the paddle weight format is oihw or goihw + auto targetDim = wgtVal_->getDims(); + auto srcFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; + wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); + hasInitedWgt_ = true; +} + +void MKLDNNConvLayer::convertWeightsToPaddle() { + CHECK(wgtVal_) << "should have been initialized"; + auto targetDim = wgtVal_->getDims(); + auto dstFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; + wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); +} + +void MKLDNNConvLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); + + // cal output sizes + // oc can not be changed + int fh = (fh_ - 1) * dh_ + 1; + int fw = (fw_ - 1) * dw_ + 1; + oh = outputSize(ih, fh, ph_, sh_, caffeMode_); + ow = outputSize(iw, fw, pw_, sw_, caffeMode_); + + reshapeOutput(oh, ow); + resizeOutput(bs, oc * oh * ow); + + printSizeInfo(); +} + +void MKLDNNConvLayer::resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetFwdPD(fwdPD_); + + resetFwdBuffers(fwdPD_, in, wgt, bias, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + + printValueFormatFlow(); +} + +void MKLDNNConvLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr bwdWgtPD; + std::shared_ptr bwdDataPD; + + resetBwdWgtPD(bwdWgtPD); + + resetBwdDataPD(bwdDataPD); + + resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + printGradFormatFlow(); +} + +void MKLDNNConvLayer::updateInputData() { + cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +} + +void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNConvLayer::loadConvSettings(memory::dims& wgt, + memory::dims& bias, + memory::dims& stride, + memory::dims& dilation, + memory::dims& padL, + memory::dims& padR) { + wgt = (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} + : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_}; + bias = memory::dims{oc_}; + stride = memory::dims{sh_, sw_}; + padL = memory::dims{ph_, pw_}; + padR = getPaddingR(); + // note: mkldnn dilation start from 0 + dilation = memory::dims{dh_ - 1, dw_ - 1}; +} + +void MKLDNNConvLayer::resetFwdPD( + std::shared_ptr& pd) { + // dims for conv + memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + + prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring + : prop_kind::forward_training; + algorithm algo = algorithm::convolution_direct; + padding_kind padKind = padding_kind::zero; + conv_fwd::desc fwdDesc = + biases_ && biases_->getW() + ? conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(biasDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind) + : conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind); + pd.reset(new conv_fwd::primitive_desc(fwdDesc, engine_)); +} + +void MKLDNNConvLayer::resetFwdBuffers( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(pd); + resetInValue(pd, in); + + resetWgtBiasValue(pd, wgt, bias); + + resetOutValue(pd, out); +} + +void MKLDNNConvLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtInVal_) { + pipeline.push_back(*cvtInVal_); + } + + if (bias) { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out)); + } else { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out)); + } + pipeline.push_back(*fwd_); + + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } +} + +void MKLDNNConvLayer::resetInValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& in) { + const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); + + // create buffer and reorder if input value do not match + cpuInVal_ = nullptr; + cvtInVal_ = nullptr; + if (inputIsOnlyMKLDNN()) { + MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast(inMat); + CHECK(dnnIn) << "Input should be MKLDNNMatrix"; + if (dnnIn->getPrimitiveDesc() != in->getPrimitiveDesc()) { + CHECK_EQ(dnnIn->getFormat(), format::nc); + CHECK(ih_ == 1 && iw_ == 1) << "when input is nc format"; + // create a new one with nchw format and same data + memory::dims inDims = memory::dims{bs_, ic_, 1, 1}; + dnnIn = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_); + CHECK(dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc()); + } + in = dnnIn; + } else { + const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); + memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; + cpuInVal_ = MKLDNNMatrix::create(cpuIn, inDims, format::nchw, engine_); + if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) { + // create new mkldnn matrix + in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc()); + cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in); + CHECK(cvtInVal_) << "should not be emptry"; + } else { + in = cpuInVal_; + } + } +} + +void MKLDNNConvLayer::resetWgtBiasValue( + std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc()); + VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat(); + + bias = nullptr; + if (biases_ && biases_->getW()) { + bias = MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()); + } +} + +void MKLDNNConvLayer::resetOutValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& out) { + out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc()); + + // change original output value from cpu matrix to mkldnn matrix + output_.value = std::dynamic_pointer_cast(out); + + // create reorder if output value has cpu device and pd do not match + cpuOutVal_ = nullptr; + cpuOutVal_ = nullptr; + if (!outputIsOnlyMKLDNN()) { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; + cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); + if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); + CHECK(cvtOutVal_) << "should not be emptry"; + } else { + // CPU output share the same data of MKLDNN output + cpuOut->setData(out->getData()); + cpuOutVal_ = out; + } + } +} + +void MKLDNNConvLayer::resetBwdWgtPD( + std::shared_ptr& pd) { + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + + // create backward weight using input, output and weight value memory desc + CHECK(inVal_) << "Should have input value"; + CHECK(outVal_) << "Should have output value"; + CHECK(wgtVal_) << "Should have weight value"; + algorithm algo = algorithm::convolution_direct; + padding_kind padKind = padding_kind::zero; + auto bwdWgtDesc = biasVal_ != nullptr + ? conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + biasVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind) + : conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind); + pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); + CHECK(pd->src_primitive_desc() == inVal_->getPrimitiveDesc()) + << "primitive desc of in value should equal"; + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad should equal the out value"; + CHECK(pd->diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc()) + << "primitive desc of weight grad should equal the weight value"; +} + +void MKLDNNConvLayer::resetBwdDataPD( + std::shared_ptr& pd) { + if (inputLayers_[0]->getOutput().grad == nullptr) { + return; + } + + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + CHECK(inVal_) << "Should have input value"; + CHECK(outVal_) << "Should have output value"; + // create backward data using input and output value memory desc + // but using weight memory desc with any format + auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, + inVal_->getMemoryDesc(), + MKLDNNMatrix::createMemoryDesc(wgtDims), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padding_kind::zero); + pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); + CHECK(pd->diff_src_primitive_desc() == inVal_->getPrimitiveDesc()) + << "primitive desc of in grad should equal the in value"; + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad should equal"; +} + +void MKLDNNConvLayer::resetBwdBuffers( + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(wgtPD); + resetOutGrad(wgtPD, out); + + resetWgtBiasGrad(wgtPD, wgt, bias); + + resetInGrad(dataPD, in); + + resetWgtValBwdData(dataPD, wgtValBwdData_); +} + +void MKLDNNConvLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtOutGrad_) { + pipeline.push_back(*cvtOutGrad_); + } + + // add bwdWgt handle + if (bias) { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + } else { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + } + pipeline.push_back(*bwdWgt_); + + if (dataPD == nullptr) { + return; + } + + if (cvtWgtVal_) { + pipeline.push_back(*cvtWgtVal_); + } + + // add bwdData handle + CHECK(wgtValBwdData_) << "Should have weight memory"; + bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in)); + pipeline.push_back(*bwdData_); + + if (cvtInGrad_) { + pipeline.push_back(*cvtInGrad_); + } +} + +void MKLDNNConvLayer::resetOutGrad( + std::shared_ptr& wgtPD, MKLDNNMatrixPtr& out) { + const MatrixPtr& outMat = output_.grad; + out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc()); + CHECK(outVal_ != nullptr && + out->getPrimitiveDesc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad and value should be equal"; + + // TODO(TJ): merge outgrad + // create reorder if has output grad does not match + cpuOutGrad_ = nullptr; + cvtOutGrad_ = nullptr; + if (!outputIsOnlyMKLDNN()) { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + // same PrimitiveDesc with cpuInVal_ + CHECK(cpuOutVal_); + cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); + if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { + outMat->setData(cpuOut->getData()); + out = cpuOutGrad_; + } else { + cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); + CHECK(cvtOutGrad_); + } + } +} + +void MKLDNNConvLayer::resetWgtBiasGrad( + std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getWGrad(), + wgtPD->diff_weights_primitive_desc()); + CHECK(nullptr != wgtVal_ && + wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc()) + << "primitive desc of weight grad and value should be equal"; + VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat(); + + if (biasVal_ == nullptr) { + return; + } + bias = MKLDNNMatrix::create(biases_->getWGrad(), + wgtPD->diff_bias_primitive_desc()); + CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc()) + << "primitive desc of bias grad should equal the bias value"; +} + +void MKLDNNConvLayer::resetInGrad( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in) { + if (dataPD == nullptr) { + return; + } + + // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done + in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, + dataPD->diff_src_primitive_desc()); + CHECK(nullptr != inVal_ && + in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) + << "primitive desc of input grad and value should be equal"; + + // create reorder if has output grad does not match + cpuInGrad_ = nullptr; + cvtInGrad_ = nullptr; + if (!inputIsOnlyMKLDNN()) { + const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); + // same PrimitiveDesc with cpuInVal_ + CHECK(cpuInVal_); + cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc()); + if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) { + const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE); + in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc()); + cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_); + CHECK(cvtInGrad_); + } else { + in = cpuInGrad_; + } + } +} + +void MKLDNNConvLayer::resetWgtValBwdData( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt) { + if (dataPD == nullptr) { + return; + } + + // create new weight value for backward data, and create reorder if necessary + // since the primitive_desc would be different with wgtVal_ + CHECK(wgtVal_) << "should have weight value"; + if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) { + wgtValBwdData_ = + MKLDNNMatrix::create(nullptr, dataPD->weights_primitive_desc()); + cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_); + CHECK(cvtWgtVal_); + } else { + wgtValBwdData_ = wgtVal_; + } + VLOG(MKLDNN_FMTS) << "weight value format for backward data" + << wgtValBwdData_->getFormat(); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..f84f2f737c47a1b8adc2b83360a0396ffbc6ae24 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -0,0 +1,253 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { +typedef mkldnn::convolution_forward conv_fwd; +typedef mkldnn::convolution_backward_weights conv_bwdWgt; +typedef mkldnn::convolution_backward_data conv_bwdData; + +/** + * @brief A subclass of MKLDNNLayer conv layer. + * + * The config file api is mkldnn_conv + */ +class MKLDNNConvLayer : public MKLDNNLayer { +protected: + // padding height and width + int ph_, pw_; + // stride height and width + int sh_, sw_; + // dilation height and width + int dh_, dw_; + // filter(kenerl) height and width + int fh_, fw_; + // group number + int gp_; + + // in resetBwdData, the format of wgtValBwdData_ is different with wgtVal_ + MKLDNNMatrixPtr wgtValBwdData_; + // convert handle from wgtVal_ to wgtValBwdData_ + std::shared_ptr cvtWgtVal_; + + // save forward primitive_desc, which can be used backward + std::shared_ptr fwdPD_; + + // MKLDNNMatrixPtr which should be created from CPU Device + MKLDNNMatrixPtr cpuInVal_; + MKLDNNMatrixPtr cpuInGrad_; + MKLDNNMatrixPtr cpuOutVal_; + MKLDNNMatrixPtr cpuOutGrad_; + // convert handle between CPU device and MKLDNN device + std::shared_ptr cvtInVal_; + std::shared_ptr cvtInGrad_; + std::shared_ptr cvtOutVal_; + std::shared_ptr cvtOutGrad_; + + // whether the weight has been init + bool hasInitedWgt_; + + // true by default, which impact the calculation of output image size. + // details can refer to mathUtil.h + bool caffeMode_; + + // weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + +public: + explicit MKLDNNConvLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), caffeMode_(true) {} + + ~MKLDNNConvLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + + void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void updateInputData() override; + + void updateWeights(const UpdateCallback& callback) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; + + void printSizeInfo() override { + MKLDNNLayer::printSizeInfo(); + VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ + << ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_ + << ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_; + } + + void printValueFormatFlow() override { + if (cpuInVal_) { + VLOG(MKLDNN_FMTS) << cpuInVal_->getFormat() << " >>>"; + } + MKLDNNLayer::printValueFormatFlow(); + if (cpuOutVal_) { + VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat(); + } + } + + void printGradFormatFlow() override { + if (cpuInGrad_) { + VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<"; + } + MKLDNNLayer::printGradFormatFlow(); + if (cpuOutGrad_) { + VLOG(MKLDNN_FMTS) << " <<< " << cpuOutGrad_->getFormat(); + } + } + +protected: + /** + * load the dims settings of this conv + */ + void loadConvSettings(mkldnn::memory::dims& wgt, + mkldnn::memory::dims& bias, + mkldnn::memory::dims& stride, + mkldnn::memory::dims& dilation, + mkldnn::memory::dims& padL, + mkldnn::memory::dims& padR); + + /** + * reset the forward primitive descriptor. + */ + void resetFwdPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in forward. + */ + void resetFwdBuffers(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the forward pipeline. + */ + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of input value + */ + void resetInValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight and bias value + */ + void resetWgtBiasValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of output value + */ + void resetOutValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& out); + + /** + * reset the backward weight primitive descriptor. + */ + void resetBwdWgtPD(std::shared_ptr& pd); + /** + * reset the backward data primitive descriptor. + */ + void resetBwdDataPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in backward. + */ + void resetBwdBuffers(std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the backward pipeline. + */ + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of output grad + */ + void resetOutGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& out); + /** + * reset MKLDNNMatrix of weight and bias grad + */ + void resetWgtBiasGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of input grad + */ + void resetInGrad(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight value for backward data + * since the primitive_desc would be different with wgtVal_ + */ + void resetWgtValBwdData(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt); + + /** + * get padding_r according to + * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ + * test_convolution_forward_common.hpp + * @note: mkldnn dilation start from 0 while paddle start from 1 + */ + mkldnn::memory::dims getPaddingR() const { + mkldnn::memory::dims padR = {ph_, pw_}; + for (int i = 0; i < 2; ++i) { + if ((ih_ - ((fh_ - 1) * dh_ + 1) + ph_ + padR[0]) / sh_ + 1 != oh_) { + ++padR[0]; + } + if ((iw_ - ((fw_ - 1) * dw_ + 1) + pw_ + padR[1]) / sw_ + 1 != ow_) { + ++padR[1]; + } + } + return padR; + } +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index e1d2270df24331914f3a51acc90a518084b3ce4e..e70802881e3f22160a87b7a4babda07ffbcf9d6f 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "MKLDNNTester.h" #include "ModelConfig.pb.h" +#include "paddle/math/MathUtils.h" using namespace paddle; // NOLINT @@ -63,6 +64,83 @@ TEST(MKLDNNLayer, FcLayer) { testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); } +struct testConvDesc { + int bs, gp; + int ic, ih, iw; + int oc, oh, ow; + int fh, fw; + int ph, pw; + int sh, sw; + int dh, dw; +}; + +void testConvLayer(const testConvDesc& pm) { + const std::string compareTypes[] = {"mkldnn_conv", "exconv"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_num_filters(pm.oc); + cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow); + // cfg.layerConfig.set_partial_sum(1); // TODO: check it + cfg.layerConfig.set_shared_biases(true); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.fh * pm.fw / pm.gp)}); + LayerInputConfig* input = cfg.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_groups(pm.gp); + conv->set_img_size(pm.iw); + conv->set_img_size_y(pm.ih); + conv->set_output_x(pm.ow); + conv->set_output_y(pm.oh); + conv->set_filter_size(pm.fw); + conv->set_filter_size_y(pm.fh); + conv->set_channels(pm.ic); + conv->set_padding(pm.pw); + conv->set_padding_y(pm.ph); + conv->set_stride(pm.sw); + conv->set_stride_y(pm.sh); + conv->set_dilation(pm.dw); + conv->set_dilation_y(pm.dh); + conv->set_caffe_mode(true); + conv->set_filter_channels(conv->channels() / conv->groups()); + CHECK_EQ(conv->filter_channels() * pm.gp, conv->channels()) + << "it is indivisible"; + + int fh = (pm.fh - 1) * pm.dh + 1; + int fw = (pm.fw - 1) * pm.dw + 1; + int ow = outputSize(pm.iw, fw, pm.pw, pm.sw, true); + int oh = outputSize(pm.ih, fh, pm.ph, pm.sh, true); + CHECK_EQ(ow, pm.ow) << "output size check failed"; + CHECK_EQ(oh, pm.oh) << "output size check failed"; + + MKLDNNTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MKLDNNLayer, ConvLayer) { + /* bs, gp, ic, ih, iw, oc, oh, ow, fh, fw, ph, pw, sh, sw, dh, dw */ + testConvLayer({2, 1, 3, 32, 32, 16, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({2, 1, 8, 16, 16, 8, 16, 16, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({3, 1, 16, 32, 32, 3, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({8, 1, 16, 18, 18, 32, 18, 18, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({16, 1, 1, 42, 31, 32, 23, 11, 4, 5, 3, 2, 2, 3, 1, 1}); + testConvLayer({2, 1, 8, 16, 16, 8, 8, 8, 3, 3, 1, 1, 2, 2, 1, 1}); + testConvLayer({3, 1, 8, 13, 13, 8, 7, 7, 3, 3, 1, 1, 2, 2, 1, 1}); + // with groups + testConvLayer({2, 2, 4, 5, 5, 8, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({2, 3, 3, 5, 5, 3, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({4, 4, 16, 3, 3, 16, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1}); +} + // TODO(TJ): add branch test int main(int argc, char** argv) { diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index c4063e5069854242d9f93886b66580385557ca73..0778bb63b7b3bca9b3d2647ca43dad72d783950a 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -49,6 +49,27 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg)); } +std::shared_ptr MKLDNNMatrix::createReorder(const MKLDNNMatrixPtr& src, + const MKLDNNMatrixPtr& dst, + bool checkData) { + if (src == dst || src->getPrimitiveDesc() == dst->getPrimitiveDesc()) { + return nullptr; + } + + if (checkData && (src->getData() == dst->getData())) { + LOG(FATAL) << "can not create reorder with inplace data"; + return nullptr; + } + + memory::dims srcDims = src->getDims(); + memory::dims dstDims = dst->getDims(); + CHECK_EQ(srcDims.size(), dstDims.size()); + for (size_t i = 0; i < srcDims.size(); ++i) { + CHECK_EQ(srcDims[i], dstDims[i]); + } + return std::make_shared(*src, *dst); +} + void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, memory::format srcFmt, memory::dims targetDim) { diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index eef3b429e6fa0087aeac3f5aed9dff983b06e826..0aa130b4a0d458ad78d5d1330164af9e73b22a44 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -52,6 +52,31 @@ public: mkldnn::engine& eg, mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); + /** + * Create Memory descriptor. + * default with any format and f32 dtype + */ + static mkldnn::memory::desc createMemoryDesc( + const mkldnn::memory::dims& dims, + const mkldnn::memory::format& fmt = mkldnn::memory::format::any, + const mkldnn::memory::data_type& dtype = mkldnn::memory::data_type::f32) { + return mkldnn::memory::desc(dims, dtype, fmt); + } + + /** + * Create reorder primitive. + * Create a mkldnn::reorder handle for converting src MKLDNNMatrix to dst. + * checkData: for whether to check the data handle of src and dst is the same. + * if true, means check it and do not want support inplace reorder; + * otherwise do not check data which means the created reorder + * maybe inplace buffer and do not guarantee the logical is correct + * since not all format or conversion support inplace. + */ + static std::shared_ptr createReorder( + const MKLDNNMatrixPtr& src, + const MKLDNNMatrixPtr& dst, + bool checkData = true); + public: /** * Reorder this MKLDNNMatrix from other format. diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f9ea25ab045a02be5ab9ed81ef9c679126d3a188..5b65584327e8afca383d2171cb42e3984baa8654 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -1,5 +1,7 @@ file(GLOB GENERAL_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") string(REPLACE ".cc" "" GENERAL_OPS "${GENERAL_OPS}") +set(pybind_file ${PADDLE_SOURCE_DIR}/paddle/pybind/pybind.h) +file(WRITE ${pybind_file} "// Generated by the paddle/operator/CMakeLists.txt. DO NOT EDIT!\n\n") function(op_library TARGET) # op_library is a function to create op library. The interface is same as # cc_library. But it handle split GPU/CPU code and link some common library @@ -7,10 +9,11 @@ function(op_library TARGET) set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE) set(cc_srcs) set(cu_srcs) - set(op_common_deps operator op_registry) + set(op_common_deps operator op_registry math_function) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) + set(pybind_flag 0) cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -46,22 +49,40 @@ function(op_library TARGET) cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) endif() + + # net_op doesn't need pybind + if ("${TARGET}" STREQUAL "net_op") + set(pybind_flag 1) + endif() + + # pybind USE_NO_KERNEL_OP + file(READ ${TARGET}.cc TARGET_CONTENT) + string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}") + string(REPLACE "_op" "" TARGET "${TARGET}") + if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") + file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") + set(pybind_flag 1) + endif() + + # pybind USE_CPU_ONLY_OP + list(LENGTH cu_srcs cu_srcs_len) + if (${pybind_flag} EQUAL 0 AND ${cu_srcs_len} EQUAL 0) + file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") + set(pybind_flag 1) + endif() + + # pybind USE_OP + if (${pybind_flag} EQUAL 0) + file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") + endif() endfunction() add_subdirectory(math) set(DEPS_OPS - identity_op - minus_op - mul_op - recurrent_op - scale_op) -op_library(identity_op DEPS scale_op) -op_library(minus_op DEPS scale_op) -op_library(mul_op DEPS math_function) + recurrent_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS framework_proto tensor operator net_op) -op_library(scale_op DEPS net_op) + DEPS framework_proto tensor net_op) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index 9ca04d402879b6a955d849a32175194df82b65c8..4a6c6381b0341dd3531aa4c09024530ee67bb4f9 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -34,7 +34,7 @@ class AccuracyOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(inference->dims()[0], label->dims()[0], "inference size must be the same as label size"); - ctx.Output("Accuracy")->Resize({1}); + ctx.Output("Accuracy")->Resize({1}); } }; diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 8dbd47cf0dfbc265032a9966343eed5c7bd8692e..b43c09d4f09c7f87cc60290bdd2a99cbe46f0d5c 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -26,7 +26,8 @@ class AddOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), ctx.Input("Y")->dims(), "Two input of Add Op's dimension must be same."); - ctx.Output("Out")->Resize(ctx.Input("X")->dims()); + ctx.Output("Out")->Resize( + ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/concat_op.cc b/paddle/operators/concat_op.cc index 0ebefbab26ec8fdf316f852fbb7f6d9f3bbc48eb..72fd179354a4be76a37e4571da168d844f7ce384 100644 --- a/paddle/operators/concat_op.cc +++ b/paddle/operators/concat_op.cc @@ -26,7 +26,7 @@ class ConcatOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto ins = ctx.MultiInput("X"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); size_t axis = static_cast(ctx.Attr("axis")); size_t n = ins.size(); diff --git a/paddle/operators/concat_op.cu b/paddle/operators/concat_op.cu deleted file mode 100644 index 38fee7473dbb2ba97fe95b6632db7a1749cf3bbe..0000000000000000000000000000000000000000 --- a/paddle/operators/concat_op.cu +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#define EIGEN_USE_GPU -#include "paddle/operators/concat_op.h" - -namespace ops = paddle::operators; -// TODO(Yancey1989) Add GPU kernel diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc index 3d0af97aae79089d10d94cb636775c5c6c092d56..253b17d8a1b88eccc58fc458ae8274d2bbd1c323 100644 --- a/paddle/operators/cos_sim_op.cc +++ b/paddle/operators/cos_sim_op.cc @@ -46,9 +46,9 @@ class CosSimOp : public framework::OperatorWithKernel { " just 1 (which will be broadcasted to match Input(X))."); // resize tensor - ctx.Output("Out")->Resize({x_dims[0], 1}); - ctx.Output("XNorm")->Resize({x_dims[0], 1}); - ctx.Output("YNorm")->Resize({y_dims[0], 1}); + ctx.Output("Out")->Resize({x_dims[0], 1}); + ctx.Output("XNorm")->Resize({x_dims[0], 1}); + ctx.Output("YNorm")->Resize({y_dims[0], 1}); } }; @@ -131,8 +131,10 @@ class CosSimOpGrad : public framework::OperatorWithKernel { "Shape of Input(Out@Grad) must be [X.Dim(0), 1]."); // resize tensor - auto *x_grad = ctx.Output(framework::GradVarName("X")); - auto *y_grad = ctx.Output(framework::GradVarName("Y")); + auto *x_grad = + ctx.Output(framework::GradVarName("X")); + auto *y_grad = + ctx.Output(framework::GradVarName("Y")); if (x_grad) x_grad->Resize(x_dims); if (y_grad) y_grad->Resize(y_dims); } diff --git a/paddle/operators/elementwise_mul_op.cc b/paddle/operators/elementwise_mul_op.cc index 1742925545d29df5d7df719faaea3b754680ab61..e37c582adbe5b9e728f683d97cc51063ce80c3a2 100644 --- a/paddle/operators/elementwise_mul_op.cc +++ b/paddle/operators/elementwise_mul_op.cc @@ -31,7 +31,7 @@ class ElementWiseMulOp : public framework::OperatorWithKernel { auto y_dim = ctx.Input("Y")->dims(); PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(), "Rank of first input must >= rank of second input.") - ctx.Output("Out")->Resize(x_dim); + ctx.Output("Out")->Resize(x_dim); } }; @@ -80,8 +80,10 @@ class ElementWiseMulOpGrad : public framework::OperatorWithKernel { auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); - auto *x_grad = ctx.Output(framework::GradVarName("X")); - auto *y_grad = ctx.Output(framework::GradVarName("Y")); + auto *x_grad = + ctx.Output(framework::GradVarName("X")); + auto *y_grad = + ctx.Output(framework::GradVarName("Y")); PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(), "Rank of first input must >= rank of second input.") diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 9d51f6e3a16fe96125599bb440d40237aeb9a028..0c9734892aac216709d380ec66acadf792761b14 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -23,7 +23,7 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output("Dst")->Resize( + ctx.Output("Dst")->Resize( ctx.Input("Src")->dims()); } }; diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 123bed296c462c30bddd3bfbd530098fdbfe4856..8883d6d5fed2d8900364cf713a1e8d8b290ef83e 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -28,7 +28,7 @@ class GatherOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0"); framework::DDim output_dims(ctx.Input("X")->dims()); output_dims[0] = batch_size; - ctx.Output("Out")->Resize(output_dims); + ctx.Output("Out")->Resize(output_dims); } }; @@ -38,7 +38,7 @@ class GatherGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto X_grad = ctx.Output(framework::GradVarName("X")); + auto X_grad = ctx.Output(framework::GradVarName("X")); auto X = ctx.Input("X"); X_grad->Resize(X->dims()); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 3d76516405960c502a46997108049b2db5cab6bf..25b0776a37488e876b0cc88aa3f2aa68e33fb270 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -44,7 +44,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& context) const override { - auto* tensor = context.Output("Out"); + auto* tensor = context.Output("Out"); auto dims = Attr>("dims"); std::vector temp; temp.reserve(dims.size()); diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 94d40890a765413e88a35a6ad995ca97ac84dcda..b3d15f1ec99813d242c86c99faa7385795eef3b1 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -25,7 +25,7 @@ class LookupTableOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext &context) const override { auto table_t = context.Input("W"); auto ids_t = context.Input("Ids"); - auto output_t = context.Output("Out"); + auto output_t = context.Output("Out"); output_t->Resize({ids_t->dims()[0], table_t->dims()[1]}); } @@ -56,7 +56,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &context) const override { auto table = context.Input("W"); - auto d_table = context.Output(framework::GradVarName("W")); + auto d_table = + context.Output(framework::GradVarName("W")); d_table->Resize(table->dims()); } }; diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index d3d0e55a674587fb04f43f24d0790de4358f035a..3e523d31b682d70825d50c2a57b6e98cbf29dcd3 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -25,7 +25,7 @@ class MeanOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input of MeanOp must be initialized."); - ctx.Output("Out")->Resize({1}); + ctx.Output("Out")->Resize({1}); } }; @@ -45,7 +45,7 @@ class MeanGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output(framework::GradVarName("X")) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc index a4876feb2edf77bd422fa2a7687b0fa7d55dae47..8a583f24edf40ce805ca216ab014bd169f9236df 100644 --- a/paddle/operators/minus_op.cc +++ b/paddle/operators/minus_op.cc @@ -33,7 +33,7 @@ class MinusOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( left_tensor->numel(), right_tensor->numel(), "Minus operator must take two tensor with same num of elements"); - ctx.Output("Out")->Resize(left_tensor->dims()); + ctx.Output("Out")->Resize(left_tensor->dims()); } }; diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 710a56a0e8e2d17162d7d000df226f1537104eb9..015e13de9a09bcd1931ccf91413b6a3f484f82bb 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { using framework::Tensor; +using framework::LoDTensor; class MulOp : public framework::OperatorWithKernel { public: @@ -45,7 +46,8 @@ class MulOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_mat_dims[1], y_mat_dims[0], "First matrix's width must be equal with second matrix's height."); - ctx.Output("Out")->Resize({x_mat_dims[0], y_mat_dims[1]}); + ctx.Output("Out")->Resize( + {x_mat_dims[0], y_mat_dims[1]}); } }; @@ -94,8 +96,10 @@ class MulOpGrad : public framework::OperatorWithKernel { auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); - auto *x_grad = ctx.Output(framework::GradVarName("X")); - auto *y_grad = ctx.Output(framework::GradVarName("Y")); + auto *x_grad = + ctx.Output(framework::GradVarName("X")); + auto *y_grad = + ctx.Output(framework::GradVarName("Y")); auto x_mat_dims = framework::flatten_to_2d(x_dims, Attr("x_num_col_dims")); diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/onehot_cross_entropy_op.cc similarity index 92% rename from paddle/operators/cross_entropy_op.cc rename to paddle/operators/onehot_cross_entropy_op.cc index ab1e1c101a10e09a81f7785d2f1514822e3bdf15..a9baada1cd4cd3af793bf1b0af7b029417e62b08 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/onehot_cross_entropy_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/cross_entropy_op.h" +#include "paddle/operators/onehot_cross_entropy_op.h" namespace paddle { namespace operators { @@ -29,7 +29,7 @@ class OnehotCrossEntropyOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(X->dims().size(), 2, "X's dimension must be 2."); PADDLE_ENFORCE_EQ(label->dims().size(), 1, "label's dimension must be 1."); PADDLE_ENFORCE_EQ(X->dims()[0], label->dims()[0]); - ctx.Output("Y")->Resize({X->dims()[0]}); + ctx.Output("Y")->Resize({X->dims()[0], 1}); } }; @@ -39,7 +39,7 @@ class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto dX = ctx.Output(framework::GradVarName("X")); + auto dX = ctx.Output(framework::GradVarName("X")); auto X = ctx.Input("X"); dX->Resize(X->dims()); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/onehot_cross_entropy_op.cu similarity index 100% rename from paddle/operators/cross_entropy_op.cu rename to paddle/operators/onehot_cross_entropy_op.cu diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/onehot_cross_entropy_op.h similarity index 100% rename from paddle/operators/cross_entropy_op.h rename to paddle/operators/onehot_cross_entropy_op.h diff --git a/paddle/operators/pad_op.cc b/paddle/operators/pad_op.cc index 7e78b6ec133981494a65b5e16316ae8fdbd61a60..6cf7bd6f35b7592c41983efd75c1628043070687 100644 --- a/paddle/operators/pad_op.cc +++ b/paddle/operators/pad_op.cc @@ -34,7 +34,8 @@ class PadOp : public framework::OperatorWithKernel { for (int i = 0; i < x_dim.size(); ++i) { out_dims[i] = x_dim[i] + paddings[i * 2] + paddings[i * 2 + 1]; } - ctx.Output("Out")->Resize(framework::make_ddim(out_dims)); + ctx.Output("Out")->Resize( + framework::make_ddim(out_dims)); } }; @@ -95,9 +96,9 @@ class PadOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Input(Out@GRAD) should not be null"); auto x_dims = ctx.Input("X")->dims(); - auto *x_grad = ctx.Output(framework::GradVarName("X")); - if (x_grad != nullptr) { - x_grad->Resize(x_dims); + auto *x_g = ctx.Output(framework::GradVarName("X")); + if (x_g != nullptr) { + x_g->Resize(x_dims); } } }; diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index e826703c60ca82e1fe690eb78c3d4f92981ef3a2..d3413d7cb9305732e9ddf3cb1bc267f7203097f3 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -26,10 +26,11 @@ namespace operators { using Scope = framework::Scope; using Variable = framework::Variable; using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; void RecurrentAlgorithm::InferShape(const Scope& scope) const { seq_len_ = scope.FindVar((arg_->inlinks[0]).external) - ->GetMutable() + ->GetMutable() ->dims()[0]; CreateScopes(scope); auto step_scopes = GetStepScopes(scope); @@ -88,7 +89,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // the weight are located in parent scope for (auto& var_name : input.second) { if (!step_scope.FindVar(var_name)) { - step_scope.NewVar(var_name)->GetMutable(); + step_scope.NewVar(var_name)->GetMutable(); } } } @@ -106,11 +107,12 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { void RecurrentAlgorithm::InitMemories(Scope* step_scope, bool infer_shape_mode) const { for (auto& attr : arg_->memories) { - Tensor* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable(); + auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable(); PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, "memory [%s]'s boot variable [%s] not exists", attr.var, attr.boot_var); - Tensor* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable(); + auto* boot_mem = + step_scope->FindVar(attr.boot_var)->GetMutable(); if (infer_shape_mode) { pre_mem->Resize(boot_mem->dims()); PADDLE_ENFORCE_EQ(pre_mem->dims().size(), 2); @@ -192,9 +194,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( "memory variable [%s] does not exists", attr.var); PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, "boot variable [%s] does not exists", attr.boot_var); - Tensor* mem_grad = step_scope->NewVar(attr.var)->GetMutable(); - Tensor* boot_mem_grad = - step_scope->NewVar(attr.boot_var)->GetMutable(); + auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable(); + auto* boot_mem_grad = + step_scope->NewVar(attr.boot_var)->GetMutable(); if (infer_shape_mode) { boot_mem_grad->Resize(mem_grad->dims()); } else { @@ -205,7 +207,7 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { seq_len_ = scope.FindVar((arg_->inlinks[0]).external) - ->GetMutable() + ->GetMutable() ->dims()[0]; auto step_scopes = GetStepScopes(scope); rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, diff --git a/paddle/operators/reshape_op.cc b/paddle/operators/reshape_op.cc index b7061153d2bf13982f14f233e87a87daeeebf5fd..d2817020921dfd3c044922de6a0f2ae0307936bd 100644 --- a/paddle/operators/reshape_op.cc +++ b/paddle/operators/reshape_op.cc @@ -46,7 +46,7 @@ class ReshapeOp : public framework::OperatorWithKernel { std::transform(shape.begin(), shape.end(), shape_int64.begin(), [](int a) { return static_cast(a); }); auto out_dims = framework::make_ddim(shape_int64); - ctx.Output("Out")->Resize(out_dims); + ctx.Output("Out")->Resize(out_dims); } }; @@ -90,7 +90,7 @@ class ReshapeGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Input(Out@GRAD) shouldn't be null."); auto dims = ctx.Input("X")->dims(); - auto *d_in = ctx.Output(framework::GradVarName("X")); + auto *d_in = ctx.Output(framework::GradVarName("X")); d_in->Resize(dims); } }; diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index 97872c67ac99fbf6c9c177d52f1d4069163e8548..6c082cb1825e04accb09019fef28eb2ec6523a5b 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -21,6 +21,7 @@ namespace rnn { namespace f = paddle::framework; using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, const size_t seq_len, @@ -31,7 +32,7 @@ void SegmentInputs(const std::vector& step_scopes, PADDLE_ENFORCE(input_var != nullptr, "input link [%s] is not in scope.", inlinks[i].external); - Tensor* input = input_var->GetMutable(); + LoDTensor* input = input_var->GetMutable(); f::DDim dims = input->dims(); PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, "all the inlinks must have same length"); @@ -40,6 +41,8 @@ void SegmentInputs(const std::vector& step_scopes, Tensor* step_input = step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable(); if (!infer_shape_mode) { + // The input of operators of each step is Tensor here. + // Maybe need to modify Slice function. *step_input = input->Slice(j, j + 1); } step_input->Resize(step_dims); @@ -54,21 +57,23 @@ void ConcatOutputs(const std::vector& step_scopes, auto output_var = step_scopes[0]->FindVar(outlinks[i].external); PADDLE_ENFORCE(output_var != nullptr, "output link [%s] is not in scope.", outlinks[i].external); - Tensor* output = output_var->GetMutable(); + LoDTensor* output = output_var->GetMutable(); if (infer_shape_mode) { auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", outlinks[i].internal); - f::DDim step_dims = step_scope_var->template GetMutable()->dims(); + f::DDim step_dims = + step_scope_var->template GetMutable()->dims(); std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); output->Resize(f::make_ddim(dims_vec)); } else { output->mutable_data(platform::CPUPlace()); for (size_t j = 0; j < seq_len; j++) { - Tensor* step_output = - step_scopes[j]->FindVar(outlinks[i].internal)->GetMutable(); + LoDTensor* step_output = step_scopes[j] + ->FindVar(outlinks[i].internal) + ->GetMutable(); // TODO(luotao02) data type and platform::DeviceContext() should set // correctly (output->Slice(j, j + 1)) @@ -94,8 +99,8 @@ void LinkMemories(const std::vector& scopes, auto scope = scopes[step_id]; auto linked_scope = scopes[step_id + offset]; for (auto& attr : memories) { - auto mem = scope->FindVar(attr.pre_var)->GetMutable(); - auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); + auto mem = scope->FindVar(attr.pre_var)->GetMutable(); + auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); if (infer_shape_mode) { mem->Resize(linked_mem->dims()); } else { diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index fa8f0ff1a858143af427b51025279c726f1628e0..c6101685a3205a7a7459347ea5b0cc8487656550 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -37,7 +37,7 @@ class RowwiseAddOp : public framework::OperatorWithKernel { framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims, "The width of two operands must be same"); PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1"); - ctx.Output("Out")->Resize(x_dims); + ctx.Output("Out")->Resize(x_dims); } }; @@ -76,8 +76,8 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims, "The width of two operands must be same"); - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *db = ctx.Output(framework::GradVarName("b")); + auto *dx = ctx.Output(framework::GradVarName("X")); + auto *db = ctx.Output(framework::GradVarName("b")); if (dx) dx->Resize(x_dims); if (db) db->Resize(b_dims); } diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index ea991f683d841b3dc4624a0d8aa3c88367fd3c6d..35e6b70ba94a645da2b99093b2354acfb0ef2771 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -28,7 +28,7 @@ class ScaleOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto *in = ctx.Input("X"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); out->Resize(in->dims()); } }; diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index f901edefa22dc9a252e87116df756d04767a7162..0f7510983e2e34485110719c92d26cbc78cd850c 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -35,7 +35,8 @@ class ScatterOp : public framework::OperatorWithKernel { framework::DDim data_dim(ctx.Input("Updates")->dims()); for (int i = 1; i < data_dim.size(); ++i) PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input("Updates")->dims()[i]); - ctx.Output("Out")->Resize(ctx.Input("Ref")->dims()); + ctx.Output("Out")->Resize( + ctx.Input("Ref")->dims()); } }; @@ -45,9 +46,11 @@ class ScatterGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto *dUpdates = ctx.Output(framework::GradVarName("Updates")); + auto *dUpdates = + ctx.Output(framework::GradVarName("Updates")); auto *Updates = ctx.Input("Updates"); - auto *dRef = ctx.Output(framework::GradVarName("Ref")); + auto *dRef = + ctx.Output(framework::GradVarName("Ref")); auto *Ref = ctx.Input("Ref"); dRef->Resize(Ref->dims()); diff --git a/paddle/operators/sequence_avg_pool_op.cc b/paddle/operators/sequence_avg_pool_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..c15a5833deba2e198f6cb724bda7e3306c56e461 --- /dev/null +++ b/paddle/operators/sequence_avg_pool_op.cc @@ -0,0 +1,90 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_avg_pool_op.h" + +namespace paddle { +namespace operators { + +class SequenceAvgPoolOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input of SequenceAvgPoolOp" + "must be initialized."); + auto* x = ctx.Input("X"); + auto dims = x->dims(); + auto lod = x->lod(); + PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); + PADDLE_ENFORCE_GE( + dims[0], + /*batch size = */ static_cast(lod[0].size() - 1), + "The first dimension of Input(X) must be large than batch size."); + dims[0] = lod[0].size() - 1; + ctx.Output("Out")->Resize({dims}); + } +}; + +class SequenceAvgPoolOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SequenceAvgPoolOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of SequenceAvgPoolOp."); + AddOutput("Out", "The output of SequenceAvgPoolOp."); + AddComment(R"DOC( + SequenceAvgPoolOp averages features of all time-steps of each instance. + More detailed comments will be added later. + )DOC"); + } +}; + +class SequenceAvgPoolGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Gradient of Out should not be null"); + auto og_dims = + ctx.Input(framework::GradVarName("Out"))->dims(); + auto x_dims = ctx.Input("X")->dims(); + PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(), + "The rank of output grad must equal to Input(X)."); + for (int64_t i = 1; i < og_dims.size(); ++i) { + PADDLE_ENFORCE_EQ(og_dims[i], x_dims[i], "The dimension mismatch."); + } + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + x_grad->Resize(x_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(sequence_avg_pool, ops::SequenceAvgPoolOp, + ops::SequenceAvgPoolOpMaker, sequence_avg_pool_grad, + ops::SequenceAvgPoolGradOp); +REGISTER_OP_CPU_KERNEL( + sequence_avg_pool, + ops::SequenceAvgPoolKernel); +REGISTER_OP_CPU_KERNEL( + sequence_avg_pool_grad, + ops::SequenceAvgPoolGradKernel); diff --git a/paddle/operators/sequence_avg_pool_op.cu b/paddle/operators/sequence_avg_pool_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..bc9d1611fccd17c99b914b6ef59995288a9ebbd6 --- /dev/null +++ b/paddle/operators/sequence_avg_pool_op.cu @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU + +#include "paddle/operators/sequence_avg_pool_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + sequence_avg_pool, + ops::SequenceAvgPoolKernel); +REGISTER_OP_GPU_KERNEL( + sequence_avg_pool_grad, + ops::SequenceAvgPoolGradKernel); diff --git a/paddle/operators/sequence_avg_pool_op.h b/paddle/operators/sequence_avg_pool_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6e343b87e2938399409498407ac46b2416dc2231 --- /dev/null +++ b/paddle/operators/sequence_avg_pool_op.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +template +using EigenMatrix = framework::EigenMatrix; + +template +class SequenceAvgPoolKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Input("X"); + auto* out = context.Output("Out"); + + auto dims = in->dims(); + auto lod = in->lod(); + int64_t w = in->numel() / dims[0]; + + out->mutable_data(context.GetPlace()); + auto place = context.GetEigenDevice(); + for (int i = 0; i < static_cast(lod[0].size()) - 1; ++i) { + Tensor in_t = in->Slice(static_cast(lod[0][i]), + static_cast(lod[0][i + 1])); + Tensor out_t = out->Slice(i, i + 1); + int64_t h = static_cast(lod[0][i + 1] - lod[0][i]); + auto in_e = EigenMatrix::From(in_t, {h, w}); + auto out_e = EigenMatrix::From(out_t, {h, w}); + out_e.device(place) = in_e.mean(Eigen::array({{0}})); + } + } +}; + +template +class SequenceAvgPoolGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Output("X"); + auto* in_g = context.Output(framework::GradVarName("X")); + auto* out_g = context.Input(framework::GradVarName("Out")); + + auto dims = in->dims(); + auto lod = in->lod(); + int64_t w = in->numel() / dims[0]; + + in_g->mutable_data(context.GetPlace()); + auto place = context.GetEigenDevice(); + for (int i = 0; i < static_cast(lod[0].size()) - 1; ++i) { + auto in_g_t = in_g->Slice(static_cast(lod[0][i]), + static_cast(lod[0][i + 1])); + auto out_g_t = out_g->Slice(i, i + 1); + int64_t h = static_cast(lod[0][i + 1] - lod[0][i]); + auto in_g_e = EigenMatrix::From(in_g_t, {h, w}); + auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); + Eigen::DSizes bcast(h, w); + in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index ad267e7f087943ff3b8326a7baf2ce3955fa51c2..7997bf690710b3675f4014790db8cc7fc06946d3 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -23,10 +23,11 @@ class SGDOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE( - ctx.Input("param")->dims() == ctx.Input("grad")->dims(), - "Two input of SGD Op's dimension must be same."); - ctx.Output("param_out")->Resize(ctx.Input("param")->dims()); + PADDLE_ENFORCE_EQ(ctx.Input("param")->dims(), + ctx.Input("grad")->dims(), + "Two input of SGD Op's dimension must be same."); + ctx.Output("param_out") + ->Resize(ctx.Input("param")->dims()); } }; diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 761c6de8d4d2150b30b97b58da95da3d5f33db63..de6a1ba77382306923ee238e5cf309c791f9f216 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -23,7 +23,8 @@ class SigmoidOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output("Y")->Resize(ctx.Input("X")->dims()); + ctx.Output("Y")->Resize( + ctx.Input("X")->dims()); } }; @@ -44,7 +45,7 @@ class SigmoidOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output(framework::GradVarName("X")) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("Y")->dims()); } }; diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 7166b2f60be8a6088ab3a81686f7bed1b7181d97..239d3d141e1076a0a6a943f340311b17aa6f542a 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -25,7 +25,8 @@ class SoftmaxOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, "The input of softmax op must be a matrix."); - ctx.Output("Y")->Resize(ctx.Input("X")->dims()); + ctx.Output("Y")->Resize( + ctx.Input("X")->dims()); } }; @@ -71,7 +72,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ctx.Input(framework::GradVarName("Y"))->dims(), "Input(Y) and its gradients should have a same shape."); - ctx.Output(framework::GradVarName("X")) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index 9f51d3efa8ecba894a1023b9de2df451ca85916c..ebe5bd352e99e298fb86355730feed77b236d2bd 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -48,9 +48,9 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { "First dimension of target must be equal to input " "or to 1."); - ctx.Output("sub_result") + ctx.Output("sub_result") ->Resize({x_dims[0], x->numel() / x_dims[0]}); - ctx.Output("Out")->Resize({x_dims[0], 1}); + ctx.Output("Out")->Resize({x_dims[0], 1}); } }; @@ -94,8 +94,10 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(out_dims[1], 1, "Second dimension of output gradient " "must be 1."); - auto* x_grad = ctx.Output(framework::GradVarName("X")); - auto* y_grad = ctx.Output(framework::GradVarName("Y")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + auto* y_grad = + ctx.Output(framework::GradVarName("Y")); if (x_grad) x_grad->Resize(x_dims); if (y_grad) y_grad->Resize(y_dims); } diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 5805826ee8a555ca6dfc1ca81feaadffea9e1012..7170e7256c206d338ef1f6f94d5d1889ca92a3de 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -23,7 +23,7 @@ class SumOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { auto ins = ctx.MultiInput("X"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); int N = ins.size(); auto in_dim = ins[0]->dims(); @@ -55,7 +55,8 @@ class SumGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto outputs = ctx.MultiOutput(framework::GradVarName("X")); + auto outputs = + ctx.MultiOutput(framework::GradVarName("X")); auto dims = ctx.Input(framework::GradVarName("Out"))->dims(); for (auto output : outputs) { output->Resize(dims); diff --git a/paddle/operators/top_k_op.cc b/paddle/operators/top_k_op.cc index 38d2f0a09aec751734864947a2f3cfa20107e22f..ff0e77a344ede7709a805d7dca4397eb49fa300c 100644 --- a/paddle/operators/top_k_op.cc +++ b/paddle/operators/top_k_op.cc @@ -35,8 +35,8 @@ class TopkOp : public framework::OperatorWithKernel { framework::DDim dims = input->dims(); dims[dims.size() - 1] = k; - ctx.Output("Out")->Resize(dims); - ctx.Output("Indices")->Resize(dims); + ctx.Output("Out")->Resize(dims); + ctx.Output("Indices")->Resize(dims); } }; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index b8fbc9b52aecdb5c8d985b5de9bcd7cb85835b60..ed7973693619e7765643dda824100afd82616470 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -50,7 +50,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { void InferShape(const framework::InferShapeContext& ctx) const override { PADDLE_ENFORCE(Attr("min") < Attr("max"), "uniform_random's min must less then max"); - auto* tensor = ctx.Output("Out"); + auto* tensor = ctx.Output("Out"); auto dims = Attr>("dims"); std::vector temp; temp.reserve(dims.size()); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 2ad1e817990b35e2d36f261fcf7f40728168794d..a7a38339fb2c8689778b0a86d3713f67e1447a80 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -23,6 +23,7 @@ limitations under the License. */ #include "paddle/operators/recurrent_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/pybind/pybind.h" #include "paddle/pybind/tensor_py.h" #include "paddle/string/to_string.h" #include "pybind11/numpy.h" @@ -31,35 +32,6 @@ limitations under the License. */ namespace py = pybind11; -USE_OP(add); -USE_OP(onehot_cross_entropy); -USE_OP(sgd); -USE_OP(mul); -USE_OP(elementwise_mul); -USE_OP(mean); -USE_OP(sigmoid); -USE_OP(softmax); -USE_OP(rowwise_add); -USE_OP(fill_zeros_like); -USE_NO_KERNEL_OP(recurrent); -USE_OP(gaussian_random); -USE_OP(uniform_random); -USE_OP(lookup_table); -USE_OP(scale); -USE_NO_KERNEL_OP(identity); -USE_OP(minus); -USE_OP(cos_sim); -USE_CPU_ONLY_OP(gather); -USE_OP(pad); -USE_CPU_ONLY_OP(scatter); -USE_OP(accuracy); -USE_CPU_ONLY_OP(concat); -USE_OP(top_k); -USE_OP(squared_l2_distance); -USE_OP(sum); -USE_OP(reshape); -USE_OP(rank_loss); - namespace paddle { namespace framework { @@ -125,27 +97,21 @@ PYBIND11_PLUGIN(core) { return self.data()[offset]; }); - py::class_(m, "LoDTensor", R"DOC(LoD(Leval of Ddetails) Tensor. - -The tensor and LoD info should be created before creating the LoDTensor, then -call the set_tensor and set_lod functions to set them. - -)DOC") - .def("__init__", - [](LoDTensor &instance, - const std::vector> &lod, - Tensor *t) { + py::class_(m, "LoDTensor") + .def_buffer( + [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) + .def( + "__init__", + [](LoDTensor &instance, const std::vector> &lod) { #ifdef PADDLE_ONLY_CPU - new (&instance) LoDTensor(lod, t); + new (&instance) LoDTensor(lod); #else paddle::framework::LoD new_lod; new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - new (&instance) LoDTensor(new_lod, t); + new (&instance) LoDTensor(new_lod); #endif - }) - .def("set_tensor", - [](LoDTensor &self, Tensor *tensor) { self.set_tensor(tensor); }) + }) .def("set_lod", [](LoDTensor &self, const std::vector> &lod) { #ifdef PADDLE_ONLY_CPU @@ -157,9 +123,6 @@ call the set_tensor and set_lod functions to set them. self.set_lod(new_lod); #endif }) - .def("tensor", - [](LoDTensor &self) -> Tensor & { return self.tensor(); }, - py::return_value_policy::reference) .def("lod", [](LoDTensor &self) -> std::vector> { #ifdef PADDLE_ONLY_CPU return self.lod(); @@ -188,9 +151,6 @@ All parameter, weight, gradient are variables in Paddle. [](Variable &var, int val) -> void { *var.GetMutable() = val; }) .def("get_int", [](const Variable &var) -> int { return var.Get(); }) .def("get_tensor", - [](Variable &self) -> Tensor * { return self.GetMutable(); }, - py::return_value_policy::reference) - .def("get_lod_tensor", [](Variable &self) -> LoDTensor * { return self.GetMutable(); }, diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 4f68a8953446ffa0510df65c5b214d09b913cff8..a9e1d6d2e06d56f837690ec95fa8f8d41a90725f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2055,20 +2055,26 @@ class ConvLayerBase(LayerBase): if num_filters is not None: self.config.num_filters = num_filters + use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0)) use_gpu = int(g_command_config_args.get("use_gpu", 0)) parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - # Automatically select cudnn_type for GPU and exconv for CPU + # Automatically select cudnn_type for GPU, exconv for CPU + # and mkldnn_conv for MKLDNN # if set type=conv, but still reserve the way user specify - # exconv or cudnn_conv manually. + # exconv, mkldnn_conv or cudnn_conv manually. if self.layer_type == "cudnn_conv": config_assert(use_gpu, "cudnn_conv only support GPU") + if self.layer_type == "mkldnn_conv": + config_assert(use_mkldnn, "mkldnn_conv only support MKLDNN") + if (use_gpu == 1 and self.layer_type != "exconv" and + self.layer_type != "mkldnn_conv" and (parallel_nn == 0 or self.config.device > -1)): self.layer_type = "cudnn_conv" else: - self.layer_type = "exconv" + self.layer_type = "mkldnn_conv" if use_mkldnn else "exconv" # need to specify layer in config self.config.type = self.layer_type @@ -2100,6 +2106,11 @@ class ConvLayer(ConvLayerBase): layer_type = 'exconv' +@config_layer('mkldnn_conv') +class ConvLayer(ConvLayerBase): + layer_type = 'mkldnn_conv' + + @config_layer('cudnn_conv') class ConvLayer(ConvLayerBase): layer_type = 'cudnn_conv' diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index c2fc102a8b8de82da5c3fc5fee273790325908f8..253e7b8a24465da63a7eacd7983eb831251e6230 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -8,20 +8,22 @@ class TestCrossEntropy(OpTest): self.op_type = "onehot_cross_entropy" batch_size = 30 class_num = 10 + X = numpy.random.uniform(0.1, 1.0, [batch_size, class_num]).astype("float32") - label = (class_num / 2) * numpy.ones(batch_size).astype("int32") - self.inputs = {'X': X, 'label': label} - Y = [] - for i in range(0, batch_size): - Y.append(-numpy.log(X[i][label[i]])) - self.outputs = {'Y': numpy.array(Y).astype("float32")} + labels = numpy.random.randint(0, class_num, batch_size, dtype="int32") + + cross_entropy = numpy.asmatrix( + [[-numpy.log(X[i][labels[i]])] for i in range(X.shape[0])], + dtype="float32") + self.inputs = {"X": X, "label": labels} + self.outputs = {"Y": cross_entropy} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y') + self.check_grad(["X"], "Y") if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py index f26ed4964c521be1cd839b39d7244f96c653cb1a..8cd93b35d7d1cb7d3b4a19e0e402ef576f1c0982 100644 --- a/python/paddle/v2/framework/tests/test_tensor.py +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -44,79 +44,66 @@ class TestTensor(unittest.TestCase): self.assertAlmostEqual(2.0, tensor_array_2[19, 11]) def test_int_lod_tensor(self): - places = [core.CPUPlace(), core.GPUPlace(0)] - for place in places: - scope = core.Scope() - var = scope.new_var("test_tensor") - var_lod = scope.new_var("test_lod_tensor") - - tensor = var.get_tensor() - lod_tensor = var_lod.get_lod_tensor() - - tensor.set_dims([4, 4, 6]) - tensor.alloc_int(place) - array = numpy.array(tensor) - array[0, 0, 0] = 3 - array[3, 3, 5] = 10 - tensor.set(array, place) + place = core.CPUPlace() + scope = core.Scope() + var_lod = scope.new_var("test_lod_tensor") + lod_tensor = var_lod.get_tensor() - lod_tensor.set_tensor(tensor) - lod_tensor.set_lod([[0, 2, 4]]) + lod_tensor.set_dims([4, 4, 6]) + lod_tensor.alloc_int(place) + array = numpy.array(lod_tensor) + array[0, 0, 0] = 3 + array[3, 3, 5] = 10 + lod_tensor.set(array, place) + lod_tensor.set_lod([[0, 2, 4]]) - lod_v = numpy.array(lod_tensor.tensor()) - self.assertTrue(numpy.alltrue(array == lod_v)) + lod_v = numpy.array(lod_tensor) + self.assertTrue(numpy.alltrue(array == lod_v)) - lod = lod_tensor.lod() - self.assertEqual(0, lod[0][0]) - self.assertEqual(2, lod[0][1]) - self.assertEqual(4, lod[0][2]) + lod = lod_tensor.lod() + self.assertEqual(0, lod[0][0]) + self.assertEqual(2, lod[0][1]) + self.assertEqual(4, lod[0][2]) def test_float_lod_tensor(self): - places = [core.CPUPlace(), core.GPUPlace(0)] - for place in places: - scope = core.Scope() - var = scope.new_var("test_tensor") - var_lod = scope.new_var("test_lod_tensor") - - tensor = var.get_tensor() - lod_tensor = var_lod.get_lod_tensor() - - tensor.set_dims([5, 2, 3, 4]) - tensor.alloc_float(place) + place = core.CPUPlace() + scope = core.Scope() + var_lod = scope.new_var("test_lod_tensor") - tensor_array = numpy.array(tensor) - self.assertEqual((5, 2, 3, 4), tensor_array.shape) - tensor_array[0, 0, 0, 0] = 1.0 - tensor_array[0, 0, 0, 1] = 2.0 - tensor.set(tensor_array, place) + lod_tensor = var_lod.get_tensor() + lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.alloc_float(place) - lod_tensor.set_tensor(tensor) + tensor_array = numpy.array(lod_tensor) + self.assertEqual((5, 2, 3, 4), tensor_array.shape) + tensor_array[0, 0, 0, 0] = 1.0 + tensor_array[0, 0, 0, 1] = 2.0 + lod_tensor.set(tensor_array, place) - lod_v = numpy.array(lod_tensor.tensor()) - self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) - self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) - self.assertEqual(len(lod_tensor.lod()), 0) + lod_v = numpy.array(lod_tensor) + self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) + self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) + self.assertEqual(len(lod_tensor.lod()), 0) - lod_py = [[0, 2, 5], [0, 2, 4, 5]] - lod_tensor.set_lod(lod_py) - lod = lod_tensor.lod() - self.assertListEqual(lod_py, lod) + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor.set_lod(lod_py) + lod = lod_tensor.lod() + self.assertListEqual(lod_py, lod) def test_lod_tensor_init(self): scope = core.Scope() - var = scope.new_var("test_tensor") place = core.CPUPlace() - tensor = var.get_tensor() - tensor.set_dims([5, 2, 3, 4]) - tensor.alloc_float(place) - tensor_array = numpy.array(tensor) + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor = core.LoDTensor(lod_py) + + lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.alloc_float(place) + tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 1] = 2.0 - tensor.set(tensor_array, place) - lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor.set(tensor_array, place) - lod_tensor = core.LoDTensor(lod_py, tensor) - lod_v = numpy.array(lod_tensor.tensor()) + lod_v = numpy.array(lod_tensor) self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) self.assertListEqual(lod_py, lod_tensor.lod())