diff --git a/.gitignore b/.gitignore index 9622ab78e0e0556ec2b4cc974fee93ff680d54d2..4f21fefda9f64a0392881971a715b97c234030e3 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ cmake-build-* # generated while compiling python/paddle/v2/framework/core.so +paddle/pybind/pybind.h CMakeFiles cmake_install.cmake paddle/.timestamp diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 8d5d533126c9b7fa84c725d614cf3486126d0284..4823dc3e91390002aefac70f7931b4197db05789 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -26,9 +26,9 @@ set(IGNORE_PATTERN .*ImportanceSampler.* .*cblas\\.h.* .*\\.pb\\.txt - .*LtrDataProvider.* .*MultiDataProvider.* - .*pb.*) + .*pb.* + .*pybind.h) # add_style_check_target # diff --git a/doc/design/if_else_op.md b/doc/design/if_else_op.md index 7370c2a24fa644a64e738f202bac9b9209642e08..954a19c0733358c235eae3cffe134c23dac94c95 100644 --- a/doc/design/if_else_op.md +++ b/doc/design/if_else_op.md @@ -1,22 +1,4 @@ -IfOp should have only one branch. An IfOp operator takes a `cond` variable whose value must be a vector of N boolean elements. Its return value has M (M<=N) instances, each corresponds to a true element in `cond`. - -```python -import paddle as pd - -x = var() -y = var() -cond = var() - -b = pd.create_ifop(inputs=[x], output_num=1) -with b.true_block(): - x = b.inputs(0) - z = operator.add(x, y) - b.set_output(0, operator.softmax(z)) - -out = b(cond) -``` - -If we want the output still has N instances, we can use IfElseOp with a default value, whose minibatch size must be N: +IfOp should have only one branch. An IfOp operator takes a `cond` variable whose value must be a vector of N boolean elements. Its return value has N instances. If cond[i] == True, input instance input[i] will go through true_block() and generate output[i]; otherwise it will produce output from false_bloack(). ```python import paddle as pd @@ -39,7 +21,7 @@ with b.false_block(): out = b(cond) ``` -If only true_block is set in an IfElseOp, we can have a default value for false as: +If only true_block is set in an IfElseOp, a special case is that we can have a default value for false as: ```python import paddle as pd diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md index e3892849abe21fc207d2fcbe4adc65184ba771f4..c6570b89aedfaac1aef9b00e889b0b3ed21d8d65 100644 --- a/doc/howto/dev/new_op_cn.md +++ b/doc/howto/dev/new_op_cn.md @@ -34,7 +34,7 @@ Kernel实现 | CPU、GPU共享Kernel实现在`.h`文件中,否则,CPU 注册Op | Op注册实现在`.cc`文件;Kernel注册CPU实现在`.cc`文件中,GPU实现在`.cu`文件中 -实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。 +实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。**系统会根据文件名自动构建op和其对应的Python扩展。** 下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 @@ -224,45 +224,15 @@ MulOp(const std::string &type, const framework::VariableNameMap &inputs, ### 5. 编译 -- 简单**无特殊依赖**的OP无需修改CMakeList.txt文件。[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt) 会自动将 `paddle/operators` 目录下新增的 `*_op.cc` 文件加入编译。 -- 较为复杂、**有额外依赖** 的operator仍需要修改[paddle/operators/CMakeLists.txt](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/CMakeLists.txt)。如,`mul_op` 依赖 `math_function`,需要在`CMakeLists.txt`中添加如下内容: +运行下面命令可以进行编译: - ``` - op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function) + - ``` - -- 运行下面命令可以进行编译: - - ``` - make mul_op - ``` +``` +make mul_op +``` ## 绑定Python -- 绑定Python - - 在 [`paddle/pybind/pybind.cc -`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc) 使用`USE_OP`告知编译器需要链接的Op,具体解释参考[代码注释](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h#L81)。 - - ``` - USE_OP(mul); - ``` - 如果只实现了CPU版本,则使用`USE_CPU_ONLY_OP`: - - ``` - USE_CPU_ONLY_OP(gather); - ``` - - 如果OP不带Kernel,则使用`USE_NO_KENREL_OP`: - - ``` - USE_NO_KENREL_OP(recurrent); - ``` - - - - 生成库 - - `paddle/operators` 目录下新增的 `*_op.cc` 文件会被自动添加链接到生成的lib库中。 +系统会对新增的op自动绑定Python,并链接到生成的lib库中。 ## 实现单元测试 @@ -367,3 +337,10 @@ make test ARGS="-R test_mul_op -V" ```bash ctest -R test_mul_op ``` + +## 注意事项 + +- 为每个Op创建单独的`*_op.h`(如有)、`*_op.cc`和`*_op.cu`(如有)。不允许一个文件中包含多个Op,这将会导致编译出错。 +- 注册Op时的类型名,需要和该Op的名字一样。即不允许在`A_op.cc`里面,注册`REGISTER_OP(B, ...)`等,这将会导致单元测试出错。 +- 如果Op没有实现GPU Kernel,请不要创建空的`*_op.cu`,这将会导致单元测试出错。 +- 如果多个Op依赖一些共用的函数,可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。 diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 568f4e89819c8345d8908634f6fa56f09483a763..fac5cd20aa7f9db0792f8102bb442192ab1ad63f 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -51,18 +51,15 @@ bool operator==(const LoD& a, const LoD& b); * LoDTensor (Level of details Tensor) * see https://en.wikipedia.org/wiki/Level_of_details for reference. */ -class LoDTensor { +class LoDTensor : public Tensor { public: LoDTensor() {} - LoDTensor(const LoD& lod, Tensor* t) : lod_(lod), tensor_(t) {} - void set_lod(const LoD& lod) { lod_ = lod; } - - void set_tensor(Tensor* tensor) { tensor_ = tensor; } + explicit LoDTensor(const LoD& lod) : lod_(lod) {} - Tensor& tensor() { return *tensor_; } + void set_lod(const LoD& lod) { lod_ = lod; } - LoD lod() { return lod_; } + LoD lod() const { return lod_; } /* * Get a element from LoD. @@ -104,7 +101,6 @@ class LoDTensor { private: LoD lod_; - Tensor* tensor_; // not owned }; } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 1da8553134f377f7a4fbe8008d12fe8d4a0e47f4..7915326b27a22e9280e3f09d9bbfc2a58f46aff7 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -36,69 +36,64 @@ class LoDTensorTester : public ::testing::Test { ASSERT_EQ(lod.size(), 3UL); - tensor.Resize({20 /*batch size*/, 128 /*dim*/}); + lod_tensor_.Resize({20 /*batch size*/, 128 /*dim*/}); // malloc memory - tensor.mutable_data(place); + lod_tensor_.mutable_data(place); - lod_tensor.set_lod(lod); - lod_tensor.set_tensor(&tensor); + lod_tensor_.set_lod(lod); } protected: platform::CPUPlace place; - Tensor tensor; - LoDTensor lod_tensor; + LoDTensor lod_tensor_; }; -TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor.NumLevels(), 3UL); } +TEST_F(LoDTensorTester, NumLevels) { ASSERT_EQ(lod_tensor_.NumLevels(), 3UL); } TEST_F(LoDTensorTester, NumElements) { - ASSERT_EQ(lod_tensor.NumElements(0), 2UL); - ASSERT_EQ(lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(lod_tensor.NumElements(2), 8UL); + ASSERT_EQ(lod_tensor_.NumElements(0), 2UL); + ASSERT_EQ(lod_tensor_.NumElements(1), 4UL); + ASSERT_EQ(lod_tensor_.NumElements(2), 8UL); } TEST_F(LoDTensorTester, SliceLevels) { // slice 1 level for (size_t level = 0; level < 3UL; ++level) { - LoDTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.SliceLevels(level, level + 1); ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level)); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level)); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); } // slice 2 level for (size_t level = 0; level < 2UL; ++level) { - LoDTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.SliceLevels(level, level + 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); - ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor.NumElements(level)); - ASSERT_EQ(new_lod_tensor.NumElements(1), lod_tensor.NumElements(level + 1)); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor_.NumElements(level)); + ASSERT_EQ(new_lod_tensor.NumElements(1), + lod_tensor_.NumElements(level + 1)); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); } } TEST_F(LoDTensorTester, SliceInLevel) { size_t level = 0; - LoDTensor new_lod_tensor = lod_tensor; + LoDTensor new_lod_tensor = lod_tensor_; new_lod_tensor.SliceInLevel(level, 0, 2); EXPECT_EQ(new_lod_tensor.NumLevels(), 3UL); EXPECT_EQ(new_lod_tensor.NumElements(0), 2UL); EXPECT_EQ(new_lod_tensor.NumElements(1), 4UL); EXPECT_EQ(new_lod_tensor.NumElements(2), 8UL); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); level = 1; - new_lod_tensor = lod_tensor; + new_lod_tensor = lod_tensor_; new_lod_tensor.SliceInLevel(level, 0, 2); ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL); ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL); - ASSERT_EQ(new_lod_tensor.tensor().data(), - lod_tensor.tensor().data()); + ASSERT_EQ(new_lod_tensor.data(), lod_tensor_.data()); } } // namespace framework diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu index 1079a36a2e7b24f6f8a5bcbb296355567305a765..97e69cdb2e5e1e64031c899f5e04020665485ba8 100644 --- a/paddle/framework/lod_tensor_test.cu +++ b/paddle/framework/lod_tensor_test.cu @@ -26,18 +26,16 @@ __global__ void test(size_t* a, int size) { } TEST(LoDTensor, LoDInGPU) { - paddle::framework::Tensor tensor; paddle::framework::LoDTensor lod_tensor; paddle::platform::GPUPlace place(0); paddle::framework::LoD src_lod; src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14}); - tensor.Resize({14, 16}); - tensor.mutable_data(place); + lod_tensor.Resize({14, 16}); + lod_tensor.mutable_data(place); lod_tensor.set_lod(src_lod); - lod_tensor.set_tensor(&tensor); CHECK_EQ(lod_tensor.lod_element(0, 2), 4); CHECK_EQ(lod_tensor.lod_element(0, 4), 8); diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index e1e122091f7759b1a68f1f982bc2a35e8241f9f0..c57537be4bf67a8db6a49669ab8d2ed1b1324bdc 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -186,6 +186,48 @@ void OperatorBase::GenerateTemporaryNames() { } } +template <> +const Tensor* InferShapeContext::Input(const std::string& name) const { + auto* var = InputVar(name); + return var == nullptr ? nullptr : GetTensorFromVar(var); +} + +template <> +const std::vector InferShapeContext::MultiInput( + const std::string& name) const { + auto names = op().Inputs(name); + std::vector res; + res.reserve(names.size()); + std::transform(names.begin(), names.end(), std::back_inserter(res), + [&](const std::string& sub_name) { + auto var = scope_.FindVar(sub_name); + return var == nullptr ? nullptr : GetTensorFromVar(var); + }); + return res; +} + +template <> +Tensor* ExecutionContext::Output(const std::string& name) const { + auto* var = OutputVar(name); + return var == nullptr ? nullptr : const_cast(GetTensorFromVar(var)); +} + +template <> +std::vector ExecutionContext::MultiOutput( + const std::string& name) const { + auto names = op().Outputs(name); + std::vector res; + res.reserve(names.size()); + std::transform(names.begin(), names.end(), std::back_inserter(res), + [&](const std::string& sub_name) { + auto var = scope().FindVar(sub_name); + return var == nullptr + ? nullptr + : const_cast(GetTensorFromVar(var)); + }); + return res; +} + void OpProtoAndCheckerMaker::Validate() { validated_ = true; CheckNoDuplicatedInOutAttrs(); diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 4600b06009bcef7d0774d25b816aac4733f30795..adae7bfc3d7d31b1ed0373f01db4ef80343a08f7 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -22,6 +22,7 @@ limitations under the License. */ #include "op_info.h" #include "paddle/framework/attribute.h" #include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" #include "paddle/framework/scope.h" #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" @@ -326,11 +327,27 @@ class InferShapeContext { return res; } + const Tensor* GetTensorFromVar(const Variable* var) const { + if (var->IsType()) { + return &var->Get(); + } + PADDLE_ENFORCE(var->IsType(), + "The Input(%s) must be LoDTensor or Tensor."); + return &var->Get(); + } + private: const OperatorBase& op_; const Scope& scope_; }; +template <> +const Tensor* InferShapeContext::Input(const std::string& name) const; + +template <> +const std::vector InferShapeContext::MultiInput( + const std::string& name) const; + template struct EigenDeviceConverter; @@ -363,9 +380,37 @@ class ExecutionContext : public InferShapeContext { return device_context_; } + // redefine Output function, + // use Variable::Get instead of Variable::GetMutable + template + T* Output(const std::string& name) const { + auto var = OutputVar(name); + return var == nullptr ? nullptr : const_cast(&var->Get()); + } + + // redefine MultiOutput function. + // use Variable::Get instead of Variable::GetMutable + template + std::vector MultiOutput(const std::string& name) const { + auto names = op().Outputs(name); + std::vector res; + res.reserve(names.size()); + std::transform( + names.begin(), names.end(), std::back_inserter(res), + [&](const std::string& sub_name) { return Output(sub_name); }); + return res; + } + const platform::DeviceContext* device_context_; }; +template <> +Tensor* ExecutionContext::Output(const std::string& name) const; + +template <> +std::vector ExecutionContext::MultiOutput( + const std::string& name) const; + class OpKernel { public: /** diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 642b53efc7095d25712ca324638f5fe9b8316c0c..ed166935f76be9d25062b5e69536c7b7ac19045d 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -22,7 +22,7 @@ namespace framework { template inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( - holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); + holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); PADDLE_ENFORCE_GE( holder_->size(), numel() * sizeof(T) + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 55302ea47120f420e952b26830c8ea4cbcce6435..e2ec738de35c90c6a06c9a46b062d4cce55f5eda 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -36,7 +36,7 @@ TEST(Tensor, DataAssert) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "holder_ should not be null\nTenosr holds no memory. Call " + "holder_ should not be null\nTensor holds no memory. Call " "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { @@ -112,7 +112,7 @@ TEST(Tensor, ShareDataWith) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "holder_ should not be null\nTenosr holds no memory. Call " + "holder_ should not be null\nTensor holds no memory. Call " "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { @@ -274,4 +274,4 @@ TEST(Tensor, ReshapeToMatrix) { Tensor res = ReshapeToMatrix(src, 2); ASSERT_EQ(res.dims()[0], 2 * 3); ASSERT_EQ(res.dims()[1], 4 * 9); -} \ No newline at end of file +} diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9088744beebd25ac105737fe3b012de143c66a7c --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -0,0 +1,544 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MKLDNNConvLayer.h" +#include "paddle/math/MathUtils.h" +#include "paddle/utils/Logging.h" + +using namespace mkldnn; // NOLINT +typedef memory::format format; + +namespace paddle { + +REGISTER_LAYER(mkldnn_conv, MKLDNNConvLayer); + +bool MKLDNNConvLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { + return false; + } + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(config_.shared_biases()) << "Only support shared biases yet"; + + oc_ = config_.num_filters(); + const ConvConfig& conf = config_.inputs(0).conv_conf(); + ic_ = conf.channels(); + fw_ = conf.filter_size(); + fh_ = conf.filter_size_y(); + pw_ = conf.padding(); + ph_ = conf.padding_y(); + dw_ = conf.dilation(); + dh_ = conf.dilation_y(); + sw_ = conf.stride(); + sh_ = conf.stride_y(); + gp_ = conf.groups(); + oh_ = conf.output_y(); + ow_ = conf.output_x(); + ih_ = conf.img_size_y(); + iw_ = conf.img_size(); + caffeMode_ = conf.caffe_mode(); + CHECK(caffeMode_) << "Only support caffe mode yet"; + CHECK(dh_ == 1 && dw_ == 1) << "Only support dilation 1 yet"; + // check group setting + CHECK_EQ((oc_ / gp_) * gp_, oc_) << "group is indivisible for oc"; + CHECK_EQ((ic_ / gp_) * gp_, ic_) << "group is indivisible for ic"; + + // create weight + size_t height = oc_ / gp_; + size_t width = ic_ * fh_ * fw_; + CHECK_EQ(parameters_[0]->getSize(), height * width); + weight_ = + std::unique_ptr(new Weight(height, width, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MKLDNNConvLayer::convertWeightsFromPaddle() { + if (hasInitedWgt_) { + return; + } + + CHECK(wgtVal_) << "should have been initialized"; + // the paddle weight format is oihw or goihw + auto targetDim = wgtVal_->getDims(); + auto srcFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; + wgtVal_->reorderDataFrom(wgtVal_, srcFmt, targetDim); + hasInitedWgt_ = true; +} + +void MKLDNNConvLayer::convertWeightsToPaddle() { + CHECK(wgtVal_) << "should have been initialized"; + auto targetDim = wgtVal_->getDims(); + auto dstFmt = (gp_ == 1) ? memory::format::oihw : memory::format::goihw; + wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); +} + +void MKLDNNConvLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); + + // cal output sizes + // oc can not be changed + int fh = (fh_ - 1) * dh_ + 1; + int fw = (fw_ - 1) * dw_ + 1; + oh = outputSize(ih, fh, ph_, sh_, caffeMode_); + ow = outputSize(iw, fw, pw_, sw_, caffeMode_); + + reshapeOutput(oh, ow); + resizeOutput(bs, oc * oh * ow); + + printSizeInfo(); +} + +void MKLDNNConvLayer::resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetFwdPD(fwdPD_); + + resetFwdBuffers(fwdPD_, in, wgt, bias, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + + printValueFormatFlow(); +} + +void MKLDNNConvLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr bwdWgtPD; + std::shared_ptr bwdDataPD; + + resetBwdWgtPD(bwdWgtPD); + + resetBwdDataPD(bwdDataPD); + + resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + printGradFormatFlow(); +} + +void MKLDNNConvLayer::updateInputData() { + cpuInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +} + +void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNConvLayer::loadConvSettings(memory::dims& wgt, + memory::dims& bias, + memory::dims& stride, + memory::dims& dilation, + memory::dims& padL, + memory::dims& padR) { + wgt = (gp_ == 1) ? memory::dims{oc_, ic_, fh_, fw_} + : memory::dims{gp_, oc_ / gp_, ic_ / gp_, fh_, fw_}; + bias = memory::dims{oc_}; + stride = memory::dims{sh_, sw_}; + padL = memory::dims{ph_, pw_}; + padR = getPaddingR(); + // note: mkldnn dilation start from 0 + dilation = memory::dims{dh_ - 1, dw_ - 1}; +} + +void MKLDNNConvLayer::resetFwdPD( + std::shared_ptr& pd) { + // dims for conv + memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + + prop_kind pk = passType_ == PASS_TEST ? prop_kind::forward_scoring + : prop_kind::forward_training; + algorithm algo = algorithm::convolution_direct; + padding_kind padKind = padding_kind::zero; + conv_fwd::desc fwdDesc = + biases_ && biases_->getW() + ? conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(biasDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind) + : conv_fwd::desc(pk, + algo, + MKLDNNMatrix::createMemoryDesc(inDims), + MKLDNNMatrix::createMemoryDesc(wgtDims), + MKLDNNMatrix::createMemoryDesc(outDims), + strides, + dilations, + padL, + padR, + padKind); + pd.reset(new conv_fwd::primitive_desc(fwdDesc, engine_)); +} + +void MKLDNNConvLayer::resetFwdBuffers( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(pd); + resetInValue(pd, in); + + resetWgtBiasValue(pd, wgt, bias); + + resetOutValue(pd, out); +} + +void MKLDNNConvLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtInVal_) { + pipeline.push_back(*cvtInVal_); + } + + if (bias) { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *bias, *out)); + } else { + fwd_.reset(new conv_fwd(*pd, *in, *wgt, *out)); + } + pipeline.push_back(*fwd_); + + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } +} + +void MKLDNNConvLayer::resetInValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& in) { + const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); + + // create buffer and reorder if input value do not match + cpuInVal_ = nullptr; + cvtInVal_ = nullptr; + if (inputIsOnlyMKLDNN()) { + MKLDNNMatrixPtr dnnIn = std::dynamic_pointer_cast(inMat); + CHECK(dnnIn) << "Input should be MKLDNNMatrix"; + if (dnnIn->getPrimitiveDesc() != in->getPrimitiveDesc()) { + CHECK_EQ(dnnIn->getFormat(), format::nc); + CHECK(ih_ == 1 && iw_ == 1) << "when input is nc format"; + // create a new one with nchw format and same data + memory::dims inDims = memory::dims{bs_, ic_, 1, 1}; + dnnIn = MKLDNNMatrix::create(inMat, inDims, format::nchw, engine_); + CHECK(dnnIn->getPrimitiveDesc() == in->getPrimitiveDesc()); + } + in = dnnIn; + } else { + const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); + memory::dims inDims = memory::dims{bs_, ic_, ih_, iw_}; + cpuInVal_ = MKLDNNMatrix::create(cpuIn, inDims, format::nchw, engine_); + if (cpuInVal_->getPrimitiveDesc() != in->getPrimitiveDesc()) { + // create new mkldnn matrix + in = MKLDNNMatrix::create(nullptr, pd->src_primitive_desc()); + cvtInVal_ = MKLDNNMatrix::createReorder(cpuInVal_, in); + CHECK(cvtInVal_) << "should not be emptry"; + } else { + in = cpuInVal_; + } + } +} + +void MKLDNNConvLayer::resetWgtBiasValue( + std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc()); + VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat(); + + bias = (biases_ && biases_->getW()) + ? MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()) + : nullptr; +} + +void MKLDNNConvLayer::resetOutValue( + std::shared_ptr& pd, MKLDNNMatrixPtr& out) { + out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc()); + + // change original output value from cpu matrix to mkldnn matrix + output_.value = std::dynamic_pointer_cast(out); + + // create reorder if output value has cpu device and pd do not match + cpuOutVal_ = nullptr; + cpuOutVal_ = nullptr; + if (!outputIsOnlyMKLDNN()) { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; + memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; + cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); + if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); + CHECK(cvtOutVal_) << "should not be emptry"; + } else { + // CPU output share the same data of MKLDNN output + cpuOut->setData(out->getData()); + cpuOutVal_ = out; + } + } +} + +void MKLDNNConvLayer::resetBwdWgtPD( + std::shared_ptr& pd) { + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + + // create backward weight using input, output and weight value memory desc + CHECK(inVal_) << "Should have input value"; + CHECK(outVal_) << "Should have output value"; + CHECK(wgtVal_) << "Should have weight value"; + algorithm algo = algorithm::convolution_direct; + padding_kind padKind = padding_kind::zero; + auto bwdWgtDesc = biasVal_ != nullptr + ? conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + biasVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind) + : conv_bwdWgt::desc(algo, + inVal_->getMemoryDesc(), + wgtVal_->getMemoryDesc(), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padKind); + pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); + CHECK(pd->src_primitive_desc() == inVal_->getPrimitiveDesc()) + << "primitive desc of in value should equal"; + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad should equal the out value"; + CHECK(pd->diff_weights_primitive_desc() == wgtVal_->getPrimitiveDesc()) + << "primitive desc of weight grad should equal the weight value"; +} + +void MKLDNNConvLayer::resetBwdDataPD( + std::shared_ptr& pd) { + pd = nullptr; + if (inputLayers_[0]->getOutput().grad == nullptr) { + return; + } + + memory::dims wgtDims, biasDims, strides, dilations, padL, padR; + loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); + CHECK(inVal_) << "Should have input value"; + CHECK(outVal_) << "Should have output value"; + // create backward data using input and output value memory desc + // but using weight memory desc with any format + auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, + inVal_->getMemoryDesc(), + MKLDNNMatrix::createMemoryDesc(wgtDims), + outVal_->getMemoryDesc(), + strides, + padL, + padR, + padding_kind::zero); + pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); + CHECK(pd->diff_src_primitive_desc() == inVal_->getPrimitiveDesc()) + << "primitive desc of in grad should equal the in value"; + CHECK(pd->diff_dst_primitive_desc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad should equal"; +} + +void MKLDNNConvLayer::resetBwdBuffers( + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(wgtPD); + resetOutGrad(wgtPD, out); + + resetWgtBiasGrad(wgtPD, wgt, bias); + + resetInGrad(dataPD, in); + + resetWgtValBwdData(dataPD, wgtValBwdData_); +} + +void MKLDNNConvLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (cvtOutGrad_) { + pipeline.push_back(*cvtOutGrad_); + } + + // add bwdWgt handle + if (bias) { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + } else { + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + } + pipeline.push_back(*bwdWgt_); + + if (dataPD == nullptr) { + return; + } + + if (cvtWgtVal_) { + pipeline.push_back(*cvtWgtVal_); + } + + // add bwdData handle + CHECK(wgtValBwdData_) << "Should have weight memory"; + bwdData_.reset(new conv_bwdData(*dataPD, *out, *wgtValBwdData_, *in)); + pipeline.push_back(*bwdData_); + + if (cvtInGrad_) { + pipeline.push_back(*cvtInGrad_); + } +} + +void MKLDNNConvLayer::resetOutGrad( + std::shared_ptr& wgtPD, MKLDNNMatrixPtr& out) { + const MatrixPtr& outMat = output_.grad; + out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc()); + CHECK(outVal_ != nullptr && + out->getPrimitiveDesc() == outVal_->getPrimitiveDesc()) + << "primitive desc of out grad and value should be equal"; + + // TODO(TJ): merge outgrad + // create reorder if has output grad does not match + cpuOutGrad_ = nullptr; + cvtOutGrad_ = nullptr; + if (!outputIsOnlyMKLDNN()) { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + // same PrimitiveDesc with cpuInVal_ + CHECK(cpuOutVal_); + cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); + if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { + outMat->setData(cpuOut->getData()); + out = cpuOutGrad_; + } else { + cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); + CHECK(cvtOutGrad_); + } + } +} + +void MKLDNNConvLayer::resetWgtBiasGrad( + std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + wgt = MKLDNNMatrix::create(weight_->getWGrad(), + wgtPD->diff_weights_primitive_desc()); + CHECK(nullptr != wgtVal_ && + wgt->getPrimitiveDesc() == wgtVal_->getPrimitiveDesc()) + << "primitive desc of weight grad and value should be equal"; + VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat(); + + bias = nullptr; + if (biasVal_ == nullptr) { + return; + } + bias = MKLDNNMatrix::create(biases_->getWGrad(), + wgtPD->diff_bias_primitive_desc()); + CHECK(bias->getPrimitiveDesc() == biasVal_->getPrimitiveDesc()) + << "primitive desc of bias grad should equal the bias value"; +} + +void MKLDNNConvLayer::resetInGrad( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in) { + if (dataPD == nullptr) { + return; + } + + // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done + in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, + dataPD->diff_src_primitive_desc()); + CHECK(nullptr != inVal_ && + in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) + << "primitive desc of input grad and value should be equal"; + + // create reorder if has output grad does not match + cpuInGrad_ = nullptr; + cvtInGrad_ = nullptr; + if (!inputIsOnlyMKLDNN()) { + const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); + // same PrimitiveDesc with cpuInVal_ + CHECK(cpuInVal_); + cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc()); + if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) { + const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE); + in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc()); + cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_); + CHECK(cvtInGrad_); + } else { + in = cpuInGrad_; + } + } +} + +void MKLDNNConvLayer::resetWgtValBwdData( + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt) { + if (dataPD == nullptr) { + return; + } + + // create new weight value for backward data, and create reorder if necessary + // since the primitive_desc would be different with wgtVal_ + CHECK(wgtVal_) << "should have weight value"; + if (dataPD->weights_primitive_desc() != wgtVal_->getPrimitiveDesc()) { + wgtValBwdData_ = + MKLDNNMatrix::create(nullptr, dataPD->weights_primitive_desc()); + cvtWgtVal_ = MKLDNNMatrix::createReorder(wgtVal_, wgtValBwdData_); + CHECK(cvtWgtVal_); + } else { + wgtValBwdData_ = wgtVal_; + } + VLOG(MKLDNN_FMTS) << "weight value format for backward data" + << wgtValBwdData_->getFormat(); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..f84f2f737c47a1b8adc2b83360a0396ffbc6ae24 --- /dev/null +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -0,0 +1,253 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MKLDNNLayer.h" +#include "mkldnn.hpp" + +namespace paddle { +typedef mkldnn::convolution_forward conv_fwd; +typedef mkldnn::convolution_backward_weights conv_bwdWgt; +typedef mkldnn::convolution_backward_data conv_bwdData; + +/** + * @brief A subclass of MKLDNNLayer conv layer. + * + * The config file api is mkldnn_conv + */ +class MKLDNNConvLayer : public MKLDNNLayer { +protected: + // padding height and width + int ph_, pw_; + // stride height and width + int sh_, sw_; + // dilation height and width + int dh_, dw_; + // filter(kenerl) height and width + int fh_, fw_; + // group number + int gp_; + + // in resetBwdData, the format of wgtValBwdData_ is different with wgtVal_ + MKLDNNMatrixPtr wgtValBwdData_; + // convert handle from wgtVal_ to wgtValBwdData_ + std::shared_ptr cvtWgtVal_; + + // save forward primitive_desc, which can be used backward + std::shared_ptr fwdPD_; + + // MKLDNNMatrixPtr which should be created from CPU Device + MKLDNNMatrixPtr cpuInVal_; + MKLDNNMatrixPtr cpuInGrad_; + MKLDNNMatrixPtr cpuOutVal_; + MKLDNNMatrixPtr cpuOutGrad_; + // convert handle between CPU device and MKLDNN device + std::shared_ptr cvtInVal_; + std::shared_ptr cvtInGrad_; + std::shared_ptr cvtOutVal_; + std::shared_ptr cvtOutGrad_; + + // whether the weight has been init + bool hasInitedWgt_; + + // true by default, which impact the calculation of output image size. + // details can refer to mathUtil.h + bool caffeMode_; + + // weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + +public: + explicit MKLDNNConvLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), caffeMode_(true) {} + + ~MKLDNNConvLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + + void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; + + void updateInputData() override; + + void updateWeights(const UpdateCallback& callback) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; + + void printSizeInfo() override { + MKLDNNLayer::printSizeInfo(); + VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_ + << ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_ + << ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_; + } + + void printValueFormatFlow() override { + if (cpuInVal_) { + VLOG(MKLDNN_FMTS) << cpuInVal_->getFormat() << " >>>"; + } + MKLDNNLayer::printValueFormatFlow(); + if (cpuOutVal_) { + VLOG(MKLDNN_FMTS) << " >>> " << cpuOutVal_->getFormat(); + } + } + + void printGradFormatFlow() override { + if (cpuInGrad_) { + VLOG(MKLDNN_FMTS) << cpuInGrad_->getFormat() << " <<<"; + } + MKLDNNLayer::printGradFormatFlow(); + if (cpuOutGrad_) { + VLOG(MKLDNN_FMTS) << " <<< " << cpuOutGrad_->getFormat(); + } + } + +protected: + /** + * load the dims settings of this conv + */ + void loadConvSettings(mkldnn::memory::dims& wgt, + mkldnn::memory::dims& bias, + mkldnn::memory::dims& stride, + mkldnn::memory::dims& dilation, + mkldnn::memory::dims& padL, + mkldnn::memory::dims& padR); + + /** + * reset the forward primitive descriptor. + */ + void resetFwdPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in forward. + */ + void resetFwdBuffers(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the forward pipeline. + */ + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of input value + */ + void resetInValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight and bias value + */ + void resetWgtBiasValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of output value + */ + void resetOutValue(std::shared_ptr& pd, + MKLDNNMatrixPtr& out); + + /** + * reset the backward weight primitive descriptor. + */ + void resetBwdWgtPD(std::shared_ptr& pd); + /** + * reset the backward data primitive descriptor. + */ + void resetBwdDataPD(std::shared_ptr& pd); + /** + * reset the MKLDNNMatrix buffers used in backward. + */ + void resetBwdBuffers(std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + /** + * reset the backward pipeline. + */ + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& wgtPD, + std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * reset MKLDNNMatrix of output grad + */ + void resetOutGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& out); + /** + * reset MKLDNNMatrix of weight and bias grad + */ + void resetWgtBiasGrad(std::shared_ptr& wgtPD, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias); + /** + * reset MKLDNNMatrix of input grad + */ + void resetInGrad(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& in); + /** + * reset MKLDNNMatrix of weight value for backward data + * since the primitive_desc would be different with wgtVal_ + */ + void resetWgtValBwdData(std::shared_ptr& dataPD, + MKLDNNMatrixPtr& wgt); + + /** + * get padding_r according to + * https://github.com/01org/mkl-dnn/blob/master/tests/gtests/ + * test_convolution_forward_common.hpp + * @note: mkldnn dilation start from 0 while paddle start from 1 + */ + mkldnn::memory::dims getPaddingR() const { + mkldnn::memory::dims padR = {ph_, pw_}; + for (int i = 0; i < 2; ++i) { + if ((ih_ - ((fh_ - 1) * dh_ + 1) + ph_ + padR[0]) / sh_ + 1 != oh_) { + ++padR[0]; + } + if ((iw_ - ((fw_ - 1) * dw_ + 1) + pw_ + padR[1]) / sw_ + 1 != ow_) { + ++padR[1]; + } + } + return padR; + } +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index f70343251ad4fbb99f9614618f6d1bff1174f15e..f60e221a6ec2ff513789a24e9f59bb25aef437b5 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -17,9 +17,6 @@ limitations under the License. */ using namespace mkldnn; // NOLINT typedef memory::format format; -typedef inner_product_forward fc_fwd; -typedef inner_product_backward_weights fc_bwdWgt; -typedef inner_product_backward_data fc_bwdData; namespace paddle { @@ -93,35 +90,88 @@ void MKLDNNFcLayer::reshape( printSizeInfo(); } -void MKLDNNFcLayer::resetFwd(std::vector& pipeline, +void MKLDNNFcLayer::resetFwd(std::vector& pipeline, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - bool hasBias = biases_ && biases_->getW(); - const MatrixPtr& wgtVal = weight_->getW(); - const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr; - const MatrixPtr& outVal = output_.value; + resetFwdBuffers(in, wgt, bias, out); + + resetFwdPD(fwdPD_, in, wgt, bias, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + + printValueFormatFlow(); +} + +void MKLDNNFcLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr bwdWgtPD; + std::shared_ptr bwdDataPD; + + resetBwdBuffers(in, wgt, bias, out); + + resetBwdWgtPD(bwdWgtPD, wgt, bias, out); + + resetBwdDataPD(bwdDataPD, in, out); + + resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + printGradFormatFlow(); +} + +void MKLDNNFcLayer::updateInputData() { + inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +} +void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetInValue(in); + + resetWgtBiasValue(wgt, bias); + + resetOutValue(out); +} + +void MKLDNNFcLayer::resetInValue(MKLDNNMatrixPtr& in) { if (inputIsOnlyMKLDNN()) { - const MatrixPtr& inVal = getInputValue(0); - in = std::dynamic_pointer_cast(inVal); + const MatrixPtr& dnnIn = getInputValue(0); + in = std::dynamic_pointer_cast(dnnIn); CHECK(in) << "Input should be MKLDNNMatrix"; } else { CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; - const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE); + const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); in = MKLDNNMatrix::create( - inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); + cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_); } in->downSpatial(); +} + +void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { wgt = MKLDNNMatrix::create( - wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); + weight_->getW(), {oc_, ic_, ih_, iw_}, format::oihw, engine_); wgt->downSpatial(); - bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_) - : nullptr; - out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_); + bias = (biases_ && biases_->getW()) + ? MKLDNNMatrix::create(biases_->getW(), {oc_}, format::x, engine_) + : nullptr; +} + +void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { + out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(out); if (!outputIsOnlyMKLDNN()) { @@ -129,46 +179,59 @@ void MKLDNNFcLayer::resetFwd(std::vector& pipeline, // just share point getOutput(CPU_DEVICE).value->setData(output_.value->getData()); } +} - // create forward handle +void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr bias, + MKLDNNMatrixPtr out) { + CHECK(in); + CHECK(wgt); + CHECK(out); prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (hasBias) { - fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out)); + fc_fwd::desc fwdDesc = bias != nullptr ? fc_fwd::desc(pk, + in->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_fwd::desc(pk, + in->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); + pd.reset(new fc_fwd::primitive_desc(fwdDesc, engine_)); +} + +void MKLDNNFcLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (bias) { + fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); } else { - fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out)); + fwd_.reset(new fc_fwd(*pd, *in, *wgt, *out)); } - printValueFormatFlow(); pipeline.push_back(*fwd_); } -void MKLDNNFcLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (!needResetBwd_) { - return; - } - needResetBwd_ = false; - bool hasBias = biases_ && biases_->getWGrad(); +void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetOutGrad(out); + + resetWgtBiasGrad(wgt, bias); - /// backward weight - CHECK(inVal_) << "Should have input value"; - const MatrixPtr& wgtGrad = weight_->getWGrad(); - const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr; + resetInGrad(in); +} +void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { // TODO(TJ): merge outgrad int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; // for MKLDNN device: @@ -178,66 +241,88 @@ void MKLDNNFcLayer::resetBwd(std::vector& pipeline, // for CPU device: // fc do not need to convert from cpu device since output is always nc format // only need create from cpu device - const MatrixPtr& outGrad = getOutput(device).grad; - out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc()); - wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc()); - bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc()) - : nullptr; - - // create memory primitive desc - fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, - inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = hasBias - ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_bwdWgt::primitive_desc bwdWgtPD = - fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - - if (hasBias) { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias)); - } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt)); + CHECK(outVal_); + out = + MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc()); +} + +void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + CHECK(wgtVal_); + wgt = MKLDNNMatrix::create(weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); + + bias = nullptr; + if (biasVal_ == nullptr) { + return; } - pipeline.push_back(*bwdWgt_); + bias = + MKLDNNMatrix::create(biases_->getWGrad(), biasVal_->getPrimitiveDesc()); +} - /// backward data +void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) { + in = nullptr; const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; if (inGrad == nullptr) { return; } - if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done - } else { - in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); - } - - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( - inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc()); - fc_bwdData::primitive_desc bwdDataPD = - fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done + CHECK(inVal_); + in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); +} - CHECK(wgtVal_) << "Should have weight memory"; - bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in)); - printGradFormatFlow(); - pipeline.push_back(*bwdData_); +void MKLDNNFcLayer::resetBwdWgtPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(inVal_); + fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); + pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); } -void MKLDNNFcLayer::updateInputData() { - inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +void MKLDNNFcLayer::resetBwdDataPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& out) { + pd = nullptr; + if (in == nullptr) { + return; + } + CHECK(wgtVal_); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( + in->getMemoryDesc(), wgtVal_->getMemoryDesc(), out->getMemoryDesc()); + pd.reset(new fc_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); } -void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); +void MKLDNNFcLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& bwdWgtPD, + std::shared_ptr& bwdDataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + CHECK(inVal_); + if (bias) { + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); + } else { + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); + } + pipeline.push_back(*bwdWgt_); + + if (bwdDataPD == nullptr) { + return; } + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(*bwdDataPD, *out, *wgtVal_, *in)); + pipeline.push_back(*bwdData_); } + } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index 3119f863496df092da13c08bf733f13c42e53780..c76878aafab7e986d2bf478eaba02f2f0aced293 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -18,6 +18,9 @@ limitations under the License. */ #include "mkldnn.hpp" namespace paddle { +typedef mkldnn::inner_product_forward fc_fwd; +typedef mkldnn::inner_product_backward_weights fc_bwdWgt; +typedef mkldnn::inner_product_backward_data fc_bwdData; /** * @brief A subclass of MKLDNNLayer fc layer. @@ -32,6 +35,9 @@ protected: // if has already init the weight bool hasInitedWgt_; + // save forward primitive_desc, which can be used backward + std::shared_ptr fwdPD_; + // fc weight and bias std::unique_ptr weight_; std::unique_ptr biases_; @@ -67,6 +73,59 @@ public: void convertWeightsFromPaddle() override; void convertWeightsToPaddle() override; + +protected: + /** + * Forward functions: reset buffers(input, output, weight and bias), + * reset primitive descriptor, + * reset pipeline. + */ + void resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetInValue(MKLDNNMatrixPtr& in); + void resetWgtBiasValue(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias); + void resetOutValue(MKLDNNMatrixPtr& out); + void resetFwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr bias, + MKLDNNMatrixPtr out); + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * Backward functions: reset buffers(input, output, weight and bias), + * reset primitive descriptor for backward weight, + * reset primitive descriptor for backward data, + * reset pipeline. + */ + void resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetOutGrad(MKLDNNMatrixPtr& out); + void resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias); + void resetInGrad(MKLDNNMatrixPtr& in); + void resetBwdWgtPD(std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetBwdDataPD(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& out); + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& bwdWgtPD, + std::shared_ptr& bwdDataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); }; } // namespace paddle diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index e1d2270df24331914f3a51acc90a518084b3ce4e..e70802881e3f22160a87b7a4babda07ffbcf9d6f 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "MKLDNNTester.h" #include "ModelConfig.pb.h" +#include "paddle/math/MathUtils.h" using namespace paddle; // NOLINT @@ -63,6 +64,83 @@ TEST(MKLDNNLayer, FcLayer) { testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); } +struct testConvDesc { + int bs, gp; + int ic, ih, iw; + int oc, oh, ow; + int fh, fw; + int ph, pw; + int sh, sw; + int dh, dw; +}; + +void testConvLayer(const testConvDesc& pm) { + const std::string compareTypes[] = {"mkldnn_conv", "exconv"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_num_filters(pm.oc); + cfg.layerConfig.set_size(pm.oc * pm.oh * pm.ow); + // cfg.layerConfig.set_partial_sum(1); // TODO: check it + cfg.layerConfig.set_shared_biases(true); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.fh * pm.fw / pm.gp)}); + LayerInputConfig* input = cfg.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_groups(pm.gp); + conv->set_img_size(pm.iw); + conv->set_img_size_y(pm.ih); + conv->set_output_x(pm.ow); + conv->set_output_y(pm.oh); + conv->set_filter_size(pm.fw); + conv->set_filter_size_y(pm.fh); + conv->set_channels(pm.ic); + conv->set_padding(pm.pw); + conv->set_padding_y(pm.ph); + conv->set_stride(pm.sw); + conv->set_stride_y(pm.sh); + conv->set_dilation(pm.dw); + conv->set_dilation_y(pm.dh); + conv->set_caffe_mode(true); + conv->set_filter_channels(conv->channels() / conv->groups()); + CHECK_EQ(conv->filter_channels() * pm.gp, conv->channels()) + << "it is indivisible"; + + int fh = (pm.fh - 1) * pm.dh + 1; + int fw = (pm.fw - 1) * pm.dw + 1; + int ow = outputSize(pm.iw, fw, pm.pw, pm.sw, true); + int oh = outputSize(pm.ih, fh, pm.ph, pm.sh, true); + CHECK_EQ(ow, pm.ow) << "output size check failed"; + CHECK_EQ(oh, pm.oh) << "output size check failed"; + + MKLDNNTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MKLDNNLayer, ConvLayer) { + /* bs, gp, ic, ih, iw, oc, oh, ow, fh, fw, ph, pw, sh, sw, dh, dw */ + testConvLayer({2, 1, 3, 32, 32, 16, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({2, 1, 8, 16, 16, 8, 16, 16, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({3, 1, 16, 32, 32, 3, 32, 32, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({8, 1, 16, 18, 18, 32, 18, 18, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({16, 1, 1, 42, 31, 32, 23, 11, 4, 5, 3, 2, 2, 3, 1, 1}); + testConvLayer({2, 1, 8, 16, 16, 8, 8, 8, 3, 3, 1, 1, 2, 2, 1, 1}); + testConvLayer({3, 1, 8, 13, 13, 8, 7, 7, 3, 3, 1, 1, 2, 2, 1, 1}); + // with groups + testConvLayer({2, 2, 4, 5, 5, 8, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({2, 3, 3, 5, 5, 3, 5, 5, 3, 3, 1, 1, 1, 1, 1, 1}); + testConvLayer({4, 4, 16, 3, 3, 16, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1}); +} + // TODO(TJ): add branch test int main(int argc, char** argv) { diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index c4063e5069854242d9f93886b66580385557ca73..0778bb63b7b3bca9b3d2647ca43dad72d783950a 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -49,6 +49,27 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, return create(m, memory::primitive_desc(memory::desc(dims, dtype, fmt), eg)); } +std::shared_ptr MKLDNNMatrix::createReorder(const MKLDNNMatrixPtr& src, + const MKLDNNMatrixPtr& dst, + bool checkData) { + if (src == dst || src->getPrimitiveDesc() == dst->getPrimitiveDesc()) { + return nullptr; + } + + if (checkData && (src->getData() == dst->getData())) { + LOG(FATAL) << "can not create reorder with inplace data"; + return nullptr; + } + + memory::dims srcDims = src->getDims(); + memory::dims dstDims = dst->getDims(); + CHECK_EQ(srcDims.size(), dstDims.size()); + for (size_t i = 0; i < srcDims.size(); ++i) { + CHECK_EQ(srcDims[i], dstDims[i]); + } + return std::make_shared(*src, *dst); +} + void MKLDNNMatrix::reorderDataFrom(const MKLDNNMatrixPtr& m, memory::format srcFmt, memory::dims targetDim) { diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index eef3b429e6fa0087aeac3f5aed9dff983b06e826..c843115eb9a5be50d6ff873f1510844228c9d89f 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -52,6 +52,32 @@ public: mkldnn::engine& eg, mkldnn::memory::data_type dtype = mkldnn::memory::data_type::f32); + /** + * Create Memory descriptor. + * default with any format and f32 dtype + */ + static mkldnn::memory::desc createMemoryDesc( + const mkldnn::memory::dims& dims, + const mkldnn::memory::format& fmt = mkldnn::memory::format::any, + const mkldnn::memory::data_type& dtype = mkldnn::memory::data_type::f32) { + return mkldnn::memory::desc(dims, dtype, fmt); + } + + /** + * Create reorder primitive. + * Create a mkldnn::reorder handle for converting src MKLDNNMatrix to dst. + * checkData: whether to check the data handle of src and dst. + * if true, it will check the data and do not allow them equal; + * otherwise, it will not check them, then the reorder created + * may have inplace buffer. + * Do not set false, if you can not guarantee the inplace logical + * would work with your reorder. + */ + static std::shared_ptr createReorder( + const MKLDNNMatrixPtr& src, + const MKLDNNMatrixPtr& dst, + bool checkData = true); + public: /** * Reorder this MKLDNNMatrix from other format. diff --git a/paddle/memory/memcpy.cc b/paddle/memory/memcpy.cc index a19a3e3675e3e2e7cc0c3594f21191f932d6379f..19ec9ba9b26f5919796181a19a048b7edb508bdd 100644 --- a/paddle/memory/memcpy.cc +++ b/paddle/memory/memcpy.cc @@ -62,6 +62,24 @@ void Copy(platform::GPUPlace dst_place, } } +template <> +void Copy(platform::CPUPlace dst_place, + void* dst, + platform::GPUPlace src_place, + const void* src, size_t num) { + platform::SetDeviceId(src_place.device); + platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost); +} + +template <> +void Copy(platform::GPUPlace dst_place, + void* dst, + platform::CPUPlace src_place, + const void* src, size_t num) { + platform::SetDeviceId(dst_place.device); + platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice); +} + #endif // PADDLE_ONLY_CPU } // namespace memory diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f9ea25ab045a02be5ab9ed81ef9c679126d3a188..e3e934bcccd1a5f34d88a2f33f3708a46ddabe05 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -1,5 +1,7 @@ file(GLOB GENERAL_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") string(REPLACE ".cc" "" GENERAL_OPS "${GENERAL_OPS}") +set(pybind_file ${PADDLE_SOURCE_DIR}/paddle/pybind/pybind.h) +file(WRITE ${pybind_file} "// Generated by the paddle/operator/CMakeLists.txt. DO NOT EDIT!\n\n") function(op_library TARGET) # op_library is a function to create op library. The interface is same as # cc_library. But it handle split GPU/CPU code and link some common library @@ -7,10 +9,11 @@ function(op_library TARGET) set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE) set(cc_srcs) set(cu_srcs) - set(op_common_deps operator op_registry) + set(op_common_deps operator op_registry math_function) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) + set(pybind_flag 0) cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -46,22 +49,42 @@ function(op_library TARGET) cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) endif() + + # net_op doesn't need pybind + if ("${TARGET}" STREQUAL "net_op") + set(pybind_flag 1) + endif() + + # pybind USE_NO_KERNEL_OP + file(READ ${TARGET}.cc TARGET_CONTENT) + string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}") + string(REPLACE "_op" "" TARGET "${TARGET}") + if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") + file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") + set(pybind_flag 1) + endif() + + # pybind USE_CPU_ONLY_OP + list(LENGTH cu_srcs cu_srcs_len) + if (${pybind_flag} EQUAL 0 AND ${cu_srcs_len} EQUAL 0) + file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") + set(pybind_flag 1) + endif() + + # pybind USE_OP + if (${pybind_flag} EQUAL 0) + file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") + endif() endfunction() add_subdirectory(math) set(DEPS_OPS - identity_op - minus_op - mul_op recurrent_op - scale_op) -op_library(identity_op DEPS scale_op) -op_library(minus_op DEPS scale_op) -op_library(mul_op DEPS math_function) + cond_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS framework_proto tensor operator net_op) -op_library(scale_op DEPS net_op) + DEPS framework_proto tensor net_op) +op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index 9ca04d402879b6a955d849a32175194df82b65c8..0c813748b2989a8f0c00a359345747242dd21dd8 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -23,10 +23,15 @@ class AccuracyOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Inference"), - "Input of Inference must be initialized."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("Inference"), + "Input(Inference) of AccuracyOp should not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), - "Input of Inference must be initialized."); + "Input(Label) of AccuracyOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Accuracy"), + "Output(Accuracy) of AccuracyOp should not be null."); + auto *inference = ctx.Input("Inference"); auto *label = ctx.Input("Label"); @@ -34,7 +39,7 @@ class AccuracyOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(inference->dims()[0], label->dims()[0], "inference size must be the same as label size"); - ctx.Output("Accuracy")->Resize({1}); + ctx.Output("Accuracy")->Resize({1}); } }; diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index 8dbd47cf0dfbc265032a9966343eed5c7bd8692e..e83c1efeaf897889d18a37a6bd2ca2f8f012db25 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -23,10 +23,18 @@ class AddOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of AddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of AddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of AddOp should not be null."); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), ctx.Input("Y")->dims(), "Two input of Add Op's dimension must be same."); - ctx.Output("Out")->Resize(ctx.Input("X")->dims()); + ctx.Output("Out")->Resize( + ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/concat_op.cc b/paddle/operators/concat_op.cc index 0ebefbab26ec8fdf316f852fbb7f6d9f3bbc48eb..223bb0ffe6e75ce71919eb5f4cca06bedbb00764 100644 --- a/paddle/operators/concat_op.cc +++ b/paddle/operators/concat_op.cc @@ -25,8 +25,11 @@ class ConcatOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ConcatOp should not be null."); + auto ins = ctx.MultiInput("X"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); size_t axis = static_cast(ctx.Attr("axis")); size_t n = ins.size(); diff --git a/paddle/operators/concat_op.cu b/paddle/operators/concat_op.cu deleted file mode 100644 index 38fee7473dbb2ba97fe95b6632db7a1749cf3bbe..0000000000000000000000000000000000000000 --- a/paddle/operators/concat_op.cu +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#define EIGEN_USE_GPU -#include "paddle/operators/concat_op.h" - -namespace ops = paddle::operators; -// TODO(Yancey1989) Add GPU kernel diff --git a/paddle/operators/cond_op.cc b/paddle/operators/cond_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8262a7a5c8c13c86c5f6c123a14fa89696358c57 --- /dev/null +++ b/paddle/operators/cond_op.cc @@ -0,0 +1,229 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/cond_op.h" + +#include +#include + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/gather.h" +#include "paddle/operators/net_op.h" +#include "paddle/operators/scatter.h" + +namespace paddle { +namespace operators { + +using Scope = framework::Scope; +using Variable = framework::Variable; +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DDim = framework::DDim; + +void CondOp::CreateScope(const Scope& scope) const { + auto sub_scopes_var = scope.FindVar("SubScopes"); + PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, + "Output(SubScopes) of CondOp should not be null."); + auto sub_scopes = sub_scopes_var->GetMutable>(); + auto& sub_scope = scope.NewScope(); + sub_scopes->push_back(&sub_scope); +} + +void CondOp::CreateIndexTensor(const Scope& scope) const { + auto index_tensors_var = scope.FindVar("IndexTensors"); + PADDLE_ENFORCE_NOT_NULL(index_tensors_var, + "Output(IndexTensors) of CondOp should not be null."); + auto& index_tensors = + *index_tensors_var->GetMutable>(); + index_tensors.push_back(LoDTensor()); +} + +void CondOp::InferShape(const Scope& scope) const { + auto sub_scopes_var = scope.FindVar("SubScopes"); + PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, + "Output(SubScopes) of CondOp should not be null."); + auto& sub_scopes = *sub_scopes_var->GetMutable>(); + + for (int i = 0; i < 2; ++i) { + // Create two sub scopes for true and false branches + // sub_scopes[0] for the true branch and sub_scopes[1] for the false + // branch + CreateScope(scope); + + // Create two tensors for true and false indices + // index_tensors[0] for the true branch and index_tensors[1] for the false + // branch + CreateIndexTensor(scope); + + PADDLE_ENFORCE(!Inputs("Xs").empty(), + "Inputs(Xs) of CondOp can't be empty."); + for (auto& input : Inputs("Xs")) { + // Create a new tensor in sub-scope for input-type tensor + Variable* v = sub_scopes[i]->NewVar(input); + LoDTensor* sub_input = v->GetMutable(); + sub_input->Resize(scope.FindVar(input)->GetMutable()->dims()); + } + + for (auto& output : (*sub_net_op_[i]).Outputs()) { + for (auto& var_name : output.second) { + sub_scopes[i]->NewVar(var_name); + } + } + + // each net calls InferShape + sub_net_op_[i]->InferShape(*sub_scopes[i]); + } + + for (auto& output : Outputs("Outs")) { + LoDTensor* tensor_t_out = + sub_scopes[0]->FindVar(output)->GetMutable(); + PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL"); + LoDTensor* tensor_f_out = + sub_scopes[1]->FindVar(output)->GetMutable(); + PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL"); + + auto* tensor_out_var = scope.FindVar(output); + PADDLE_ENFORCE_NOT_NULL(tensor_out_var, "Output not found"); + LoDTensor* tensor_out = tensor_out_var->GetMutable(); + PADDLE_ENFORCE_NOT_NULL(tensor_t_out, + "True output tensor should not be NULL"); + + // check output size should be same + PADDLE_ENFORCE_EQ(tensor_t_out->dims(), tensor_f_out->dims(), + "Outputs not of the same shape"); + tensor_out->Resize(tensor_t_out->dims()); + // tensor_out->mutable_data(tensor_out->dims(), + // platform::CPUPlace()); + tensor_out->mutable_data(platform::CPUPlace()); + } +} + +void CondOp::Run(const Scope& scope, + const platform::DeviceContext& dev_ctx) const { + auto* sub_scopes_var = scope.FindVar("SubScopes"); + PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, + "Output(SubScopes) of CondOp should not be null."); + auto sub_scopes = sub_scopes_var->Get>(); + auto* index_tensors_var = scope.FindVar("IndexTensors"); + PADDLE_ENFORCE_NOT_NULL(index_tensors_var, + "Output(IndexTensors) of CondOp should not be null."); + auto index_tensors = index_tensors_var->Get>(); + + std::string cond_name = Input("Cond"); + Variable* cond_var = scope.FindVar(cond_name); + PADDLE_ENFORCE_NOT_NULL(cond_var, + "Input(Cond) of CondOp should not be null."); + const LoDTensor* cond = cond_var->GetMutable(); + + // Step 1: get the true/false index at runtime + // index_[0]: vector, contains all index for cond[i] == true + // index_[1]: vector, contains all index for cond[i] == false + for (int i = 0; i < 2; ++i) index_[i].clear(); + + const int* cond_data = cond->data(); + for (int i = 0; i < cond->dims()[0]; ++i) { + if (cond_data[i]) + index_[0].push_back(i); + else + index_[1].push_back(i); + } + + // put index_[0] and index_[1] into two tensors: + // index_tensor_[0] and index_tensor_[1] + DDim dim = paddle::framework::make_ddim({0}); + for (int i = 0; i < 2; ++i) { + dim[0] = index_[i].size(); + int* tmp_ptr = + index_tensors[i].mutable_data(dim, platform::CPUPlace()); + index_tensors[i].Resize(dim); + memcpy(tmp_ptr, index_[i].data(), dim[0] * sizeof(int)); + } + + // Step 2: collect data by calling gather + for (int i = 0; i < 2; ++i) { + // i= 0/i for True and False branches respectively + for (auto& input : Inputs("Xs")) { + // find Tensor + Variable* v = scope.FindVar(input); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_parent = v->GetMutable(); + + v = sub_scopes[i]->FindVar(input); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_child = v->GetMutable(); + + // Resize child + DDim dim = tensor_child->dims(); + dim[0] = index_[i].size(); + tensor_child->Resize(dim); + tensor_child->mutable_data(dim, platform::CPUPlace()); + + Gather(dev_ctx.GetPlace(), tensor_parent, &index_tensors[i], + tensor_child); + } + } + + // Step 3: run + for (int i = 0; i < 2; ++i) { + sub_net_op_[i]->Run(*sub_scopes[i], dev_ctx); + } + + // Step 4: merge output results + PADDLE_ENFORCE(!Outputs("Outs").empty(), + "Outputs(Outs) of CondOp can't be empty."); + for (int i = 0; i < 2; ++i) { + // i= 0/i for True and False branches respectively + for (auto& output : Outputs("Outs")) { + // find Tensor + Variable* v = scope.FindVar(output); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_parent = v->GetMutable(); + + v = sub_scopes[i]->FindVar(output); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_child = v->GetMutable(); + + ScatterUpdate(dev_ctx.GetPlace(), tensor_child, &index_tensors[i], + tensor_parent); + } + } +} + +class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker { + public: + CondOpProtoAndCheckerMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Cond", "The condition, which is a bool vector"); + AddInput("Xs", "Inputs of Subnets").AsDuplicable(); + AddOutput("Outs", "Outputs of Cond_Op after merge").AsDuplicable(); + + AddOutput("SubScopes", "sub scopes for true and false branches"); + AddOutput("IndexTensors", "Index Tensors contains indices for true/false"); + + AddComment(R"DOC( +Sample dependent Cond Operator: +Given Cond[i] as a 1/0 vector to indicate true/false +The equation is: +Out[i] = subnet_t[i], if Cond[i] == true +Out[i] = subnet_t[i], if Cond[i] == false +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT(cond, paddle::operators::CondOp, + paddle::operators::CondOpProtoAndCheckerMaker); diff --git a/paddle/operators/cond_op.h b/paddle/operators/cond_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b09e32331e66c53555c88c06d7b1456276050eaa --- /dev/null +++ b/paddle/operators/cond_op.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "glog/logging.h" +#include "paddle/framework/ddim.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/tensor.h" +#include "paddle/operators/net_op.h" + +namespace paddle { +namespace operators { + +/* + * @brief CondOp is a dynamic if-else Operator + * + * It has a input tensor named cond indicating which netop each instance will + * run. + * + * if cond == 1, it will run true_net, which is a NetOp. + * + * if cond == 0, it will run false_net, which is another NetOp. + */ +class CondOp : public framework::OperatorBase { + public: + CondOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) { + index_.resize(2); + sub_net_op_.resize(2); + } + + CondOp(const CondOp& o) + : framework::OperatorBase( + static_cast(o)) { + // TODO(yuyang18): Implement copy ctor well. + PADDLE_THROW("Not implemented"); + } + + void CreateScope(const framework::Scope& scope) const; + + void CreateIndexTensor(const framework::Scope& scope) const; + + /* + * InferShape must be called before Run. + */ + void InferShape(const framework::Scope& scope) const override; + + /* + * Set True Block + */ + void set_truenet(std::unique_ptr&& net) { + sub_net_op_[0] = std::move(net); + } + + /* + * Set False Block + */ + void set_falsenet(std::unique_ptr&& net) { + sub_net_op_[1] = std::move(net); + } + + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override; + + private: + // sub_net_op_[0]: subnet_t + // sub_net_op_[1]: subnet_f + std::vector> sub_net_op_; + + // index_[0]: True_index; + // index_[1]: False_index; + mutable std::vector> index_; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc index c033af3b741ae26ad9d37b2164f87aa6e8651c6e..72c446493684246959656dc048e7f0e761665423 100644 --- a/paddle/operators/cos_sim_op.cc +++ b/paddle/operators/cos_sim_op.cc @@ -25,16 +25,38 @@ class CosSimOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must not be null."); - PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), - ctx.Input("Y")->dims(), - "Dimensions of Input(X) and Input(Y) must be the same."); - - auto dims = ctx.Input("X")->dims(); - ctx.Output("Out")->Resize({dims[0], 1}); - ctx.Output("XNorm")->Resize({dims[0], 1}); - ctx.Output("YNorm")->Resize({dims[0], 1}); + // notnull check + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("XNorm"), + "Output(XNorm) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("YNorm"), + "Output(YNorm) of CosSimOp should not be null."); + + // shape check + auto x_dims = ctx.Input("X")->dims(); + auto y_dims = ctx.Input("Y")->dims(); + + PADDLE_ENFORCE_EQ(x_dims.size(), y_dims.size(), + "Ranks of Input(X) and Input(Y) must be equal."); + PADDLE_ENFORCE_GE(x_dims.size(), 2, + "Rank of Input(X) must not be less than 2."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 1, x_dims.size()), + framework::slice_ddim(y_dims, 1, y_dims.size()), + "All dimensions except the 1st of Input(X) and Input(Y) " + "must be equal."); + PADDLE_ENFORCE(x_dims[0] == y_dims[0] || y_dims[0] == 1, + "The 1st dimension of Input(Y) must be equal to Input(X) or" + " just 1 (which will be broadcasted to match Input(X))."); + + // resize tensor + ctx.Output("Out")->Resize({x_dims[0], 1}); + ctx.Output("XNorm")->Resize({x_dims[0], 1}); + ctx.Output("YNorm")->Resize({y_dims[0], 1}); } }; @@ -42,16 +64,27 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { public: CosSimOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "The first input of cos_sim op."); - AddInput("Y", "The second input of cos_sim op."); + AddInput("X", "The 1st input of cos_sim op."); + AddInput("Y", "The 2nd input of cos_sim op."); AddOutput("Out", "The output of cos_sim op."); - AddOutput("XNorm", "Row norm of the first input.").AsIntermediate(); - AddOutput("YNorm", "Row norm of the second input.").AsIntermediate(); + AddOutput("XNorm", + "Norm of the first input, reduced along the 1st " + "dimension.") + .AsIntermediate(); + AddOutput("YNorm", + "Norm of the second input, reduced along the 1st " + "dimension.") + .AsIntermediate(); AddComment(R"DOC( Cosine Similarity Operator. -The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)) +The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)). + +Input(X) and Input(Y) must have the same shape, except that the 1st dimension +of Input(Y) could be just 1 (different from Input(X)), which will be +broadcasted to match the shape of Input(X) before computing their cosine +similarity. )DOC"); } }; @@ -62,34 +95,54 @@ class CosSimOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + // notnull check PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("XNorm"), "Input(XNorm) must not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("YNorm"), "Input(YNorm) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Out"), + "Input(Out) must not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Input(Out@GRAD) must not be null."); + // shape check auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); auto xnorm_dims = ctx.Input("XNorm")->dims(); auto ynorm_dims = ctx.Input("YNorm")->dims(); - auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); - PADDLE_ENFORCE_EQ(x_dims, y_dims, - "Dimensions of Input(X) and Input(Y) must be the same."); - PADDLE_ENFORCE_EQ(xnorm_dims[0], x_dims[0], - "1st dimension of XNorm must equal that of Input(X)."); - PADDLE_ENFORCE_EQ(xnorm_dims[1], 1, "2st dimension of XNorm must be one."); - PADDLE_ENFORCE_EQ(ynorm_dims[0], y_dims[0], - "1st dimension of YNorm must equal that of Input(Y)."); - PADDLE_ENFORCE_EQ(ynorm_dims[1], 1, "2st dimension of YNorm must be one."); - PADDLE_ENFORCE_EQ(out_dims[0], x_dims[0], - "1st dimension of Out@GRAD must equal that of Input(X)"); - PADDLE_ENFORCE_EQ(out_dims[1], 1, "1st dimension of Out@GRAD must be one."); - - auto *x_grad = ctx.Output(framework::GradVarName("X")); - auto *y_grad = ctx.Output(framework::GradVarName("Y")); + auto out_dims = ctx.Input("Out")->dims(); + auto out_grad_dims = + ctx.Input(framework::GradVarName("Out"))->dims(); + + PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(), + "Ranks of Input(X) and Input(Y) must be equal."); + PADDLE_ENFORCE_GE(x_dims.size(), 2, + "Rank of Input(X) must not be less than 2."); + PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 1, x_dims.size()), + framework::slice_ddim(y_dims, 1, y_dims.size()), + "All dimensions except the 1st of Input(X) and Input(Y) " + "must be equal."); + PADDLE_ENFORCE(x_dims[0] == y_dims[0] || y_dims[0] == 1, + "The 1st dimension of Input(Y) must be equal to Input(X) or" + " just 1 (which will be broadcasted to match Input(X))."); + auto target_xnorm_dims = framework::make_ddim({x_dims[0], 1}); + auto target_ynorm_dims = framework::make_ddim({y_dims[0], 1}); + PADDLE_ENFORCE_EQ(xnorm_dims, target_xnorm_dims, + "Shape of Input(XNorm) must be [X.Dim(0), 1]."); + PADDLE_ENFORCE_EQ(ynorm_dims, target_ynorm_dims, + "Shape of Input(YNorm) must be [Y.Dim(0), 1]."); + PADDLE_ENFORCE_EQ(out_dims, target_xnorm_dims, + "Shape of Input(Out) must be [X.Dim(0), 1]."); + PADDLE_ENFORCE_EQ(out_grad_dims, target_xnorm_dims, + "Shape of Input(Out@Grad) must be [X.Dim(0), 1]."); + + // resize tensor + auto *x_grad = + ctx.Output(framework::GradVarName("X")); + auto *y_grad = + ctx.Output(framework::GradVarName("Y")); if (x_grad) x_grad->Resize(x_dims); if (y_grad) y_grad->Resize(y_dims); } diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 0dc509952578497671a128374f77ce616a520909..bcf6f758cae561a2e22f5be6c7a242647ef1c144 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -31,30 +31,38 @@ template class CosSimKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input_x = context.Input("X"); - auto* input_y = context.Input("Y"); - auto* output_z = context.Output("Out"); - auto* output_x_norm = context.Output("XNorm"); - auto* output_y_norm = context.Output("YNorm"); + // get Tensor + auto* in_x = context.Input("X"); + auto* in_y = context.Input("Y"); + auto* out_z = context.Output("Out"); + auto* out_x_norm = context.Output("XNorm"); + auto* out_y_norm = context.Output("YNorm"); + out_z->mutable_data(context.GetPlace()); + out_x_norm->mutable_data(context.GetPlace()); + out_y_norm->mutable_data(context.GetPlace()); - output_z->mutable_data(context.GetPlace()); - output_x_norm->mutable_data(context.GetPlace()); - output_y_norm->mutable_data(context.GetPlace()); - - auto dims = input_x->dims(); - int64_t size = input_x->numel(); - auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); - auto x = EigenMatrix::From(*input_x, new_dims); - auto y = EigenMatrix::From(*input_y, new_dims); - auto z = EigenVector::Flatten(*output_z); - auto x_norm = EigenVector::Flatten(*output_x_norm); - auto y_norm = EigenVector::Flatten(*output_y_norm); + // convert Tensor to Eigen Tensor + int rows_x = in_x->dims()[0]; + int rows_y = in_y->dims()[0]; + auto x = EigenMatrix::Reshape(*in_x, 1); + auto y = EigenMatrix::Reshape(*in_y, 1); + auto z = EigenVector::Flatten(*out_z); + auto x_norm = EigenVector::Flatten(*out_x_norm); + auto y_norm = EigenVector::Flatten(*out_y_norm); + // compute auto place = context.GetEigenDevice(); - auto xy = (x * y).sum(Eigen::array({{1}})); - x_norm.device(place) = x.square().sum(Eigen::array({{1}})).sqrt(); - y_norm.device(place) = y.square().sum(Eigen::array({{1}})).sqrt(); - z.device(place) = xy / x_norm / y_norm; + auto row_along = Eigen::array({{1}}); + x_norm.device(place) = x.square().sum(row_along).sqrt(); + y_norm.device(place) = y.square().sum(row_along).sqrt(); + if (rows_x == rows_y) { + auto xy = (x * y).sum(Eigen::array({{1}})); + z.device(place) = xy / x_norm / y_norm; + } else { + Eigen::DSizes bcast(rows_x, 1); + auto xy = (x * y.broadcast(bcast)).sum(row_along); + z.device(place) = xy / x_norm / y_norm.broadcast(bcast); + } } }; @@ -62,43 +70,72 @@ template class CosSimGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input_x = context.Input("X"); - auto* input_y = context.Input("Y"); - auto* input_z = context.Input("Out"); - auto* input_x_norm = context.Input("XNorm"); - auto* input_y_norm = context.Input("YNorm"); - auto* output_grad_x = context.Output(framework::GradVarName("X")); - auto* output_grad_y = context.Output(framework::GradVarName("Y")); - auto* input_grad_z = context.Input(framework::GradVarName("Out")); + // get Tensor + auto* in_x = context.Input("X"); + auto* in_y = context.Input("Y"); + auto* in_z = context.Input("Out"); + auto* in_x_norm = context.Input("XNorm"); + auto* in_y_norm = context.Input("YNorm"); + auto* out_grad_x = context.Output(framework::GradVarName("X")); + auto* out_grad_y = context.Output(framework::GradVarName("Y")); + auto* in_grad_z = context.Input(framework::GradVarName("Out")); - auto dims = input_x->dims(); - int64_t size = input_x->numel(); - auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); - auto x = EigenMatrix::From(*input_x, new_dims); - auto y = EigenMatrix::From(*input_y, new_dims); - auto z = EigenMatrix::From(*input_z); - auto x_norm = EigenMatrix::From(*input_x_norm); - auto y_norm = EigenMatrix::From(*input_y_norm); - auto dz = EigenMatrix::From(*input_grad_z); + // convert Tensor to Eigen Tensor + auto x = EigenMatrix::Reshape(*in_x, 1); + auto y = EigenMatrix::Reshape(*in_y, 1); + auto z = EigenMatrix::Reshape(*in_z, 1); + auto x_norm = EigenMatrix::Reshape(*in_x_norm, 1); + auto y_norm = EigenMatrix::Reshape(*in_y_norm, 1); + auto dz = EigenMatrix::Reshape(*in_grad_z, 1); - Eigen::DSizes bcast(1, new_dims[1]); - auto z_bcast = z.broadcast(bcast); - auto dz_bcast = dz.broadcast(bcast); + // compute gradident + int rows_x = in_x->dims()[0]; + int rows_y = in_y->dims()[0]; + int cols = framework::product(in_x->dims()) / rows_x; + Eigen::DSizes bcast_cols(1, cols); + auto z_bcast = z.broadcast(bcast_cols); + auto dz_bcast = dz.broadcast(bcast_cols); + auto x_snorm_bcast = x_norm.square().eval().broadcast(bcast_cols); auto place = context.GetEigenDevice(); - auto x_snorm_bcast = x_norm.square().eval().broadcast(bcast); - auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast); - auto norm_prod_bcast = (x_norm * y_norm).eval().broadcast(bcast); - if (output_grad_x) { - output_grad_x->mutable_data(context.GetPlace()); - auto dx = EigenMatrix::From(*output_grad_x, new_dims); - dx.device(place) = - dz_bcast * (y / norm_prod_bcast - z_bcast * x / x_snorm_bcast); - } - if (output_grad_y) { - output_grad_y->mutable_data(context.GetPlace()); - auto dy = EigenMatrix::From(*output_grad_y, new_dims); - dy.device(place) = - dz_bcast * (x / norm_prod_bcast - z_bcast * y / y_snorm_bcast); + if (rows_x == rows_y) { + auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast_cols); + auto norm_prod_bcast = (x_norm * y_norm).eval().broadcast(bcast_cols); + // compute dx + if (out_grad_x) { + out_grad_x->mutable_data(context.GetPlace()); + auto dx = EigenMatrix::Reshape(*out_grad_x, 1); + auto grad = y / norm_prod_bcast - z_bcast * x / x_snorm_bcast; + dx.device(place) = dz_bcast * grad; + } + // compute dy + if (out_grad_y) { + out_grad_y->mutable_data(context.GetPlace()); + auto dy = EigenMatrix::Reshape(*out_grad_y, 1); + auto grad = x / norm_prod_bcast - z_bcast * y / y_snorm_bcast; + dy.device(place) = dz_bcast * grad; + } + } else { + Eigen::DSizes bcast_rows(rows_x, 1); + Eigen::DSizes bcast_rows_cols(rows_x, cols); + auto y_bcast = y.broadcast(bcast_rows); + auto y_snorm_bcast = y_norm.square().eval().broadcast(bcast_rows_cols); + auto norm_prod_bcast = (x_norm * y_norm.eval().broadcast(bcast_rows)) + .eval() + .broadcast(bcast_cols); + // compute dx + if (out_grad_x) { + out_grad_x->mutable_data(context.GetPlace()); + auto dx = EigenMatrix::Reshape(*out_grad_x, 1); + auto grad = y_bcast / norm_prod_bcast - z_bcast * x / x_snorm_bcast; + dx.device(place) = dz_bcast * grad; + } + // compute dy + if (out_grad_y) { + out_grad_y->mutable_data(context.GetPlace()); + auto dy = EigenMatrix::Reshape(*out_grad_y, 1); + auto grad = x / norm_prod_bcast - z_bcast * y_bcast / y_snorm_bcast; + dy.device(place) = (dz_bcast * grad).sum(Eigen::array({{0}})); + } } } }; diff --git a/paddle/operators/crop_op.cc b/paddle/operators/crop_op.cc index 0c2fd096546a72507df6b6a4fea7ddfb93369b89..9f4a3152e4194aa5a50ff83c77a1f993b97dde33 100644 --- a/paddle/operators/crop_op.cc +++ b/paddle/operators/crop_op.cc @@ -141,17 +141,23 @@ template class CropCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { + LOG(INFO) << "CropCPUKernel step1"; auto *x = context.Input("X"); + LOG(INFO) << "CropCPUKernel step2"; auto *out = context.Output("Out"); + LOG(INFO) << "CropCPUKernel step3"; auto x_data = x->data(); T *out_data = out->mutable_data(paddle::platform::CPUPlace()); + LOG(INFO) << "CropCPUKernel step4"; auto x_dims = x->dims(); auto out_dims = out->dims(); + LOG(INFO) << "CropCPUKernel step5"; int64_t out_count = framework::product(out_dims); std::vector x_shape = framework::vectorize(x_dims); std::vector out_shape = framework::vectorize(out_dims); auto offsets = context.op().Attr>("offsets"); + LOG(INFO) << "CropCPUKernel step6"; PADDLE_ENFORCE_EQ( x_dims.size(), offsets.size(), "Offsets size should be equal to dimension size of input tensor."); @@ -165,6 +171,7 @@ class CropCPUKernel : public framework::OpKernel { for (int64_t i = 0; i < out_count; ++i) { out_data[i] = x_data[transIndex(out_shape, x_shape, crop_rules, i)]; } + LOG(INFO) << "CropCPUKernel step7"; } }; diff --git a/paddle/operators/crop_op.cu b/paddle/operators/crop_op.cu index a1184ce7a67d3f3089931afebe43f170f3327d25..f39478858a8c9452ef1537ad35c738f53f23feee 100644 --- a/paddle/operators/crop_op.cu +++ b/paddle/operators/crop_op.cu @@ -48,6 +48,7 @@ template void CropCUDAFunctoin(const framework::ExecutionContext& context) { PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()), "It must use GPUPlace."); + LOG(INFO) << "CropCUDAFunctoin step1"; auto* x = context.Input("X"); auto* out = context.Output("Out"); auto x_data = x->data(); diff --git a/paddle/operators/elementwise_mul_op.cc b/paddle/operators/elementwise_mul_op.cc index 1742925545d29df5d7df719faaea3b754680ab61..ee6e975b443691bf71cec904565ced20406f3fba 100644 --- a/paddle/operators/elementwise_mul_op.cc +++ b/paddle/operators/elementwise_mul_op.cc @@ -25,13 +25,19 @@ class ElementWiseMulOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of ElementWiseMulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of ElementWiseMulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of ElementWiseMulOp should not be null."); + auto x_dim = ctx.Input("X")->dims(); auto y_dim = ctx.Input("Y")->dims(); PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(), "Rank of first input must >= rank of second input.") - ctx.Output("Out")->Resize(x_dim); + ctx.Output("Out")->Resize(x_dim); } }; @@ -80,8 +86,10 @@ class ElementWiseMulOpGrad : public framework::OperatorWithKernel { auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); - auto *x_grad = ctx.Output(framework::GradVarName("X")); - auto *y_grad = ctx.Output(framework::GradVarName("Y")); + auto *x_grad = + ctx.Output(framework::GradVarName("X")); + auto *y_grad = + ctx.Output(framework::GradVarName("Y")); PADDLE_ENFORCE_GE(x_dims.size(), y_dims.size(), "Rank of first input must >= rank of second input.") diff --git a/paddle/operators/elementwise_mul_op.h b/paddle/operators/elementwise_mul_op.h index e9ed6791799240039f9af42c1a4339be7126ee65..6d58da580b81b9e0a8ae170eec1a73638b190df8 100644 --- a/paddle/operators/elementwise_mul_op.h +++ b/paddle/operators/elementwise_mul_op.h @@ -13,10 +13,8 @@ limitations under the License. */ #pragma once -#include #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 9d51f6e3a16fe96125599bb440d40237aeb9a028..ba7857cc65f6860a6156674c6addc2bfdce21a99 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -23,7 +23,14 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output("Dst")->Resize( + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("Src"), + "Input(Src) of FillZerosLikeOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Dst"), + "Output(Dst) of FillZerosLikeOp should not be null."); + + ctx.Output("Dst")->Resize( ctx.Input("Src")->dims()); } }; diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 123bed296c462c30bddd3bfbd530098fdbfe4856..d445b61c1657356f2cdcf1e98d756607de2bd042 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -24,11 +24,18 @@ class GatherOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of GatherOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Index"), + "Input(Index) of GatherOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of GatherOp should not be null."); + int batch_size = ctx.Input("Index")->dims()[0]; PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0"); framework::DDim output_dims(ctx.Input("X")->dims()); output_dims[0] = batch_size; - ctx.Output("Out")->Resize(output_dims); + ctx.Output("Out")->Resize(output_dims); } }; @@ -38,7 +45,7 @@ class GatherGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto X_grad = ctx.Output(framework::GradVarName("X")); + auto X_grad = ctx.Output(framework::GradVarName("X")); auto X = ctx.Input("X"); X_grad->Resize(X->dims()); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 3d76516405960c502a46997108049b2db5cab6bf..c0e161bbc0c5486eb10408e43e6388f1b287abf8 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -43,8 +43,12 @@ class GaussianRandomOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext& context) const override { - auto* tensor = context.Output("Out"); + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of GaussianRandomOp should not be null."); + + auto* tensor = ctx.Output("Out"); auto dims = Attr>("dims"); std::vector temp; temp.reserve(dims.size()); diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc index 7d9d4fa519d1c690feacbadc5175aeab49082282..b67ca5f6f8d516224e18a5eed497f2bfc680259c 100644 --- a/paddle/operators/identity_op.cc +++ b/paddle/operators/identity_op.cc @@ -42,6 +42,11 @@ class IdentityOp : public NetOp { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : NetOp(type, inputs, outputs, attrs) { + PADDLE_ENFORCE_NE(Input("X"), framework::kEmptyVarName, + "Input(X) of IdentityOp should not be null."); + PADDLE_ENFORCE_NE(Output("Out"), framework::kEmptyVarName, + "Output(Out) of IdentityOp should not be null."); + AppendOp(framework::OpRegistry::CreateOp( "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Out")}}}, {{"scale", static_cast(1)}})); diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 94d40890a765413e88a35a6ad995ca97ac84dcda..07f6dfabca5879e3de6004e59d2e87f7fa68d66c 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -22,10 +22,17 @@ class LookupTableOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext &context) const override { - auto table_t = context.Input("W"); - auto ids_t = context.Input("Ids"); - auto output_t = context.Output("Out"); + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("W"), + "Input(W) of LookupTableOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Ids"), + "Input(Ids) of LookupTableOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of LookupTableOp should not be null."); + + auto table_t = ctx.Input("W"); + auto ids_t = ctx.Input("Ids"); + auto output_t = ctx.Output("Out"); output_t->Resize({ids_t->dims()[0], table_t->dims()[1]}); } @@ -56,7 +63,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &context) const override { auto table = context.Input("W"); - auto d_table = context.Output(framework::GradVarName("W")); + auto d_table = + context.Output(framework::GradVarName("W")); d_table->Resize(table->dims()); } }; diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index d3d0e55a674587fb04f43f24d0790de4358f035a..7d7eeb59a23435036dc33c1e4fe6dd1c4a1a2f62 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -24,8 +24,10 @@ class MeanOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input of MeanOp must be initialized."); - ctx.Output("Out")->Resize({1}); + "Input(X) of MeanOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of MeanOp should not be null."); + ctx.Output("Out")->Resize({1}); } }; @@ -45,7 +47,7 @@ class MeanGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output(framework::GradVarName("X")) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc index a4876feb2edf77bd422fa2a7687b0fa7d55dae47..ecf8a6f7795314e2475bb9546b55b8f354b96366 100644 --- a/paddle/operators/minus_op.cc +++ b/paddle/operators/minus_op.cc @@ -27,13 +27,20 @@ class MinusOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of MinusOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of MinusOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of MinusOp should not be null."); + auto *left_tensor = ctx.Input("X"); auto *right_tensor = ctx.Input("Y"); PADDLE_ENFORCE_EQ( left_tensor->numel(), right_tensor->numel(), "Minus operator must take two tensor with same num of elements"); - ctx.Output("Out")->Resize(left_tensor->dims()); + ctx.Output("Out")->Resize(left_tensor->dims()); } }; @@ -77,8 +84,6 @@ class MinusGradOp : public NetOp { } // namespace operators } // namespace paddle -USE_OP(scale); -USE_NO_KERNEL_OP(identity); namespace ops = paddle::operators; REGISTER_OP(minus, ops::MinusOp, ops::MinusOpMaker, minus_grad, ops::MinusGradOp); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 710a56a0e8e2d17162d7d000df226f1537104eb9..b6d320b415e02549e85cb36ab517b0b5433887d5 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { using framework::Tensor; +using framework::LoDTensor; class MulOp : public framework::OperatorWithKernel { public: @@ -25,6 +26,13 @@ class MulOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of MulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of MulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of MulOp should not be null."); + auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); int x_num_col_dims = Attr("x_num_col_dims"); @@ -45,7 +53,8 @@ class MulOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( x_mat_dims[1], y_mat_dims[0], "First matrix's width must be equal with second matrix's height."); - ctx.Output("Out")->Resize({x_mat_dims[0], y_mat_dims[1]}); + ctx.Output("Out")->Resize( + {x_mat_dims[0], y_mat_dims[1]}); } }; @@ -94,8 +103,10 @@ class MulOpGrad : public framework::OperatorWithKernel { auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); - auto *x_grad = ctx.Output(framework::GradVarName("X")); - auto *y_grad = ctx.Output(framework::GradVarName("Y")); + auto *x_grad = + ctx.Output(framework::GradVarName("X")); + auto *y_grad = + ctx.Output(framework::GradVarName("Y")); auto x_mat_dims = framework::flatten_to_2d(x_dims, Attr("x_num_col_dims")); diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/onehot_cross_entropy_op.cc similarity index 81% rename from paddle/operators/cross_entropy_op.cc rename to paddle/operators/onehot_cross_entropy_op.cc index ab1e1c101a10e09a81f7785d2f1514822e3bdf15..f38be3549f3c5d2443f61739fc32cdca74197649 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/onehot_cross_entropy_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/cross_entropy_op.h" +#include "paddle/operators/onehot_cross_entropy_op.h" namespace paddle { namespace operators { @@ -23,13 +23,23 @@ class OnehotCrossEntropyOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("X"), + "Input(X) of OnehotCrossEntropyOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("label"), + "Input(label) of OnehotCrossEntropyOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Y"), + "Output(Y) of OnehotCrossEntropyOp should not be null."); + auto *X = ctx.Input("X"); auto *label = ctx.Input("label"); PADDLE_ENFORCE_EQ(X->dims().size(), 2, "X's dimension must be 2."); PADDLE_ENFORCE_EQ(label->dims().size(), 1, "label's dimension must be 1."); PADDLE_ENFORCE_EQ(X->dims()[0], label->dims()[0]); - ctx.Output("Y")->Resize({X->dims()[0]}); + ctx.Output("Y")->Resize({X->dims()[0], 1}); } }; @@ -39,7 +49,7 @@ class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto dX = ctx.Output(framework::GradVarName("X")); + auto dX = ctx.Output(framework::GradVarName("X")); auto X = ctx.Input("X"); dX->Resize(X->dims()); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/onehot_cross_entropy_op.cu similarity index 100% rename from paddle/operators/cross_entropy_op.cu rename to paddle/operators/onehot_cross_entropy_op.cu diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/onehot_cross_entropy_op.h similarity index 100% rename from paddle/operators/cross_entropy_op.h rename to paddle/operators/onehot_cross_entropy_op.h diff --git a/paddle/operators/pad_op.cc b/paddle/operators/pad_op.cc index 7e78b6ec133981494a65b5e16316ae8fdbd61a60..a0b1c6b631d97a40d774f7d2ff9550fda9c32db4 100644 --- a/paddle/operators/pad_op.cc +++ b/paddle/operators/pad_op.cc @@ -25,6 +25,11 @@ class PadOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of PadOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of PadOp should not be null."); + auto x_dim = ctx.Input("X")->dims(); auto paddings = Attr>("paddings"); PADDLE_ENFORCE_EQ(x_dim.size() * 2, int64_t(paddings.size()), @@ -34,7 +39,8 @@ class PadOp : public framework::OperatorWithKernel { for (int i = 0; i < x_dim.size(); ++i) { out_dims[i] = x_dim[i] + paddings[i * 2] + paddings[i * 2 + 1]; } - ctx.Output("Out")->Resize(framework::make_ddim(out_dims)); + ctx.Output("Out")->Resize( + framework::make_ddim(out_dims)); } }; @@ -95,9 +101,9 @@ class PadOpGrad : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Input(Out@GRAD) should not be null"); auto x_dims = ctx.Input("X")->dims(); - auto *x_grad = ctx.Output(framework::GradVarName("X")); - if (x_grad != nullptr) { - x_grad->Resize(x_dims); + auto *x_g = ctx.Output(framework::GradVarName("X")); + if (x_g != nullptr) { + x_g->Resize(x_dims); } } }; diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index e826703c60ca82e1fe690eb78c3d4f92981ef3a2..d3413d7cb9305732e9ddf3cb1bc267f7203097f3 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -26,10 +26,11 @@ namespace operators { using Scope = framework::Scope; using Variable = framework::Variable; using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; void RecurrentAlgorithm::InferShape(const Scope& scope) const { seq_len_ = scope.FindVar((arg_->inlinks[0]).external) - ->GetMutable() + ->GetMutable() ->dims()[0]; CreateScopes(scope); auto step_scopes = GetStepScopes(scope); @@ -88,7 +89,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // the weight are located in parent scope for (auto& var_name : input.second) { if (!step_scope.FindVar(var_name)) { - step_scope.NewVar(var_name)->GetMutable(); + step_scope.NewVar(var_name)->GetMutable(); } } } @@ -106,11 +107,12 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { void RecurrentAlgorithm::InitMemories(Scope* step_scope, bool infer_shape_mode) const { for (auto& attr : arg_->memories) { - Tensor* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable(); + auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable(); PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, "memory [%s]'s boot variable [%s] not exists", attr.var, attr.boot_var); - Tensor* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable(); + auto* boot_mem = + step_scope->FindVar(attr.boot_var)->GetMutable(); if (infer_shape_mode) { pre_mem->Resize(boot_mem->dims()); PADDLE_ENFORCE_EQ(pre_mem->dims().size(), 2); @@ -192,9 +194,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( "memory variable [%s] does not exists", attr.var); PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, "boot variable [%s] does not exists", attr.boot_var); - Tensor* mem_grad = step_scope->NewVar(attr.var)->GetMutable(); - Tensor* boot_mem_grad = - step_scope->NewVar(attr.boot_var)->GetMutable(); + auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable(); + auto* boot_mem_grad = + step_scope->NewVar(attr.boot_var)->GetMutable(); if (infer_shape_mode) { boot_mem_grad->Resize(mem_grad->dims()); } else { @@ -205,7 +207,7 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { seq_len_ = scope.FindVar((arg_->inlinks[0]).external) - ->GetMutable() + ->GetMutable() ->dims()[0]; auto step_scopes = GetStepScopes(scope); rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, diff --git a/paddle/operators/reshape_op.cc b/paddle/operators/reshape_op.cc index b7061153d2bf13982f14f233e87a87daeeebf5fd..0d05e344148c68f5625dd819ec59c5991892e4ce 100644 --- a/paddle/operators/reshape_op.cc +++ b/paddle/operators/reshape_op.cc @@ -28,7 +28,11 @@ class ReshapeOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { // input check - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) shouldn't be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of ReshapeOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ReshapeOp should not be null."); + auto shape = ctx.Attr>("shape"); PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty."); for (auto dim : shape) { @@ -46,7 +50,7 @@ class ReshapeOp : public framework::OperatorWithKernel { std::transform(shape.begin(), shape.end(), shape_int64.begin(), [](int a) { return static_cast(a); }); auto out_dims = framework::make_ddim(shape_int64); - ctx.Output("Out")->Resize(out_dims); + ctx.Output("Out")->Resize(out_dims); } }; @@ -90,7 +94,7 @@ class ReshapeGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Input(Out@GRAD) shouldn't be null."); auto dims = ctx.Input("X")->dims(); - auto *d_in = ctx.Output(framework::GradVarName("X")); + auto *d_in = ctx.Output(framework::GradVarName("X")); d_in->Resize(dims); } }; diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index 97872c67ac99fbf6c9c177d52f1d4069163e8548..6c082cb1825e04accb09019fef28eb2ec6523a5b 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -21,6 +21,7 @@ namespace rnn { namespace f = paddle::framework; using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, const size_t seq_len, @@ -31,7 +32,7 @@ void SegmentInputs(const std::vector& step_scopes, PADDLE_ENFORCE(input_var != nullptr, "input link [%s] is not in scope.", inlinks[i].external); - Tensor* input = input_var->GetMutable(); + LoDTensor* input = input_var->GetMutable(); f::DDim dims = input->dims(); PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, "all the inlinks must have same length"); @@ -40,6 +41,8 @@ void SegmentInputs(const std::vector& step_scopes, Tensor* step_input = step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable(); if (!infer_shape_mode) { + // The input of operators of each step is Tensor here. + // Maybe need to modify Slice function. *step_input = input->Slice(j, j + 1); } step_input->Resize(step_dims); @@ -54,21 +57,23 @@ void ConcatOutputs(const std::vector& step_scopes, auto output_var = step_scopes[0]->FindVar(outlinks[i].external); PADDLE_ENFORCE(output_var != nullptr, "output link [%s] is not in scope.", outlinks[i].external); - Tensor* output = output_var->GetMutable(); + LoDTensor* output = output_var->GetMutable(); if (infer_shape_mode) { auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", outlinks[i].internal); - f::DDim step_dims = step_scope_var->template GetMutable()->dims(); + f::DDim step_dims = + step_scope_var->template GetMutable()->dims(); std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); output->Resize(f::make_ddim(dims_vec)); } else { output->mutable_data(platform::CPUPlace()); for (size_t j = 0; j < seq_len; j++) { - Tensor* step_output = - step_scopes[j]->FindVar(outlinks[i].internal)->GetMutable(); + LoDTensor* step_output = step_scopes[j] + ->FindVar(outlinks[i].internal) + ->GetMutable(); // TODO(luotao02) data type and platform::DeviceContext() should set // correctly (output->Slice(j, j + 1)) @@ -94,8 +99,8 @@ void LinkMemories(const std::vector& scopes, auto scope = scopes[step_id]; auto linked_scope = scopes[step_id + offset]; for (auto& attr : memories) { - auto mem = scope->FindVar(attr.pre_var)->GetMutable(); - auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); + auto mem = scope->FindVar(attr.pre_var)->GetMutable(); + auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); if (infer_shape_mode) { mem->Resize(linked_mem->dims()); } else { diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index fa8f0ff1a858143af427b51025279c726f1628e0..2a3fd3be941d91aaa6b014df91d3025f07767577 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -25,6 +25,13 @@ class RowwiseAddOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of RowwiseAddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("b"), + "Input(b) of RowwiseAddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of RowwiseAddOp should not be null."); + auto x_dims = ctx.Input("X")->dims(); auto b_dims = ctx.Input("b")->dims(); PADDLE_ENFORCE_GT( @@ -37,7 +44,7 @@ class RowwiseAddOp : public framework::OperatorWithKernel { framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims, "The width of two operands must be same"); PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1"); - ctx.Output("Out")->Resize(x_dims); + ctx.Output("Out")->Resize(x_dims); } }; @@ -76,8 +83,8 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims, "The width of two operands must be same"); - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *db = ctx.Output(framework::GradVarName("b")); + auto *dx = ctx.Output(framework::GradVarName("X")); + auto *db = ctx.Output(framework::GradVarName("b")); if (dx) dx->Resize(x_dims); if (db) db->Resize(b_dims); } diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index ea991f683d841b3dc4624a0d8aa3c88367fd3c6d..d1f42e8662537d35e17429f9d436fdc0e5a1dc11 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -27,8 +27,13 @@ class ScaleOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of ScaleOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ScaleOp should not be null."); + auto *in = ctx.Input("X"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); out->Resize(in->dims()); } }; diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index f901edefa22dc9a252e87116df756d04767a7162..8820262732327306f4f807702751708bd1e2aa36 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -24,6 +24,15 @@ class ScatterOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Ref"), + "Input(Ref) of ScatterOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Index"), + "Input(Index) of ScatterOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Updates"), + "Input(Updates) of ScatterOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ScatterOp should not be null."); + PADDLE_ENFORCE_EQ(ctx.Input("Index")->dims().size(), 1, "Update Index should be 1-D."); PADDLE_ENFORCE_EQ(ctx.Input("Ref")->dims().size(), @@ -35,7 +44,8 @@ class ScatterOp : public framework::OperatorWithKernel { framework::DDim data_dim(ctx.Input("Updates")->dims()); for (int i = 1; i < data_dim.size(); ++i) PADDLE_ENFORCE_EQ(data_dim[i], ctx.Input("Updates")->dims()[i]); - ctx.Output("Out")->Resize(ctx.Input("Ref")->dims()); + ctx.Output("Out")->Resize( + ctx.Input("Ref")->dims()); } }; @@ -45,9 +55,11 @@ class ScatterGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto *dUpdates = ctx.Output(framework::GradVarName("Updates")); + auto *dUpdates = + ctx.Output(framework::GradVarName("Updates")); auto *Updates = ctx.Input("Updates"); - auto *dRef = ctx.Output(framework::GradVarName("Ref")); + auto *dRef = + ctx.Output(framework::GradVarName("Ref")); auto *Ref = ctx.Input("Ref"); dRef->Resize(Ref->dims()); diff --git a/paddle/operators/sequence_avg_pool_op.cc b/paddle/operators/sequence_avg_pool_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..eb3e37655bc7eae1a3cf1348434e33a415947cad --- /dev/null +++ b/paddle/operators/sequence_avg_pool_op.cc @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_avg_pool_op.h" + +namespace paddle { +namespace operators { + +class SequenceAvgPoolOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("X"), "Input(X) of SequenceAvgPoolOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of SequenceAvgPoolOp should not be null."); + + auto* x = ctx.Input("X"); + auto dims = x->dims(); + auto lod = x->lod(); + PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); + PADDLE_ENFORCE_GE( + dims[0], + /*batch size = */ static_cast(lod[0].size() - 1), + "The first dimension of Input(X) must be large than batch size."); + dims[0] = lod[0].size() - 1; + ctx.Output("Out")->Resize({dims}); + } +}; + +class SequenceAvgPoolOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SequenceAvgPoolOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of SequenceAvgPoolOp."); + AddOutput("Out", "The output of SequenceAvgPoolOp."); + AddComment(R"DOC( + SequenceAvgPoolOp averages features of all time-steps of each instance. + More detailed comments will be added later. + )DOC"); + } +}; + +class SequenceAvgPoolGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Gradient of Out should not be null"); + auto og_dims = + ctx.Input(framework::GradVarName("Out"))->dims(); + auto x_dims = ctx.Input("X")->dims(); + PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(), + "The rank of output grad must equal to Input(X)."); + for (int64_t i = 1; i < og_dims.size(); ++i) { + PADDLE_ENFORCE_EQ(og_dims[i], x_dims[i], "The dimension mismatch."); + } + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + x_grad->Resize(x_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(sequence_avg_pool, ops::SequenceAvgPoolOp, + ops::SequenceAvgPoolOpMaker, sequence_avg_pool_grad, + ops::SequenceAvgPoolGradOp); +REGISTER_OP_CPU_KERNEL( + sequence_avg_pool, + ops::SequenceAvgPoolKernel); +REGISTER_OP_CPU_KERNEL( + sequence_avg_pool_grad, + ops::SequenceAvgPoolGradKernel); diff --git a/paddle/operators/sequence_avg_pool_op.cu b/paddle/operators/sequence_avg_pool_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..bc9d1611fccd17c99b914b6ef59995288a9ebbd6 --- /dev/null +++ b/paddle/operators/sequence_avg_pool_op.cu @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU + +#include "paddle/operators/sequence_avg_pool_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + sequence_avg_pool, + ops::SequenceAvgPoolKernel); +REGISTER_OP_GPU_KERNEL( + sequence_avg_pool_grad, + ops::SequenceAvgPoolGradKernel); diff --git a/paddle/operators/sequence_avg_pool_op.h b/paddle/operators/sequence_avg_pool_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6e343b87e2938399409498407ac46b2416dc2231 --- /dev/null +++ b/paddle/operators/sequence_avg_pool_op.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +template +using EigenMatrix = framework::EigenMatrix; + +template +class SequenceAvgPoolKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Input("X"); + auto* out = context.Output("Out"); + + auto dims = in->dims(); + auto lod = in->lod(); + int64_t w = in->numel() / dims[0]; + + out->mutable_data(context.GetPlace()); + auto place = context.GetEigenDevice(); + for (int i = 0; i < static_cast(lod[0].size()) - 1; ++i) { + Tensor in_t = in->Slice(static_cast(lod[0][i]), + static_cast(lod[0][i + 1])); + Tensor out_t = out->Slice(i, i + 1); + int64_t h = static_cast(lod[0][i + 1] - lod[0][i]); + auto in_e = EigenMatrix::From(in_t, {h, w}); + auto out_e = EigenMatrix::From(out_t, {h, w}); + out_e.device(place) = in_e.mean(Eigen::array({{0}})); + } + } +}; + +template +class SequenceAvgPoolGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Output("X"); + auto* in_g = context.Output(framework::GradVarName("X")); + auto* out_g = context.Input(framework::GradVarName("Out")); + + auto dims = in->dims(); + auto lod = in->lod(); + int64_t w = in->numel() / dims[0]; + + in_g->mutable_data(context.GetPlace()); + auto place = context.GetEigenDevice(); + for (int i = 0; i < static_cast(lod[0].size()) - 1; ++i) { + auto in_g_t = in_g->Slice(static_cast(lod[0][i]), + static_cast(lod[0][i + 1])); + auto out_g_t = out_g->Slice(i, i + 1); + int64_t h = static_cast(lod[0][i + 1] - lod[0][i]); + auto in_g_e = EigenMatrix::From(in_g_t, {h, w}); + auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); + Eigen::DSizes bcast(h, w); + in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index ad267e7f087943ff3b8326a7baf2ce3955fa51c2..1232e64c7f0132b9ea19b3d7e1ebe9531e1e25a5 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -23,10 +23,18 @@ class SGDOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE( - ctx.Input("param")->dims() == ctx.Input("grad")->dims(), - "Two input of SGD Op's dimension must be same."); - ctx.Output("param_out")->Resize(ctx.Input("param")->dims()); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("param"), + "Input(param) of SGDOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("grad"), + "Input(grad) of SGDOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("param_out"), + "Output(param_out) of SGDOp should not be null."); + + PADDLE_ENFORCE_EQ(ctx.Input("param")->dims(), + ctx.Input("grad")->dims(), + "Two input of SGD Op's dimension must be same."); + ctx.Output("param_out") + ->Resize(ctx.Input("param")->dims()); } }; diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index 761c6de8d4d2150b30b97b58da95da3d5f33db63..992b19965e0ca9ce7dba1b8b3c5b7780af06eb45 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -23,7 +23,13 @@ class SigmoidOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output("Y")->Resize(ctx.Input("X")->dims()); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of SigmoidOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"), + "Output(Y) of SigmoidOp should not be null."); + + ctx.Output("Y")->Resize( + ctx.Input("X")->dims()); } }; @@ -44,7 +50,7 @@ class SigmoidOpGrad : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - ctx.Output(framework::GradVarName("X")) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("Y")->dims()); } }; diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 7166b2f60be8a6088ab3a81686f7bed1b7181d97..c67eb028c882ed82ca4e6a4dd70cdea9f69cdc24 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -23,9 +23,15 @@ class SoftmaxOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of SoftmaxOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"), + "Output(Y) of SoftmaxOp should not be null."); + PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, "The input of softmax op must be a matrix."); - ctx.Output("Y")->Resize(ctx.Input("X")->dims()); + ctx.Output("Y")->Resize( + ctx.Input("X")->dims()); } }; @@ -71,7 +77,7 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { ctx.Input(framework::GradVarName("Y"))->dims(), "Input(Y) and its gradients should have a same shape."); - ctx.Output(framework::GradVarName("X")) + ctx.Output(framework::GradVarName("X")) ->Resize(ctx.Input("X")->dims()); } }; diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index 9f51d3efa8ecba894a1023b9de2df451ca85916c..39f4305877de20d451bc35fe698a0eabf9758d57 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -23,12 +23,18 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input of SquaredL2DistanceOp " - "must be initialized."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), - "Target of SquaredL2DistanceOp " - "must be initialized."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("X"), + "Input(X) of SquaredL2DistanceOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("Y"), + "Input(Y) of SquaredL2DistanceOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("sub_result"), + "Output(sub_result) of SquaredL2DistanceOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of SquaredL2DistanceOp should not be null."); auto* x = ctx.Input("X"); auto x_dims = x->dims(); @@ -48,9 +54,9 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { "First dimension of target must be equal to input " "or to 1."); - ctx.Output("sub_result") + ctx.Output("sub_result") ->Resize({x_dims[0], x->numel() / x_dims[0]}); - ctx.Output("Out")->Resize({x_dims[0], 1}); + ctx.Output("Out")->Resize({x_dims[0], 1}); } }; @@ -94,8 +100,10 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(out_dims[1], 1, "Second dimension of output gradient " "must be 1."); - auto* x_grad = ctx.Output(framework::GradVarName("X")); - auto* y_grad = ctx.Output(framework::GradVarName("Y")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + auto* y_grad = + ctx.Output(framework::GradVarName("Y")); if (x_grad) x_grad->Resize(x_dims); if (y_grad) y_grad->Resize(y_dims); } diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 5805826ee8a555ca6dfc1ca81feaadffea9e1012..41e05c27f9029b2664685d3979fadcfd2bf6dbce 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -22,8 +22,13 @@ class SumOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE(!ctx.MultiInputVar("X").empty(), + "Input(X) of SumOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of SumOp should not be null."); + auto ins = ctx.MultiInput("X"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); int N = ins.size(); auto in_dim = ins[0]->dims(); @@ -55,7 +60,8 @@ class SumGradOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto outputs = ctx.MultiOutput(framework::GradVarName("X")); + auto outputs = + ctx.MultiOutput(framework::GradVarName("X")); auto dims = ctx.Input(framework::GradVarName("Out"))->dims(); for (auto output : outputs) { output->Resize(dims); diff --git a/paddle/operators/top_k_op.cc b/paddle/operators/top_k_op.cc index 38d2f0a09aec751734864947a2f3cfa20107e22f..169b815feffd86f9ff04c129ccc997230ce03a8c 100644 --- a/paddle/operators/top_k_op.cc +++ b/paddle/operators/top_k_op.cc @@ -24,7 +24,12 @@ class TopkOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input of TopkOP must be initialized."); + "Input(X) of TopkOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of TopkOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Indices"), + "Output(Indices) of TopkOp should not be null."); + auto *input = ctx.Input("X"); const int k = static_cast(ctx.Attr("k")); @@ -35,8 +40,8 @@ class TopkOp : public framework::OperatorWithKernel { framework::DDim dims = input->dims(); dims[dims.size() - 1] = k; - ctx.Output("Out")->Resize(dims); - ctx.Output("Indices")->Resize(dims); + ctx.Output("Out")->Resize(dims); + ctx.Output("Indices")->Resize(dims); } }; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index b8fbc9b52aecdb5c8d985b5de9bcd7cb85835b60..184bcbc29c0d26a214345506f126f9cc0d406b07 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -48,9 +48,13 @@ class UniformRandomOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of UniformRandomOp should not be null."); + PADDLE_ENFORCE(Attr("min") < Attr("max"), "uniform_random's min must less then max"); - auto* tensor = ctx.Output("Out"); + auto* tensor = ctx.Output("Out"); auto dims = Attr>("dims"); std::vector temp; temp.reserve(dims.size()); diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 17bdac8749e31565b119b2cb84aed199fac0f441..8b605e51c3f4ea38fc358ce054bb36fcc82063c4 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -24,3 +24,4 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) +nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place) diff --git a/paddle/platform/details/device_ptr_cast.h b/paddle/platform/details/device_ptr_cast.h new file mode 100644 index 0000000000000000000000000000000000000000..4015491fcdc3554029aa771ab7da1b2f3424321f --- /dev/null +++ b/paddle/platform/details/device_ptr_cast.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifndef __NVCC__ +#error device_ptr_cast must be include by .cu file +#endif + +#include + +namespace paddle { +namespace platform { +namespace details { +template +struct DevicePtrCast; + +template +struct DevicePtrCast { + using ELEM = typename std::remove_pointer::type; + using RTYPE = thrust::device_ptr; + + inline thrust::device_ptr operator()(ELEM* ele) const { + return thrust::device_pointer_cast(ele); + } +}; + +template +struct DevicePtrCast { + using RTYPE = T; + inline RTYPE operator()(RTYPE it) const { return it; } +}; + +// Cast T to thrust::device_ptr if T is a pointer. +// Otherwise, e.g., T is a iterator, return T itself. +template +auto DevPtrCast(T t) -> + typename DevicePtrCast::value>::RTYPE { + DevicePtrCast::value> cast; + return cast(t); +} + +} // namespace details +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index 64fcbd93b6c4d5d9b36f2636c3ef4f7327f08d25..df5f71ed760952ed042d7ffa40a4319a73fb93bf 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -25,6 +25,10 @@ limitations under the License. */ #include "paddle/string/printf.h" #include "paddle/string/to_string.h" +#ifdef __GNUC__ +#include // for __cxa_demangle +#endif + #ifndef PADDLE_ONLY_CPU #include "paddle/platform/dynload/cublas.h" @@ -42,6 +46,19 @@ limitations under the License. */ namespace paddle { namespace platform { +namespace { +#ifdef __GNUC__ +inline std::string demangle(std::string name) { + int status = -4; // some arbitrary value to eliminate the compiler warning + std::unique_ptr res{ + abi::__cxa_demangle(name.c_str(), NULL, NULL, &status), std::free}; + return (status == 0) ? res.get() : name; +} +#else +inline std::string demangle(std::string name) { return name; } +#endif +} + struct EnforceNotMet : public std::exception { std::exception_ptr exp_; std::string err_str_; @@ -61,8 +78,8 @@ struct EnforceNotMet : public std::exception { Dl_info info; for (int i = 0; i < size; ++i) { - if (dladdr(call_stack[i], &info)) { - auto demangled = info.dli_sname; + if (dladdr(call_stack[i], &info) && info.dli_sname) { + auto demangled = demangle(info.dli_sname); auto addr_offset = static_cast(call_stack[i]) - static_cast(info.dli_saddr); sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, diff --git a/paddle/platform/transform.h b/paddle/platform/transform.h new file mode 100644 index 0000000000000000000000000000000000000000..3ee4acd29660f201d318ce6d39baa6f3999ae274 --- /dev/null +++ b/paddle/platform/transform.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/platform/enforce.h" +#include "paddle/platform/hostdevice.h" +#include "paddle/platform/place.h" + +#include +#include +#ifdef __NVCC__ +#include +#include "paddle/platform/details/device_ptr_cast.h" +#endif + +namespace paddle { +namespace platform { +// Transform on host or device. It provides the same API in std library. +template +void Transform(Place place, InputIter first, InputIter last, OutputIter result, + UnaryOperation op) { + if (is_cpu_place(place)) { + std::transform(first, last, result, op); + } else { +#ifdef __NVCC__ + using namespace details; + thrust::transform(DevPtrCast(first), DevPtrCast(last), DevPtrCast(result), + op); +#else + PADDLE_THROW("Do not invoke `Transform` in .cc file"); +#endif + } +} + +template +void Transform(Place place, InputIter1 first1, InputIter1 last1, + InputIter2 first2, OutputIter result, BinaryOperation op) { + if (is_cpu_place(place)) { + std::transform(first1, last1, first2, result, op); + } else { +#ifdef __NVCC__ + using namespace details; + thrust::transform(DevPtrCast(first1), DevPtrCast(last1), DevPtrCast(first2), + DevPtrCast(result), op); +#else + PADDLE_THROW("Do not invoke `Transform` in .cc file"); +#endif + } +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/transform_test.cu b/paddle/platform/transform_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..600fed8f45077a6fee91f295aa854153c9cf9c01 --- /dev/null +++ b/paddle/platform/transform_test.cu @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "paddle/memory/memcpy.h" +#include "paddle/memory/memory.h" +#include "paddle/platform/transform.h" + +template +class Scale { + public: + explicit Scale(const T& scale) : scale_(scale) {} + + HOSTDEVICE T operator()(const T& a) const { return a * scale_; } + + private: + T scale_; +}; + +template +class Multiply { + public: + HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; } +}; + +TEST(Transform, CPUUnary) { + using namespace paddle::platform; + float buf[4] = {0.1, 0.2, 0.3, 0.4}; + Transform(CPUPlace(), buf, buf + 4, buf, Scale(10)); + for (int i = 0; i < 4; ++i) { + ASSERT_NEAR(buf[i], static_cast(i + 1), 1e-5); + } +} + +TEST(Transform, GPUUnary) { + using namespace paddle::platform; + using namespace paddle::memory; + GPUPlace gpu0(0); + float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; + float* gpu_buf = static_cast(Alloc(gpu0, sizeof(float) * 4)); + Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf)); + Transform(gpu0, gpu_buf, gpu_buf + 4, gpu_buf, Scale(10)); + Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf)); + Free(gpu0, gpu_buf); + for (int i = 0; i < 4; ++i) { + ASSERT_NEAR(cpu_buf[i], static_cast(i + 1), 1e-5); + } +} + +TEST(Transform, CPUBinary) { + using namespace paddle::platform; + using namespace paddle::memory; + int buf[4] = {1, 2, 3, 4}; + Transform(CPUPlace(), buf, buf + 4, buf, buf, Multiply()); + for (int i = 0; i < 4; ++i) { + ASSERT_EQ((i + 1) * (i + 1), buf[i]); + } +} + +TEST(Transform, GPUBinary) { + using namespace paddle::platform; + using namespace paddle::memory; + int buf[4] = {1, 2, 3, 4}; + GPUPlace gpu0(0); + int* gpu_buf = static_cast(Alloc(gpu0, sizeof(buf))); + Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf)); + Transform(gpu0, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply()); + Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf)); + Free(gpu0, gpu_buf); + for (int i = 0; i < 4; ++i) { + ASSERT_EQ((i + 1) * (i + 1), buf[i]); + } +} \ No newline at end of file diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 851399a91c9ecbed7bed22f7609a47787478bdc6..c7009a604f60cda11434ad33b6c7d7caee1befdd 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -19,10 +19,12 @@ limitations under the License. */ #include "paddle/framework/backward.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/cond_op.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" +#include "paddle/pybind/pybind.h" #include "paddle/pybind/tensor_py.h" #include "paddle/string/to_string.h" #include "pybind11/numpy.h" @@ -31,34 +33,6 @@ limitations under the License. */ namespace py = pybind11; -USE_OP(add); -USE_OP(onehot_cross_entropy); -USE_OP(sgd); -USE_OP(mul); -USE_OP(elementwise_mul); -USE_OP(mean); -USE_OP(sigmoid); -USE_OP(softmax); -USE_OP(rowwise_add); -USE_OP(fill_zeros_like); -USE_NO_KERNEL_OP(recurrent); -USE_OP(gaussian_random); -USE_OP(uniform_random); -USE_OP(lookup_table); -USE_OP(scale); -USE_NO_KERNEL_OP(identity); -USE_OP(minus); -USE_OP(cos_sim); -USE_CPU_ONLY_OP(gather); -USE_OP(pad); -USE_CPU_ONLY_OP(scatter); -USE_OP(crop); -USE_CPU_ONLY_OP(concat); -USE_OP(top_k); -USE_OP(squared_l2_distance); -USE_OP(sum); -USE_OP(reshape); - namespace paddle { namespace framework { @@ -124,27 +98,21 @@ PYBIND11_PLUGIN(core) { return self.data()[offset]; }); - py::class_(m, "LoDTensor", R"DOC(LoD(Leval of Ddetails) Tensor. - -The tensor and LoD info should be created before creating the LoDTensor, then -call the set_tensor and set_lod functions to set them. - -)DOC") - .def("__init__", - [](LoDTensor &instance, - const std::vector> &lod, - Tensor *t) { + py::class_(m, "LoDTensor") + .def_buffer( + [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) + .def( + "__init__", + [](LoDTensor &instance, const std::vector> &lod) { #ifdef PADDLE_ONLY_CPU - new (&instance) LoDTensor(lod, t); + new (&instance) LoDTensor(lod); #else paddle::framework::LoD new_lod; new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - new (&instance) LoDTensor(new_lod, t); + new (&instance) LoDTensor(new_lod); #endif - }) - .def("set_tensor", - [](LoDTensor &self, Tensor *tensor) { self.set_tensor(tensor); }) + }) .def("set_lod", [](LoDTensor &self, const std::vector> &lod) { #ifdef PADDLE_ONLY_CPU @@ -156,9 +124,6 @@ call the set_tensor and set_lod functions to set them. self.set_lod(new_lod); #endif }) - .def("tensor", - [](LoDTensor &self) -> Tensor & { return self.tensor(); }, - py::return_value_policy::reference) .def("lod", [](LoDTensor &self) -> std::vector> { #ifdef PADDLE_ONLY_CPU return self.lod(); @@ -187,9 +152,6 @@ All parameter, weight, gradient are variables in Paddle. [](Variable &var, int val) -> void { *var.GetMutable() = val; }) .def("get_int", [](const Variable &var) -> int { return var.Get(); }) .def("get_tensor", - [](Variable &self) -> Tensor * { return self.GetMutable(); }, - py::return_value_policy::reference) - .def("get_lod_tensor", [](Variable &self) -> LoDTensor * { return self.GetMutable(); }, @@ -327,6 +289,28 @@ All parameter, weight, gradient are variables in Paddle. [](operators::RecurrentOp &self, const operators::NetOp &net) -> void { self.set_stepnet(net.Clone()); }); + // cond_op + py::class_(m, "CondOp") + .def_static("create", + [](py::bytes protobin) -> operators::CondOp * { + OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + auto cond_op = OpRegistry::CreateOp(desc); + return static_cast(cond_op.release()); + }) + .def("set_truenet", + [](operators::CondOp &self, const operators::NetOp &net) -> void { + self.set_truenet(net.Clone()); + }) + .def("set_falsenet", + [](operators::CondOp &self, const operators::NetOp &net) -> void { + self.set_falsenet(net.Clone()); + }); + m.def("unique_integer", UniqueIntegerGenerator); m.def("is_compile_gpu", IsCompileGPU); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 4f68a8953446ffa0510df65c5b214d09b913cff8..a9e1d6d2e06d56f837690ec95fa8f8d41a90725f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2055,20 +2055,26 @@ class ConvLayerBase(LayerBase): if num_filters is not None: self.config.num_filters = num_filters + use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0)) use_gpu = int(g_command_config_args.get("use_gpu", 0)) parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - # Automatically select cudnn_type for GPU and exconv for CPU + # Automatically select cudnn_type for GPU, exconv for CPU + # and mkldnn_conv for MKLDNN # if set type=conv, but still reserve the way user specify - # exconv or cudnn_conv manually. + # exconv, mkldnn_conv or cudnn_conv manually. if self.layer_type == "cudnn_conv": config_assert(use_gpu, "cudnn_conv only support GPU") + if self.layer_type == "mkldnn_conv": + config_assert(use_mkldnn, "mkldnn_conv only support MKLDNN") + if (use_gpu == 1 and self.layer_type != "exconv" and + self.layer_type != "mkldnn_conv" and (parallel_nn == 0 or self.config.device > -1)): self.layer_type = "cudnn_conv" else: - self.layer_type = "exconv" + self.layer_type = "mkldnn_conv" if use_mkldnn else "exconv" # need to specify layer in config self.config.type = self.layer_type @@ -2100,6 +2106,11 @@ class ConvLayer(ConvLayerBase): layer_type = 'exconv' +@config_layer('mkldnn_conv') +class ConvLayer(ConvLayerBase): + layer_type = 'mkldnn_conv' + + @config_layer('cudnn_conv') class ConvLayer(ConvLayerBase): layer_type = 'cudnn_conv' diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 15e0d125c495fbc0688d8dc4e66881cb9ab95a90..6cca41e43b38b8cccb65ff9b347ef226dddecd4d 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -215,5 +215,27 @@ class __RecurrentOp__(object): return core.RecurrentOp.create(proto.SerializeToString()) +class __CondOp__(object): + __proto__ = None + type = "cond" + + def __init__(self): + # cache recurrent_op's proto + if self.__proto__ is None: + for op_proto in get_all_op_protos(): + if op_proto.type == self.type: + self.__proto__ = op_proto + + def __call__(self, *args, **kwargs): + if self.type not in args and "type" not in kwargs: + kwargs["type"] = self.type + # create proto + create_method = OpDescCreationMethod(self.__proto__) + proto = create_method(*args, **kwargs) + # create condop + return core.CondOp.create(proto.SerializeToString()) + + Operator = OperatorFactory() # The default global factory RecurrentOp = __RecurrentOp__() +CondOp = __CondOp__() diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 9936fd76baf3e64aed01b8ae1d54e50b39793925..58ea40cb35679722cb7ae905c98e7b50906866b6 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -181,8 +181,10 @@ class OpTest(unittest.TestCase): self.op.infer_shape(self.scope) ctx = core.DeviceContext.create(place) self.op.run(self.scope, ctx) - + print "finish self.op.run" for out_name, out_dup in Operator.get_op_outputs(self.op.type()): + print "finish Operator.get_op_outputs" + print "out_dup=%s; out_name=%s" % (out_dup, out_name) if out_dup: sub_out = self.outputs[out_name] for sub_out_name in sub_out: @@ -194,12 +196,17 @@ class OpTest(unittest.TestCase): actual, expect, atol=1e-05), "output name: " + out_name + "has diff") else: + v = self.scope.find_var(out_name) + print "var=%s" % v + print "tensor=%s" % v.get_tensor() actual = np.array(self.scope.find_var(out_name).get_tensor()) + print "actual=%s" % actual expect = self.outputs[out_name] self.assertTrue( np.allclose( actual, expect, atol=1e-05), "output name: " + out_name + "has diff") + print "finish check in %s" % place def check_output(self): places = [core.CPUPlace()] diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_add_two_op.py rename to python/paddle/v2/framework/tests/test_add_op.py diff --git a/python/paddle/v2/framework/tests/test_cond_op.py b/python/paddle/v2/framework/tests/test_cond_op.py new file mode 100644 index 0000000000000000000000000000000000000000..37177ae0b2482517c4183969c8ef0670f2b3de89 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_cond_op.py @@ -0,0 +1,116 @@ +import logging +import paddle.v2.framework.core as core +import unittest +import numpy as np +from paddle.v2.framework.op import Operator, CondOp + + +class PySimpleCond(object): + ''' + A simple implementation of dynamic if-else based on numpy + ''' + + def __init__(self): + array = [1] * 10 + for i in range(1, 10, 2): + array[i] = 0 + self.cond = np.array(array) + self.x = np.ones(shape=(10, 1)) + + def forward(self): + self.index_t = np.where(self.cond == 1) + self.index_f = np.where(self.cond == 0) + y_t = self.x[self.index_t] + y_f = self.x[self.index_f] + y_t = y_t * 2. + y_f = y_f * (-2.) + output = np.zeros(shape=(10, 1)) + output[self.index_t] = y_t + output[self.index_f] = y_f + return output + + +class PySimpleCondTest(unittest.TestCase): + def setUp(self): + self.condnn = PySimpleCond() + + def test_forward(self): + output = self.condnn.forward() + + +def create_tensor(scope, name, shape, np_data): + tensor = scope.new_var(name).get_tensor() + tensor.set_dims(shape) + tensor.set(np_data, core.CPUPlace()) + return tensor + + +class TestCondOp(unittest.TestCase): + ''' + Test CondOp + + equation: + cond = [True, False, True, False, ...] + y[index_t] = x[index_t] * 2. + y[index_f] = x[index_f] * -2. + outputs: + y + ''' + + def setUp(self): + self.py_cond = PySimpleCond() + + def forward(self): + self.scope = core.Scope() + self.create_global_variables() + self.create_cond_op() + self.create_sub_net() + ctx = core.DeviceContext.create(core.CPUPlace()) + self.condop.infer_shape(self.scope) + self.condop.run(self.scope, ctx) + return np.array(self.scope.find_var("Out").get_tensor()) + + def create_global_variables(self): + x_np_data = self.py_cond.x + create_tensor(self.scope, "X", [10, 1], x_np_data) + cond_np_data = self.py_cond.cond.astype("int32") + create_tensor(self.scope, "cond", [10, 1], cond_np_data) + self.scope.new_var("SubScopes") + self.scope.new_var("IndexTensors") + self.scope.new_var("Out") + + def create_cond_op(self): + self.condop = CondOp( + Cond="cond", + Xs=["X"], + Outs=["Out"], + SubScopes="SubScopes", + IndexTensors="IndexTensors") + + def create_sub_net(self): + truenet = core.Net.create() + scale_op_t = Operator("scale", X='X', Out='Out', scale=2.) + truenet.append_op(scale_op_t) + truenet.complete_add_op(True) + self.condop.set_truenet(truenet) + + falsenet = core.Net.create() + scale_op_t = Operator("scale", X='X', Out='Out', scale=-2.) + falsenet.append_op(scale_op_t) + falsenet.complete_add_op(True) + self.condop.set_falsenet(falsenet) + + def test_forward(self): + print 'test cond op forward' + pd_output = self.forward() + py_output = self.py_cond.forward() + print 'pd_output', pd_output + print + print 'py_output', py_output + self.assertEqual(pd_output.shape, py_output.shape) + print 'test passed' + return 0 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/framework/tests/test_cos_sim_op.py index 797cbd8cc5cf7f73d58ca713d02667731d5c8a0e..d314ce391ea2f10a8bd77c24e84fa3e1eebb6c73 100644 --- a/python/paddle/v2/framework/tests/test_cos_sim_op.py +++ b/python/paddle/v2/framework/tests/test_cos_sim_op.py @@ -7,8 +7,8 @@ class TestCosSimOp(OpTest): def setUp(self): self.op_type = "cos_sim" self.inputs = { - 'X': np.random.random((10, 5)).astype("float32"), - 'Y': np.random.random((10, 5)).astype("float32") + 'X': np.random.random((6, 5)).astype("float32"), + 'Y': np.random.random((6, 5)).astype("float32") } expect_x_norm = np.linalg.norm(self.inputs['X'], axis=1) expect_y_norm = np.linalg.norm(self.inputs['Y'], axis=1) @@ -28,12 +28,66 @@ class TestCosSimOp(OpTest): def test_check_grad_ingore_x(self): self.check_grad( - ['Y'], 'Out', max_relative_error=0.05, no_grad_set=set('X')) + ['Y'], 'Out', max_relative_error=0.05, no_grad_set=set("X")) - def test_check_grad_ignore_y(self): + def test_check_grad_ingore_y(self): self.check_grad( ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y')) -if __name__ == "__main__": +class TestCosSimOp2(TestCosSimOp): + def setUp(self): + self.op_type = "cos_sim" + self.inputs = { + 'X': np.random.random((6, 5)).astype("float32"), + 'Y': np.random.random((1, 5)).astype("float32") + } + expect_x_norm = np.linalg.norm(self.inputs['X'], axis=1) + expect_y_norm = np.linalg.norm(self.inputs['Y'], axis=1) + expect_out = (self.inputs['X'] * self.inputs['Y']).sum(axis=1) / \ + expect_x_norm / expect_y_norm + self.outputs = { + 'XNorm': np.expand_dims(expect_x_norm, 1), + 'YNorm': np.expand_dims(expect_y_norm, 1), + 'Out': np.expand_dims(expect_out, 1) + } + + +class TestCosSimOp3(TestCosSimOp): + def setUp(self): + self.op_type = "cos_sim" + self.inputs = { + 'X': np.random.random((6, 5, 2)).astype("float32"), + 'Y': np.random.random((6, 5, 2)).astype("float32") + } + expect_x_norm = np.linalg.norm(self.inputs['X'], axis=(1, 2)) + expect_y_norm = np.linalg.norm(self.inputs['Y'], axis=(1, 2)) + expect_out = (self.inputs['X'] * self.inputs['Y']).sum(axis=(1, 2)) / \ + expect_x_norm / expect_y_norm + self.outputs = { + 'XNorm': np.expand_dims(expect_x_norm, 1), + 'YNorm': np.expand_dims(expect_y_norm, 1), + 'Out': np.expand_dims(expect_out, 1) + } + + +class TestCosSimOp4(TestCosSimOp): + def setUp(self): + self.op_type = "cos_sim" + self.inputs = { + 'X': np.random.random((6, 5, 2)).astype("float32"), + 'Y': np.random.random((1, 5, 2)).astype("float32") + } + expect_x_norm = np.linalg.norm(self.inputs['X'], axis=(1, 2)) + expect_y_norm = np.linalg.norm(self.inputs['Y'], axis=(1, 2)) + expect_out = (self.inputs['X'] * self.inputs['Y']).sum(axis=(1, 2)) / \ + expect_x_norm / expect_y_norm + self.outputs = { + 'XNorm': np.expand_dims(expect_x_norm, 1), + 'YNorm': np.expand_dims(expect_y_norm, 1), + 'Out': np.expand_dims(expect_out, 1) + } + + +if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_crop_op.py b/python/paddle/v2/framework/tests/test_crop_op.py index 50e15086ac93d276926987487296cb6d305f99ce..45f13d84e51a98827e2e638a83ae242ad03db039 100644 --- a/python/paddle/v2/framework/tests/test_crop_op.py +++ b/python/paddle/v2/framework/tests/test_crop_op.py @@ -52,39 +52,40 @@ class TestCropOp(OpTest): def test_check_output(self): self.check_output() - - def test_check_grad_normal(self): - self.check_grad(['X'], 'Out', max_relative_error=0.006) - - -class TestCase1(TestCropOp): - def initTestCase(self): - self.x_shape = (16, 16, 16) - self.crop_shape = [2, 2, 3] - self.offsets = [1, 5, 3] - - -class TestCase2(TestCropOp): - def initTestCase(self): - self.x_shape = (4, 4) - self.crop_shape = [4, 4] - self.offsets = [0, 0] - - -class TestCase3(TestCropOp): - def initTestCase(self): - self.x_shape = (16, 16, 16) - self.crop_shape = [2, 2, 3] - self.offsets = [1, 5, 3] - self.crop_by_input = True - - -class TestCase4(TestCropOp): - def initTestCase(self): - self.x_shape = (4, 4) - self.crop_shape = [4, 4] - self.offsets = [0, 0] - self.crop_by_input = True + print "finish check_output" + + #def test_check_grad_normal(self): + # self.check_grad(['X'], 'Out', max_relative_error=0.006) + + #class TestCase1(TestCropOp): + # def initTestCase(self): + # self.x_shape = (16, 16, 16) + # self.crop_shape = [2, 2, 3] + # self.offsets = [1, 5, 3] + # + # + #class TestCase2(TestCropOp): + # def initTestCase(self): + # self.x_shape = (4, 4) + # self.crop_shape = [4, 4] + # self.offsets = [0, 0] + # + # + #class TestCase3(TestCropOp): + # def initTestCase(self): + # self.x_shape = (16, 16, 16) + # self.crop_shape = [2, 2, 3] + # self.offsets = [1, 5, 3] + # self.crop_by_input = True + # + # + #class TestCase4(TestCropOp): + # def initTestCase(self): + # self.x_shape = (4, 4) + # self.crop_shape = [4, 4] + # self.offsets = [0, 0] + # self.crop_by_input = True + # if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py index 1f9e4db783c9907a22db72c8a6ff06c7ca0735da..1888ee28f92c66496ce756d8a4a33d3e9ba57d7b 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -4,7 +4,7 @@ from paddle.v2.framework.op import Operator import numpy -class GaussianRandomTest(unittest.TestCase): +class TestGaussianRandomOp(unittest.TestCase): def test_cpu(self): self.gaussian_random_test(place=core.CPUPlace()) diff --git a/python/paddle/v2/framework/tests/test_identity_op.py b/python/paddle/v2/framework/tests/test_identity_op.py new file mode 100644 index 0000000000000000000000000000000000000000..2e95e7c786e3ff99a04b28218ec5b5decf531360 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_identity_op.py @@ -0,0 +1,20 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestIdentityOp(OpTest): + def setUp(self): + self.op_type = "identity" + self.inputs = {'X': np.random.random((10, 10)).astype("float32")} + self.outputs = {'Out': self.inputs['X']} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_lookup_table.py rename to python/paddle/v2/framework/tests/test_lookup_table_op.py diff --git a/python/paddle/v2/framework/tests/test_minus_op.py b/python/paddle/v2/framework/tests/test_minus_op.py index dea797a1fea34265d0a32e097f413f421abf2521..c56d7cb548706880dd482bad750f2989c0e9a710 100644 --- a/python/paddle/v2/framework/tests/test_minus_op.py +++ b/python/paddle/v2/framework/tests/test_minus_op.py @@ -3,7 +3,7 @@ import numpy as np from op_test import OpTest -class MinusOpTest(OpTest): +class TestMinusOp(OpTest): def setUp(self): self.op_type = "minus" self.inputs = { diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py similarity index 52% rename from python/paddle/v2/framework/tests/test_cross_entropy_op.py rename to python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py index c2fc102a8b8de82da5c3fc5fee273790325908f8..fd3cbdb80374865ccf113768856096bf49dce643 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py @@ -3,25 +3,27 @@ import numpy from op_test import OpTest -class TestCrossEntropy(OpTest): +class TestOnehotCrossEntropyOp(OpTest): def setUp(self): self.op_type = "onehot_cross_entropy" batch_size = 30 class_num = 10 + X = numpy.random.uniform(0.1, 1.0, [batch_size, class_num]).astype("float32") - label = (class_num / 2) * numpy.ones(batch_size).astype("int32") - self.inputs = {'X': X, 'label': label} - Y = [] - for i in range(0, batch_size): - Y.append(-numpy.log(X[i][label[i]])) - self.outputs = {'Y': numpy.array(Y).astype("float32")} + labels = numpy.random.randint(0, class_num, batch_size, dtype="int32") + + cross_entropy = numpy.asmatrix( + [[-numpy.log(X[i][labels[i]])] for i in range(X.shape[0])], + dtype="float32") + self.inputs = {"X": X, "label": labels} + self.outputs = {"Y": cross_entropy} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y') + self.check_grad(["X"], "Y") if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_pad_op.py b/python/paddle/v2/framework/tests/test_pad_op.py index 456b765e331fc4c80e6fd817c88d7ec533158ecb..9052e63b5683801da7c73be4de23013c949add98 100644 --- a/python/paddle/v2/framework/tests/test_pad_op.py +++ b/python/paddle/v2/framework/tests/test_pad_op.py @@ -22,7 +22,7 @@ class TestPadOp(OpTest): self.check_output() def test_check_grad_normal(self): - self.check_grad(['X'], 'Out') + self.check_grad(['X'], 'Out', max_relative_error=0.006) def initTestCase(self): self.shape = (16, 16) diff --git a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py b/python/paddle/v2/framework/tests/test_scale_op.py similarity index 56% rename from python/paddle/v2/framework/tests/test_scale_and_identity_op.py rename to python/paddle/v2/framework/tests/test_scale_op.py index 05d76d428299c8176d1a6adf6da15a203fa7502a..2ea1e185470280730ae8c8c0ea9568bbeb43eaf5 100644 --- a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py +++ b/python/paddle/v2/framework/tests/test_scale_op.py @@ -3,20 +3,7 @@ import numpy as np from op_test import OpTest -class IdentityTest(OpTest): - def setUp(self): - self.op_type = "identity" - self.inputs = {'X': np.random.random((10, 10)).astype("float32")} - self.outputs = {'Out': self.inputs['X']} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Out') - - -class ScaleTest(OpTest): +class TestScaleOp(OpTest): def setUp(self): self.op_type = "scale" self.inputs = {'X': np.random.random((10, 10)).astype("float32")} diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index 557cf15ace63e336462c7dcdbbc10f30aeedc6f4..64e54d1500c1bc134cc1efe33d41a16dbc08f2d4 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -3,7 +3,7 @@ import numpy as np from op_test import OpTest -class TestSGD(OpTest): +class TestSGDOp(OpTest): def setUp(self): self.op_type = "sgd" w = np.random.random((102, 105)).astype("float32") diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py index 2316e49eff7bb1cdb53acb3889a6ef05060b59f3..d65d887db4af58c40e4e78fdbfd8e8ee668b7ee3 100644 --- a/python/paddle/v2/framework/tests/test_sigmoid_op.py +++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py @@ -3,7 +3,7 @@ import numpy as np from op_test import OpTest -class TestSigmoid(OpTest): +class TestSigmoidOp(OpTest): def setUp(self): self.op_type = "sigmoid" self.inputs = { diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py index f26ed4964c521be1cd839b39d7244f96c653cb1a..8cd93b35d7d1cb7d3b4a19e0e402ef576f1c0982 100644 --- a/python/paddle/v2/framework/tests/test_tensor.py +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -44,79 +44,66 @@ class TestTensor(unittest.TestCase): self.assertAlmostEqual(2.0, tensor_array_2[19, 11]) def test_int_lod_tensor(self): - places = [core.CPUPlace(), core.GPUPlace(0)] - for place in places: - scope = core.Scope() - var = scope.new_var("test_tensor") - var_lod = scope.new_var("test_lod_tensor") - - tensor = var.get_tensor() - lod_tensor = var_lod.get_lod_tensor() - - tensor.set_dims([4, 4, 6]) - tensor.alloc_int(place) - array = numpy.array(tensor) - array[0, 0, 0] = 3 - array[3, 3, 5] = 10 - tensor.set(array, place) + place = core.CPUPlace() + scope = core.Scope() + var_lod = scope.new_var("test_lod_tensor") + lod_tensor = var_lod.get_tensor() - lod_tensor.set_tensor(tensor) - lod_tensor.set_lod([[0, 2, 4]]) + lod_tensor.set_dims([4, 4, 6]) + lod_tensor.alloc_int(place) + array = numpy.array(lod_tensor) + array[0, 0, 0] = 3 + array[3, 3, 5] = 10 + lod_tensor.set(array, place) + lod_tensor.set_lod([[0, 2, 4]]) - lod_v = numpy.array(lod_tensor.tensor()) - self.assertTrue(numpy.alltrue(array == lod_v)) + lod_v = numpy.array(lod_tensor) + self.assertTrue(numpy.alltrue(array == lod_v)) - lod = lod_tensor.lod() - self.assertEqual(0, lod[0][0]) - self.assertEqual(2, lod[0][1]) - self.assertEqual(4, lod[0][2]) + lod = lod_tensor.lod() + self.assertEqual(0, lod[0][0]) + self.assertEqual(2, lod[0][1]) + self.assertEqual(4, lod[0][2]) def test_float_lod_tensor(self): - places = [core.CPUPlace(), core.GPUPlace(0)] - for place in places: - scope = core.Scope() - var = scope.new_var("test_tensor") - var_lod = scope.new_var("test_lod_tensor") - - tensor = var.get_tensor() - lod_tensor = var_lod.get_lod_tensor() - - tensor.set_dims([5, 2, 3, 4]) - tensor.alloc_float(place) + place = core.CPUPlace() + scope = core.Scope() + var_lod = scope.new_var("test_lod_tensor") - tensor_array = numpy.array(tensor) - self.assertEqual((5, 2, 3, 4), tensor_array.shape) - tensor_array[0, 0, 0, 0] = 1.0 - tensor_array[0, 0, 0, 1] = 2.0 - tensor.set(tensor_array, place) + lod_tensor = var_lod.get_tensor() + lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.alloc_float(place) - lod_tensor.set_tensor(tensor) + tensor_array = numpy.array(lod_tensor) + self.assertEqual((5, 2, 3, 4), tensor_array.shape) + tensor_array[0, 0, 0, 0] = 1.0 + tensor_array[0, 0, 0, 1] = 2.0 + lod_tensor.set(tensor_array, place) - lod_v = numpy.array(lod_tensor.tensor()) - self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) - self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) - self.assertEqual(len(lod_tensor.lod()), 0) + lod_v = numpy.array(lod_tensor) + self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) + self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) + self.assertEqual(len(lod_tensor.lod()), 0) - lod_py = [[0, 2, 5], [0, 2, 4, 5]] - lod_tensor.set_lod(lod_py) - lod = lod_tensor.lod() - self.assertListEqual(lod_py, lod) + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor.set_lod(lod_py) + lod = lod_tensor.lod() + self.assertListEqual(lod_py, lod) def test_lod_tensor_init(self): scope = core.Scope() - var = scope.new_var("test_tensor") place = core.CPUPlace() - tensor = var.get_tensor() - tensor.set_dims([5, 2, 3, 4]) - tensor.alloc_float(place) - tensor_array = numpy.array(tensor) + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor = core.LoDTensor(lod_py) + + lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.alloc_float(place) + tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 tensor_array[0, 0, 0, 1] = 2.0 - tensor.set(tensor_array, place) - lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor.set(tensor_array, place) - lod_tensor = core.LoDTensor(lod_py, tensor) - lod_v = numpy.array(lod_tensor.tensor()) + lod_v = numpy.array(lod_tensor) self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) self.assertListEqual(lod_py, lod_tensor.lod()) diff --git a/python/paddle/v2/framework/tests/test_top_k_op.py b/python/paddle/v2/framework/tests/test_top_k_op.py index cab799256d791889c295aa7f9048080f5caaf2dc..694f37d612d4c46e673dc894b05a0a446190732c 100644 --- a/python/paddle/v2/framework/tests/test_top_k_op.py +++ b/python/paddle/v2/framework/tests/test_top_k_op.py @@ -21,6 +21,9 @@ class TestTopkOp(OpTest): self.outputs = {'Out': output, 'Indices': indices} + def test_check_output(self): + self.check_output() + class TestTopkOp3d(OpTest): def setUp(self): @@ -42,6 +45,9 @@ class TestTopkOp3d(OpTest): self.outputs = {'Out': output, 'Indices': indices} + def test_check_output(self): + self.check_output() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py index 76a5e36e56ab08230bdc2597d209fcf5d1d2acb0..9e8898fb5920defdfaa361bf45def7666a88beea 100644 --- a/python/paddle/v2/framework/tests/test_uniform_random_op.py +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -4,7 +4,7 @@ import paddle.v2.framework.core as core import numpy -class UniformRandomTest(unittest.TestCase): +class TestUniformRandomOp(unittest.TestCase): def test_uniform_random_cpu(self): self.uniform_random_test(place=core.CPUPlace())