diff --git a/.travis.yml b/.travis.yml index 3fadf8deb787bfb1d055496c5987798297f80bbf..e217c8f5a740ef5ab7315656ed7839ffa219c805 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,6 @@ addons: - python - python-pip - python2.7-dev - - python-numpy - python-wheel - libboost-dev - curl @@ -35,8 +34,8 @@ before_install: - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - - pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt - - pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker + - sudo pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt + - sudo pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - go get -u github.com/alecthomas/gometalinter diff --git a/CMakeLists.txt b/CMakeLists.txt index ad559672ad2f83a3d62cdf332b47c6cf1e730f70..08237cd850ae20c515a39c8783a18deaac431626 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,8 +65,8 @@ if(NOT CMAKE_BUILD_TYPE) endif() if(ANDROID) - if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") - message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21") + if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "16") + message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16") endif() set(WITH_GPU OFF CACHE STRING diff --git a/doc/design/functions_operators_layers.md b/doc/design/functions_operators_layers.md index 7a2e8fd0ace2e3f4462b15215de22c31e944b7cb..d23ba56b5773a36d448a99e4abdebc1475ed789c 100644 --- a/doc/design/functions_operators_layers.md +++ b/doc/design/functions_operators_layers.md @@ -86,12 +86,13 @@ def layer.fc(X): We'd like to have Python bindings to operators in package `paddle.operator`, and Python compositions of operators in package `paddle.layer`. So we have the following concepts in above illustrative example: -``` + | C++ functions/functors | mul | add | | | +|------------------------|--------------|--------------|-------------|----------| | C++ operator class | mulOp | addOp | FCOp | | | Python binding | operator.mul | operator.add | operator.fc | | | Python function | | | | layer.fc | -``` + This is how we differentiate layer and operators in PaddlePaddle: diff --git a/doc/design/ops/dist_train.md b/doc/design/ops/dist_train.md new file mode 100644 index 0000000000000000000000000000000000000000..fa3c5d7990213cf2b0d236e66e592dd2699da876 --- /dev/null +++ b/doc/design/ops/dist_train.md @@ -0,0 +1,106 @@ +# Design Doc: Operation Graph Based Parameter Server + +## Abstract + +We propose an approach to implement the parameter server. In this +approach, there is no fundamental difference between the trainer and +the parameter server: they both run subgraphs, but subgraphs of +different purposes. + +## Background + +The previous implementations of the parameter server does not run a +subgraph. parameter initialization, optimizer computation, network +communication and checkpointing are implemented twice on both the +trainer and the parameter server. + +It would be great if we can write code once and use them on both the +trainer and the parameter server: reduces code duplication and +improves extensibility. Given that after the current refactor, we are +representing everything as a computing graph on the +trainer. Representing everything as a computing graph on the parameter +server becomes a natural extension. + +## Design + +### Graph Converter + +The *graph converter* converts the user-defined operation (OP) graph +into subgraphs to be scheduled on different nodes with the following +steps: + +1. OP placement: the OPs will be placed on different nodes according + to heuristic that minimizes estimated total computation + time. Currently we will use a simple heuristic that puts parameter + varable on parameter server workers and everything else on trainer + workers. + +1. Add communication OPs to enable the communication between nodes. + +We will need these OPs: *Send*, *Recv*, *Enqueue*, *Dequeue*. + +Below is an example of converting the user defined graph to the +subgraphs for the trainer and the parameter server: + + + +After converting: + + + +1. The parameter variable W and it's optimizer subgraph are placed on the parameter server. +1. Operators are added to the subgraphs. + - *Send* sends data to the connected *Recv* operator. The + scheduler on the receive node will only schedule *Recv* operator + to run when the *Send* operator has ran (the *Send* OP will mark + the *Recv* OP runnable automatically). + - *Enueue* enqueues the input variable, it can block until space + become available in the queue. + - *Dequeue* outputs configurable numbers of tensors from the + queue. It will block until the queue have the required number of + tensors. + + +### Benefits + +- Model parallelism become easier to implement: it's an extension to + the trainer - parameter server approach. we already have the + communication OPs, but need to extend the graph converter's + placement functionality. + +- User-defined optimizer is easier to add - user can now express it as + a subgraph. + +- No more duplication logic inside the trainer and the parameter + server mentioned in the background section. + +### Challenges + +- It might be hard for the graph converter to cut a general graph + (without any hint for which subgraph is the optimizer). We may need + to label which subgraph inside the OP graph is the optimizer. + +- It's important to balance the parameter shards of on multiple + parameter server. If a single parameter is very big (some + word-embedding, fully connected, softmax layer), we need to + automatically partition the single parameter onto different + parameter servers when possible (only element-wise optimizer depends + on the parameter variable). + +### Discussion + +- In the "Aync SGD" figure, the "W" variable on the parameter server + could be read and wrote concurrently, what is our locking strategy? + E.g., each variable have a lock cpp method to be invoked by every + OP, or, have a lock OP. + +- Can the Enqueue OP be implemented under our current tensor design + (puts the input tensor into the queue tensor)? + +- *Dequeue* OP will have variable numbers of output (depends on the + `min_count` attribute), does our current design support it? (similar + question for the *Add* OP) + + +### References: +[1] [TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf) diff --git a/doc/design/ops/src/dist-graph.graffle b/doc/design/ops/src/dist-graph.graffle new file mode 100644 index 0000000000000000000000000000000000000000..941399c6ced8d5f65b6c595522b770c88259df4b Binary files /dev/null and b/doc/design/ops/src/dist-graph.graffle differ diff --git a/doc/design/ops/src/dist-graph.png b/doc/design/ops/src/dist-graph.png new file mode 100644 index 0000000000000000000000000000000000000000..3546b09f1c2ee3e4f60f519d5e47f823f08051a7 Binary files /dev/null and b/doc/design/ops/src/dist-graph.png differ diff --git a/doc/design/ops/src/local-graph.graffle b/doc/design/ops/src/local-graph.graffle new file mode 100644 index 0000000000000000000000000000000000000000..19e509bd9af3c1e9a3f5e0f16ddd281457a339c5 Binary files /dev/null and b/doc/design/ops/src/local-graph.graffle differ diff --git a/doc/design/ops/src/local-graph.png b/doc/design/ops/src/local-graph.png new file mode 100644 index 0000000000000000000000000000000000000000..ada51200f793a9bb18911e7d63cfdb3244b967d7 Binary files /dev/null and b/doc/design/ops/src/local-graph.png differ diff --git a/doc/howto/dev/write_docs_cn.rst b/doc/howto/dev/write_docs_cn.rst index 36e5d420c986fc8d88eefee4aa221dba0a0480f2..731a63f945c29ba78538b3d71289b234e569354d 100644 --- a/doc/howto/dev/write_docs_cn.rst +++ b/doc/howto/dev/write_docs_cn.rst @@ -5,15 +5,13 @@ PaddlePaddle的文档包括英文文档 ``doc`` 和中文文档 ``doc_cn`` 两个部分。文档都是通过 `cmake`_ 驱动 `sphinx`_ 编译生成,生成后的文档分别存储在编译目录的 ``doc`` 和 ``doc_cn`` 两个子目录下。 -如何构建PaddlePaddle的文档 -========================== +如何构建文档 +============ -PaddlePaddle的文档构建有直接构建和基于Docker构建两种方式,我们提供了一个构建脚本build_docs.sh来进行构建。 -PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使用基于Docker来构建PaddlePaddle的文档。 +PaddlePaddle的文档构建有两种方式。 - -使用Docker构建PaddlePaddle的文档 --------------------------------- +使用Docker构建 +-------------- 使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即 @@ -21,58 +19,46 @@ PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使 cd TO_YOUR_PADDLE_CLONE_PATH cd paddle/scripts/tools/build_docs - bash build_docs.sh with_docker - -编译完成后,会在当前目录生成两个子目录\: - -* doc 英文文档目录 -* doc_cn 中文文档目录 + sh build_docs.sh +编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。 打开浏览器访问对应目录下的index.html即可访问本地文档。 - - -直接构建PaddlePaddle的文档 --------------------------- - -因为PaddlePaddle的v2 api文档生成过程依赖于py_paddle Python包,用户需要首先确认py_paddle包已经安装。 - -.. code-block:: bash - - python -c "import py_paddle" - -如果提示错误,那么用户需要在本地编译安装PaddlePaddle,请参考 `源码编译文档 `_ 。 -注意,用户在首次编译安装PaddlePaddle时,请将WITH_DOC选项关闭。在编译安装正确之后,请再次确认py_paddle包已经安装,即可进行下一步操作。 +直接构建 +-------- 如果提示正确,可以执行以下命令编译生成文档,即 .. code-block:: bash cd TO_YOUR_PADDLE_CLONE_PATH - cd paddle/scripts/tools/build_docs - bash build_docs.sh local - -编译完成之后,会在当前目录生成两个子目录\: - -* doc 英文文档目录 -* doc_cn 中文文档目录 + mkdir -p build + cd build + cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON + make gen_proto_py + make paddle_docs paddle_docs_cn +编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。 打开浏览器访问对应目录下的index.html即可访问本地文档。 -如何书写PaddlePaddle的文档 -========================== +如何书写文档 +============ PaddlePaddle文档使用 `sphinx`_ 自动生成,用户可以参考sphinx教程进行书写。 -如何更新www.paddlepaddle.org文档 -================================ +如何更新文档主题 +================ + +PaddlePaddle文档主题在 `TO_YOUR_PADDLE_CLONE_PATH/doc_theme` 文件夹下,包含所有和前端网页设计相关的文件。 -开发者给PaddlePaddle代码增加的注释以PR的形式提交到github中,提交方式可参见 `贡献文档 `_ 。 +如何更新doc.paddlepaddle.org +============================ + +更新的文档以PR的形式提交到github中,提交方式参见 `贡献文档 `_ 。 目前PaddlePaddle的develop分支的文档是自动触发更新的,用户可以分别查看最新的 `中文文档 `_ 和 `英文文档 `_ 。 - .. _cmake: https://cmake.org/ .. _sphinx: http://www.sphinx-doc.org/en/1.4.8/ diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index c0838d9b759110fd706577386d2c81bda6876223..3371962c635c3731f00a6af2a6e287ece33397cd 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -9,6 +9,7 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor) +nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) cc_test(variable_test SRCS variable_test.cc) diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index cde3dfa1d3d19b1bee9fd23dad52ecbbe628c3a9..2b788a76cafe198abb9aed8ba842e37cc6ff73a6 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -45,7 +45,19 @@ class GreaterThanChecker { public: explicit GreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {} void operator()(T& value) const { - PADDLE_ENFORCE(value > lower_bound_, "larger_than check fail"); + PADDLE_ENFORCE(value > lower_bound_, "larger_than check fails."); + } + + private: + T lower_bound_; +}; + +template +class EqualGreaterThanChecker { + public: + explicit EqualGreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {} + void operator()(T& value) const { + PADDLE_ENFORCE_GE(value, lower_bound_, "equal_larger_than check fails."); } private: @@ -115,6 +127,11 @@ class TypedAttrChecker { return *this; } + TypedAttrChecker& EqualGreaterThan(const T& lower_bound) { + value_checkers_.push_back(EqualGreaterThanChecker(lower_bound)); + return *this; + } + // we can add more common limits, like LessThan(), Between()... TypedAttrChecker& SetDefault(const T& default_value) { diff --git a/paddle/framework/backward.md b/paddle/framework/backward.md index 8aa6728a95bc464ab8884986f0cec6c817d3303b..c762811dfc190b255e0a3389885a081ce8315caf 100644 --- a/paddle/framework/backward.md +++ b/paddle/framework/backward.md @@ -2,20 +2,20 @@ ## Motivation -In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the fundmental gradient operators/expressions together with chain rule . Every forward network need a backward network to construct the full computation graph, the operator/expression's backward pass will be generated respect to forward pass. - +In Neural Network, the backpropagation algorithm follows the chain rule, so we need to compound the gradient operators/expressions together with the chain rule. Every forward network needs a backward network to construct the full computation graph, the operator/expression's backward pass will be generated respect to forward pass. + ## Backward Operator Registry -A backward network is built up with several backward operators. Backward operators take forward operators' inputs, outputs and output gradients and then calculate its input gradients. +A backward network is built up with several backward operators. Backward operators take forward operators' inputs outputs, and output gradients and then calculate its input gradients. | | forward operator | backward operator | ---------------------- | ---------------- |------------------------- | | **Operator::inputs_** | Inputs | Inputs, Outputs, OutputGradients | | **Operator::outputs_** | Outputs | InputGradients | - In most cases, there is a one-to-one correspondence between forward and backward operators. These correspondences are recorded by a global hash map(`OpInfoMap`). To follow the philosophy of minimum core and make operators pluggable, the registry mechanism is introduced. + In most cases, there is a one-to-one correspondence between the forward and backward operators. These correspondences are recorded by a global hash map(`OpInfoMap`). To follow the philosophy of minimum core and make operators pluggable, the registry mechanism is introduced. -For example, we have got a `mul_op`, and we can register it's information and corresponding backward operator by the following macro: +For example, we have got a `mul_op`, and we can register its information and corresponding backward operator by the following macro: ```cpp REGISTER_OP(mul, MulOp, MulOpMaker, mul_grad, MulOpGrad); @@ -27,17 +27,17 @@ REGISTER_OP(mul, MulOp, MulOpMaker, mul_grad, MulOpGrad); ## Backward Opeartor Creating -Given a certain forward operator, we can get its corresponding backward opeartor by calling: +Given a certain forward operator, we can get its corresponding backward operator by calling: ```cpp OperatorBase* bwd_op = BuildGradOp(const OperatorBase* fwd_op); -``` +``` The function `BuildGradOp` will sequentially execute following processes: 1. Get the `type_` of given forward operator, and then get the corresponding backward operator's type by looking up the `OpInfoMap`. -2. Build two maps named `inputs` and `outputs` to temporary storage backward operator's inputs and outputs. Copy forward operator's `inputs_` and `outputs_` to map `inputs`, except these are not necessary for gradient computing. +2. Build two maps named `inputs` and `outputs` to temporary storage backward operator's inputs and outputs. Copy forward operator's `inputs_` and `outputs_` to map `inputs`, except these, are not necessary for gradient computing. 3. Add forward inputs' gradient variables into map `output`, adding forward outputs' gradient variables into map `input`. @@ -49,31 +49,31 @@ A backward network is a series of backward operators. The main idea of building In our design, the network itself is also a kind of operator. So the operators contained by a big network may be some small network. -given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`,`InputGradients`. +given a forward network, it generates the backward network. We only care about the Gradients—`OutputGradients`, `InputGradients`. 1. Op - when the input forward network is a Op, return its gradient Operator Immediately. + when the input forward network is an Op, return its gradient Operator Immediately. 2. NetOp - when the input forward network is a NetOp, it need to call the sub NetOp/Operators backward function recursively. During the process, we need to collect the `OutputGradients` name according to forward NetOp. + when the input forward network is a NetOp, it needs to call the sub NetOp/Operators backward function recursively. During the process, we need to collect the `OutputGradients` name according to the forward NetOp. - **shared variable**. As illustrated in the pictures, two operator's `Output` `Gradient` will overwirte their shared input variable. + **shared variable**. As illustrated in the pictures, two operator's `Output` `Gradient` will overwrite their shared input variable.

-
+
- 1. shared variable in two operators. + 1. Shared variable in operators.

- Share variable between operators or same input variable used in multiple operators lead to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively, and add a generic add operator replace the overwirte links. + Share variable between operators or same input variable used in multiple operators leads to a duplicate gradient variable. As demo show above, we need to rename gradient name recursively and add a generic add operator replace the overwrite links.

-
+
- 2. replace shared variable gradient with `Add` Operator + 2. Replace shared variable's gradient with `Add` operator.

diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 85b7de79743bb0390d66b8999f2e8342a51d14a9..fc3d508553c0e966978b28d58127bdbff10d45f1 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -283,5 +283,14 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { DDim::DDim(std::initializer_list init_list) { *this = make_ddim(init_list); } + +DDim flatten_to_2d(const DDim& src, int num_col_dims) { + int rank = src.size(); + return make_ddim({product(slice_ddim(src, 0, num_col_dims)), + product(slice_ddim(src, num_col_dims, rank))}); +} + +DDim flatten_to_1d(const DDim& src) { return make_ddim({product(src)}); } + } // namespace framework } // namespace paddle diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index db30c523948b1d437615aa0e9bfecb5e25569296..ca29e7e8c7776de6adf3e3b0e8f11f0d4d8487c3 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -115,6 +115,12 @@ int arity(const DDim& ddim); std::ostream& operator<<(std::ostream&, const DDim&); +// Reshape a tensor to a matrix. The matrix's first dimension(column length) +// will be the product of tensor's first `num_col_dims` dimensions. +DDim flatten_to_2d(const DDim& src, int num_col_dims); + +DDim flatten_to_1d(const DDim& src); + } // namespace framework } // namespace paddle diff --git a/paddle/framework/eigen.h b/paddle/framework/eigen.h index 2d8d9ae10c56e0632414a5bbc754d35bfa9ce6a5..54bbeafcabdeeb1e2c1017c156b3512c83dada3a 100644 --- a/paddle/framework/eigen.h +++ b/paddle/framework/eigen.h @@ -63,20 +63,35 @@ struct EigenTensor { template -struct EigenMatrix : public EigenTensor {}; +struct EigenMatrix : public EigenTensor { + static typename EigenMatrix::Type Reshape(Tensor& tensor, int num_col_dims) { + int rank = tensor.dims_.size(); + PADDLE_ENFORCE(num_col_dims > 0 && num_col_dims < rank, + "`num_col_dims` must be between (0, rank_of_tensor)."); + return EigenMatrix::From(tensor, + flatten_to_2d(tensor.dims(), num_col_dims)); + } + + static typename EigenMatrix::ConstType Reshape(const Tensor& tensor, + int num_col_dims) { + int rank = tensor.dims_.size(); + PADDLE_ENFORCE(num_col_dims > 0 && num_col_dims < rank, + "`num_col_dims` must be between (0, rank_of_tensor)."); + return EigenMatrix::From(tensor, + flatten_to_2d(tensor.dims(), num_col_dims)); + } +}; template struct EigenVector : public EigenTensor { // Flatten reshapes a Tensor into an EigenVector. static typename EigenVector::Type Flatten(Tensor& tensor) { - return EigenVector::From( - tensor, make_ddim({static_cast(product(tensor.dims_))})); + return EigenVector::From(tensor, {product(tensor.dims_)}); } static typename EigenVector::ConstType Flatten(const Tensor& tensor) { - return EigenVector::From( - tensor, make_ddim({static_cast(product(tensor.dims_))})); + return EigenVector::From(tensor, {product(tensor.dims_)}); } }; diff --git a/paddle/framework/eigen_test.cc b/paddle/framework/eigen_test.cc index dc1957691b1a202826e10e84c21ac8874df9e378..bc4a2db32cfba66bef2c444e1f822e0d2a57b91e 100644 --- a/paddle/framework/eigen_test.cc +++ b/paddle/framework/eigen_test.cc @@ -108,5 +108,24 @@ TEST(Eigen, Matrix) { } } +TEST(Eigen, MatrixReshape) { + Tensor t; + float* p = t.mutable_data({2, 3, 6, 4}, platform::CPUPlace()); + for (int i = 0; i < 2 * 3 * 6 * 4; ++i) { + p[i] = static_cast(i); + } + + EigenMatrix::Type em = EigenMatrix::Reshape(t, 2); + + ASSERT_EQ(2 * 3, em.dimension(0)); + ASSERT_EQ(6 * 4, em.dimension(1)); + + for (int i = 0; i < 2 * 3; i++) { + for (int j = 0; j < 6 * 4; j++) { + ASSERT_NEAR(i * 6 * 4 + j, em(i, j), 1e-6f); + } + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/images/duplicate_op2.graffle b/paddle/framework/images/duplicate_op2.graffle index 2b658085d6a55d368c320051ba7f94ec2900f13c..ede3bca30ae17d5af52505fd94dc2f79b23b57e0 100644 Binary files a/paddle/framework/images/duplicate_op2.graffle and b/paddle/framework/images/duplicate_op2.graffle differ diff --git a/paddle/framework/images/duplicate_op2.png b/paddle/framework/images/duplicate_op2.png index c5588015d1450fd8c1bda3580680d884494868bb..4e872dc2caf3b0cbd0d5176f11a14801b538dc86 100644 Binary files a/paddle/framework/images/duplicate_op2.png and b/paddle/framework/images/duplicate_op2.png differ diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 154068fef69bc96edbd85b731fe8091b3b1ff823..568f4e89819c8345d8908634f6fa56f09483a763 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -18,8 +18,10 @@ #ifndef PADDLE_ONLY_CPU #include #include +#include #endif +#include #include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" #include "paddle/platform/enforce.h" @@ -32,7 +34,8 @@ template using Vector = std::vector; #else template -using Vector = thrust::host_vector; +using Vector = thrust::host_vector< + T, thrust::system::cuda::experimental::pinned_allocator>; #endif using LoD = std::vector>; diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..1079a36a2e7b24f6f8a5bcbb296355567305a765 --- /dev/null +++ b/paddle/framework/lod_tensor_test.cu @@ -0,0 +1,52 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include "paddle/framework/lod_tensor.h" +#include "paddle/platform/assert.h" + +#include + +__global__ void test(size_t* a, int size) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; + i += blockDim.x * gridDim.x) { + a[i] *= 2; + } +} + +TEST(LoDTensor, LoDInGPU) { + paddle::framework::Tensor tensor; + paddle::framework::LoDTensor lod_tensor; + paddle::platform::GPUPlace place(0); + + paddle::framework::LoD src_lod; + src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14}); + + tensor.Resize({14, 16}); + tensor.mutable_data(place); + + lod_tensor.set_lod(src_lod); + lod_tensor.set_tensor(&tensor); + CHECK_EQ(lod_tensor.lod_element(0, 2), 4); + CHECK_EQ(lod_tensor.lod_element(0, 4), 8); + + auto lod = lod_tensor.lod(); + + test<<<1, 8>>>(lod[0].data(), lod[0].size()); + cudaDeviceSynchronize(); + + for (size_t i = 0; i < src_lod[0].size(); ++i) { + CHECK_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2); + } +} diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 790cfc4746b1d34da413fa3c29a266f962c6dde6..e1e122091f7759b1a68f1f982bc2a35e8241f9f0 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -123,6 +123,15 @@ OperatorBase::OperatorBase(const std::string& type, CheckAllInputOutputSet(); } +std::vector OperatorBase::InputVars() const { + std::vector ret_val; + for (auto& o : outputs_) { + ret_val.reserve(ret_val.size() + o.second.size()); + ret_val.insert(ret_val.end(), o.second.begin(), o.second.end()); + } + return ret_val; +} + std::vector OperatorBase::OutputVars(bool has_intermediate) const { std::vector ret_val; if (has_intermediate) { diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 9a98d4d3be0d1cb875d614b263f1e4365ede4113..4600b06009bcef7d0774d25b816aac4733f30795 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -94,11 +94,14 @@ class OperatorBase { const VariableNameMap& Inputs() const { return inputs_; } const VariableNameMap& Outputs() const { return outputs_; } + //! Get a input with argument's name described in `op_proto` std::string Input(const std::string& name) const; //! Get a input which has multiple variables. const std::vector& Inputs(const std::string& name) const; + std::vector InputVars() const; + //! Get a output with argument's name described in `op_proto` std::string Output(const std::string& name) const; //! Get an output which has multiple variables. @@ -311,9 +314,9 @@ class InferShapeContext { } template - std::vector MultiOutput(const std::string& name) const { + std::vector MultiOutput(const std::string& name) const { auto names = op_.Outputs(name); - std::vector res; + std::vector res; res.reserve(names.size()); std::transform(names.begin(), names.end(), std::back_inserter(res), [&](const std::string& sub_name) { diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 657d3e6628b9aa68bd10bbfc1557305ce6f41c83..20f019892b7ca374b6a4482d2a48a5e6332c25bd 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -43,6 +43,9 @@ class Tensor { template friend struct EigenTensor; + template + friend struct EigenMatrix; + template friend struct EigenVector; @@ -81,6 +84,9 @@ class Tensor { /*! Return the dimensions of the memory block. */ inline const DDim& dims() const; + /*! Return the numel of the memory block. */ + inline int64_t numel() const; + /*! Resize the dimensions of the memory block. */ inline Tensor& Resize(const DDim& dims); @@ -168,6 +174,12 @@ class Tensor { /*! points to dimensions of memory block. */ DDim dims_; + /** + * A cache of the number of elements in a tensor. + * Would be 0 for an uninitialized tensor. + */ + int64_t numel_; + /** * @brief A PlaceHolder may be shared by more than one tensor. * diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index f2929137feb8a88954c0107a22d4095bb1599265..ed166935f76be9d25062b5e69536c7b7ac19045d 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -24,7 +24,7 @@ inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); PADDLE_ENFORCE_GE( - holder_->size(), product(dims_) * sizeof(T) + offset_, + holder_->size(), numel() * sizeof(T) + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " "first to re-allocate memory.\n" "or maybe the required data-type mismatches the data already stored."); @@ -54,11 +54,11 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) { template inline T* Tensor::mutable_data(platform::Place place) { static_assert(std::is_pod::value, "T must be POD"); - PADDLE_ENFORCE_GT(product(dims_), 0, + PADDLE_ENFORCE_GT(numel(), 0, "Tensor's numel must be larger than zero to call " "Tensor::mutable_data. Call Tensor::set_dim first."); /* some versions of boost::variant don't have operator!= */ - int64_t size = product(dims_) * sizeof(T); + int64_t size = numel() * sizeof(T); if (holder_ == nullptr || !(holder_->place() == place) || holder_->size() < size + offset_) { if (platform::is_cpu_place(place)) { @@ -97,7 +97,7 @@ inline void Tensor::CopyFrom(const Tensor& src, auto dst_ptr = static_cast(mutable_data(dst_place)); - auto size = product(src.dims_) * sizeof(T); + auto size = src.numel() * sizeof(T); if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { memory::Copy(boost::get(dst_place), dst_ptr, @@ -131,7 +131,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { PADDLE_ENFORCE_LT(begin_idx, end_idx, "Begin index must be less than end index."); PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1."); - size_t base = product(dims_) / dims_[0]; + size_t base = numel() / dims_[0]; Tensor dst; dst.holder_ = holder_; DDim dst_dims = dims_; @@ -143,10 +143,21 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { inline Tensor& Tensor::Resize(const DDim& dims) { dims_ = dims; + numel_ = product(dims_); return *this; } inline const DDim& Tensor::dims() const { return dims_; } +inline int64_t Tensor::numel() const { return numel_; } + +template +inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { + Tensor res; + res.ShareDataWith(src); + res.Resize(flatten_to_2d(src.dims(), num_col_dims)); + return res; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index da0a4d636398721e6fa87ad0cf5074cbfe110a05..8491536e6f57fd8c1ce67bd633e7e93cdeedc0e9 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -264,3 +264,16 @@ TEST(Tensor, CopyFrom) { } #endif } + +TEST(Tensor, ReshapeToMatrix) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor src; + int* src_ptr = src.mutable_data({2, 3, 4, 9}, CPUPlace()); + for (int i = 0; i < 2 * 3 * 4 * 9; ++i) { + src_ptr[i] = i; + } + Tensor res = ReshapeToMatrix(src, 2); + ASSERT_EQ(res.dims()[0], 2 * 3); + ASSERT_EQ(res.dims()[1], 4 * 9); +} \ No newline at end of file diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/gserver/layers/BatchNormBaseLayer.cpp index 1ceaaaa206ee3cbc5421238574c7f310011ccaa5..f7a80e23e1bd49549bec57b360587adc6b423794 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.cpp +++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp @@ -62,14 +62,18 @@ void BatchNormBaseLayer::calFeatureMapSize() { const ImageConfig& conf = config_.inputs(0).image_conf(); imageH_ = inputLayers_[0]->getOutput().getFrameHeight(); imageW_ = inputLayers_[0]->getOutput().getFrameWidth(); + imageD_ = inputLayers_[0]->getOutput().getFrameDepth(); + + if (0 == imageD_) imageD_ = conf.img_size_z(); if (imageH_ == 0 && imageW_ == 0) { imageH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); imageW_ = conf.img_size(); } else { getOutput().setFrameHeight(imageH_); getOutput().setFrameWidth(imageW_); + getOutput().setFrameDepth(imageD_); } - imgPixels_ = imageH_ * imageW_; + imgPixels_ = imageH_ * imageW_ * imageD_; } } // namespace paddle diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h index 230bafc31d96bbd49481a7ed135be6888688627e..e721d2d267a31cae46407673b8b1281e87055608 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.h +++ b/paddle/gserver/layers/BatchNormBaseLayer.h @@ -80,6 +80,7 @@ protected: /// Height or width of input image feature. /// Both of them are 1 if the input is fully-connected layer. + int imageD_; int imageH_; int imageW_; /// Height * Width. diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index 44ba2c4b7d1562d2ce839b5f4b4de1af35e6925f..49a9540c0b6e36b59ed786287ff5c4569b69a6a5 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -37,7 +37,7 @@ bool CudnnBatchNormLayer::init(const LayerMap& layerMap, } void CudnnBatchNormLayer::reshape(int batchSize) { - hl_tensor_reshape(ioDesc_, batchSize, channels_, imageH_, imageW_); + hl_tensor_reshape(ioDesc_, batchSize, channels_, imageH_ * imageD_, imageW_); } void CudnnBatchNormLayer::forward(PassType passType) { @@ -104,7 +104,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { EPS, batchSize, channels_, - imageH_, + imageH_ * imageD_, imageW_); } } diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp index 1b59ed60c57fe3bbfa814befa8a63408a2621715..3eea638649e8ebfdd7efa18615977a9e1344c695 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/gserver/layers/DeConv3DLayer.cpp @@ -53,27 +53,27 @@ bool DeConv3DLayer::init(const LayerMap &layerMap, size_t DeConv3DLayer::getSize() { CHECK_NE(inputLayers_.size(), 0UL); - outputH_.clear(); - outputW_.clear(); - outputD_.clear(); + imgSizeW_.clear(); + imgSizeH_.clear(); + imgSizeD_.clear(); N_.clear(); NOut_.clear(); size_t layerSize = 0; for (size_t i = 0; i < inputLayers_.size(); ++i) { - outputW_.push_back( - imageSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true)); - outputH_.push_back(imageSize( - imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true)); - outputD_.push_back(imageSize( - imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true)); - NOut_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); - N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]); + imgSizeW_.push_back( + imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i], true)); + imgSizeH_.push_back(imageSize( + outputH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true)); + imgSizeD_.push_back(imageSize( + outputD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true)); + NOut_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]); + N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]); CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize); layerSize += NOut_[i] * numFilters_; } - getOutput().setFrameHeight(outputH_[0]); - getOutput().setFrameWidth(outputW_[0]); - getOutput().setFrameDepth(outputD_[0]); + getOutput().setFrameHeight(imgSizeH_[0]); + getOutput().setFrameWidth(imgSizeW_[0]); + getOutput().setFrameDepth(imgSizeD_[0]); return layerSize; } @@ -103,9 +103,9 @@ void DeConv3DLayer::forward(PassType passType) { } colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(), numFilters_, - outputD_[i], - outputH_[i], - outputW_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], filterSizeZ_[i], filterSizeY_[i], filterSize_[i], @@ -144,9 +144,9 @@ void DeConv3DLayer::backward(const UpdateCallback &callback) { colBuf_->vol2Col( getOutputGrad()->getData() + n * getOutputGrad()->getStride(), numFilters_, - outputD_[i], - outputH_[i], - outputW_[i], + imgSizeD_[i], + imgSizeH_[i], + imgSizeW_[i], filterSizeZ_[i], filterSizeY_[i], filterSize_[i], diff --git a/paddle/gserver/layers/DetectionOutputLayer.cpp b/paddle/gserver/layers/DetectionOutputLayer.cpp index 8ab838e191314ab25469631626c0b0564d7fffda..0cf0a92bf4bd8f9b8eba2016b2377d9dfb18c70a 100644 --- a/paddle/gserver/layers/DetectionOutputLayer.cpp +++ b/paddle/gserver/layers/DetectionOutputLayer.cpp @@ -139,7 +139,13 @@ void DetectionOutputLayer::forward(PassType passType) { allDecodedBBoxes, &allIndices); - resetOutput(numKept, 7); + if (numKept > 0) { + resetOutput(numKept, 7); + } else { + MatrixPtr outV = getOutputValue(); + outV = NULL; + return; + } MatrixPtr outV = getOutputValue(); getDetectionOutput(confBuffer_->getData(), numKept, diff --git a/paddle/gserver/layers/DetectionUtil.cpp b/paddle/gserver/layers/DetectionUtil.cpp index 3e61adc66e60c54250e4f323452aa13045310879..d83674f45a70212a8adc94a31ff58eb0e01baa00 100644 --- a/paddle/gserver/layers/DetectionUtil.cpp +++ b/paddle/gserver/layers/DetectionUtil.cpp @@ -469,7 +469,7 @@ size_t getDetectionIndices( const size_t numClasses, const size_t backgroundId, const size_t batchSize, - const size_t confThreshold, + const real confThreshold, const size_t nmsTopK, const real nmsThreshold, const size_t keepTopK, diff --git a/paddle/gserver/layers/DetectionUtil.h b/paddle/gserver/layers/DetectionUtil.h index fe4f9f075e4cf011c97f68f49598a828d62327b3..641ed873b4c8645b6455e5ef5e63593e3005b770 100644 --- a/paddle/gserver/layers/DetectionUtil.h +++ b/paddle/gserver/layers/DetectionUtil.h @@ -275,7 +275,7 @@ size_t getDetectionIndices( const size_t numClasses, const size_t backgroundId, const size_t batchSize, - const size_t confThreshold, + const real confThreshold, const size_t nmsTopK, const real nmsThreshold, const size_t keepTopK, diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index edef36194aabdb9c122ec3423deb036169a34d7c..4002a3d0747a86ab7b495ffe52247521831b71b8 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -49,6 +49,12 @@ struct LayerState { }; typedef std::shared_ptr LayerStatePtr; +/// Paddle device ID, MKLDNN is -2, CPU is -1 +enum PADDLE_DEVICE_ID { + MKLDNN_DEVICE = -2, + CPU_DEVICE = -1, +}; + /** * @brief Base class for layer. * Define necessary variables and functions for every layer. @@ -59,11 +65,6 @@ protected: LayerConfig config_; /// whether to use GPU bool useGpu_; - /// Paddle device ID, MKLDNN is -2, CPU is -1 - enum PADDLE_DEVICE_ID { - MKLDNN_DEVICE = -2, - CPU_DEVICE = -1, - }; /// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ... int deviceId_; /// Input layers diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 8318c8c519a4cec1610eadd28320ee5ce0b4147d..f70343251ad4fbb99f9614618f6d1bff1174f15e 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "MKLDNNFcLayer.h" #include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" using namespace mkldnn; // NOLINT typedef memory::format format; @@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap, oc_ = getSize(); oh_ = 1; ow_ = 1; + ih_ = 1; + iw_ = 1; // input size can not change in FC iLayerSize_ = inputLayers_[0]->getSize(); @@ -77,111 +78,86 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim); } -void MKLDNNFcLayer::convertOutputToOtherDevice() { - copyOutputInfoToOtherDevice(); - // find other cpu device and reorder output to cpu device - int cnt = 0; - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { - // fc cpu output value do not need convert - // just share point - outputOtherDevice_[i].value = output_.value; - ++cnt; - } - } - - if (cnt > 1) { - LOG(WARNING) << "should not have more than one CPU devie"; - } -} +void MKLDNNFcLayer::reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + reshapeInput(bs, ih, iw); -void MKLDNNFcLayer::reshape() { - const Argument& input = getInput(0, getPrev(0)->getDeviceId()); - int batchSize = input.getBatchSize(); - if (bs_ == batchSize) { - return; - } - bs_ = batchSize; - ih_ = input.getFrameHeight(); - iw_ = input.getFrameWidth(); - if (ih_ == 0) { - ih_ = 1; - } - if (iw_ == 0) { - iw_ = 1; - } CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); - ic_ = iLayerSize_ / (ih_ * iw_); - CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; - CHECK_EQ(size_t(oc_), getSize()); - printSizeInfo(); + ic = iLayerSize_ / (ih * iw); + CHECK_EQ(size_t(ic * ih * iw), iLayerSize_) << "not divisible"; + CHECK_EQ(size_t(oc), getSize()); - // reset output - output_.setFrameHeight(oh_); - output_.setFrameWidth(ow_); - resetOutput(bs_, oc_); + reshapeOutput(oh, ow); + resizeOutput(bs, oc); - // reset mkldnn forward - resetFwd(); - needResetBwd_ = true; - - convertWeightsFromPaddle(); + printSizeInfo(); } -void MKLDNNFcLayer::resetFwd() { +void MKLDNNFcLayer::resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); bool hasBias = biases_ && biases_->getW(); - const MatrixPtr& wgt = weight_->getW(); - const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr; - const MatrixPtr& out = output_.value; + const MatrixPtr& wgtVal = weight_->getW(); + const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr; + const MatrixPtr& outVal = output_.value; if (inputIsOnlyMKLDNN()) { - const MatrixPtr& in = getInputValue(0); - inVal_ = std::dynamic_pointer_cast(in); - CHECK(inVal_) << "Input should be MKLDNNMatrix"; + const MatrixPtr& inVal = getInputValue(0); + in = std::dynamic_pointer_cast(inVal); + CHECK(in) << "Input should be MKLDNNMatrix"; } else { CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; - const MatrixPtr& in = getInputValue(0, CPU_DEVICE); - inVal_ = MKLDNNMatrix::create( - in, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); - } - inVal_->downSpatial(); - wgtVal_ = MKLDNNMatrix::create( - wgt, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); - wgtVal_->downSpatial(); - biasVal_ = - hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr; - outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_); + const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE); + in = MKLDNNMatrix::create( + inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); + } + in->downSpatial(); + wgt = MKLDNNMatrix::create( + wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); + wgt->downSpatial(); + bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_) + : nullptr; + out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_); // change original output value to mkldnn output value - output_.value = std::dynamic_pointer_cast(outVal_); + output_.value = std::dynamic_pointer_cast(out); if (!outputIsOnlyMKLDNN()) { - convertOutputToOtherDevice(); + // fc cpu output value do not need create convert + // just share point + getOutput(CPU_DEVICE).value->setData(output_.value->getData()); } // create forward handle prop_kind pk = prop_kind::forward; fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, - inVal_->getMemoryDesc(), - wgtVal_->getMemoryDesc(), - biasVal_->getMemoryDesc(), - outVal_->getMemoryDesc()) + in->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) : fc_fwd::desc(pk, - inVal_->getMemoryDesc(), - wgtVal_->getMemoryDesc(), - outVal_->getMemoryDesc()); + in->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); if (hasBias) { - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); + fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out)); } else { - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); + fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out)); } printValueFormatFlow(); - pipelineFwd_.clear(); - pipelineFwd_.push_back(*fwd_); + pipeline.push_back(*fwd_); } -void MKLDNNFcLayer::resetBwd() { +void MKLDNNFcLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); if (!needResetBwd_) { return; } @@ -190,8 +166,8 @@ void MKLDNNFcLayer::resetBwd() { /// backward weight CHECK(inVal_) << "Should have input value"; - const MatrixPtr& wgt = weight_->getWGrad(); - const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr; + const MatrixPtr& wgtGrad = weight_->getWGrad(); + const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr; // TODO(TJ): merge outgrad int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; @@ -202,101 +178,66 @@ void MKLDNNFcLayer::resetBwd() { // for CPU device: // fc do not need to convert from cpu device since output is always nc format // only need create from cpu device - const MatrixPtr& out = getOutput(device).grad; - outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc()); - wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc()); - biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc()) - : nullptr; + const MatrixPtr& outGrad = getOutput(device).grad; + out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc()); + wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc()); + bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc()) + : nullptr; // create memory primitive desc fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()); + wgt->getMemoryDesc(), + out->getMemoryDesc()); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); fc_bwdWgt::desc bwdWgtDesc = hasBias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - biasGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()) + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()); + wgt->getMemoryDesc(), + out->getMemoryDesc()); fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); if (hasBias) { - bwdWgt_.reset( - new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias)); } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt)); } - pipelineBwd_.clear(); - pipelineBwd_.push_back(*bwdWgt_); + pipeline.push_back(*bwdWgt_); /// backward data - device = inputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; - const MatrixPtr& in = getInputGrad(0, device); - if (in == nullptr) { + const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; + if (inGrad == nullptr) { return; } - if (getInput(0, device).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways when merge outgrad done + if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { + // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done } else { - inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc()); + in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); } - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(), - wgtGrad_->getMemoryDesc(), - outGrad_->getMemoryDesc()); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( + inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc()); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); CHECK(wgtVal_) << "Should have weight memory"; - bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in)); printGradFormatFlow(); - pipelineBwd_.push_back(*bwdData_); + pipeline.push_back(*bwdData_); } -void MKLDNNFcLayer::forward(PassType passType) { - Layer::forward(passType); - reshape(); - - { - REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - syncInputValue(); - - // just submit forward pipeline - stream_->submit(pipelineFwd_); - } - - /* activation */ { - REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); - forwardActivation(); - } +void MKLDNNFcLayer::updateInputData() { + inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); } -void MKLDNNFcLayer::backward(const UpdateCallback& callback) { - /* Do derivation */ { - REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); - backwardActivation(); - } - - { - REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); - resetBwd(); - - syncOutputGrad(); - // just sumbmit backward pipeline - stream_->submit(pipelineBwd_); - } - - { - REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); - } +void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); } } } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index e138a6faf181c412949218458e7ecf800a0d6a07..3119f863496df092da13c08bf733f13c42e53780 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -45,35 +45,28 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void convertWeightsFromPaddle() override; + void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; - void convertWeightsToPaddle() override; + void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; - void forward(PassType passType) override; + void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) override; - void backward(const UpdateCallback& callback) override; + void updateInputData() override; -protected: - /** - * reshape the input image sizes - * and reset output buffer size - * and reset mkldnn forward - */ - void reshape(); - - /** - * reset the forward primitve and memory - * only would be called when input size changes - */ - void resetFwd(); - - /** - * reset the backward primitve and memory for mkldnn fc - * only would be called when needed - */ - void resetBwd(); - - void convertOutputToOtherDevice() override; + void updateWeights(const UpdateCallback& callback) override; + + void convertWeightsFromPaddle() override; + + void convertWeightsToPaddle() override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index b983b833d510b823c5d4cff0b9390173e4cefc89..169679c8297542cac4a43f5a8e1af311ad9282df 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "MKLDNNBase.h" #include "mkldnn.hpp" #include "paddle/math/MKLDNNMatrix.h" +#include "paddle/utils/Stat.h" DECLARE_bool(use_mkldnn); @@ -33,6 +34,8 @@ typedef std::shared_ptr MKLDNNLayerPtr; */ class MKLDNNLayer : public Layer { protected: + // input value element count + size_t inputElemenCnt_; // batch size int bs_; // input image channel, height and width @@ -52,7 +55,7 @@ protected: std::vector pipelineFwd_; std::vector pipelineBwd_; - // MKLDNNMatrixPtr + // MKLDNNMatrixPtr with internal format MKLDNNMatrixPtr inVal_; MKLDNNMatrixPtr inGrad_; MKLDNNMatrixPtr outVal_; @@ -65,6 +68,7 @@ protected: public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), + inputElemenCnt_(0), bs_(0), ic_(0), ih_(0), @@ -95,12 +99,104 @@ public: if (!Layer::init(layerMap, parameterMap)) { return false; } + checkCPUOutputsNumber(); stream_.reset(new MKLDNNStream()); engine_ = CPUEngine::Instance().getEngine(); return true; } + void forward(PassType passType) override { + passType_ = passType; + + { + REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); + CHECK(!inputLayers_.empty()); + copySeqInfoToOutputs(); + size_t elemenCnt = inputLayers_[0]->getOutput().value->getElementCnt(); + if (inputElemenCnt_ != elemenCnt) { + // reset when input total sizes changed, not only the batchsize + inputElemenCnt_ = elemenCnt; + reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); + resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); + convertWeightsFromPaddle(); + needResetBwd_ = true; + } + + if (inputLayers_[0]->getType() == "data") { + updateInputData(); + } + + stream_->submit(pipelineFwd_); + } + + /* activation */ { + REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); + forwardActivation(); + } + } + + void backward(const UpdateCallback& callback) override { + /* Do derivation */ { + REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); + backwardActivation(); + } + + { + REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); + if (needResetBwd_) { + resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); + needResetBwd_ = false; + } + + stream_->submit(pipelineBwd_); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + updateWeights(callback); + } + } + + /** + * reshape the input image sizes + * and reset output image and buffer size + * output channel can not be changed + */ + virtual void reshape( + int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0; + + /** + * reset the mkldnn forward primitve and memory + * only would be called when input size changes + */ + virtual void resetFwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) = 0; + + /** + * reset the mkldnn backward primitve and memory for mkldnn fc + * only would be called when needed + */ + virtual void resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) = 0; + + /** + * Update input value data when input layer is "data" type. + * Since the input value data address might be changed. + */ + virtual void updateInputData() {} + + /** + * Update weights and biases if necessary. + */ + virtual void updateWeights(const UpdateCallback& callback) {} + /** * convert weight from paddle format to mkldnn format * weight_ will be override @@ -114,10 +210,38 @@ public: virtual void convertWeightsToPaddle() {} /** - * convert MKLDNN output to other device. - * only support CPU device yet + * add this interface as public for unit test + */ + void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); } + +protected: + /** + * reshape the input image sizes and input batchsize */ - virtual void convertOutputToOtherDevice() {} + virtual void reshapeInput(int& batchsize, int& height, int& width) { + const Argument& input = inputLayers_[0]->getOutput(); + batchsize = input.getBatchSize(); + int h = input.getFrameHeight(); + int w = input.getFrameWidth(); + if (h != 0) { + height = h; + } + if (w != 0) { + width = w; + } + } + + /** + * reshape output image sizes + */ + virtual void reshapeOutput(size_t height, size_t width) { + output_.setFrameHeight(height); + output_.setFrameWidth(width); + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + outputOtherDevice_[i].setFrameHeight(height); + outputOtherDevice_[i].setFrameWidth(width); + } + } /** * print info about sizes @@ -133,8 +257,8 @@ public: */ virtual void printValueFormatFlow() { if (inVal_ && outVal_) { - VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat() - << " >>> " << outVal_->getFormat(); + VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>> " + << outVal_->getFormat(); } } @@ -143,29 +267,12 @@ public: */ virtual void printGradFormatFlow() { if (inGrad_ && outGrad_) { - VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat() - << " <<< " << outGrad_->getFormat(); + VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<< " + << outGrad_->getFormat(); } } protected: - /** - * copy image size and sequence info to other device - * @note: can not directly use Layer::copyOutputToOtherDevice since here only - * copy base info and do not copy data value - */ - void copyOutputInfoToOtherDevice() { - for (size_t i = 0; i < outputOtherDevice_.size(); i++) { - outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight()); - outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth()); - outputOtherDevice_[i].sequenceStartPositions = - output_.sequenceStartPositions; - outputOtherDevice_[i].subSequenceStartPositions = - output_.subSequenceStartPositions; - outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; - } - } - /** * If input only has MKLDNN device. * Otherwise, only support the previous layer using CPU device. @@ -193,37 +300,12 @@ protected: return outputOtherDevice_.size() == 0; } - /** - * Sync input value data - */ - void syncInputValue() { - if (inputIsOnlyMKLDNN()) { - return; - } - real* iData = getInputValue(0, CPU_DEVICE)->getData(); - // update input data - // since it might be changed if this is after data layer - inVal_->updateData(iData); - } - - /** - * Sync output grad data - */ - void syncOutputGrad() { - if (outputIsOnlyMKLDNN()) { - return; - } - - // update diff - real* oDiff = getOutput(CPU_DEVICE).grad->getData(); - outGrad_->updateData(oDiff); - } - /** * Set deviceId of this layer. */ void setDevice(int id) { deviceId_ = id; } +private: /** * Set deviceId of the params used in this layer. */ @@ -247,6 +329,42 @@ protected: parameter->setDevice(id); } } + + /** + * Check the cpu device number of outputOtherDevice_. + * should have only one at most. + */ + void checkCPUOutputsNumber(int max = 1) { + int cnt = 0; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + if (outputOtherDevice_[i].deviceId == CPU_DEVICE) { + ++cnt; + } + } + CHECK_LE(cnt, max) << "too much CPU devies"; + } + + /** + * copy SeqInfo from input layer to this output and other output devices. + * @note: do not use getInput(0) since it used this deviceId_, + * use "inputLayers_[0]->getOutput()" instead. + */ + void copySeqInfoToOutputs() { + if (inputLayers_.empty() || !needSequenceInfo_) { + return; + } + const Argument& input = inputLayers_[0]->getOutput(); + output_.sequenceStartPositions = input.sequenceStartPositions; + output_.subSequenceStartPositions = input.subSequenceStartPositions; + output_.cpuSequenceDims = input.cpuSequenceDims; + for (size_t i = 0; i < outputOtherDevice_.size(); i++) { + outputOtherDevice_[i].sequenceStartPositions = + output_.sequenceStartPositions; + outputOtherDevice_[i].subSequenceStartPositions = + output_.subSequenceStartPositions; + outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; + } + } }; } // namespace paddle diff --git a/paddle/gserver/layers/SwitchOrderLayer.cpp b/paddle/gserver/layers/SwitchOrderLayer.cpp index 92cd61cdd515d5c693df086c9575a5f197c00cee..d7eee6eaf078dab8d48adc4c7ee758a433672ac6 100644 --- a/paddle/gserver/layers/SwitchOrderLayer.cpp +++ b/paddle/gserver/layers/SwitchOrderLayer.cpp @@ -24,10 +24,12 @@ bool SwitchOrderLayer::init(const LayerMap& layerMap, /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); auto& img_conf = config_.inputs(0).image_conf(); + size_t inD = img_conf.img_size_z(); size_t inH = img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(); size_t inW = img_conf.img_size(); size_t inC = img_conf.channels(); + inH = inH * inD; inDims_ = TensorShape({0, inC, inH, inW}); outDims_ = TensorShape(4); @@ -64,9 +66,10 @@ void SwitchOrderLayer::setInDims() { MatrixPtr input = inputLayers_[0]->getOutputValue(); size_t batchSize = input->getHeight(); inDims_.setDim(0, batchSize); - + int d = inputLayers_[0]->getOutput().getFrameDepth(); + d = (d == 0 ? 1 : d); int h = inputLayers_[0]->getOutput().getFrameHeight(); - if (h != 0) inDims_.setDim(2, h); + if (h != 0) inDims_.setDim(2, h * d); int w = inputLayers_[0]->getOutput().getFrameWidth(); if (w != 0) inDims_.setDim(3, w); int totalCount = input->getElementCnt(); diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index de1635be2af37cd0ba49010199a417090865b0e4..2f48e5b2d3ffc9337ed1314f6db6549e56263fdd 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -63,8 +63,12 @@ void MKLDNNTester::reset(const TestConfig& dnn, initTestLayer( configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); } - dnnLayer_ = testLayers_[DNN]; refLayer_ = testLayers_[REF]; + dnnLayer_ = std::dynamic_pointer_cast(testLayers_[DNN]); + CHECK(dnnLayer_); + // for comparison with Paddle reference results, + // need manually add cpu device output for test + dnnLayer_->addOutputArgument(CPU_DEVICE); EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); @@ -109,20 +113,22 @@ void MKLDNNTester::randomBotDatas() { void MKLDNNTester::randomTopDiffs() { refLayer_->getOutputGrad()->randomizeUniform(); - dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); - VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; + dnnLayer_->getOutput(CPU_DEVICE) + .grad->copyFrom(*(refLayer_->getOutputGrad())); + VLOG(lvl_) << "Random Backward Input, TopDiff: "; printMatrix(refLayer_->getOutputGrad()); } void MKLDNNTester::checkForward() { - printTopDatas(); - double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), - testLayers_[REF]->getOutputValue()); VLOG(MKLDNN_ALL) << "Check Forward"; + printTopDatas(); + double delta = compareMatrix(dnnLayer_->getOutput(-1).value, + refLayer_->getOutputValue()); EXPECT_LE(fabs(delta), eps_); } void MKLDNNTester::checkBackwardData() { + VLOG(MKLDNN_ALL) << "Check Backward Data"; // TODO(TJ): uncomment me when batch norm ready // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { @@ -144,14 +150,12 @@ void MKLDNNTester::checkBackwardData() { } void MKLDNNTester::checkBackwardWgts() { + VLOG(MKLDNN_ALL) << "Check Backward Weight"; CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); vector dnnWgts; // used to temply save mkldnn weights saveWgt(parameters_[DNN], dnnWgts); - const MKLDNNLayerPtr dnnlayer = - std::dynamic_pointer_cast(dnnLayer_); - CHECK(dnnlayer); - dnnlayer->convertWeightsToPaddle(); + dnnLayer_->convertWeightsToPaddle(); for (size_t i = 0; i < parameters_[DNN].size(); ++i) { const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); @@ -189,38 +193,38 @@ void MKLDNNTester::restoreWgt(const vector& from, } // clear parameters grad -void MKLDNNTester::clearWgtDiffs() { +void MKLDNNTester::clearWgtDiffs(size_t id) { + CHECK_LE(id, parameters_.size()); for (size_t n = 0; n < parameters_.size(); ++n) { - for (size_t i = 0; i < parameters_[n].size(); ++i) { - const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); - if (grad) { - grad->zeroMem(); + if (id == n || id == parameters_.size()) { + for (size_t i = 0; i < parameters_[n].size(); ++i) { + const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); + if (grad) { + grad->zeroMem(); + } } } } } -void MKLDNNTester::clearBotDiffs() { - // dnn and ref +void MKLDNNTester::clearBotDiffs(size_t id) { + CHECK_LE(id, dataLayers_.size()); for (size_t n = 0; n < dataLayers_.size(); ++n) { - // all inputs layers - for (size_t i = 0; i < dataLayers_[n].size(); ++i) { - dataLayers_[n][i]->getOutputGrad()->zeroMem(); + if (id == n || id == dataLayers_.size()) { + // clear inputs layers of this specific layer + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } } } } -void MKLDNNTester::clearBotDiffs(int n) { - CHECK_LT(n, NUM); - // all inputs layers - for (size_t i = 0; i < dataLayers_[n].size(); ++i) { - dataLayers_[n][i]->getOutputGrad()->zeroMem(); - } -} - -void MKLDNNTester::clearTopDatas() { +void MKLDNNTester::clearTopDatas(size_t id) { + CHECK_LE(id, testLayers_.size()); for (size_t i = 0; i < testLayers_.size(); ++i) { - testLayers_[i]->getOutputValue()->zeroMem(); + if (id == i || id == testLayers_.size()) { + testLayers_[i]->getOutputValue()->zeroMem(); + } } } @@ -300,16 +304,24 @@ void MKLDNNTester::runOnce() { checkForward(); // test backward + // simple updater + UpdateCallback updateCallback = [](Parameter* para) { + auto& grad = para->getBuf(PARAMETER_GRADIENT); + auto& value = para->getBuf(PARAMETER_VALUE); + real lr = 1e-3; + value->add(*grad, lr); + }; randomTopDiffs(); - dnnLayer_->backward(nullptr); - refLayer_->backward(nullptr); + dnnLayer_->backward(updateCallback); + refLayer_->backward(updateCallback); checkBackwardData(); checkBackwardWgts(); // clear buffers // ref code will addto the diff, dnn code will writeto it - // and clearTopDatas() and clearWgtDiffs() should be coverd by test layers + // and clearTopDatas(REF) should be coverd by ref layers clearBotDiffs(REF); + clearWgtDiffs(REF); } void MKLDNNTester::run(const TestConfig& dnn, diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index e55e4493ffdfe45b8cfdee423febd1878b8b3d8a..5ac885638cde7693a0c847733e7a6149c1b7e6c2 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include "LayerGradUtil.h" #include "paddle/gserver/layers/MKLDNNBase.h" +#include "paddle/gserver/layers/MKLDNNLayer.h" namespace paddle { @@ -40,7 +41,8 @@ protected: vector layerMaps_; vector> parameters_; vector testLayers_; - LayerPtr dnnLayer_, refLayer_; + LayerPtr refLayer_; + MKLDNNLayerPtr dnnLayer_; /// run some iterations, all the result should pass size_t iter_; @@ -88,10 +90,10 @@ private: void checkBackwardData(); void checkBackwardWgts(); - void clearWgtDiffs(); - void clearBotDiffs(); - void clearBotDiffs(int n); // clear specific layer - void clearTopDatas(); + // clear specific layer, clear all when id equals NUM + void clearWgtDiffs(size_t id = NUM); + void clearBotDiffs(size_t id = NUM); + void clearTopDatas(size_t id = NUM); void printTopDatas(); void printMatrix(const MatrixPtr& m); diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index d1f3bc241fa621cb0070125980996e8627e40fd6..090bde7b203652e3ffb1662b8f5b8937885d2608 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1703,6 +1703,55 @@ TEST(Layer, BatchNormalizationLayer) { #endif } +void testBatchNorm3DLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + const int CHANNELS = 10; + const int IMG_SIZE = 16; + const int IMG_SIZE_Y = 8; + const int IMG_SIZE_Z = 8; + size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y * IMG_SIZE_Z; + config.layerConfig.set_type(type); + config.layerConfig.set_size(size); + config.layerConfig.set_active_type("sigmoid"); + config.biasSize = CHANNELS; + config.inputDefs.push_back({INPUT_DATA, + "layer_0", + /* dim= */ size, + /* paraSize= */ CHANNELS}); + + config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS}); + config.inputDefs.back().isStatic = true; + config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, CHANNELS}); + config.inputDefs.back().isStatic = true; + + LayerInputConfig* input = config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + ImageConfig* img_conf = input->mutable_image_conf(); + img_conf->set_channels(CHANNELS); + img_conf->set_img_size(IMG_SIZE); + img_conf->set_img_size_y(IMG_SIZE_Y); + img_conf->set_img_size_z(IMG_SIZE_Z); + + testLayerGrad(config, + "batch_norm", + 64, + /* trans= */ trans, + useGpu, + /* useWeight */ true); +} + +TEST(Layer, testBatchNorm3DLayer) { + testBatchNorm3DLayer("batch_norm", false, false); +#ifndef PADDLE_ONLY_CPU + testBatchNorm3DLayer("batch_norm", false, true); + if (hl_get_cudnn_lib_version() >= int(4000)) { + testBatchNorm3DLayer("cudnn_batch_norm", false, true); + } +#endif +} + void testConvOperator(bool isDeconv) { TestConfig config; const int NUM_FILTERS = 16; @@ -2253,26 +2302,27 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) { conv->set_stride(2); conv->set_stride_y(2); conv->set_stride_z(2); - conv->set_img_size(IMAGE_SIZE); - conv->set_img_size_y(IMAGE_SIZE_Y); - conv->set_img_size_z(IMAGE_SIZE_Z); - conv->set_output_x(imageSize(conv->img_size(), + conv->set_output_x(IMAGE_SIZE); + conv->set_output_y(IMAGE_SIZE_Y); + conv->set_output_z(IMAGE_SIZE_Z); + + conv->set_img_size(imageSize(conv->output_x(), conv->filter_size(), conv->padding(), conv->stride(), true)); - conv->set_output_y(imageSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - true)); - conv->set_output_z(imageSize(conv->img_size_z(), - conv->filter_size_z(), - conv->padding_z(), - conv->stride_z(), - true)); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - conv->output_z() * NUM_FILTERS); + conv->set_img_size_y(imageSize(conv->output_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + true)); + conv->set_img_size_z(imageSize(conv->output_z(), + conv->filter_size_z(), + conv->padding_z(), + conv->stride_z(), + true)); + config.layerConfig.set_size(conv->img_size() * conv->img_size_y() * + conv->img_size_z() * NUM_FILTERS); conv->set_groups(1); conv->set_filter_channels(conv->channels() / conv->groups()); config.inputDefs.push_back( diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp index 0a355e2644cce572ce90ecf5c9d2a5b7b395bc61..c4063e5069854242d9f93886b66580385557ca73 100644 --- a/paddle/math/MKLDNNMatrix.cpp +++ b/paddle/math/MKLDNNMatrix.cpp @@ -33,14 +33,12 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) { size_t width = cnts / dims[0]; m = Matrix::create(height, width, false, false); } - CHECK(m) << " Matrix should not be empty"; + CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast(m); CHECK(cpuMatrix) << "Only support create from CPU matrix yet"; - - CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match"; - return std::make_shared( - m->getData(), m->getHeight(), m->getWidth(), pd); + CHECK_EQ(cpuMatrix->getElementCnt(), cnts) << "Count size does not match"; + return std::make_shared(cpuMatrix, pd); } MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, @@ -138,7 +136,7 @@ void MKLDNNMatrix::downSpatial() { mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr), "could not create a memory primitive"); reset(result); - set_data_handle(getData()); + set_data_handle(data_); } } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index e50f698b495713e6f15ab7a12a7ee7487662040f..eef3b429e6fa0087aeac3f5aed9dff983b06e826 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -30,11 +30,10 @@ typedef std::shared_ptr MKLDNNMatrixPtr; */ class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory { public: - MKLDNNMatrix(real* data, - size_t height, - size_t width, - mkldnn::memory::primitive_desc pd) - : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {} + MKLDNNMatrix(CpuMatrixPtr m, mkldnn::memory::primitive_desc pd) + : CpuMatrix(m->getData(), m->getHeight(), m->getWidth(), false), + mkldnn::memory(pd, m->getData()), + m_(m) {} ~MKLDNNMatrix() {} @@ -81,11 +80,29 @@ public: void downSpatial(); /** - * Update the memory data handle. + * set the memory data handle. * Caution: This will not check the buffer size of the data, * it should be coverd by user. */ - void updateData(void* data) { set_data_handle(data); } + void setData(real* data) { + set_data_handle(data); + CpuMatrix::setData(data); + m_.reset(); + } + + /** + * override Matrix::getData + * check data before return + */ + real* getData() override { + CHECK_EQ((void*)data_, get_data_handle()); + return data_; + } + + const real* getData() const override { + CHECK_EQ((void*)data_, get_data_handle()); + return data_; + } /** * Get primitive descriptor. @@ -143,6 +160,10 @@ protected: memory::format srcFmt, memory::format dstFmt, memory::dims dm); + +private: + // save the CpuMatrixPtr in case the buffer released outside + CpuMatrixPtr m_; }; } // namespace paddle diff --git a/paddle/operators/concat_op.cc b/paddle/operators/concat_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..0ebefbab26ec8fdf316f852fbb7f6d9f3bbc48eb --- /dev/null +++ b/paddle/operators/concat_op.cc @@ -0,0 +1,79 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/concat_op.h" +#include + +namespace paddle { +namespace operators { +using framework::Tensor; + +class ConcatOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto ins = ctx.MultiInput("X"); + auto *out = ctx.Output("Out"); + size_t axis = static_cast(ctx.Attr("axis")); + size_t n = ins.size(); + + PADDLE_ENFORCE_GT(n, 1, "Input tensors count should > 1."); + + auto out_dims = ins[0]->dims(); + size_t in_zero_dims_size = out_dims.size(); + for (size_t i = 1; i < n; i++) { + for (size_t j = 0; j < in_zero_dims_size; j++) { + if (j == axis) { + out_dims[axis] += ins[i]->dims()[j]; + continue; + } + PADDLE_ENFORCE_EQ(out_dims[j], ins[i]->dims()[j], + "Input tensors should have the same " + "elements except the specify axis.") + } + } + out->Resize(out_dims); + } +}; + +class ConcatOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "the input tensors of concat operator.").AsDuplicable(); + AddOutput("Out", "the output tensor of concat operator."); + AddComment(R"DOC( + Join the input tensors along with the axis. + Examples: + Input[0] = [[1,2],[3,4]] + Input[1] = [[5,6]] + axis = 0 + Output = [[1,2], + [3,4], + [5,6]] + )DOC"); + AddAttr("axis", "The axis which the inputs will be joined with.") + .SetDefault(0); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(concat, ops::ConcatOp, ops::ConcatOpMaker) +REGISTER_OP_CPU_KERNEL(concat, + ops::ConcatKernel) diff --git a/paddle/operators/concat_op.cu b/paddle/operators/concat_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..38fee7473dbb2ba97fe95b6632db7a1749cf3bbe --- /dev/null +++ b/paddle/operators/concat_op.cu @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/concat_op.h" + +namespace ops = paddle::operators; +// TODO(Yancey1989) Add GPU kernel diff --git a/paddle/operators/concat_op.h b/paddle/operators/concat_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f977054fdf8aa0164db726b94a21c57f770dd674 --- /dev/null +++ b/paddle/operators/concat_op.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class ConcatKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto ins = ctx.MultiInput("X"); + auto* out = ctx.Output("Out"); + int64_t axis = static_cast(ctx.Attr("axis")); + size_t n = ins.size(); + size_t output_axis_dim = 0; + size_t before = 1, after = 1; + for (size_t i = 0; i < n; i++) { + output_axis_dim += ins[i]->dims()[axis]; + } + auto& input_zero = ins[0]; + for (int64_t i = 0; i < input_zero->dims().size(); i++) { + if (i == axis) { + continue; + } + if (i < axis) { + before *= input_zero->dims()[i]; + } else { + after *= input_zero->dims()[i]; + } + } + size_t output_offset = 0; + for (size_t i = 0; i < n; i++) { + auto& in = ins[i]; + auto axis_dim = in->dims()[axis]; + for (size_t j = 0; j < before; j++) { + size_t len = axis_dim * after * sizeof(T); + const T* src = in->data() + axis_dim * after * j; + T* out_data = out->mutable_data(platform::CPUPlace()); + T* dest = out_data + output_offset + output_axis_dim * after * j; + memcpy(dest, src, len); + } + output_offset += axis_dim * after; + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 9e2bcebe3b5432c157fac895a9bbab5164193dbb..0dc509952578497671a128374f77ce616a520909 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -42,7 +42,7 @@ class CosSimKernel : public framework::OpKernel { output_y_norm->mutable_data(context.GetPlace()); auto dims = input_x->dims(); - int size = static_cast(framework::product(dims)); + int64_t size = input_x->numel(); auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto x = EigenMatrix::From(*input_x, new_dims); auto y = EigenMatrix::From(*input_y, new_dims); @@ -72,7 +72,7 @@ class CosSimGradKernel : public framework::OpKernel { auto* input_grad_z = context.Input(framework::GradVarName("Out")); auto dims = input_x->dims(); - int size = static_cast(framework::product(dims)); + int64_t size = input_x->numel(); auto new_dims = framework::make_ddim({dims[0], size / dims[0]}); auto x = EigenMatrix::From(*input_x, new_dims); auto y = EigenMatrix::From(*input_y, new_dims); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 6574880c0eb6324b2dd175e39a364d2ef46e735e..3d76516405960c502a46997108049b2db5cab6bf 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -31,7 +31,7 @@ class CPUGaussianRandomKernel : public framework::OpKernel { } engine.seed(seed); std::normal_distribution dist(mean, std); - int64_t size = framework::product(tensor->dims()); + int64_t size = tensor->numel(); for (int64_t i = 0; i < size; ++i) { data[i] = dist(engine); } diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index d9dbc1dcfe6a6676938d64be93c879ea69148018..2d63b3049988cfc3135a87a57dad56b970df3eab 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -50,8 +50,8 @@ class GPUGaussianRandomKernel : public framework::OpKernel { T mean = static_cast(context.Attr("mean")); T std = static_cast(context.Attr("std")); thrust::counting_iterator index_sequence_begin(0); - ssize_t N = framework::product(tensor->dims()); - thrust::transform(index_sequence_begin, index_sequence_begin + N, + int64_t size = tensor->numel(); + thrust::transform(index_sequence_begin, index_sequence_begin + size, thrust::device_ptr(data), GaussianGenerator(mean, std, seed)); } diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index 27eee3436af8107cef2aa3577ea238be49edf1af..708344046760691aa2da562eb1ee3d8b130c5f18 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -70,7 +70,7 @@ class LookupTableCUDAKernel : public framework::OpKernel { size_t N = table_t->dims()[0]; size_t D = table_t->dims()[1]; - size_t K = product(ids_t->dims()); + size_t K = ids_t->numel(); auto ids = ids_t->data(); auto table = table_t->data(); auto output = output_t->mutable_data(context.GetPlace()); @@ -91,7 +91,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { int N = d_table_t->dims()[0]; int D = d_table_t->dims()[1]; - int K = product(ids_t->dims()); + int K = ids_t->numel(); const int32_t* ids = ids_t->data(); const T* d_output = d_output_t->data(); T* d_table = d_table_t->mutable_data(context.GetPlace()); diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h index 877b36cef4ea9cdaaaf37c97d5e5bfce55b91436..a1298906dd4b4209644fe06584f70169519de01c 100644 --- a/paddle/operators/lookup_table_op.h +++ b/paddle/operators/lookup_table_op.h @@ -35,7 +35,7 @@ class LookupTableKernel : public framework::OpKernel { auto ids = ids_t->data(); auto table = table_t->data(); auto output = output_t->mutable_data(context.GetPlace()); - for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { + for (int64_t i = 0; i < ids_t->numel(); ++i) { PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_GE(ids[i], 0); memcpy(output + i * D, table + ids[i] * D, D * sizeof(T)); @@ -61,7 +61,7 @@ class LookupTableGradKernel : public framework::OpKernel { t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); - for (ssize_t i = 0; i < product(ids_t->dims()); ++i) { + for (int64_t i = 0; i < ids_t->numel(); ++i) { PADDLE_ENFORCE_LT(ids[i], N); PADDLE_ENFORCE_GE(ids[i], 0); for (int j = 0; j < D; ++j) { diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 186a33edcec88bd5e51091a524a778eeb27ad526..4f380388b108dc173d847f027ba5c9db387a87f8 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -119,4 +119,4 @@ TEST(math, im2col) { #ifndef PADDLE_ONLY_CPU testIm2col(); #endif -} \ No newline at end of file +} diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index 9848af280b62729bef9243052ceae0b7d8f4c6f5..ce31e178d8e375dc59be80a6c05133201308da70 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -49,12 +49,11 @@ class MeanGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto OG = context.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE(framework::product(OG->dims()) == 1, - "Mean Gradient should be scalar"); + PADDLE_ENFORCE(OG->numel() == 1, "Mean Gradient should be scalar"); auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); - T ig_size = (T)framework::product(IG->dims()); + T ig_size = static_cast(IG->numel()); Eigen::DSizes bcast(ig_size); EigenVector::Flatten(*IG).device(context.GetEigenDevice()) = diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc index 069fb5e1abc657aa02a50fde352ce88d078c36e1..a4876feb2edf77bd422fa2a7687b0fa7d55dae47 100644 --- a/paddle/operators/minus_op.cc +++ b/paddle/operators/minus_op.cc @@ -31,8 +31,7 @@ class MinusOp : public framework::OperatorWithKernel { auto *right_tensor = ctx.Input("Y"); PADDLE_ENFORCE_EQ( - framework::product(left_tensor->dims()), - framework::product(right_tensor->dims()), + left_tensor->numel(), right_tensor->numel(), "Minus operator must take two tensor with same num of elements"); ctx.Output("Out")->Resize(left_tensor->dims()); } diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 28a47cdff2e9b7a965ff9f99e787bb8315010823..710a56a0e8e2d17162d7d000df226f1537104eb9 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -25,18 +25,27 @@ class MulOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto dim0 = ctx.Input("X")->dims(); - auto dim1 = ctx.Input("Y")->dims(); - PADDLE_ENFORCE_EQ(dim0.size(), 2, - "input X(%s) should be a tensor with 2 dims, a matrix", - ctx.op().Input("X")); - PADDLE_ENFORCE_EQ(dim1.size(), 2, - "input Y(%s) should be a tensor with 2 dims, a matrix", - ctx.op().Input("Y")); + auto x_dims = ctx.Input("X")->dims(); + auto y_dims = ctx.Input("Y")->dims(); + int x_num_col_dims = Attr("x_num_col_dims"); + int y_num_col_dims = Attr("y_num_col_dims"); + + PADDLE_ENFORCE(x_dims.size() > x_num_col_dims, + "The rank of input tensor X(%s) should be larger than " + "`mul_op`'s `x_num_col_dims`.", + ctx.op().Input("X")); + PADDLE_ENFORCE(y_dims.size() > y_num_col_dims, + "The rank of input tensor Y(%s) should be larger than " + "`mul_op`'s `y_num_col_dims`.", + ctx.op().Input("Y")); + + auto x_mat_dims = framework::flatten_to_2d(x_dims, x_num_col_dims); + auto y_mat_dims = framework::flatten_to_2d(y_dims, y_num_col_dims); + PADDLE_ENFORCE_EQ( - dim0[1], dim1[0], + x_mat_dims[1], y_mat_dims[0], "First matrix's width must be equal with second matrix's height."); - ctx.Output("Out")->Resize({dim0[0], dim1[1]}); + ctx.Output("Out")->Resize({x_mat_dims[0], y_mat_dims[1]}); } }; @@ -47,6 +56,23 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "The first input of mul op"); AddInput("Y", "The second input of mul op"); AddOutput("Out", "The output of mul op"); + AddAttr( + "x_num_col_dims", + R"DOC(mul_op can take tensors with more than two dimensions as input `X`, + in that case, tensors will be reshaped to a matrix. The matrix's first + dimension(column length) will be the product of tensor's last + `num_col_dims` dimensions, and the matrix's second dimension(row length) + will be the product of tensor's first `rank - num_col_dims` dimensions. + )DOC") + .SetDefault(1) + .EqualGreaterThan(1); + AddAttr( + "y_num_col_dims", + R"DOC(mul_op can take tensors with more than two dimensions as input `Y`, + in that case, tensors will be reshaped to a matrix. Just like input `X`. + )DOC") + .SetDefault(1) + .EqualGreaterThan(1); AddComment(R"DOC( Two Element Mul Operator. @@ -70,10 +96,20 @@ class MulOpGrad : public framework::OperatorWithKernel { auto out_dims = ctx.Input(framework::GradVarName("Out"))->dims(); auto *x_grad = ctx.Output(framework::GradVarName("X")); auto *y_grad = ctx.Output(framework::GradVarName("Y")); - PADDLE_ENFORCE(x_dims[0] == out_dims[0], - "Out@GRAD M X N must equal to X dims 0, M "); - PADDLE_ENFORCE(y_dims[1] == out_dims[1], - "Out@GRAD M X N must equal to Y dims 1, N "); + + auto x_mat_dims = + framework::flatten_to_2d(x_dims, Attr("x_num_col_dims")); + auto y_mat_dims = + framework::flatten_to_2d(y_dims, Attr("y_num_col_dims")); + + PADDLE_ENFORCE_EQ( + x_mat_dims[0], out_dims[0], + "The first dimension of Out@GRAD must equal to the first dimension of " + "the first operand."); + PADDLE_ENFORCE_EQ( + y_mat_dims[1], out_dims[1], + "The second dimension of Out@GRAD must equal to the second " + "dimension of the second operand."); if (x_grad) x_grad->Resize(x_dims); if (y_grad) y_grad->Resize(y_dims); diff --git a/paddle/operators/mul_op.h b/paddle/operators/mul_op.h index 05a79e13b3470e39a5ebd0394ba05629553a5075..3c01f868bda8cba488b3403df456d63d6b082fa6 100644 --- a/paddle/operators/mul_op.h +++ b/paddle/operators/mul_op.h @@ -1,7 +1,7 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. + You may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 @@ -31,13 +31,25 @@ template class MulKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* z = context.Output("Out"); + const Tensor* x = context.Input("X"); + const Tensor* y = context.Input("Y"); + Tensor* z = context.Output("Out"); + const Tensor x_matrix = + x->dims().size() > 2 + ? framework::ReshapeToMatrix( + *x, context.template Attr("x_num_col_dims")) + : *x; + const Tensor y_matrix = + y->dims().size() > 2 + ? framework::ReshapeToMatrix( + *y, context.template Attr("y_num_col_dims")) + : *y; + z->mutable_data(context.GetPlace()); auto* device_context = const_cast(context.device_context_); - math::matmul(*x, false, *y, false, 1, z, 0, device_context); + math::matmul(x_matrix, false, y_matrix, false, 1, z, 0, + device_context); } }; @@ -45,23 +57,39 @@ template class MulGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); + int x_num_col_dims = ctx.template Attr("x_num_col_dims"); + int y_num_col_dims = ctx.template Attr("y_num_col_dims"); + const Tensor* x = ctx.Input("X"); + const Tensor* y = ctx.Input("Y"); + const Tensor x_matrix = + x->dims().size() > 2 ? framework::ReshapeToMatrix(*x, x_num_col_dims) + : *x; + const Tensor y_matrix = + y->dims().size() > 2 ? framework::ReshapeToMatrix(*y, y_num_col_dims) + : *y; + const Tensor* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + Tensor* dx = ctx.Output(framework::GradVarName("X")); + Tensor* dy = ctx.Output(framework::GradVarName("Y")); auto* device_context = const_cast(ctx.device_context_); if (dx) { dx->mutable_data(ctx.GetPlace()); + Tensor dx_matrix = dx->dims().size() > 2 ? framework::ReshapeToMatrix( + *dx, x_num_col_dims) + : *dx; // dx = dout * y'. dx: M x K, dout : M x N, y : K x N - math::matmul(*dout, false, *y, true, 1, dx, 0, device_context); + math::matmul(*dout, false, y_matrix, true, 1, &dx_matrix, 0, + device_context); } if (dy) { dy->mutable_data(ctx.GetPlace()); + Tensor dy_matrix = dy->dims().size() > 2 ? framework::ReshapeToMatrix( + *dy, y_num_col_dims) + : *dy; // dy = x' * dout. dy K x N, dout : M x N, x : M x K - math::matmul(*x, true, *dout, false, 1, dy, 0, device_context); + math::matmul(x_matrix, true, *dout, false, 1, &dy_matrix, 0, + device_context); } } }; diff --git a/paddle/operators/reshape_op.cc b/paddle/operators/reshape_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..b7061153d2bf13982f14f233e87a87daeeebf5fd --- /dev/null +++ b/paddle/operators/reshape_op.cc @@ -0,0 +1,107 @@ + +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/reshape_op.h" + +namespace paddle { +namespace operators { + +class ReshapeOp : public framework::OperatorWithKernel { + public: + ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + // input check + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) shouldn't be null"); + auto shape = ctx.Attr>("shape"); + PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty."); + for (auto dim : shape) { + PADDLE_ENFORCE(dim > 0, "Each dimension of shape must be positive."); + } + // capacity check + int64_t capacity = + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + auto *in = ctx.Input("X"); + int64_t in_size = framework::product(in->dims()); + PADDLE_ENFORCE_EQ(capacity, in_size, + "The size of Input(X) mismatches with Attr(shape)."); + // resize output + std::vector shape_int64(shape.size(), 0); + std::transform(shape.begin(), shape.end(), shape_int64.begin(), + [](int a) { return static_cast(a); }); + auto out_dims = framework::make_ddim(shape_int64); + ctx.Output("Out")->Resize(out_dims); + } +}; + +class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ReshapeOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input tensor of reshape operator."); + AddOutput("Out", "The output tensor of reshape operator."); + AddAttr>("shape", "Target shape of reshape operator."); + AddComment(R"DOC(Reshape operator + +Reshape Input(X) into the shape specified by Attr(shape). + +An example: +Given a 2-D tensor X with 2 rows and 2 columns + + [[1, 2], [3, 4]] + +with target shape = [1, 4], the reshape operator will transform +the tensor X into a 1-D tensor: + + [1, 2, 3, 4] + +)DOC"); + } +}; + +class ReshapeGradOp : public framework::OperatorWithKernel { + public: + ReshapeGradOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) shouldn't be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), + "Input(Out@GRAD) shouldn't be null."); + auto dims = ctx.Input("X")->dims(); + auto *d_in = ctx.Output(framework::GradVarName("X")); + d_in->Resize(dims); + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OP(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, reshape_grad, + ops::ReshapeGradOp); +REGISTER_OP_CPU_KERNEL(reshape, + ops::ReshapeKernel); +REGISTER_OP_CPU_KERNEL( + reshape_grad, ops::ReshapeGradKernel); diff --git a/paddle/operators/reshape_op.cu b/paddle/operators/reshape_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..23dbe089d3b37aabedf9ef166f7bbfbf67da7e0a --- /dev/null +++ b/paddle/operators/reshape_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/reshape_op.h" + +REGISTER_OP_GPU_KERNEL( + reshape, + paddle::operators::ReshapeKernel); +REGISTER_OP_GPU_KERNEL( + reshape_grad, + paddle::operators::ReshapeGradKernel); diff --git a/paddle/operators/reshape_op.h b/paddle/operators/reshape_op.h new file mode 100644 index 0000000000000000000000000000000000000000..873acf30782d390cdca5e7e864c76e1f743f9a7c --- /dev/null +++ b/paddle/operators/reshape_op.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class ReshapeKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* out = ctx.Output("Out"); + auto* in = ctx.Input("X"); + out->mutable_data(ctx.GetPlace()); + + auto shape = ctx.Attr>("shape"); + std::vector shape_int64(shape.size(), 0); + std::transform(shape.begin(), shape.end(), shape_int64.begin(), + [](int a) { return static_cast(a); }); + auto out_dims = framework::make_ddim(shape_int64); + out->CopyFrom(*in, ctx.GetPlace()); + out->Resize(out_dims); + } +}; + +template +class ReshapeGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto* d_x = ctx.Output(framework::GradVarName("X")); + d_x->mutable_data(ctx.GetPlace()); + + auto in_dims = d_x->dims(); + d_x->CopyFrom(*d_out, ctx.GetPlace()); + d_x->Resize(in_dims); + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index 30b4b404315a9f041e21d79b75fd06307e33f7f9..fa8f0ff1a858143af427b51025279c726f1628e0 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -25,14 +25,19 @@ class RowwiseAddOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - auto dim0 = ctx.Input("X")->dims(); - auto dim1 = ctx.Input("b")->dims(); - - PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix"); - PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); - PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same"); - PADDLE_ENFORCE(ctx.OutputSize("Out") == 1, "The output size must be 1"); - ctx.Output("Out")->Resize(ctx.Input("X")->dims()); + auto x_dims = ctx.Input("X")->dims(); + auto b_dims = ctx.Input("b")->dims(); + PADDLE_ENFORCE_GT( + x_dims.size(), b_dims.size(), + "The rank of input `X` must be larger than the one of input `b`."); + + int num_col_dims = x_dims.size() - b_dims.size(); + + PADDLE_ENFORCE_EQ( + framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims, + "The width of two operands must be same"); + PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1"); + ctx.Output("Out")->Resize(x_dims); } }; @@ -61,13 +66,20 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("b"), "b should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), "Input(Out@GRAD) should not be null"); - auto dims0 = ctx.Input("X")->dims(); - auto dims1 = ctx.Input("b")->dims(); - PADDLE_ENFORCE_EQ(1, dims1.size(), "b dims should be 1") + auto x_dims = ctx.Input("X")->dims(); + auto b_dims = ctx.Input("b")->dims(); + PADDLE_ENFORCE_GT( + x_dims.size(), b_dims.size(), + "The rank of input `X` must be larger than the one of input `b`."); + + int num_col_dims = x_dims.size() - b_dims.size(); + PADDLE_ENFORCE_EQ( + framework::slice_ddim(x_dims, num_col_dims, x_dims.size()), b_dims, + "The width of two operands must be same"); auto *dx = ctx.Output(framework::GradVarName("X")); auto *db = ctx.Output(framework::GradVarName("b")); - if (dx) dx->Resize(dims0); - if (db) db->Resize(dims1); + if (dx) dx->Resize(x_dims); + if (db) db->Resize(b_dims); } }; diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 4e926d9f2947f37b71e81c0fa592b0c66b19c640..35774b940926f77167b8f19597027e74d3477e5b 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -33,10 +33,12 @@ class RowwiseAddKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto out = context.Output("Out"); out->mutable_data(context.GetPlace()); - - auto input = EigenMatrix::From(*context.Input("X")); - auto bias = EigenVector::From(*context.Input("b")); - auto output = EigenMatrix::From(*out); + int num_col_dims = context.Input("X")->dims().size() - + context.Input("b")->dims().size(); + auto input = + EigenMatrix::Reshape(*context.Input("X"), num_col_dims); + auto bias = EigenVector::Flatten(*context.Input("b")); + auto output = EigenMatrix::Reshape(*out, num_col_dims); const int bias_size = bias.dimension(0); const int rest_size = input.size() / bias_size; @@ -54,12 +56,15 @@ class RowwiseAddGradKernel : public framework::OpKernel { auto* dout = context.Input(framework::GradVarName("Out")); auto* dx = context.Output(framework::GradVarName("X")); auto* db = context.Output(framework::GradVarName("b")); + int num_col_dims = context.Input("X")->dims().size() - + context.Input("b")->dims().size(); - auto out_grad = EigenMatrix::From(*dout); + auto out_grad = EigenMatrix::Reshape(*dout, num_col_dims); auto place = context.GetEigenDevice(); + if (dx) { dx->mutable_data(context.GetPlace()); - EigenMatrix::From(*dx).device(place) = out_grad; + EigenMatrix::Reshape(*dx, num_col_dims).device(place) = out_grad; } if (db) { diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index dc30644a5e7e33d4289e48cac093aa5fde7e75e7..9f51d3efa8ecba894a1023b9de2df451ca85916c 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -41,8 +41,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { int rank = framework::arity(x_dims); PADDLE_ENFORCE_GE(rank, 2, "Tensor rank should be at least equal to 2."); - PADDLE_ENFORCE_EQ(framework::product(x_dims) / x_dims[0], - framework::product(y_dims) / y_dims[0], + PADDLE_ENFORCE_EQ(x->numel() / x_dims[0], y->numel() / y_dims[0], "Product of dimensions expcet the first dimension of " "input and target must be equal."); PADDLE_ENFORCE(y_dims[0] == 1 || y_dims[0] == x_dims[0], @@ -50,8 +49,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { "or to 1."); ctx.Output("sub_result") - ->Resize({static_cast(x_dims[0]), - static_cast(framework::product(x_dims) / x_dims[0])}); + ->Resize({x_dims[0], x->numel() / x_dims[0]}); ctx.Output("Out")->Resize({x_dims[0], 1}); } }; diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h index ad3347a0b35f3385c5adbcd7ceaa94fe134105e3..097ac04fc09a10b3b624f491a847e281e41a802c 100644 --- a/paddle/operators/squared_l2_distance_op.h +++ b/paddle/operators/squared_l2_distance_op.h @@ -39,7 +39,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel { auto in0_dims = in0->dims(); auto in1_dims = in1->dims(); - int cols = framework::product(in0_dims) / in0_dims[0]; + int cols = in0->numel() / in0_dims[0]; // reduce dimensions except the first auto x = EigenMatrix::From(*in0, framework::make_ddim({in0_dims[0], cols})); @@ -82,7 +82,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { auto x_dims = x_g->dims(); auto y_dims = y_g->dims(); - int cols = framework::product(x_dims) / x_dims[0]; + int cols = x_g->numel() / x_dims[0]; // calculate gradient auto grad_mat = 2 * (out_grad.broadcast(Eigen::array({{1, cols}}))) * diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5805826ee8a555ca6dfc1ca81feaadffea9e1012 --- /dev/null +++ b/paddle/operators/sum_op.cc @@ -0,0 +1,73 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sum_op.h" +#include + +namespace paddle { +namespace operators { +using framework::Tensor; + +class SumOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto ins = ctx.MultiInput("X"); + auto *out = ctx.Output("Out"); + int N = ins.size(); + + auto in_dim = ins[0]->dims(); + + PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1."); + for (int i = 1; i < N; i++) { + auto dim = ins[i]->dims(); + PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape"); + } + out->Resize(in_dim); + } +}; + +class SumOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SumOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "the input tensors of sum operator.").AsDuplicable(); + AddOutput("Out", "the output tensor of sum operator."); + AddComment(R"DOC( + Sum the input tensors. + )DOC"); + } +}; + +class SumGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto outputs = ctx.MultiOutput(framework::GradVarName("X")); + auto dims = ctx.Input(framework::GradVarName("Out"))->dims(); + for (auto output : outputs) { + output->Resize(dims); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(sum, ops::SumOp, ops::SumOpMaker, sum_grad, ops::SumGradOp); +REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel); +REGISTER_OP_CPU_KERNEL(sum_grad, + ops::SumGradKernel); diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..a465cf3659ba7c51338abadfc62962fb6755a39d --- /dev/null +++ b/paddle/operators/sum_op.cu @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/sum_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel); +REGISTER_OP_GPU_KERNEL(sum_grad, + ops::SumGradKernel); diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h new file mode 100644 index 0000000000000000000000000000000000000000..0b1e9ebaa38d455fb5e3ce8c1a39cbbcdad9a940 --- /dev/null +++ b/paddle/operators/sum_op.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + +template +class SumKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto ins = context.MultiInput("X"); + auto* out = context.Output("Out"); + out->mutable_data(context.GetPlace()); + + auto place = context.GetEigenDevice(); + auto result = EigenVector::Flatten(*out); + + int N = ins.size(); + auto in = EigenVector::Flatten(*(ins[0])); + result.device(place) = in; + for (int i = 1; i < N; i++) { + auto in = EigenVector::Flatten(*(ins[i])); + result.device(place) = result + in; + } + } +}; + +template +class SumGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* input = context.Input(framework::GradVarName("Out")); + auto outs = context.MultiOutput(framework::GradVarName("X")); + for (auto out : outs) { + out->mutable_data(context.GetPlace()); + } + + auto place = context.GetEigenDevice(); + auto in = EigenVector::Flatten(*input); + for (auto out : outs) { + auto result = EigenVector::Flatten(*out); + result.device(place) = in; + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index f2aeef6c310df8535e67fa3906301a87f8ec4694..b8fbc9b52aecdb5c8d985b5de9bcd7cb85835b60 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -35,7 +35,7 @@ class CPUUniformRandomKernel : public framework::OpKernel { std::uniform_real_distribution dist( static_cast(context.Attr("min")), static_cast(context.Attr("max"))); - int64_t size = framework::product(tensor->dims()); + int64_t size = tensor->numel(); for (int64_t i = 0; i < size; ++i) { data[i] = dist(engine); } diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index c2c041b144b6ca1f019f972e1301b756ec1c9301..6614b53b3f990d10c82633f3c1f079acea0cd827 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -53,8 +53,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { T min = static_cast(context.Attr("min")); T max = static_cast(context.Attr("max")); thrust::counting_iterator index_sequence_begin(0); - ssize_t N = framework::product(tensor->dims()); - thrust::transform(index_sequence_begin, index_sequence_begin + N, + int64_t size = tensor->numel(); + thrust::transform(index_sequence_begin, index_sequence_begin + size, thrust::device_ptr(data), UniformGenerator(min, max, seed)); } diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index 81448897e95eb05f4ce7de8683d98e05bade77cb..64fcbd93b6c4d5d9b36f2636c3ef4f7327f08d25 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -25,10 +25,6 @@ limitations under the License. */ #include "paddle/string/printf.h" #include "paddle/string/to_string.h" -#ifdef __GNUC__ -#include // for __cxa_demangle -#endif - #ifndef PADDLE_ONLY_CPU #include "paddle/platform/dynload/cublas.h" @@ -46,19 +42,6 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace { -#ifdef __GNUC__ -inline std::string demangle(std::string name) { - int status = -4; // some arbitrary value to eliminate the compiler warning - std::unique_ptr res{ - abi::__cxa_demangle(name.c_str(), NULL, NULL, &status), std::free}; - return (status == 0) ? res.get() : name; -} -#else -inline std::string demangle(std::string name) { return name; } -#endif -} - struct EnforceNotMet : public std::exception { std::exception_ptr exp_; std::string err_str_; @@ -79,7 +62,7 @@ struct EnforceNotMet : public std::exception { Dl_info info; for (int i = 0; i < size; ++i) { if (dladdr(call_stack[i], &info)) { - auto demangled = demangle(info.dli_sname); + auto demangled = info.dli_sname; auto addr_offset = static_cast(call_stack[i]) - static_cast(info.dli_saddr); sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 53985933ed143c7faba6f7e2a704445697c1f58e..16a2368aae5fff7445654161db4fd6a97d5bebfc 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "paddle/framework/backward.h" +#include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" @@ -49,13 +50,18 @@ USE_OP(minus); USE_OP(cos_sim); USE_CPU_ONLY_OP(gather); USE_CPU_ONLY_OP(scatter); +USE_CPU_ONLY_OP(concat); USE_OP(top_k); USE_OP(squared_l2_distance); +USE_OP(sum); +USE_OP(reshape); namespace paddle { namespace framework { using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using LoD = framework::LoD; static size_t UniqueIntegerGenerator() { static std::atomic generator; @@ -115,6 +121,60 @@ PYBIND11_PLUGIN(core) { return self.data()[offset]; }); + py::class_(m, "LoDTensor", R"DOC(LoD(Leval of Ddetails) Tensor. + +The tensor and LoD info should be created before creating the LoDTensor, then +call the set_tensor and set_lod functions to set them. + +)DOC") + .def("__init__", + [](LoDTensor &instance, + const std::vector> &lod, + Tensor *t) { +#ifdef PADDLE_ONLY_CPU + new (&instance) LoDTensor(lod, t); +#else + paddle::framework::LoD new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + new (&instance) LoDTensor(new_lod, t); +#endif + }) + .def("set_tensor", + [](LoDTensor &self, Tensor *tensor) { self.set_tensor(tensor); }) + .def("set_lod", + [](LoDTensor &self, const std::vector> &lod) { +#ifdef PADDLE_ONLY_CPU + self.set_lod(lod); +#else + paddle::framework::LoD new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + self.set_lod(new_lod); +#endif + }) + .def("tensor", + [](LoDTensor &self) -> Tensor & { return self.tensor(); }, + py::return_value_policy::reference) + .def("lod", [](LoDTensor &self) -> std::vector> { +#ifdef PADDLE_ONLY_CPU + return self.lod(); +#else + auto lod = self.lod(); + std::vector> new_lod; + new_lod.reserve(lod.size()); + std::transform(lod.begin(), lod.end(), std::back_inserter(new_lod), + [](paddle::framework::Vector item) -> + std::vector { + std::vector v; + v.reserve(item.size()); + std::copy(item.begin(), item.end(), std::back_inserter(v)); + return v; + }); + return new_lod; +#endif + }); + py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. @@ -126,6 +186,11 @@ All parameter, weight, gradient are variables in Paddle. .def("get_tensor", [](Variable &self) -> Tensor * { return self.GetMutable(); }, py::return_value_policy::reference) + .def("get_lod_tensor", + [](Variable &self) -> LoDTensor * { + return self.GetMutable(); + }, + py::return_value_policy::reference) .def("get_net", [](Variable &self) -> operators::NetOp * { return self.GetMutable(); @@ -216,7 +281,10 @@ All parameter, weight, gradient are variables in Paddle. -> std::map> { return op.Outputs(); }) + .def("output_vars", + [](const OperatorBase &op) { return op.OutputVars(true); }) .def("inputs", [](const OperatorBase &op) { return op.Inputs(); }) + .def("input_vars", [](const OperatorBase &op) { return op.InputVars(); }) .def("__str__", &OperatorBase::DebugString) .def("no_intermediate_outputs", [](const OperatorBase &op) { return op.OutputVars(false); }) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index e57f793ac42b19037e9ca43a5e4a3ac5447dc34c..2ac455d771bf78377ce4ee7d921393d3b3958e3c 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -30,6 +30,8 @@ Configuring cmake in /paddle/build ... -DCMAKE_BUILD_TYPE=Release -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} + -DWITH_MKLDNN=${WITH_MKLDNN:-ON} + -DWITH_MKLML=${WITH_MKLML:-ON} -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-ON} -DWITH_SWIG_PY=ON @@ -50,6 +52,8 @@ cmake .. \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ + -DWITH_MKLDNN=${WITH_MKLDNN:-ON} \ + -DWITH_MKLML=${WITH_MKLML:-ON} \ -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index b18b73e06a6c39c3bf9717280bc6323917c80efb..2755fdd9cd1c2509cad996557c6fb24363d42d8a 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -320,6 +320,9 @@ void loadFileList(const std::string& fileListFileName, } double getMemoryUsage() { +#if defined(__ANDROID__) + return 0.0; +#else FILE* fp = fopen("/proc/meminfo", "r"); CHECK(fp) << "failed to fopen /proc/meminfo"; size_t bufsize = 256 * sizeof(char); @@ -357,6 +360,7 @@ double getMemoryUsage() { delete[] buf; double usedMem = 1.0 - 1.0 * (freeMem + bufMem + cacheMem) / totalMem; return usedMem; +#endif } SyncThreadPool* getGlobalSyncThreadPool() { diff --git a/paddle/utils/Util.h b/paddle/utils/Util.h index 613844669d2495ada7b8f7a841f47b821b7fdeba..22ce2534d3468ded36221810aa61c15b37f13f3d 100644 --- a/paddle/utils/Util.h +++ b/paddle/utils/Util.h @@ -33,6 +33,13 @@ limitations under the License. */ #include "Flags.h" #include "hl_gpu.h" +#if defined(__ANDROID__) && (__ANDROID_API__ < 21) +inline int rand_r(unsigned int* seedp) { + (void)seedp; + return rand(); +} +#endif + /** * Loop over the elements in a container * TODO(yuyang18): It's this foreach useful? Why not use C++ 11 foreach, diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 7d7fc23a4691646dfce4c162a445864c748501d9..ebf0911d6ea0b39d51447859ae2aef485b50b0e6 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -271,6 +271,7 @@ message ImageConfig { // The size of input feature map. required uint32 img_size = 8; optional uint32 img_size_y = 9; + optional uint32 img_size_z = 10 [ default = 1 ]; } message PriorBoxConfig { @@ -519,6 +520,7 @@ message LayerConfig { // for HuberRegressionLoss optional double delta = 57 [ default = 1.0 ]; + // for 3D data optional uint64 depth = 58 [ default = 1 ]; // for switch order layer diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 11dc84ae20679bb73735f9119739fca5ea7fa673..4f68a8953446ffa0510df65c5b214d09b913cff8 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1332,6 +1332,12 @@ def parse_image(image, input_layer_name, image_conf): get_img_size(input_layer_name, image_conf.channels) +def parse_image3d(image, input_layer_name, image_conf): + image_conf.channels = image.channels + image_conf.img_size, image_conf.img_size_y, image_conf.img_size_z = \ + get_img3d_size(input_layer_name, image_conf.channels) + + def parse_norm(norm, input_layer_name, norm_conf): norm_conf.norm_type = norm.norm_type config_assert( @@ -2028,6 +2034,7 @@ class ParameterReluLayer(LayerBase): config_assert(input_layer.size % partial_sum == 0, "a wrong setting for partial_sum") self.set_layer_size(input_layer.size) + self.config.partial_sum = partial_sum self.create_input_parameter(0, input_layer.size / partial_sum) @@ -2365,9 +2372,11 @@ class BatchNormLayer(LayerBase): name, inputs, bias=True, + img3D=False, use_global_stats=True, moving_average_fraction=0.9, batch_norm_type=None, + mean_var_names=None, **xargs): if inputs is None: inputs = [] @@ -2409,24 +2418,69 @@ class BatchNormLayer(LayerBase): input_layer = self.get_input_layer(0) image_conf = self.config.inputs[0].image_conf - parse_image(self.inputs[0].image, input_layer.name, image_conf) - - # Only pass the width and height of input to batch_norm layer - # when either of it is non-zero. - if input_layer.width != 0 or input_layer.height != 0: - self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size, - image_conf.channels, False) + if img3D: + parse_image3d(self.inputs[0].image, input_layer.name, image_conf) + # Only pass the width and height of input to batch_norm layer + # when either of it is non-zero. + if input_layer.width != 0 or input_layer.height != 0: + self.set_cnn_layer( + input_layer_name=name, + depth=image_conf.img_size_z, + height=image_conf.img_size_y, + width=image_conf.img_size, + channels=image_conf.channels, + is_print=True) + else: + self.set_layer_size(input_layer.size) else: - self.set_layer_size(input_layer.size) + parse_image(self.inputs[0].image, input_layer.name, image_conf) + # Only pass the width and height of input to batch_norm layer + # when either of it is non-zero. + if input_layer.width != 0 or input_layer.height != 0: + self.set_cnn_layer( + input_layer_name=name, + height=image_conf.img_size_y, + width=image_conf.img_size, + channels=image_conf.channels, + is_print=True) + else: + self.set_layer_size(input_layer.size) psize = self.calc_parameter_size(image_conf) dims = [1, psize] + if mean_var_names is not None: + assert len(mean_var_names) == 2 + self.inputs[1].parameter_name = mean_var_names[0] + self.inputs[2].parameter_name = mean_var_names[1] + self.create_input_parameter(0, psize) self.create_input_parameter(1, psize, dims) self.create_input_parameter(2, psize, dims) self.create_bias_parameter(bias, psize) + def set_cnn_layer(self, + input_layer_name, + depth=None, + height=None, + width=None, + channels=None, + is_print=True): + depthIsNone = False + if depth is None: + depth = 1 + depthIsNone = True + size = depth * height * width * channels + self.set_layer_size(size) + self.set_layer_height_width(height, width) + self.set_layer_depth(depth) + if is_print and depthIsNone: + print("output for %s: c = %d, h = %d, w = %d, size = %d" % + (input_layer_name, channels, height, width, size)) + elif is_print: + print("output for %s: c = %d, d = %d, h = %d, w = %d, size = %d" % + (input_layer_name, channels, depth, height, width, size)) + def calc_parameter_size(self, image_conf): return image_conf.channels @@ -2688,9 +2742,20 @@ class AddToLayer(LayerBase): super(AddToLayer, self).__init__( name, 'addto', 0, inputs=inputs, **xargs) config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer') - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - self.set_layer_size(input_layer.size) + + if len(self.inputs) > 1: + for input_index in xrange(len(self.inputs)): + assert self.get_input_layer(0).height == self.get_input_layer( + input_index).height + assert self.get_input_layer(0).width == self.get_input_layer( + input_index).width + assert self.get_input_layer(0).depth == self.get_input_layer( + input_index).depth + + self.set_layer_size(self.get_input_layer(0).size) + self.set_layer_height_width(self.get_input_layer(0).height, \ + self.get_input_layer(0).width) + self.set_layer_depth(self.get_input_layer(0).depth) self.create_bias_parameter(bias, self.config.size) @@ -3370,11 +3435,20 @@ class ConcatenateLayer(LayerBase): name, 'concat', 0, inputs=inputs, **xargs) size = 0 for input_index in xrange(len(self.inputs)): + assert self.get_input_layer(0).height == self.get_input_layer( + input_index).height + assert self.get_input_layer(0).width == self.get_input_layer( + input_index).width + assert self.get_input_layer(0).depth == self.get_input_layer( + input_index).depth input_layer = self.get_input_layer(input_index) input = self.inputs[input_index] if self.config.size == 0: size += input_layer.size + self.set_layer_height_width(self.get_input_layer(0).height, \ + self.get_input_layer(0).width) + self.set_layer_depth(self.get_input_layer(0).depth) self.set_layer_size(size) @@ -3675,8 +3749,8 @@ class SwitchOrderLayer(LayerBase): def __init__(self, name, inputs, reshape, **xargs): super(SwitchOrderLayer, self).__init__( name, 'switch_order', 0, inputs=inputs, **xargs) - self.config.reshape_conf.heightAxis.extend(reshape['height']) - self.config.reshape_conf.widthAxis.extend(reshape['width']) + self.config.reshape_conf.height_axis.extend(reshape['height']) + self.config.reshape_conf.width_axis.extend(reshape['width']) # Deprecated, use a new layer specific class instead diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index cba45bd3afa178ab4dd3a50f0947b144e7466e53..4b1d80d3db924bfa2ad0e081f785d8f5dd719fce 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -354,6 +354,10 @@ class LayerOutput(object): def height(self): return cp.g_layer_map[self.full_name].height + @property + def depth(self): + return cp.g_layer_map[self.full_name].depth + def set_input(self, input): """ Set the input for a memory layer. Can only be used for memory layer @@ -943,7 +947,7 @@ def data_layer(name, size, depth=None, height=None, width=None, if height is not None and width is not None: num_filters = size / (width * height * depth) assert num_filters * width * height * depth == size, \ - "size=%s width=%s height=%s depth=%s" % (size, width, height, depth) + "size=%s width=%s height=%s depth=%s" % (size, width, height, depth) return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) @@ -1219,7 +1223,8 @@ def detection_output_layer(input_loc, name=None): """ Apply the NMS to the output of network and compute the predict bounding - box location. + box location. The output of this layer could be None if there is no valid + bounding box. :param name: The Layer Name. :type name: basestring @@ -2953,13 +2958,15 @@ def img_cmrnorm_layer(input, def batch_norm_layer(input, act=None, name=None, + img3D=False, num_channels=None, bias_attr=None, param_attr=None, layer_attr=None, batch_norm_type=None, moving_average_fraction=0.9, - use_global_stats=None): + use_global_stats=None, + mean_var_names=None): """ Batch Normalization Layer. The notation of this layer as follow. @@ -3026,6 +3033,8 @@ def batch_norm_layer(input, :math:`runningMean = newMean*(1-factor) + runningMean*factor` :type moving_average_fraction: float. + :param mean_var_names: [mean name, variance name] + :type mean_var_names: string list :return: LayerOutput object. :rtype: LayerOutput """ @@ -3039,6 +3048,7 @@ def batch_norm_layer(input, (batch_norm_type == "cudnn_batch_norm") l = Layer( name=name, + img3D=img3D, inputs=Input( input.name, image=Image(channels=num_channels), **param_attr.attr), active_type=act.name, @@ -3047,6 +3057,7 @@ def batch_norm_layer(input, bias=ParamAttr.to_bias(bias_attr), moving_average_fraction=moving_average_fraction, use_global_stats=use_global_stats, + mean_var_names=mean_var_names, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( @@ -6410,7 +6421,7 @@ def gated_unit_layer(input, @wrap_name_default('switch_order') def switch_order_layer(input, name=None, - reshape=None, + reshape_axis=None, act=None, layer_attr=None): """ @@ -6421,8 +6432,9 @@ def switch_order_layer(input, The example usage is: .. code-block:: python + reshape_axis = 3 + switch = switch_order(input=layer, name='switch', reshape_axis=reshape_axis) reshape = {'height':[ 0, 1, 2], 'width':[3]} - switch = switch_order(input=layer, name='switch', reshape=reshape) :param input: The input layer. :type input: LayerOutput @@ -6434,6 +6446,11 @@ def switch_order_layer(input, :rtype: LayerOutput """ assert isinstance(input, LayerOutput) + assert reshape_axis != None and (reshape_axis > 0 and reshape_axis < 4) + height = [ele for ele in xrange(reshape_axis)] + width = [ele for ele in range(reshape_axis, 4)] + reshape = {'height': height, 'width': width} + l = Layer( name=name, inputs=input.name, @@ -6444,6 +6461,7 @@ def switch_order_layer(input, return LayerOutput( name=name, layer_type=LayerType.SWITCH_ORDER_LAYER, + activation=act, parents=input, size=l.config.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index df872a90ff388f0d96cef44763dbd076bc768ab9..8a204a96f3ef57673cef65306d0bf8e8c3409751 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -10,6 +10,6 @@ test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_la test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer -test_conv3d_layer test_deconv3d_layer) +test_conv3d_layer test_deconv3d_layer test_BatchNorm3D) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr index 1a577b8d9b1e1915236ba6afcfa97040d70c707a..5ddf6052df021b055390a42c25ce6c0d650e4aee 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr @@ -62,6 +62,7 @@ layers { moving_average_fraction: 0.9 height: 227 width: 227 + depth: 1 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr index 2818389b16cca75f5030b75fc4de8c89c06c5e02..c0252b945b4c7fd6b4dad8770e3e1dccb88df28a 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -62,6 +62,7 @@ layers { moving_average_fraction: 0.9 height: 256 width: 256 + depth: 1 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr new file mode 100644 index 0000000000000000000000000000000000000000..832ed24a31dd2bedba9a4fce77d7a088d1796fdb --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr @@ -0,0 +1,92 @@ +type: "nn" +layers { + name: "data3D" + type: "data" + size: 360 + active_type: "" + height: 6 + width: 20 + depth: 3 +} +layers { + name: "__batch_norm_0__" + type: "batch_norm" + size: 360 + active_type: "relu" + inputs { + input_layer_name: "data3D" + input_parameter_name: "___batch_norm_0__.w0" + image_conf { + channels: 1 + img_size: 20 + img_size_y: 6 + img_size_z: 3 + } + } + inputs { + input_layer_name: "data3D" + input_parameter_name: "___batch_norm_0__.w1" + } + inputs { + input_layer_name: "data3D" + input_parameter_name: "___batch_norm_0__.w2" + } + bias_parameter_name: "___batch_norm_0__.wbias" + moving_average_fraction: 0.9 + height: 6 + width: 20 + depth: 3 +} +parameters { + name: "___batch_norm_0__.w0" + size: 1 + initial_mean: 1.0 + initial_std: 0.0 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___batch_norm_0__.w1" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false + is_static: true + is_shared: true +} +parameters { + name: "___batch_norm_0__.w2" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false + is_static: true + is_shared: true +} +parameters { + name: "___batch_norm_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data3D" +output_layer_names: "__batch_norm_0__" +sub_models { + name: "root" + layer_names: "data3D" + layer_names: "__batch_norm_0__" + input_layer_names: "data3D" + output_layer_names: "__batch_norm_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr index b110e91498ce7d112987714bd769868179141c54..8a1399efad0ff339e35f69400ac654a4787a6018 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_bi_grumemory.protostr @@ -74,6 +74,9 @@ layers { inputs { input_layer_name: "__bidirectional_gru_0___bw" } + height: 0 + width: 0 + depth: 1 } parameters { name: "___bidirectional_gru_0___fw_transform.w0" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr index 64d227565f2b21ff43d4391c682ca90c0f47908e..94ad56cab063df9e6a11bb1c293727fb9dec810f 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr @@ -14,6 +14,29 @@ layers { input_layer_name: "input" input_parameter_name: "___prelu_layer_0__.w0" } + partial_sum: 1 +} +layers { + name: "__prelu_layer_1__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_1__.w0" + } + partial_sum: 1 +} +layers { + name: "__prelu_layer_2__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_2__.w0" + } + partial_sum: 5 } parameters { name: "___prelu_layer_0__.w0" @@ -23,14 +46,32 @@ parameters { initial_strategy: 0 initial_smart: true } +parameters { + name: "___prelu_layer_1__.w0" + size: 300 + initial_mean: 0.0 + initial_std: 0.057735026919 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___prelu_layer_2__.w0" + size: 60 + initial_mean: 0.0 + initial_std: 0.129099444874 + initial_strategy: 0 + initial_smart: true +} input_layer_names: "input" -output_layer_names: "__prelu_layer_0__" +output_layer_names: "__prelu_layer_2__" sub_models { name: "root" layer_names: "input" layer_names: "__prelu_layer_0__" + layer_names: "__prelu_layer_1__" + layer_names: "__prelu_layer_2__" input_layer_names: "input" - output_layer_names: "__prelu_layer_0__" + output_layer_names: "__prelu_layer_2__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr index 8133aa9c8d3e7c6843d1b27b70e87d394a1e0e47..046037936a6d85f54095c65f206e468aa69065d7 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_recursive_topology.protostr @@ -16,6 +16,9 @@ layers { inputs { input_layer_name: "data" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_1__" @@ -28,6 +31,9 @@ layers { inputs { input_layer_name: "__addto_0__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_2__" @@ -40,6 +46,9 @@ layers { inputs { input_layer_name: "__addto_1__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_3__" @@ -52,6 +61,9 @@ layers { inputs { input_layer_name: "__addto_2__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_4__" @@ -64,6 +76,9 @@ layers { inputs { input_layer_name: "__addto_3__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_5__" @@ -76,6 +91,9 @@ layers { inputs { input_layer_name: "__addto_4__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_6__" @@ -88,6 +106,9 @@ layers { inputs { input_layer_name: "__addto_5__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_7__" @@ -100,6 +121,9 @@ layers { inputs { input_layer_name: "__addto_6__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_8__" @@ -112,6 +136,9 @@ layers { inputs { input_layer_name: "__addto_7__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_9__" @@ -124,6 +151,9 @@ layers { inputs { input_layer_name: "__addto_8__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_10__" @@ -136,6 +166,9 @@ layers { inputs { input_layer_name: "__addto_9__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_11__" @@ -148,6 +181,9 @@ layers { inputs { input_layer_name: "__addto_10__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_12__" @@ -160,6 +196,9 @@ layers { inputs { input_layer_name: "__addto_11__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_13__" @@ -172,6 +211,9 @@ layers { inputs { input_layer_name: "__addto_12__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_14__" @@ -184,6 +226,9 @@ layers { inputs { input_layer_name: "__addto_13__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_15__" @@ -196,6 +241,9 @@ layers { inputs { input_layer_name: "__addto_14__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_16__" @@ -208,6 +256,9 @@ layers { inputs { input_layer_name: "__addto_15__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_17__" @@ -220,6 +271,9 @@ layers { inputs { input_layer_name: "__addto_16__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_18__" @@ -232,6 +286,9 @@ layers { inputs { input_layer_name: "__addto_17__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_19__" @@ -244,6 +301,9 @@ layers { inputs { input_layer_name: "__addto_18__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_20__" @@ -256,6 +316,9 @@ layers { inputs { input_layer_name: "__addto_19__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_21__" @@ -268,6 +331,9 @@ layers { inputs { input_layer_name: "__addto_20__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_22__" @@ -280,6 +346,9 @@ layers { inputs { input_layer_name: "__addto_21__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_23__" @@ -292,6 +361,9 @@ layers { inputs { input_layer_name: "__addto_22__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_24__" @@ -304,6 +376,9 @@ layers { inputs { input_layer_name: "__addto_23__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_25__" @@ -316,6 +391,9 @@ layers { inputs { input_layer_name: "__addto_24__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_26__" @@ -328,6 +406,9 @@ layers { inputs { input_layer_name: "__addto_25__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_27__" @@ -340,6 +421,9 @@ layers { inputs { input_layer_name: "__addto_26__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_28__" @@ -352,6 +436,9 @@ layers { inputs { input_layer_name: "__addto_27__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_29__" @@ -364,6 +451,9 @@ layers { inputs { input_layer_name: "__addto_28__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_30__" @@ -376,6 +466,9 @@ layers { inputs { input_layer_name: "__addto_29__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__addto_31__" @@ -388,6 +481,9 @@ layers { inputs { input_layer_name: "__addto_30__" } + height: 0 + width: 0 + depth: 1 } layers { name: "__fc_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr index d0ad388165007b8f96f059e5b003c52f756383e5..7a2f3eab38808a031c27cf7ab9d6273952e389eb 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/util_layers.protostr @@ -22,6 +22,9 @@ layers { inputs { input_layer_name: "b" } + height: 0 + width: 0 + depth: 1 } layers { name: "__concat_0__" @@ -34,6 +37,9 @@ layers { inputs { input_layer_name: "b" } + height: 0 + width: 0 + depth: 1 } layers { name: "__concat_1__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py new file mode 100644 index 0000000000000000000000000000000000000000..a991b22252ba10eed895efd931108c2d8b0e52f1 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py @@ -0,0 +1,11 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-4) + +#data = data_layer(name='data', size=180, width=30, height=6) +#batchNorm = batch_norm_layer(data, num_channels=1) +#outputs(batchNorm) + +data3D = data_layer(name='data3D', size=120 * 3, width=20, height=6, depth=3) +batchNorm3D = batch_norm_layer(data3D, num_channels=1, img3D=True) +outputs(batchNorm3D) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py index 2e3057f323db22ffc3911cce30ec2e8bb95e3dbe..aae90fab32db78a70c2169ed8fafb930433f4136 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py @@ -2,5 +2,7 @@ from paddle.trainer_config_helpers import * data = data_layer(name='input', size=300) prelu = prelu_layer(input=data) +prelu = prelu_layer(input=data, partial_sum=1) +prelu = prelu_layer(input=data, partial_sum=5) outputs(prelu) diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py index 7589cc9917f26375d595e200245d5ba099bc38d7..e66bf67d7949057486eb54c46f39128fad5dae55 100644 --- a/python/paddle/v2/event.py +++ b/python/paddle/v2/event.py @@ -53,10 +53,13 @@ class BeginPass(object): class EndPass(WithMetric): """ Event On One Pass Training Complete. + To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')" + in your event_handler call back """ - def __init__(self, pass_id, evaluator): + def __init__(self, pass_id, evaluator, gm): self.pass_id = pass_id + self.gm = gm WithMetric.__init__(self, evaluator) @@ -73,10 +76,13 @@ class BeginIteration(object): class EndIteration(WithMetric): """ Event On One Batch Training Complete. + To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')" + in your event_handler call back """ - def __init__(self, pass_id, batch_id, cost, evaluator): + def __init__(self, pass_id, batch_id, cost, evaluator, gm): self.pass_id = pass_id self.batch_id = batch_id self.cost = cost + self.gm = gm WithMetric.__init__(self, evaluator) diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index c1585bcffcceb75292853018179066c9f614261e..9e665adad2d3ad91d183c6815fbd7135ac4e8965 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -43,7 +43,6 @@ class OpDescCreationMethod(object): if len(args) != 0: raise ValueError("Only keyword arguments are supported.") op_desc = framework_pb2.OpDesc() - for input_parameter in self.__op_proto__.inputs: input_arguments = kwargs.get(input_parameter.name, []) if is_str(input_arguments): @@ -142,8 +141,8 @@ def create_op_creation_method(op_proto): return OpInfo( method=__impl__, name=op_proto.type, - inputs=[var.name for var in op_proto.inputs], - outputs=[var.name for var in op_proto.outputs], + inputs=[(var.name, var.duplicable) for var in op_proto.inputs], + outputs=[(var.name, var.duplicable) for var in op_proto.outputs], attrs=[attr.name for attr in op_proto.attrs]) @@ -180,9 +179,15 @@ class OperatorFactory(object): return self.op_methods.get(t) def get_op_input_names(self, type): + return map(lambda x: x[0], self.get_op_info(type).inputs) + + def get_op_inputs(self, type): return self.get_op_info(type).inputs def get_op_output_names(self, type): + return map(lambda x: x[0], self.get_op_info(type).outputs) + + def get_op_outputs(self, type): return self.get_op_info(type).outputs def get_op_attr_names(self, type): diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index ef910f939be0b9d3cb5e6d49e69a00daa191b1c6..6b22c0008210b492d00dee42e967ca14d0948b20 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -19,8 +19,6 @@ py_test(test_scatter_op SRCS test_scatter_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(test_top_k_op SRCS test_top_k_op.py) -py_test(gradient_checker SRCS gradient_checker.py) - py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) @@ -33,5 +31,8 @@ py_test(test_sgd_op SRCS test_sgd_op.py) py_test(test_gradient_checker SRCS test_gradient_checker.py) py_test(test_lookup_table SRCS test_lookup_table.py) py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py) +py_test(test_sum_op SRCS test_sum_op.py) py_test(mnist SRCS mnist.py) +py_test(test_concat_op SRCS test_concat_op.py) py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py) +py_test(test_reshape_op SRCS test_reshape_op.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py deleted file mode 100644 index fdb06b7988935ebbe53f72f4eba89d75ac2502d4..0000000000000000000000000000000000000000 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ /dev/null @@ -1,311 +0,0 @@ -import unittest - -import numpy -import itertools -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator - -__all__ = ['get_numeric_gradient'] - - -def create_op(op_type): - # TODO need to set attrs - kwargs = dict() - for in_name in Operator.get_op_input_names(op_type): - kwargs[in_name] = in_name - for out_name in Operator.get_op_output_names(op_type): - kwargs[out_name] = out_name - - return Operator(op_type, **kwargs) - - -def grad_var_name(var_name): - return var_name + "@GRAD" - - -def empty_var_name(): - return "@EMPTY@" - - -def get_numeric_gradient(op, - input_values, - output_name, - input_to_check, - delta=0.005, - local_scope=None, - in_place=False): - """ - Get Numeric Gradient for an operator's input. - - :param op: C++ operator instance, could be an network - :param input_values: The input variables. Should be an dictionary, key is - variable name. Value is numpy array. - :param output_name: The final output variable name. - :param input_to_check: The input variable need to get gradient. - :param delta: The perturbation value for numeric gradient method. The - smaller delta is, the more accurate result will get. But if that delta is - too small, it could occur numerical stability problem. - :param local_scope: The local scope used for get_numeric_gradient. - :return: The gradient array in numpy format. - """ - if local_scope is None: - local_scope = core.Scope() - - # Create all input variable in local_scope - for var_name in input_values: - var = local_scope.new_var(var_name) - tensor = var.get_tensor() - tensor.set_dims(input_values[var_name].shape) - tensor.alloc_float(core.CPUPlace()) - tensor.set(input_values[var_name], core.CPUPlace()) - - # Create all output variable in local_scope - opts = op.outputs() - for key in opts: - for output in opts[key]: - if local_scope.find_var(output) is None: - local_scope.new_var(output).get_tensor() - op.infer_shape(local_scope) - - # allocate output memory - for key in opts: - for output in opts[key]: - local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace( - )) - - cpu_ctx = core.DeviceContext.create(core.CPUPlace()) - - def get_output(): - op.run(local_scope, cpu_ctx) - return numpy.array(local_scope.find_var(output_name).get_tensor()).sum() - - def product(dim): - return reduce(lambda a, b: a * b, dim, 1) - - def restore_inputs(): - for var_name in input_values: - tensor_ = local_scope.find_var(var_name).get_tensor() - tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace()) - - # get the input tensor that we want to get it's numeric gradient. - tensor_to_check = local_scope.find_var(input_to_check).get_tensor() - tensor_size = product(tensor_to_check.get_dims()) - # prepare a numpy array to store the gradient. - gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32') - - # we only compute gradient of one element each time. - # we use a for loop to compute the gradient of every element. - for i in xrange(tensor_size): - if in_place: - restore_inputs() - # get one input element throw it's index i. - origin = tensor_to_check.get_float_element(i) - - # add delta to it, run op and then get the sum of the result tensor. - x_pos = origin + delta - tensor_to_check.set_float_element(i, x_pos) - y_pos = get_output() - - # plus delta to this element, run op and get the sum of the result tensor. - if in_place: - restore_inputs() - x_neg = origin - delta - tensor_to_check.set_float_element(i, x_neg) - y_neg = get_output() - - # restore old value - tensor_to_check.set_float_element(i, origin) - - # compute the gradient of this element and store it into a numpy array. - gradient_flat[i] = (y_pos - y_neg) / delta / 2 - - # reshape the gradient result to the shape of the source tensor. - return gradient_flat.reshape(tensor_to_check.get_dims()) - - -class GradientChecker(unittest.TestCase): - def __get_gradient(self, forward_op, backward_op, input_value, grad_names, - place): - """Get the input gradients after running forward and backward operators - on the given places. - - :param forward_op: forward operator - :type forward_op: Operator - :param backward_op: backward operator - :type backward_op: Operator - :param input_value: input values. - :type input_value: dict{string:numpy.array} - :param grad_names: the names of returned input gradients. - :type input_value: a list of string - :param place: the device type. - :type place: CPUPlace or GPUPlace - :return: the input grdients of given grad_names. - :rtype: a list of numpy.array - """ - scope = core.Scope() - ctx = core.DeviceContext.create(place) - - inputs = forward_op.inputs() - in_names = [item for k in inputs for item in inputs[k]] - outputs = forward_op.outputs() - out_names = [item for k in outputs for item in outputs[k]] - - # create input var and set value - for name, value in input_value.iteritems(): - if name not in in_names: - raise ValueError(name + "does not exist in Op's inputs.") - var = scope.new_var(name).get_tensor() - var.set_dims(value.shape) - var.set(value, place) - - # run forward op - for out_name in out_names: - scope.new_var(out_name) - forward_op.infer_shape(scope) - forward_op.run(scope, ctx) - - # set output var's shape - # set output grad to ones - for name in out_names: - out_tensor = scope.find_var(name).get_tensor() - grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() - grad_tensor.set_dims(out_tensor.shape()) - data = numpy.ones(out_tensor.shape(), dtype=numpy.float32) - grad_tensor.set(data, place) - - # run backward op - backward_outs = backward_op.outputs() - backward_names = [ - item for key in backward_outs for item in backward_outs[key] - ] - for name in backward_names: - scope.new_var(name) - - backward_op.infer_shape(scope) - backward_op.run(scope, ctx) - - outs = [ - numpy.array(scope.find_var(name).get_tensor()) - for name in grad_names - ] - return outs - - def compare_grad(self, forward_op, input_value, no_grad_set=None): - """ Compare the input gradients between CPU and GPU for the given forward - operator. - - :param forward_op: forward operator - :type forward_op: Operator - :param input_value: input values. - :type input_value: dict{string:numpy.array} - :param no_grad_set: the set of variables names without gradients. - :type no_grad_set: a set of string - :raises: AssertionError, there is different gradient value. - """ - if no_grad_set is None: - no_grad_set = set() - backward_op = core.Operator.backward(forward_op, no_grad_set) - # return if not compile with GPU or not implementing GPU kernel - if not (core.is_compile_gpu() and backward_op.support_gpu()): - return - - outputs = backward_op.outputs() - out_names = [item for k in outputs for item in outputs[k]] - out_names = filter(lambda x: x != empty_var_name(), out_names) - cpu_grads = self.__get_gradient(forward_op, backward_op, input_value, - out_names, core.CPUPlace()) - gpu_grads = self.__get_gradient(forward_op, backward_op, input_value, - out_names, core.GPUPlace(0)) - - for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads, - out_names): - self.assertTrue( - numpy.allclose( - c_grad, g_grad, atol=1e-4), - "output name: " + name + " has diff") - - def __assert_is_close(self, numeric_grads, analytic_grads, names, - max_relative_error, msg_prefix): - """Use relative error for the comparison. - - :param numeric_grads: the numerical graidents. - :type numeric_grads: a list of numpy.array - :param analytic_grads: the analytical graidents. - :type analytic_grads: a list of numpy.array - :param name: the names of gradients, used to print for debug. - :type names: a list of string - :param msg_prefix: string info, used to print for debug. - :type msf_prefix: string - """ - for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): - abs_a = numpy.abs(a) - # if abs_a is nearly zero, then use abs error for a, not relative - # error. - abs_a[abs_a < 1e-3] = 1 - - diff_mat = numpy.abs(a - b) / abs_a - max_diff = numpy.max(diff_mat) - - def err_msg(): - offset = numpy.argmax(diff_mat > max_relative_error) - return "%s Variable %s max gradient diff %f over limit %f, the first " \ - "error element is %d" % ( - msg_prefix, name, max_diff, max_relative_error, offset) - - self.assertLessEqual(max_diff, max_relative_error, err_msg()) - - def check_grad(self, - forward_op, - input_vars, - inputs_to_check, - output_name, - no_grad_set=None, - only_cpu=False, - in_place=False, - max_relative_error=0.005): - """ - :param forward_op: used to create backward_op - :param input_vars: numpy value of input variable. The following - computation will use these variables. - :param inputs_to_check: inputs var names that should check gradient. - :param output_name: the output variable name of forward network. - :param max_relative_error: The relative tolerance parameter. - :param no_grad_set: used when create backward ops - :param only_cpu: only compute and check gradient on cpu kernel. - :return: - """ - if no_grad_set is None: - no_grad_set = set() - - no_tmp_out = forward_op.no_intermediate_outputs() - if len(no_tmp_out) != 1: - raise ValueError("non temp out_names should be 1") - - inputs = forward_op.inputs() - in_names = [item for k in inputs for item in inputs[k]] - for no_grad in no_grad_set: - if no_grad not in in_names: - raise ValueError("no_grad should be in in_names") - if no_grad in inputs_to_check: - raise ValueError("no_grad should not be in inputs_to_check") - - backward_op = core.Operator.backward(forward_op, no_grad_set) - - places = [core.CPUPlace()] - if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu(): - places.append(core.GPUPlace(0)) - - # get numerical gradients - numeric_grads = [ - get_numeric_gradient( - forward_op, input_vars, output_name, name, in_place=in_place) - for name in inputs_to_check - ] - - check_names = [grad_var_name(name) for name in inputs_to_check] - for place in places: - analytic_grads = self.__get_gradient(forward_op, backward_op, - input_vars, check_names, place) - self.__assert_is_close(numeric_grads, analytic_grads, check_names, - max_relative_error, - "Gradient Check On %s" % str(place)) diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py new file mode 100644 index 0000000000000000000000000000000000000000..4fec4c9109bf247abb2068177583acb47a8ebd97 --- /dev/null +++ b/python/paddle/v2/framework/tests/op_test.py @@ -0,0 +1,281 @@ +import unittest +import numpy as np +import itertools +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + +def create_op(scope, op_type, inputs, outputs, attrs): + kwargs = dict() + + for in_name, in_dup in Operator.get_op_inputs(op_type): + if in_name in inputs: + kwargs[in_name] = [] + if in_dup: + sub_in = inputs[in_name] + for sub_in_name, _ in sub_in: + var = scope.new_var(sub_in_name) + kwargs[in_name].append(sub_in_name) + else: + var = scope.new_var(in_name) + kwargs[in_name].append(in_name) + + for out_name, out_dup in Operator.get_op_outputs(op_type): + if out_name in outputs: + kwargs[out_name] = [] + if out_dup: + sub_in = outputs[out_name] + for sub_in_name, _ in sub_in: + var = scope.new_var(sub_in_name) + kwargs[out_name].append(sub_in_name) + else: + var = scope.new_var(out_name) + kwargs[out_name].append(out_name) + + for attr_name in Operator.get_op_attr_names(op_type): + if attr_name in attrs: + kwargs[attr_name] = attrs[attr_name] + return Operator(op_type, **kwargs) + + +def set_input(scope, op, inputs, place): + for in_name, in_dup in Operator.get_op_inputs(op.type()): + if in_name in inputs: + if in_dup: + sub_in = inputs[in_name] + for sub_in_name, sub_in_array in sub_in: + var = scope.find_var(sub_in_name) + tensor = var.get_tensor() + tensor.set_dims(sub_in_array.shape) + tensor.set(sub_in_array, place) + else: + var = scope.find_var(in_name) + tensor = var.get_tensor() + arr = inputs[in_name] + tensor.set_dims(arr.shape) + tensor.set(arr, place) + + +def set_output_grad(scope, op, outputs, place): + for out_name, out_dup in Operator.get_op_outputs(op.type()): + if out_name in outputs: + if out_dup: + sub_out = outputs[out_name] + for sub_out_name, _ in sub_out: + out_tensor = scope.find_var(sub_out_name).get_tensor() + grad_tensor = scope.new_var(grad_var_name( + sub_out_name)).get_tensor() + grad_tensor.set_dims(out_tensor.shape()) + data = np.ones(out_tensor.shape(), dtype=np.float32) + grad_tensor.set(data, place) + else: + out_tensor = scope.find_var(out_name).get_tensor() + grad_tensor = scope.new_var(grad_var_name(out_name)).get_tensor( + ) + grad_tensor.set_dims(out_tensor.shape()) + data = np.ones(out_tensor.shape(), dtype=np.float32) + grad_tensor.set(data, place) + + +def get_numeric_gradient(scope, + op, + inputs, + input_to_check, + output_name, + delta=0.005, + in_place=False): + + set_input(scope, op, inputs, core.CPUPlace()) + op.infer_shape(scope) + + tensor_to_check = scope.find_var(input_to_check).get_tensor() + + def product(dim): + return reduce(lambda a, b: a * b, dim, 1) + + ctx = core.DeviceContext.create(core.CPUPlace()) + + def get_output(): + op.run(scope, ctx) + return np.array(scope.find_var(output_name).get_tensor()).sum() + + tensor_to_check = scope.find_var(input_to_check).get_tensor() + tensor_size = product(tensor_to_check.get_dims()) + gradient_flat = np.zeros(shape=(tensor_size, ), dtype='float32') + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. + for i in xrange(tensor_size): + if in_place: + set_input(scope, op, inputs, core.CPUPlace()) + + # get one input element throw it's index i. + origin = tensor_to_check.get_float_element(i) + # add delta to it, run op and then get the sum of the result tensor. + x_pos = origin + delta + tensor_to_check.set_float_element(i, x_pos) + y_pos = get_output() + + if in_place: + set_input(scope, op, inputs, core.CPUPlace()) + + x_neg = origin - delta + tensor_to_check.set_float_element(i, x_neg) + y_neg = get_output() + + tensor_to_check.set_float_element(i, origin) + gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + return gradient_flat.reshape(tensor_to_check.get_dims()) + + +def get_backward_op(scope, op, no_grad_set): + backward_op = core.Operator.backward(op, no_grad_set) + for input in backward_op.input_vars(): + var = scope.new_var(input) + var.get_tensor() + for output in backward_op.output_vars(): + var = scope.new_var(output) + var.get_tensor() + return backward_op + + +def get_gradient(scope, op, inputs, outputs, grad_name, place, + no_grad_set=None): + ctx = core.DeviceContext.create(place) + + set_input(scope, op, inputs, place) + + op.infer_shape(scope) + op.run(scope, ctx) + + if no_grad_set is None: + no_grad_set = set() + + backward_op = get_backward_op(scope, op, no_grad_set) + set_output_grad(scope, op, outputs, place) + + backward_op.infer_shape(scope) + backward_op.run(scope, ctx) + + out = np.array(scope.find_var(grad_name).get_tensor()) + return out + + +class OpTest(unittest.TestCase): + def check_output_with_place(self, place): + self.scope = core.Scope() + op_inputs = self.inputs if hasattr(self, "inputs") else dict() + op_attrs = self.attrs if hasattr(self, "attrs") else dict() + self.op = create_op(self.scope, self.op_type, op_inputs, self.outputs, + op_attrs) + if isinstance(place, core.GPUPlace) and not self.op.support_gpu(): + return + set_input(self.scope, self.op, self.inputs, place) + self.op.infer_shape(self.scope) + ctx = core.DeviceContext.create(place) + self.op.run(self.scope, ctx) + + for out_name, out_dup in Operator.get_op_outputs(self.op.type()): + if out_dup: + sub_out = self.outputs[out_name] + for sub_out_name in sub_out: + actual = np.array( + self.scope.find_var(sub_out_name).get_tensor()) + expect = sub_out[sub_out_name] + self.assertTrue( + np.allclose( + actual, expect, atol=1e-05), + "output name: " + out_name + "has diff") + else: + actual = np.array(self.scope.find_var(out_name).get_tensor()) + expect = self.outputs[out_name] + self.assertTrue( + np.allclose( + actual, expect, atol=1e-05), + "output name: " + out_name + "has diff") + + def check_output(self): + places = [core.CPUPlace()] + if core.is_compile_gpu(): + places.append(core.GPUPlace(0)) + for place in places: + self.check_output_with_place(place) + + def __assert_is_close(self, numeric_grads, analytic_grads, names, + max_relative_error, msg_prefix): + + for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): + abs_a = np.abs(a) + abs_a[abs_a < 1e-3] = 1 + + diff_mat = np.abs(a - b) / abs_a + max_diff = np.max(diff_mat) + + def err_msg(): + offset = np.argmax(diff_mat > max_relative_error) + return "%s Variable %s max gradient diff %f over limit %f, the first " \ + "error element is %d" % ( + msg_prefix, name, max_diff, max_relative_error, offset) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) + + def check_grad(self, + inputs_to_check, + output_name, + no_grad_set=None, + in_place=False, + max_relative_error=0.005): + self.scope = core.Scope() + op_inputs = self.inputs if hasattr(self, "inputs") else dict() + op_attrs = self.attrs if hasattr(self, "attrs") else dict() + self.op = create_op(self.scope, self.op_type, op_inputs, self.outputs, + op_attrs) + if no_grad_set is None: + no_grad_set = set() + + numeric_grads = [ + get_numeric_gradient( + self.scope, + self.op, + self.inputs, + input_to_check, + output_name, + in_place=in_place) for input_to_check in inputs_to_check + ] + grad_names = [ + grad_var_name(input_to_check) for input_to_check in inputs_to_check + ] + + cpu_place = core.CPUPlace() + cpu_analytic_grads = [ + get_gradient(self.scope, self.op, self.inputs, self.outputs, + grad_name, cpu_place, no_grad_set) + for grad_name in grad_names + ] + + self.__assert_is_close(numeric_grads, cpu_analytic_grads, grad_names, + max_relative_error, + "Gradient Check On %s" % str(cpu_place)) + + if core.is_compile_gpu() and self.op.support_gpu(): + gpu_place = core.GPUPlace(0) + gpu_analytic_grads = [ + get_gradient(self.scope, self.op, self.inputs, self.outputs, + grad_name, gpu_place, no_grad_set) + for grad_name in grad_names + ] + + self.__assert_is_close(numeric_grads, gpu_analytic_grads, + grad_names, max_relative_error, + "Gradient Check On %s" % str(gpu_place)) + + for c_grad, g_grad, name in itertools.izip( + cpu_analytic_grads, gpu_analytic_grads, grad_names): + self.assertTrue( + np.allclose( + c_grad, g_grad, atol=1e-4), + "output name: " + name + " has diff") diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py deleted file mode 100644 index 370f27eaf658dadbf7e82262c118140a10d15c41..0000000000000000000000000000000000000000 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ /dev/null @@ -1,72 +0,0 @@ -import numpy -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator - - -class OpTestMeta(type): - """ - Operator Test ClassMeta. - - It injects `test_all` method into user's OperatorTest class, to make Python - unittest module run that method. - - The `test_all` read what value is stored in `self`. It use self's values to - create and run a operator, and check whether that op is OK or not. - - See `test_add_two_op` for example usage. - """ - - def __new__(cls, name, bases, attrs): - obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs) - - def test_all(self): - scope = core.Scope() - kwargs = dict() - places = [core.CPUPlace()] - if core.is_compile_gpu(): - places.append(core.GPUPlace(0)) - - for place in places: - for in_name in Operator.get_op_input_names(self.type): - if hasattr(self, "inputs") and in_name in self.inputs: - kwargs[in_name] = in_name - var = scope.new_var(in_name).get_tensor() - arr = self.inputs[in_name] - var.set_dims(arr.shape) - var.set(arr, place) - else: - kwargs[in_name] = "@EMPTY@" - - for out_name in Operator.get_op_output_names(self.type): - if not hasattr(self, "outputs"): - raise ValueError( - "The test op must set self.outputs dict.") - if out_name not in self.outputs: - raise ValueError("The %s is not in self.outputs dict." % - (out_name)) - kwargs[out_name] = out_name - scope.new_var(out_name).get_tensor() - - for attr_name in Operator.get_op_attr_names(self.type): - if hasattr(self, "attrs") and attr_name in self.attrs: - kwargs[attr_name] = self.attrs[attr_name] - - op = Operator(self.type, **kwargs) - if isinstance(place, core.GPUPlace) and not op.support_gpu(): - return - - op.infer_shape(scope) - - ctx = core.DeviceContext.create(place) - op.run(scope, ctx) - - for out_name in Operator.get_op_output_names(self.type): - actual = numpy.array(scope.find_var(out_name).get_tensor()) - expect = self.outputs[out_name] - self.assertTrue( - numpy.allclose( - actual, expect, atol=1e-05), - "output name: " + out_name + " has diff") - - obj.test_all = test_all - return obj diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index a578e74eca9a3c4327a4881f853028e2347c98ad..3ca34d9b9fc2b7b54cc25ca0e0d1a08a71e37c52 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -1,23 +1,20 @@ import unittest +import numpy as np +from op_test import OpTest -import numpy -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator - -from op_test_util import OpTestMeta - - -class TestAddOp(unittest.TestCase): - __metaclass__ = OpTestMeta +class TestAddOp(OpTest): def setUp(self): - self.type = "add" + self.op_type = "add" self.inputs = { - 'X': numpy.random.random((102, 105)).astype("float32"), - 'Y': numpy.random.random((102, 105)).astype("float32") + 'X': np.random.random((102, 105)).astype("float32"), + 'Y': np.random.random((102, 105)).astype("float32") } self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']} + def test_check_output(self): + self.check_output() + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_concat_op.py b/python/paddle/v2/framework/tests/test_concat_op.py new file mode 100644 index 0000000000000000000000000000000000000000..656563f96e52df30951ec0ec7042ad9c530e90b2 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_concat_op.py @@ -0,0 +1,22 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestConcatOp(OpTest): + def setUp(self): + self.op_type = "concat" + x0 = np.random.random((2, 3, 2, 5)).astype('float32') + x1 = np.random.random((2, 3, 3, 5)).astype('float32') + x2 = np.random.random((2, 3, 4, 5)).astype('float32') + axis = 2 + self.inputs = {'X': [('x0', x0), ('x1', x1), ('x2', x2)]} + self.attrs = {'axis': axis} + self.outputs = {'Out': np.concatenate((x0, x1, x2), axis=axis)} + + def test_check_output(self): + self.check_output() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/framework/tests/test_cos_sim_op.py index 32013a7999a4be42e5974b9ac751d5d911730994..797cbd8cc5cf7f73d58ca713d02667731d5c8a0e 100644 --- a/python/paddle/v2/framework/tests/test_cos_sim_op.py +++ b/python/paddle/v2/framework/tests/test_cos_sim_op.py @@ -1,17 +1,14 @@ import unittest import numpy as np -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta +from op_test import OpTest -class TestCosSimOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestCosSimOp(OpTest): def setUp(self): - self.type = "cos_sim" + self.op_type = "cos_sim" self.inputs = { - 'X': np.random.random((32, 64)).astype("float32"), - 'Y': np.random.random((32, 64)).astype("float32") + 'X': np.random.random((10, 5)).astype("float32"), + 'Y': np.random.random((10, 5)).astype("float32") } expect_x_norm = np.linalg.norm(self.inputs['X'], axis=1) expect_y_norm = np.linalg.norm(self.inputs['Y'], axis=1) @@ -23,38 +20,20 @@ class TestCosSimOp(unittest.TestCase): 'Out': np.expand_dims(expect_out, 1) } + def test_check_output(self): + self.check_output() -class TestCosSimGradOp(GradientChecker): - def setUp(self): - self.op = create_op("cos_sim") - self.inputs = { - 'X': np.random.random((10, 5)).astype("float32"), - 'Y': np.random.random((10, 5)).astype("float32") - } - - def test_cpu_gpu_compare(self): - self.compare_grad(self.op, self.inputs) - - def test_normal(self): - self.check_grad( - self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.05) + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05) - def test_ignore_x(self): + def test_check_grad_ingore_x(self): self.check_grad( - self.op, - self.inputs, ["Y"], - "Out", - max_relative_error=0.05, - no_grad_set={"X"}) + ['Y'], 'Out', max_relative_error=0.05, no_grad_set=set('X')) - def test_ignore_y(self): + def test_check_grad_ignore_y(self): self.check_grad( - self.op, - self.inputs, ["X"], - "Out", - max_relative_error=0.05, - no_grad_set={"Y"}) + ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y')) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index d4277f2a42ce2e66e37405ccd3b2ee444d403d1a..c2fc102a8b8de82da5c3fc5fee273790325908f8 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,36 +1,27 @@ import unittest import numpy -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op +from op_test import OpTest -class TestCrossEntropy(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestCrossEntropy(OpTest): def setUp(self): - self.type = "onehot_cross_entropy" + self.op_type = "onehot_cross_entropy" batch_size = 30 class_num = 10 - X = numpy.random.random((batch_size, class_num)).astype("float32") - label = 5 * numpy.ones(batch_size).astype("int32") + X = numpy.random.uniform(0.1, 1.0, + [batch_size, class_num]).astype("float32") + label = (class_num / 2) * numpy.ones(batch_size).astype("int32") self.inputs = {'X': X, 'label': label} Y = [] for i in range(0, batch_size): Y.append(-numpy.log(X[i][label[i]])) self.outputs = {'Y': numpy.array(Y).astype("float32")} + def test_check_output(self): + self.check_output() -class CrossEntropyGradOpTest(GradientChecker): def test_check_grad(self): - op = create_op("onehot_cross_entropy") - batch_size = 30 - class_num = 10 - inputs = { - "X": numpy.random.uniform( - 0.1, 1.0, [batch_size, class_num]).astype("float32"), - "label": (class_num / 2) * numpy.ones(batch_size).astype("int32") - } - self.check_grad(op, inputs, set("X"), "Y") + self.check_grad(['X'], 'Y') if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py index e5c862605fb11a5ea1426cf8f9054589dc377ff1..2473daaba24438819f3f55ccc40fe1c64ee59960 100644 --- a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py +++ b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py @@ -1,16 +1,17 @@ import unittest -from op_test_util import OpTestMeta -import numpy +import numpy as np +from op_test import OpTest -class TestFillZerosLikeOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestFillZerosLikeOp(OpTest): def setUp(self): - self.type = "fill_zeros_like" - self.inputs = {'Src': numpy.random.random((219, 232)).astype("float32")} - self.outputs = {'Dst': numpy.zeros_like(self.inputs['Src'])} + self.op_type = "fill_zeros_like" + self.inputs = {'Src': np.random.random((219, 232)).astype("float32")} + self.outputs = {'Dst': np.zeros_like(self.inputs["Src"])} + + def test_check_output(self): + self.check_output() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py index e3de3fd0a1dddb3edb0de5987bd71d8a176d97ef..b0ab429ef1b53640dfb696f6ea2f7b745564b874 100644 --- a/python/paddle/v2/framework/tests/test_gather_op.py +++ b/python/paddle/v2/framework/tests/test_gather_op.py @@ -1,30 +1,20 @@ import unittest -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op -import numpy -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +import numpy as np +from op_test import OpTest -class TestGatherOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestGatherOp(OpTest): def setUp(self): - self.type = "gather" - xnp = numpy.random.random((10, 20)).astype("float32") - self.inputs = { - 'X': xnp, - 'Index': numpy.array([1, 3, 5]).astype("int32") - } - self.outputs = {'Out': self.inputs['X'][self.inputs['Index']]} + self.op_type = "gather" + xnp = np.random.random((10, 20)).astype("float32") + self.inputs = {'X': xnp, 'Index': np.array([1, 3, 5]).astype("int32")} + self.outputs = {'Out': self.inputs["X"][self.inputs["Index"]]} + def test_check_output(self): + self.check_output() -class TestGatherGradOp(GradientChecker): - def test_gather_grad(self): - op = create_op("gather") - xnp = numpy.random.random((10, 20)).astype("float32") - inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")} - self.check_grad(op, inputs, set("X"), "Out") + def test_check_grad(self): + self.check_grad(['X'], 'Out') if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py index f95ed70b58d611b3233a21d3f2a34c864ae4d1b3..1f9e4db783c9907a22db72c8a6ff06c7ca0735da 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -14,11 +14,11 @@ class GaussianRandomTest(unittest.TestCase): def gaussian_random_test(self, place): scope = core.Scope() - scope.new_var("Out").get_tensor() + scope.new_var('Out').get_tensor() op = Operator( "gaussian_random", - Out="Out", + Out='Out', dims=[1000, 784], mean=.0, std=1., @@ -27,10 +27,10 @@ class GaussianRandomTest(unittest.TestCase): op.infer_shape(scope) context = core.DeviceContext.create(place) op.run(scope, context) - tensor = numpy.array(scope.find_var("Out").get_tensor()) + tensor = numpy.array(scope.find_var('Out').get_tensor()) self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gradient_checker.py b/python/paddle/v2/framework/tests/test_gradient_checker.py index e8a7f848dffa0529c8cb0d6599286ce0e228d180..abeb01cb34158a43b5dcce5e39efc0e21e9fe638 100644 --- a/python/paddle/v2/framework/tests/test_gradient_checker.py +++ b/python/paddle/v2/framework/tests/test_gradient_checker.py @@ -1,42 +1,44 @@ import unittest -import numpy -from paddle.v2.framework.op import Operator -from gradient_checker import GradientChecker -from gradient_checker import get_numeric_gradient +import numpy as np +import paddle.v2.framework.core as core +from op_test import get_numeric_gradient +from op_test import create_op class GetNumericGradientTest(unittest.TestCase): def test_add_op(self): - add_op = Operator("add", X="X", Y="Y", Out="Z") - x = numpy.random.random((10, 1)).astype("float32") - y = numpy.random.random((10, 1)).astype("float32") - - arr = get_numeric_gradient(add_op, {"X": x, "Y": y}, "Z", "X") + x = np.random.random((10, 1)).astype("float32") + y = np.random.random((10, 1)).astype("float32") + z = x + y + scope = core.Scope() + add_op = create_op(scope, "add", {'X': x, 'Y': y}, {'Out': z}, dict()) + arr = get_numeric_gradient(scope, add_op, {'X': x, 'Y': y}, 'X', 'Out') self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4) def test_softmax_op(self): def stable_softmax(x): """Compute the softmax of vector x in a numerically stable way.""" - shiftx = x - numpy.max(x) - exps = numpy.exp(shiftx) - return exps / numpy.sum(exps) + shiftx = x - np.max(x) + exps = np.exp(shiftx) + return exps / np.sum(exps) def label_softmax_grad(Y, dY): dX = Y * 0.0 for i in range(Y.shape[0]): - d = numpy.dot(Y[i, :], dY[i, :]) + d = np.dot(Y[i, :], dY[i, :]) dX[i, :] = Y[i, :] * (dY[i, :] - d) return dX - softmax_op = Operator("softmax", X="X", Y="Y") - - X = numpy.random.random((2, 2)).astype("float32") - Y = numpy.apply_along_axis(stable_softmax, 1, X) - dY = numpy.ones(Y.shape) + X = np.random.random((2, 2)).astype("float32") + Y = np.apply_along_axis(stable_softmax, 1, X) + dY = np.ones(Y.shape) dX = label_softmax_grad(Y, dY) - arr = get_numeric_gradient(softmax_op, {"X": X}, "Y", "X") - numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2) + scope = core.Scope() + softmax_op = create_op(scope, "softmax", {"X": X}, {"Y": Y}, dict()) + + arr = get_numeric_gradient(scope, softmax_op, {"X": X}, "X", "Y") + np.testing.assert_almost_equal(arr, dX, decimal=1e-2) if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py index 19eb464baa555fb67a994f3cfb4d3ed628367c73..b259bb67e832adcb31b0ab4e992738be2b85f884 100644 --- a/python/paddle/v2/framework/tests/test_lookup_table.py +++ b/python/paddle/v2/framework/tests/test_lookup_table.py @@ -1,31 +1,22 @@ import unittest import numpy as np -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op +from op_test import OpTest -class TestSigmoidOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestLookupTableOp(OpTest): def setUp(self): - self.type = 'lookup_table' - table = np.random.random((17, 31)).astype('float32') - ids = np.random.randint(0, 17, 4).astype('int32') + self.op_type = "lookup_table" + table = np.random.random((17, 31)).astype("float32") + ids = np.random.randint(0, 17, 4).astype("int32") self.inputs = {'W': table, 'Ids': ids} self.outputs = {'Out': table[ids]} + def test_check_output(self): + self.check_output() -class TestSigmoidGradOp(GradientChecker): - def test_grad(self): - op = create_op('lookup_table') - table = np.random.random((17, 31)).astype('float32') - ids = np.random.randint(0, 17, 4).astype('int32') - inputs = {'W': table, 'Ids': ids} - # comapre gradients - self.compare_grad(op, inputs, set(['Ids'])) - # check gradients - self.check_grad(op, inputs, set('W'), 'Out') + def test_check_grad(self): + self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_mean_op.py b/python/paddle/v2/framework/tests/test_mean_op.py index f32b3160d651a290823223c46c45bb3b6950a505..7823abd8f813aad6462c98a9ace9a13dc286a157 100644 --- a/python/paddle/v2/framework/tests/test_mean_op.py +++ b/python/paddle/v2/framework/tests/test_mean_op.py @@ -1,24 +1,20 @@ import unittest -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op import numpy as np +from op_test import OpTest -class TestMeanOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestMeanOp(OpTest): def setUp(self): - self.type = "mean" - self.inputs = {'X': np.random.random((32, 784)).astype("float32")} - self.outputs = {'Out': np.mean(self.inputs['X'])} + self.op_type = "mean" + self.inputs = {'X': np.random.random((10, 10)).astype("float32")} + self.outputs = {'Out': np.mean(self.inputs["X"])} + def test_check_output(self): + self.check_output() -class MeanGradOpTest(GradientChecker): - def test_normal(self): - op = create_op("mean") - inputs = {"X": np.random.random((10, 10)).astype("float32")} - self.check_grad(op, inputs, set("X"), "Out") + def test_checkout_grad(self): + self.check_grad(['X'], 'Out') -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_minus_op.py b/python/paddle/v2/framework/tests/test_minus_op.py index 5abdd4a69bf3faa2f3341f338e195815389a7cef..dea797a1fea34265d0a32e097f413f421abf2521 100644 --- a/python/paddle/v2/framework/tests/test_minus_op.py +++ b/python/paddle/v2/framework/tests/test_minus_op.py @@ -1,30 +1,23 @@ import unittest import numpy as np -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta +from op_test import OpTest -class MinusOpTest(unittest.TestCase): - __metaclass__ = OpTestMeta - +class MinusOpTest(OpTest): def setUp(self): - self.type = "minus" + self.op_type = "minus" self.inputs = { 'X': np.random.random((32, 84)).astype("float32"), 'Y': np.random.random((32, 84)).astype("float32") } self.outputs = {'Out': (self.inputs['X'] - self.inputs['Y'])} + def test_check_output(self): + self.check_output() -class MinusGradTest(GradientChecker): - def test_left(self): - op = create_op("minus") - inputs = { - "X": np.random.random((10, 10)).astype("float32"), - "Y": np.random.random((10, 10)).astype("float32") - } - self.check_grad(op, inputs, ["X", 'Y'], "Out") + def test_check_grad(self): + self.check_grad(['X', 'Y'], 'Out') -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_mul_op.py b/python/paddle/v2/framework/tests/test_mul_op.py index b58e4266d1588a4b6151f5f896537ded6ddd3896..b3d95a56b88e510734da54f36ff21ccd7e1baabb 100644 --- a/python/paddle/v2/framework/tests/test_mul_op.py +++ b/python/paddle/v2/framework/tests/test_mul_op.py @@ -1,55 +1,59 @@ import unittest import numpy as np -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta +from op_test import OpTest -class TestMulOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestMulOp(OpTest): def setUp(self): - self.type = "mul" + self.op_type = "mul" self.inputs = { 'X': np.random.random((32, 84)).astype("float32"), 'Y': np.random.random((84, 100)).astype("float32") } self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5) + + def test_check_grad_ingore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) + -class TestMulGradOp(GradientChecker): +class TestMulOp2(OpTest): def setUp(self): - self.op = create_op("mul") + self.op_type = "mul" self.inputs = { - 'X': np.random.random((32, 84)).astype("float32"), - 'Y': np.random.random((84, 100)).astype("float32") + 'X': np.random.random((15, 4, 12, 10)).astype("float32"), + 'Y': np.random.random((4, 30, 8, 2, 9)).astype("float32") + } + self.attrs = {'x_num_col_dims': 2, 'y_num_col_dims': 2} + self.outputs = { + 'Out': np.dot(self.inputs['X'].reshape(15 * 4, 12 * 10), + self.inputs['Y'].reshape(4 * 30, 8 * 2 * 9)) } - def test_cpu_gpu_compare(self): - self.compare_grad(self.op, self.inputs) + def test_check_output(self): + self.check_output() - def test_normal(self): - # mul op will enlarge the relative error - self.check_grad( - self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5) + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5) - def test_ignore_x(self): + def test_check_grad_ingore_x(self): self.check_grad( - self.op, - self.inputs, ["Y"], - "Out", - max_relative_error=0.5, - no_grad_set={"X"}) + ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set('X')) - def test_ignore_y(self): + def test_check_grad_ignore_y(self): self.check_grad( - self.op, - self.inputs, ["X"], - "Out", - max_relative_error=0.5, - no_grad_set={"Y"}) - + ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) -# TODO(dzh,qijun) : mulgrad test case need transpose feature of blas library -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index e4b7cd480cb36249bb64ba3cab9a4b220d812346..50cfb855f2b01d8fd32342855d46716da7e07856 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -35,5 +35,5 @@ Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]} self.assertEqual(expected, "\n" + str(net)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_reshape_op.py b/python/paddle/v2/framework/tests/test_reshape_op.py new file mode 100644 index 0000000000000000000000000000000000000000..16bb6bb2af67f7d32a2fafc1cb37412084ec0829 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_reshape_op.py @@ -0,0 +1,21 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestReshapeOp(OpTest): + def setUp(self): + self.op_type = "reshape" + self.inputs = {'X': np.random.random((10, 20)).astype("float32")} + self.attrs = {'shape': [10 * 20]} + self.outputs = {'Out': self.inputs['X'].reshape(self.attrs['shape'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_rowwise_add_op.py b/python/paddle/v2/framework/tests/test_rowwise_add_op.py index 2ddb85e2e7a98a08bd1d6e24e6f812f6021142e8..336645bd993ff743cbe20bb5cae5cd278db57ce7 100644 --- a/python/paddle/v2/framework/tests/test_rowwise_add_op.py +++ b/python/paddle/v2/framework/tests/test_rowwise_add_op.py @@ -1,38 +1,51 @@ import unittest import numpy as np -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op +from op_test import OpTest -class TestRowwiseAddOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestRowwiseAddOp(OpTest): def setUp(self): - self.type = "rowwise_add" + self.op_type = "rowwise_add" self.inputs = { - 'X': np.random.random((32, 84)).astype("float32"), - 'b': np.random.random(84).astype("float32") + 'X': np.random.uniform(0.1, 1, [5, 10]).astype("float32"), + 'b': np.random.uniform(0.1, 1, [10]).astype("float32") } self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])} + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'b'], 'Out') + + def test_check_grad_ingore_b(self): + self.check_grad(['X'], 'Out', no_grad_set=set('b')) + + def test_check_grad_ingore_x(self): + self.check_grad(['b'], 'Out', no_grad_set=set('X')) -class TestRowwiseAddGradOp(GradientChecker): + +class TestRowwiseAddOp2(OpTest): def setUp(self): - self.op = create_op("rowwise_add") + self.op_type = "rowwise_add" self.inputs = { - "X": np.random.uniform(0.1, 1, [5, 10]).astype("float32"), - "b": np.random.uniform(0.1, 1, [10]).astype("float32") + 'X': np.random.uniform(0.1, 1, [2, 3, 2, 5]).astype("float32"), + 'b': np.random.uniform(0.1, 1, [2, 5]).astype("float32") } + self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])} + + def test_check_output(self): + self.check_output() - def test_normal(self): - self.check_grad(self.op, self.inputs, ["X", "b"], "Out") + def test_check_grad_normal(self): + self.check_grad(['X', 'b'], 'Out') - def test_ignore_b(self): - self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"}) + def test_check_grad_ignore_b(self): + self.check_grad(['X'], 'Out', no_grad_set=set('b')) - def test_ignore_x(self): - self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"}) + def test_check_grad_ignore_x(self): + self.check_grad(['b'], 'Out', no_grad_set=set('X')) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py index 69b301c376ee7a4ebb2e2dadc645c7d10f823a08..05d76d428299c8176d1a6adf6da15a203fa7502a 100644 --- a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py +++ b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py @@ -1,43 +1,34 @@ import unittest -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op import numpy as np -from paddle.v2.framework.op import Operator +from op_test import OpTest -class IdentityTest(unittest.TestCase): - __metaclass__ = OpTestMeta - +class IdentityTest(OpTest): def setUp(self): - self.type = "identity" - self.inputs = {'X': np.random.random((32, 784)).astype("float32")} + self.op_type = "identity" + self.inputs = {'X': np.random.random((10, 10)).astype("float32")} self.outputs = {'Out': self.inputs['X']} + def test_check_output(self): + self.check_output() -class IdentityGradOpTest(GradientChecker): - def test_normal(self): - op = create_op("identity") - inputs = {"X": np.random.random((10, 10)).astype("float32")} - self.check_grad(op, inputs, set("X"), "Out") - + def test_check_grad(self): + self.check_grad(['X'], 'Out') -class ScaleTest(unittest.TestCase): - __metaclass__ = OpTestMeta +class ScaleTest(OpTest): def setUp(self): - self.type = "scale" - self.inputs = {'X': np.random.random((32, 784)).astype("float32")} + self.op_type = "scale" + self.inputs = {'X': np.random.random((10, 10)).astype("float32")} self.attrs = {'scale': -2.3} self.outputs = {'Out': self.inputs['X'] * self.attrs['scale']} + def test_check_output(self): + self.check_output() -class ScaleGradTest(GradientChecker): - def test_normal(self): - op = Operator("scale", X="X", Out="Out", scale=3.2) - self.check_grad(op, - {"X": np.random.random((10, 10)).astype("float32")}, - set("X"), "Out") + def test_check_grad(self): + self.check_grad(['X'], 'Out') -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py index c1f9444889372104e39ded78fc7207a59b80a293..33c73c52631a09ea0fefdeb9467991ae9c04321c 100644 --- a/python/paddle/v2/framework/tests/test_scatter_op.py +++ b/python/paddle/v2/framework/tests/test_scatter_op.py @@ -1,37 +1,24 @@ import unittest -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op -import numpy -import paddle.v2.framework.core as core -from paddle.v2.framework.op import Operator +import numpy as np +from op_test import OpTest -class TestScatterOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestScatterOp(OpTest): def setUp(self): - self.type = "scatter" - ref_np = numpy.ones((3, 3)).astype("float32") - index_np = numpy.array([1, 2]).astype("int32") - updates_np = numpy.random.random((2, 3)).astype("float32") - output_np = numpy.copy(ref_np) + self.op_type = "scatter" + ref_np = np.ones((3, 3)).astype("float32") + index_np = np.array([1, 2]).astype("int32") + updates_np = np.random.random((2, 3)).astype("float32") + output_np = np.copy(ref_np) output_np[index_np] += updates_np self.inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} self.outputs = {'Out': output_np} + def test_check_output(self): + self.check_output() -class TestScatterGradOp(GradientChecker): - def test_scatter_grad(self): - op = create_op("scatter") - # test data setup - ref_np = numpy.ones((3, 10)).astype("float32") - index_np = numpy.array([1, 2]).astype("int32") - updates_np = numpy.random.random((2, 10)).astype("float32") - output_np = numpy.copy(ref_np) - output_np[index_np] += updates_np - inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np} - self.check_grad( - op, inputs, set(["Updates", "Ref"]), "Out", in_place=True) + def test_check_grad(self): + self.check_grad(['Updates', 'Ref'], 'Out', in_place=True) if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index e5f9ef865e84f1a78e28884ad7e2e758f9ca8054..557cf15ace63e336462c7dcdbbc10f30aeedc6f4 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -1,21 +1,22 @@ import unittest -import numpy -from op_test_util import OpTestMeta +import numpy as np +from op_test import OpTest -class TestSGD(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestSGD(OpTest): def setUp(self): - self.type = "sgd" - w = numpy.random.random((102, 105)).astype("float32") - g = numpy.random.random((102, 105)).astype("float32") + self.op_type = "sgd" + w = np.random.random((102, 105)).astype("float32") + g = np.random.random((102, 105)).astype("float32") lr = 0.1 self.inputs = {'param': w, 'grad': g} self.attrs = {'learning_rate': lr} self.outputs = {'param_out': w - lr * g} + def test_check_output(self): + self.check_output() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py index 273c2e5ab1a84d12621fe9568c4cf22073b6aed4..2316e49eff7bb1cdb53acb3889a6ef05060b59f3 100644 --- a/python/paddle/v2/framework/tests/test_sigmoid_op.py +++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py @@ -1,27 +1,21 @@ import unittest import numpy as np -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op +from op_test import OpTest -class TestSigmoidOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestSigmoid(OpTest): def setUp(self): - self.type = "sigmoid" - self.inputs = {'X': np.random.random((15, 31)).astype("float32")} + self.op_type = "sigmoid" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32") + } self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))} + def test_check_output(self): + self.check_output() -class TestSigmoidGradOp(GradientChecker): - def test_grad(self): - op = create_op("sigmoid") - inputs = {"X": np.random.uniform(0.1, 1, [11, 17]).astype("float32")} - # compare gpu and cpu results for backward op. - # this test will be skiped if only compiling CPU version. - self.compare_grad(op, inputs) - # check gradients - self.check_grad(op, inputs, set("X"), "Y", max_relative_error=0.007) + def test_check_grad(self): + self.check_grad(["X"], "Y", max_relative_error=0.007) if __name__ == '__main__': diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index 0d590fa7065bdd2df0e3f2aea5464f0524d70670..1b948f252fa631e9886840b377de2996e110dc91 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -1,9 +1,6 @@ import unittest - import numpy as np - -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta +from op_test import OpTest def stable_softmax(x): @@ -13,26 +10,21 @@ def stable_softmax(x): return exps / np.sum(exps) -class TestSoftmaxOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestSoftmaxOp(OpTest): def setUp(self): - self.type = "softmax" - self.inputs = {"X": np.random.random((10, 10)).astype("float32")} + self.op_type = "softmax" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [10, 10]).astype("float32") + } self.outputs = { - "Y": np.apply_along_axis(stable_softmax, 1, self.inputs["X"]) + 'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) } + def test_check_output(self): + self.check_output() -class TestSoftmaxGradOp(GradientChecker): - def setUp(self): - self.op = create_op("softmax") - self.inputs = { - "X": np.random.uniform(0.1, 1, [10, 10]).astype("float32") - } - - def test_softmax_grad(self): - self.check_grad(self.op, self.inputs, ["X"], "Y") + def test_check_grad(self): + self.check_grad(['X'], 'Y') if __name__ == "__main__": diff --git a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py index 2bcdf37df434c9a089d75438d876114156261a5c..dc6ebf5d30369231b4918a168bbdf25c7096c808 100644 --- a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py +++ b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py @@ -1,17 +1,14 @@ import unittest -from op_test_util import OpTestMeta -from gradient_checker import GradientChecker, create_op import numpy as np +from op_test import OpTest -class TestSquaredL2DistanceOp_f0(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestSquaredL2DistanceOp_f0(OpTest): def setUp(self): - self.type = 'squared_l2_distance' + self.op_type = "squared_l2_distance" self.inputs = { - 'X': np.random.uniform(0.1, 1., (32, 64)).astype('float32'), - 'Y': np.random.uniform(0.1, 1., (32, 64)).astype('float32') + 'X': np.random.uniform(0.1, 0.6, (2, 3)).astype("float32"), + 'Y': np.random.uniform(0.1, 0.6, (2, 3)).astype("float32") } sub_res = self.inputs['X'] - self.inputs['Y'] output = sub_res * sub_res @@ -20,15 +17,19 @@ class TestSquaredL2DistanceOp_f0(unittest.TestCase): 'Out': np.expand_dims(output.sum(1), 1) } + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X', 'Y'], 'Out') -class TestSquaredL2DistanceOp_f1(unittest.TestCase): - __metaclass__ = OpTestMeta +class TestSquaredL2DistanceOp_f1(OpTest): def setUp(self): - self.type = 'squared_l2_distance' + self.op_type = "squared_l2_distance" self.inputs = { - 'X': np.random.uniform(0.1, 1., (32, 64)).astype('float32'), - 'Y': np.random.uniform(0.1, 1., (1, 64)).astype('float32') + 'X': np.random.uniform(0.1, 0.6, (2, 3)).astype("float32"), + 'Y': np.random.uniform(0.1, 0.6, (1, 3)).astype("float32") } sub_res = self.inputs['X'] - self.inputs['Y'] output = sub_res * sub_res @@ -37,53 +38,34 @@ class TestSquaredL2DistanceOp_f1(unittest.TestCase): 'Out': np.expand_dims(output.sum(1), 1) } + def test_check_output(self): + self.check_output() -class TestSquaredL2DistanceOp_f2(unittest.TestCase): - __metaclass__ = OpTestMeta + def test_check_grad(self): + self.check_grad(['X', 'Y'], 'Out') + +class TestSquaredL2DistanceOp_f2(OpTest): def setUp(self): - self.type = 'squared_l2_distance' + self.op_type = "squared_l2_distance" self.inputs = { - 'X': np.random.uniform(0.1, 1., (32, 64, 128)).astype('float32'), - 'Y': np.random.uniform(0.1, 1., (1, 64, 128)).astype('float32') + 'X': np.random.uniform(0.1, 0.6, (2, 3, 4)).astype("float32"), + 'Y': np.random.uniform(0.1, 0.6, (1, 3, 4)).astype("float32") } sub_res = self.inputs['X'] - self.inputs['Y'] - sub_res = sub_res.reshape((32, 64 * 128)) + sub_res = sub_res.reshape((2, 3 * 4)) output = sub_res * sub_res self.outputs = { 'sub_result': sub_res, 'Out': np.expand_dims(output.sum(1), 1) } + def test_check_output(self): + self.check_output() -class TestSquaredL2DistanceGradOp(GradientChecker): - def test_squared_l2_distance_b0(self): - op = create_op("squared_l2_distance") - inputs = { - 'X': np.random.uniform(0.1, .6, (2, 3)).astype('float32'), - 'Y': np.random.uniform(0.1, .6, (2, 3)).astype('float32') - } - self.compare_grad(op, inputs) - self.check_grad(op, inputs, set(["X", "Y"]), "Out") - - def test_squared_l2_distance_b1(self): - op = create_op("squared_l2_distance") - inputs = { - 'X': np.random.uniform(0.1, .6, (2, 3)).astype('float32'), - 'Y': np.random.uniform(0.1, .6, (1, 3)).astype('float32') - } - self.compare_grad(op, inputs) - self.check_grad(op, inputs, set(["X", "Y"]), "Out") - - def test_squared_l2_distance_b2(self): - op = create_op("squared_l2_distance") - inputs = { - 'X': np.random.uniform(0.1, .6, (2, 3, 4)).astype('float32'), - 'Y': np.random.uniform(0.1, .6, (1, 3, 4)).astype('float32') - } - self.compare_grad(op, inputs) - self.check_grad(op, inputs, set(["X", "Y"]), "Out") + def test_check_grad(self): + self.check_grad(['X', 'Y'], 'Out') -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_sum_op.py b/python/paddle/v2/framework/tests/test_sum_op.py new file mode 100644 index 0000000000000000000000000000000000000000..60254291e2ab9215e2bc37c12d5e2e1ca6d33d5d --- /dev/null +++ b/python/paddle/v2/framework/tests/test_sum_op.py @@ -0,0 +1,24 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestSumOp(OpTest): + def setUp(self): + self.op_type = "sum" + x0 = np.random.random((3, 4)).astype('float32') + x1 = np.random.random((3, 4)).astype('float32') + x2 = np.random.random((3, 4)).astype('float32') + self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]} + y = x0 + x1 + x2 + self.outputs = {'Out': y} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['x0'], 'Out') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py index 1af39818a305215b45219b8c5f0a10630fd64279..f26ed4964c521be1cd839b39d7244f96c653cb1a 100644 --- a/python/paddle/v2/framework/tests/test_tensor.py +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -3,7 +3,7 @@ import unittest import numpy -class TestScope(unittest.TestCase): +class TestTensor(unittest.TestCase): def test_int_tensor(self): scope = core.Scope() var = scope.new_var("test_tensor") @@ -20,8 +20,8 @@ class TestScope(unittest.TestCase): tensor.set(tensor_array, place) tensor_array_2 = numpy.array(tensor) - self.assertEqual(1.0, tensor_array_2[3, 9]) - self.assertEqual(2.0, tensor_array_2[19, 11]) + self.assertEqual(1, tensor_array_2[3, 9]) + self.assertEqual(2, tensor_array_2[19, 11]) def test_float_tensor(self): scope = core.Scope() @@ -43,6 +43,84 @@ class TestScope(unittest.TestCase): self.assertAlmostEqual(1.0, tensor_array_2[3, 9]) self.assertAlmostEqual(2.0, tensor_array_2[19, 11]) + def test_int_lod_tensor(self): + places = [core.CPUPlace(), core.GPUPlace(0)] + for place in places: + scope = core.Scope() + var = scope.new_var("test_tensor") + var_lod = scope.new_var("test_lod_tensor") + + tensor = var.get_tensor() + lod_tensor = var_lod.get_lod_tensor() + + tensor.set_dims([4, 4, 6]) + tensor.alloc_int(place) + array = numpy.array(tensor) + array[0, 0, 0] = 3 + array[3, 3, 5] = 10 + tensor.set(array, place) + + lod_tensor.set_tensor(tensor) + lod_tensor.set_lod([[0, 2, 4]]) + + lod_v = numpy.array(lod_tensor.tensor()) + self.assertTrue(numpy.alltrue(array == lod_v)) + + lod = lod_tensor.lod() + self.assertEqual(0, lod[0][0]) + self.assertEqual(2, lod[0][1]) + self.assertEqual(4, lod[0][2]) + + def test_float_lod_tensor(self): + places = [core.CPUPlace(), core.GPUPlace(0)] + for place in places: + scope = core.Scope() + var = scope.new_var("test_tensor") + var_lod = scope.new_var("test_lod_tensor") + + tensor = var.get_tensor() + lod_tensor = var_lod.get_lod_tensor() + + tensor.set_dims([5, 2, 3, 4]) + tensor.alloc_float(place) + + tensor_array = numpy.array(tensor) + self.assertEqual((5, 2, 3, 4), tensor_array.shape) + tensor_array[0, 0, 0, 0] = 1.0 + tensor_array[0, 0, 0, 1] = 2.0 + tensor.set(tensor_array, place) + + lod_tensor.set_tensor(tensor) + + lod_v = numpy.array(lod_tensor.tensor()) + self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) + self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) + self.assertEqual(len(lod_tensor.lod()), 0) + + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor.set_lod(lod_py) + lod = lod_tensor.lod() + self.assertListEqual(lod_py, lod) + + def test_lod_tensor_init(self): + scope = core.Scope() + var = scope.new_var("test_tensor") + place = core.CPUPlace() + tensor = var.get_tensor() + tensor.set_dims([5, 2, 3, 4]) + tensor.alloc_float(place) + tensor_array = numpy.array(tensor) + tensor_array[0, 0, 0, 0] = 1.0 + tensor_array[0, 0, 0, 1] = 2.0 + tensor.set(tensor_array, place) + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + + lod_tensor = core.LoDTensor(lod_py, tensor) + lod_v = numpy.array(lod_tensor.tensor()) + self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) + self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) + self.assertListEqual(lod_py, lod_tensor.lod()) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_top_k_op.py b/python/paddle/v2/framework/tests/test_top_k_op.py index e841d96d26bba13b2780c41ea7a209fd470cad3b..cab799256d791889c295aa7f9048080f5caaf2dc 100644 --- a/python/paddle/v2/framework/tests/test_top_k_op.py +++ b/python/paddle/v2/framework/tests/test_top_k_op.py @@ -1,14 +1,11 @@ import unittest import numpy as np -from gradient_checker import GradientChecker, create_op -from op_test_util import OpTestMeta +from op_test import OpTest -class TestTopkOp(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestTopkOp(OpTest): def setUp(self): - self.type = "top_k" + self.op_type = "top_k" k = 1 input = np.random.random((32, 84)).astype("float32") output = np.ndarray((32, k)) @@ -25,11 +22,9 @@ class TestTopkOp(unittest.TestCase): self.outputs = {'Out': output, 'Indices': indices} -class TestTopkOp3d(unittest.TestCase): - __metaclass__ = OpTestMeta - +class TestTopkOp3d(OpTest): def setUp(self): - self.type = "top_k" + self.op_type = "top_k" k = 1 input = np.random.random((32, 2, 84)).astype("float32") input_flat_2d = input.reshape(64, 84) @@ -48,5 +43,5 @@ class TestTopkOp3d(unittest.TestCase): self.outputs = {'Out': output, 'Indices': indices} -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py index c3d2bb44da3977c0899b2609a8efe15b7e1789f2..76a5e36e56ab08230bdc2597d209fcf5d1d2acb0 100644 --- a/python/paddle/v2/framework/tests/test_uniform_random_op.py +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -14,11 +14,11 @@ class UniformRandomTest(unittest.TestCase): def uniform_random_test(self, place): scope = core.Scope() - scope.new_var("X").get_tensor() + scope.new_var('X').get_tensor() op = Operator( "uniform_random", - Out="X", + Out='X', dims=[1000, 784], min=-5.0, max=10.0, @@ -27,9 +27,9 @@ class UniformRandomTest(unittest.TestCase): op.infer_shape(scope) ctx = core.DeviceContext.create(place) op.run(scope, ctx) - tensor = numpy.array(scope.find_var("X").get_tensor()) + tensor = numpy.array(scope.find_var('X').get_tensor()) self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 8acea6155c588f2e8e5ad009cd8f0a0c09afb92b..e80456d9bbeb3c34ac9eab873a84dbf8f06e34df 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -2,6 +2,7 @@ import numpy import collections import topology import minibatch +import cPickle __all__ = ['infer', 'Inference'] @@ -25,11 +26,23 @@ class Inference(object): :type parameters: paddle.v2.parameters.Parameters """ - def __init__(self, output_layer, parameters): + def __init__(self, parameters, output_layer=None, fileobj=None): import py_paddle.swig_paddle as api - topo = topology.Topology(output_layer) - gm = api.GradientMachine.createFromConfigProto( - topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) + + if output_layer is not None: + topo = topology.Topology(output_layer) + gm = api.GradientMachine.createFromConfigProto( + topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) + self.__data_types__ = topo.data_type() + elif fileobj is not None: + tmp = cPickle.load(fileobj) + gm = api.GradientMachine.createByConfigProtoStr( + tmp['protobin'], api.CREATE_MODE_TESTING, + [api.PARAMETER_VALUE]) + self.__data_types__ = tmp['data_type'] + else: + raise ValueError("Either output_layer or fileobj must be set") + for param in gm.getParameters(): val = param.getBuf(api.PARAMETER_VALUE) name = param.getName() @@ -43,7 +56,6 @@ class Inference(object): # called here, but it's better to call this function in one place. param.setValueUpdated() self.__gradient_machine__ = gm - self.__data_types__ = topo.data_type() def iter_infer(self, input, feeding=None): from data_feeder import DataFeeder diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py index a20e878d0817d0a75e9c47a44f8765deca99225c..2db66be2505dde38a501edf45984e1f36beb351d 100644 --- a/python/paddle/v2/topology.py +++ b/python/paddle/v2/topology.py @@ -18,6 +18,7 @@ from paddle.proto.ModelConfig_pb2 import ModelConfig import paddle.trainer_config_helpers as conf_helps import layer as v2_layer import config_base +import cPickle __all__ = ['Topology'] @@ -100,6 +101,14 @@ class Topology(object): return layer return None + def serialize_for_inference(self, stream): + protobin = self.proto().SerializeToString() + data_type = self.data_type() + cPickle.dump({ + 'protobin': protobin, + 'data_type': data_type + }, stream, cPickle.HIGHEST_PROTOCOL) + def __check_layer_type__(layer): if not isinstance(layer, config_base.Layer): diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py index 0654a301049dcb347b79879076a869a0c14a07ae..ca95ef13bd440ac0ba3d46f6e4680d4d7aa94c42 100644 --- a/python/paddle/v2/trainer.py +++ b/python/paddle/v2/trainer.py @@ -174,13 +174,18 @@ class SGD(object): pass_id=pass_id, batch_id=batch_id, cost=cost, - evaluator=batch_evaluator)) + evaluator=batch_evaluator, + gm=self.__gradient_machine__)) self.__parameter_updater__.finishBatch(cost) batch_evaluator.finish() self.__parameter_updater__.finishPass() pass_evaluator.finish() - event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) + event_handler( + v2_event.EndPass( + pass_id, + evaluator=pass_evaluator, + gm=self.__gradient_machine__)) self.__gradient_machine__.finish() def test(self, reader, feeding=None):