diff --git a/doc/howto/dev/write_docs_cn.rst b/doc/howto/dev/write_docs_cn.rst
index 36e5d420c986fc8d88eefee4aa221dba0a0480f2..731a63f945c29ba78538b3d71289b234e569354d 100644
--- a/doc/howto/dev/write_docs_cn.rst
+++ b/doc/howto/dev/write_docs_cn.rst
@@ -5,15 +5,13 @@
PaddlePaddle的文档包括英文文档 ``doc`` 和中文文档 ``doc_cn`` 两个部分。文档都是通过 `cmake`_ 驱动 `sphinx`_ 编译生成,生成后的文档分别存储在编译目录的 ``doc`` 和 ``doc_cn`` 两个子目录下。
-如何构建PaddlePaddle的文档
-==========================
+如何构建文档
+============
-PaddlePaddle的文档构建有直接构建和基于Docker构建两种方式,我们提供了一个构建脚本build_docs.sh来进行构建。
-PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使用基于Docker来构建PaddlePaddle的文档。
+PaddlePaddle的文档构建有两种方式。
-
-使用Docker构建PaddlePaddle的文档
---------------------------------
+使用Docker构建
+--------------
使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即
@@ -21,58 +19,46 @@ PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使
cd TO_YOUR_PADDLE_CLONE_PATH
cd paddle/scripts/tools/build_docs
- bash build_docs.sh with_docker
-
-编译完成后,会在当前目录生成两个子目录\:
-
-* doc 英文文档目录
-* doc_cn 中文文档目录
+ sh build_docs.sh
+编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。
打开浏览器访问对应目录下的index.html即可访问本地文档。
-
-
-直接构建PaddlePaddle的文档
---------------------------
-
-因为PaddlePaddle的v2 api文档生成过程依赖于py_paddle Python包,用户需要首先确认py_paddle包已经安装。
-
-.. code-block:: bash
-
- python -c "import py_paddle"
-
-如果提示错误,那么用户需要在本地编译安装PaddlePaddle,请参考 `源码编译文档 `_ 。
-注意,用户在首次编译安装PaddlePaddle时,请将WITH_DOC选项关闭。在编译安装正确之后,请再次确认py_paddle包已经安装,即可进行下一步操作。
+直接构建
+--------
如果提示正确,可以执行以下命令编译生成文档,即
.. code-block:: bash
cd TO_YOUR_PADDLE_CLONE_PATH
- cd paddle/scripts/tools/build_docs
- bash build_docs.sh local
-
-编译完成之后,会在当前目录生成两个子目录\:
-
-* doc 英文文档目录
-* doc_cn 中文文档目录
+ mkdir -p build
+ cd build
+ cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON
+ make gen_proto_py
+ make paddle_docs paddle_docs_cn
+编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。
打开浏览器访问对应目录下的index.html即可访问本地文档。
-如何书写PaddlePaddle的文档
-==========================
+如何书写文档
+============
PaddlePaddle文档使用 `sphinx`_ 自动生成,用户可以参考sphinx教程进行书写。
-如何更新www.paddlepaddle.org文档
-================================
+如何更新文档主题
+================
+
+PaddlePaddle文档主题在 `TO_YOUR_PADDLE_CLONE_PATH/doc_theme` 文件夹下,包含所有和前端网页设计相关的文件。
-开发者给PaddlePaddle代码增加的注释以PR的形式提交到github中,提交方式可参见 `贡献文档 `_ 。
+如何更新doc.paddlepaddle.org
+============================
+
+更新的文档以PR的形式提交到github中,提交方式参见 `贡献文档 `_ 。
目前PaddlePaddle的develop分支的文档是自动触发更新的,用户可以分别查看最新的 `中文文档 `_ 和
`英文文档 `_ 。
-
.. _cmake: https://cmake.org/
.. _sphinx: http://www.sphinx-doc.org/en/1.4.8/
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index c0838d9b759110fd706577386d2c81bda6876223..3371962c635c3731f00a6af2a6e287ece33397cd 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -9,6 +9,7 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
+nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
cc_test(variable_test SRCS variable_test.cc)
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 154068fef69bc96edbd85b731fe8091b3b1ff823..568f4e89819c8345d8908634f6fa56f09483a763 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -18,8 +18,10 @@
#ifndef PADDLE_ONLY_CPU
#include
#include
+#include
#endif
+#include
#include "paddle/framework/ddim.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/enforce.h"
@@ -32,7 +34,8 @@ template
using Vector = std::vector;
#else
template
-using Vector = thrust::host_vector;
+using Vector = thrust::host_vector<
+ T, thrust::system::cuda::experimental::pinned_allocator>;
#endif
using LoD = std::vector>;
diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu
new file mode 100644
index 0000000000000000000000000000000000000000..1079a36a2e7b24f6f8a5bcbb296355567305a765
--- /dev/null
+++ b/paddle/framework/lod_tensor_test.cu
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include
+#include
+#include "paddle/framework/lod_tensor.h"
+#include "paddle/platform/assert.h"
+
+#include
+
+__global__ void test(size_t* a, int size) {
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size;
+ i += blockDim.x * gridDim.x) {
+ a[i] *= 2;
+ }
+}
+
+TEST(LoDTensor, LoDInGPU) {
+ paddle::framework::Tensor tensor;
+ paddle::framework::LoDTensor lod_tensor;
+ paddle::platform::GPUPlace place(0);
+
+ paddle::framework::LoD src_lod;
+ src_lod.push_back(std::vector{0, 2, 4, 6, 8, 10, 12, 14});
+
+ tensor.Resize({14, 16});
+ tensor.mutable_data(place);
+
+ lod_tensor.set_lod(src_lod);
+ lod_tensor.set_tensor(&tensor);
+ CHECK_EQ(lod_tensor.lod_element(0, 2), 4);
+ CHECK_EQ(lod_tensor.lod_element(0, 4), 8);
+
+ auto lod = lod_tensor.lod();
+
+ test<<<1, 8>>>(lod[0].data(), lod[0].size());
+ cudaDeviceSynchronize();
+
+ for (size_t i = 0; i < src_lod[0].size(); ++i) {
+ CHECK_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2);
+ }
+}
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index 790cfc4746b1d34da413fa3c29a266f962c6dde6..e1e122091f7759b1a68f1f982bc2a35e8241f9f0 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -123,6 +123,15 @@ OperatorBase::OperatorBase(const std::string& type,
CheckAllInputOutputSet();
}
+std::vector OperatorBase::InputVars() const {
+ std::vector ret_val;
+ for (auto& o : outputs_) {
+ ret_val.reserve(ret_val.size() + o.second.size());
+ ret_val.insert(ret_val.end(), o.second.begin(), o.second.end());
+ }
+ return ret_val;
+}
+
std::vector OperatorBase::OutputVars(bool has_intermediate) const {
std::vector ret_val;
if (has_intermediate) {
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index 9a98d4d3be0d1cb875d614b263f1e4365ede4113..4600b06009bcef7d0774d25b816aac4733f30795 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -94,11 +94,14 @@ class OperatorBase {
const VariableNameMap& Inputs() const { return inputs_; }
const VariableNameMap& Outputs() const { return outputs_; }
+
//! Get a input with argument's name described in `op_proto`
std::string Input(const std::string& name) const;
//! Get a input which has multiple variables.
const std::vector& Inputs(const std::string& name) const;
+ std::vector InputVars() const;
+
//! Get a output with argument's name described in `op_proto`
std::string Output(const std::string& name) const;
//! Get an output which has multiple variables.
@@ -311,9 +314,9 @@ class InferShapeContext {
}
template
- std::vector MultiOutput(const std::string& name) const {
+ std::vector MultiOutput(const std::string& name) const {
auto names = op_.Outputs(name);
- std::vector res;
+ std::vector res;
res.reserve(names.size());
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h
index ce938b21437195fed8c1adad4329fd139f3f96ab..4b5a2ae523f2f7fde5445f0534cd99969ad9d59e 100644
--- a/paddle/framework/tensor.h
+++ b/paddle/framework/tensor.h
@@ -81,6 +81,9 @@ class Tensor {
/*! Return the dimensions of the memory block. */
inline const DDim& dims() const;
+ /*! Return the numel of the memory block. */
+ inline int64_t numel() const;
+
/*! Resize the dimensions of the memory block. */
inline Tensor& Resize(const DDim& dims);
@@ -162,6 +165,12 @@ class Tensor {
/*! points to dimensions of memory block. */
DDim dims_;
+ /**
+ * A cache of the number of elements in a tensor.
+ * Would be 0 for an uninitialized tensor.
+ */
+ int64_t numel_;
+
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h
index 637f04ae0037bd402d855b8bcde8087bfe8328d1..642b53efc7095d25712ca324638f5fe9b8316c0c 100644
--- a/paddle/framework/tensor_impl.h
+++ b/paddle/framework/tensor_impl.h
@@ -24,7 +24,7 @@ inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_GE(
- holder_->size(), product(dims_) * sizeof(T) + offset_,
+ holder_->size(), numel() * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.\n"
"or maybe the required data-type mismatches the data already stored.");
@@ -54,11 +54,11 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
template
inline T* Tensor::mutable_data(platform::Place place) {
static_assert(std::is_pod::value, "T must be POD");
- PADDLE_ENFORCE_GT(product(dims_), 0,
+ PADDLE_ENFORCE_GT(numel(), 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
/* some versions of boost::variant don't have operator!= */
- int64_t size = product(dims_) * sizeof(T);
+ int64_t size = numel() * sizeof(T);
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) {
@@ -97,7 +97,7 @@ inline void Tensor::CopyFrom(const Tensor& src,
auto dst_ptr = static_cast(mutable_data(dst_place));
- auto size = product(src.dims_) * sizeof(T);
+ auto size = src.numel() * sizeof(T);
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get(dst_place), dst_ptr,
@@ -131,7 +131,7 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
PADDLE_ENFORCE_LT(begin_idx, end_idx,
"Begin index must be less than end index.");
PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1.");
- size_t base = product(dims_) / dims_[0];
+ size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
DDim dst_dims = dims_;
@@ -143,11 +143,14 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
inline Tensor& Tensor::Resize(const DDim& dims) {
dims_ = dims;
+ numel_ = product(dims_);
return *this;
}
inline const DDim& Tensor::dims() const { return dims_; }
+inline int64_t Tensor::numel() const { return numel_; }
+
template
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
Tensor res;
diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/gserver/layers/DeConv3DLayer.cpp
index 1b59ed60c57fe3bbfa814befa8a63408a2621715..3eea638649e8ebfdd7efa18615977a9e1344c695 100644
--- a/paddle/gserver/layers/DeConv3DLayer.cpp
+++ b/paddle/gserver/layers/DeConv3DLayer.cpp
@@ -53,27 +53,27 @@ bool DeConv3DLayer::init(const LayerMap &layerMap,
size_t DeConv3DLayer::getSize() {
CHECK_NE(inputLayers_.size(), 0UL);
- outputH_.clear();
- outputW_.clear();
- outputD_.clear();
+ imgSizeW_.clear();
+ imgSizeH_.clear();
+ imgSizeD_.clear();
N_.clear();
NOut_.clear();
size_t layerSize = 0;
for (size_t i = 0; i < inputLayers_.size(); ++i) {
- outputW_.push_back(
- imageSize(imgSizeW_[i], filterSize_[i], padding_[i], stride_[i], true));
- outputH_.push_back(imageSize(
- imgSizeH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true));
- outputD_.push_back(imageSize(
- imgSizeD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true));
- NOut_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
- N_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]);
+ imgSizeW_.push_back(
+ imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i], true));
+ imgSizeH_.push_back(imageSize(
+ outputH_[i], filterSizeY_[i], paddingY_[i], strideY_[i], true));
+ imgSizeD_.push_back(imageSize(
+ outputD_[i], filterSizeZ_[i], paddingZ_[i], strideZ_[i], true));
+ NOut_.push_back(imgSizeD_[i] * imgSizeH_[i] * imgSizeW_[i]);
+ N_.push_back(outputD_[i] * outputH_[i] * outputW_[i]);
CHECK(layerSize == 0 || N_[i] * size_t(numFilters_) == layerSize);
layerSize += NOut_[i] * numFilters_;
}
- getOutput().setFrameHeight(outputH_[0]);
- getOutput().setFrameWidth(outputW_[0]);
- getOutput().setFrameDepth(outputD_[0]);
+ getOutput().setFrameHeight(imgSizeH_[0]);
+ getOutput().setFrameWidth(imgSizeW_[0]);
+ getOutput().setFrameDepth(imgSizeD_[0]);
return layerSize;
}
@@ -103,9 +103,9 @@ void DeConv3DLayer::forward(PassType passType) {
}
colBuf_->col2Vol(outMat->getData() + n * outMat->getStride(),
numFilters_,
- outputD_[i],
- outputH_[i],
- outputW_[i],
+ imgSizeD_[i],
+ imgSizeH_[i],
+ imgSizeW_[i],
filterSizeZ_[i],
filterSizeY_[i],
filterSize_[i],
@@ -144,9 +144,9 @@ void DeConv3DLayer::backward(const UpdateCallback &callback) {
colBuf_->vol2Col(
getOutputGrad()->getData() + n * getOutputGrad()->getStride(),
numFilters_,
- outputD_[i],
- outputH_[i],
- outputW_[i],
+ imgSizeD_[i],
+ imgSizeH_[i],
+ imgSizeW_[i],
filterSizeZ_[i],
filterSizeY_[i],
filterSize_[i],
diff --git a/paddle/gserver/layers/DetectionOutputLayer.cpp b/paddle/gserver/layers/DetectionOutputLayer.cpp
index 8ab838e191314ab25469631626c0b0564d7fffda..0cf0a92bf4bd8f9b8eba2016b2377d9dfb18c70a 100644
--- a/paddle/gserver/layers/DetectionOutputLayer.cpp
+++ b/paddle/gserver/layers/DetectionOutputLayer.cpp
@@ -139,7 +139,13 @@ void DetectionOutputLayer::forward(PassType passType) {
allDecodedBBoxes,
&allIndices);
- resetOutput(numKept, 7);
+ if (numKept > 0) {
+ resetOutput(numKept, 7);
+ } else {
+ MatrixPtr outV = getOutputValue();
+ outV = NULL;
+ return;
+ }
MatrixPtr outV = getOutputValue();
getDetectionOutput(confBuffer_->getData(),
numKept,
diff --git a/paddle/gserver/layers/DetectionUtil.cpp b/paddle/gserver/layers/DetectionUtil.cpp
index 3e61adc66e60c54250e4f323452aa13045310879..d83674f45a70212a8adc94a31ff58eb0e01baa00 100644
--- a/paddle/gserver/layers/DetectionUtil.cpp
+++ b/paddle/gserver/layers/DetectionUtil.cpp
@@ -469,7 +469,7 @@ size_t getDetectionIndices(
const size_t numClasses,
const size_t backgroundId,
const size_t batchSize,
- const size_t confThreshold,
+ const real confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
diff --git a/paddle/gserver/layers/DetectionUtil.h b/paddle/gserver/layers/DetectionUtil.h
index fe4f9f075e4cf011c97f68f49598a828d62327b3..641ed873b4c8645b6455e5ef5e63593e3005b770 100644
--- a/paddle/gserver/layers/DetectionUtil.h
+++ b/paddle/gserver/layers/DetectionUtil.h
@@ -275,7 +275,7 @@ size_t getDetectionIndices(
const size_t numClasses,
const size_t backgroundId,
const size_t batchSize,
- const size_t confThreshold,
+ const real confThreshold,
const size_t nmsTopK,
const real nmsThreshold,
const size_t keepTopK,
diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h
index edef36194aabdb9c122ec3423deb036169a34d7c..4002a3d0747a86ab7b495ffe52247521831b71b8 100644
--- a/paddle/gserver/layers/Layer.h
+++ b/paddle/gserver/layers/Layer.h
@@ -49,6 +49,12 @@ struct LayerState {
};
typedef std::shared_ptr LayerStatePtr;
+/// Paddle device ID, MKLDNN is -2, CPU is -1
+enum PADDLE_DEVICE_ID {
+ MKLDNN_DEVICE = -2,
+ CPU_DEVICE = -1,
+};
+
/**
* @brief Base class for layer.
* Define necessary variables and functions for every layer.
@@ -59,11 +65,6 @@ protected:
LayerConfig config_;
/// whether to use GPU
bool useGpu_;
- /// Paddle device ID, MKLDNN is -2, CPU is -1
- enum PADDLE_DEVICE_ID {
- MKLDNN_DEVICE = -2,
- CPU_DEVICE = -1,
- };
/// Device Id. MKLDNN is -2, CPU is -1, and GPU is 0, 1, 2 ...
int deviceId_;
/// Input layers
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
index 8318c8c519a4cec1610eadd28320ee5ce0b4147d..f70343251ad4fbb99f9614618f6d1bff1174f15e 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -14,7 +14,6 @@ limitations under the License. */
#include "MKLDNNFcLayer.h"
#include "paddle/utils/Logging.h"
-#include "paddle/utils/Stat.h"
using namespace mkldnn; // NOLINT
typedef memory::format format;
@@ -40,6 +39,8 @@ bool MKLDNNFcLayer::init(const LayerMap& layerMap,
oc_ = getSize();
oh_ = 1;
ow_ = 1;
+ ih_ = 1;
+ iw_ = 1;
// input size can not change in FC
iLayerSize_ = inputLayers_[0]->getSize();
@@ -77,111 +78,86 @@ void MKLDNNFcLayer::convertWeightsToPaddle() {
wgtVal_->reorderDataTo(wgtVal_, dstFmt, targetDim);
}
-void MKLDNNFcLayer::convertOutputToOtherDevice() {
- copyOutputInfoToOtherDevice();
- // find other cpu device and reorder output to cpu device
- int cnt = 0;
- for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
- if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
- // fc cpu output value do not need convert
- // just share point
- outputOtherDevice_[i].value = output_.value;
- ++cnt;
- }
- }
-
- if (cnt > 1) {
- LOG(WARNING) << "should not have more than one CPU devie";
- }
-}
+void MKLDNNFcLayer::reshape(
+ int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) {
+ reshapeInput(bs, ih, iw);
-void MKLDNNFcLayer::reshape() {
- const Argument& input = getInput(0, getPrev(0)->getDeviceId());
- int batchSize = input.getBatchSize();
- if (bs_ == batchSize) {
- return;
- }
- bs_ = batchSize;
- ih_ = input.getFrameHeight();
- iw_ = input.getFrameWidth();
- if (ih_ == 0) {
- ih_ = 1;
- }
- if (iw_ == 0) {
- iw_ = 1;
- }
CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize());
- ic_ = iLayerSize_ / (ih_ * iw_);
- CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible";
- CHECK_EQ(size_t(oc_), getSize());
- printSizeInfo();
+ ic = iLayerSize_ / (ih * iw);
+ CHECK_EQ(size_t(ic * ih * iw), iLayerSize_) << "not divisible";
+ CHECK_EQ(size_t(oc), getSize());
- // reset output
- output_.setFrameHeight(oh_);
- output_.setFrameWidth(ow_);
- resetOutput(bs_, oc_);
+ reshapeOutput(oh, ow);
+ resizeOutput(bs, oc);
- // reset mkldnn forward
- resetFwd();
- needResetBwd_ = true;
-
- convertWeightsFromPaddle();
+ printSizeInfo();
}
-void MKLDNNFcLayer::resetFwd() {
+void MKLDNNFcLayer::resetFwd(std::vector& pipeline,
+ MKLDNNMatrixPtr& in,
+ MKLDNNMatrixPtr& wgt,
+ MKLDNNMatrixPtr& bias,
+ MKLDNNMatrixPtr& out) {
+ pipeline.clear();
bool hasBias = biases_ && biases_->getW();
- const MatrixPtr& wgt = weight_->getW();
- const MatrixPtr& bias = hasBias ? biases_->getW() : nullptr;
- const MatrixPtr& out = output_.value;
+ const MatrixPtr& wgtVal = weight_->getW();
+ const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr;
+ const MatrixPtr& outVal = output_.value;
if (inputIsOnlyMKLDNN()) {
- const MatrixPtr& in = getInputValue(0);
- inVal_ = std::dynamic_pointer_cast(in);
- CHECK(inVal_) << "Input should be MKLDNNMatrix";
+ const MatrixPtr& inVal = getInputValue(0);
+ in = std::dynamic_pointer_cast(inVal);
+ CHECK(in) << "Input should be MKLDNNMatrix";
} else {
CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet";
- const MatrixPtr& in = getInputValue(0, CPU_DEVICE);
- inVal_ = MKLDNNMatrix::create(
- in, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_);
- }
- inVal_->downSpatial();
- wgtVal_ = MKLDNNMatrix::create(
- wgt, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_);
- wgtVal_->downSpatial();
- biasVal_ =
- hasBias ? MKLDNNMatrix::create(bias, {oc_}, format::x, engine_) : nullptr;
- outVal_ = MKLDNNMatrix::create(out, {bs_, oc_}, format::nc, engine_);
+ const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE);
+ in = MKLDNNMatrix::create(
+ inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_);
+ }
+ in->downSpatial();
+ wgt = MKLDNNMatrix::create(
+ wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_);
+ wgt->downSpatial();
+ bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_)
+ : nullptr;
+ out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_);
// change original output value to mkldnn output value
- output_.value = std::dynamic_pointer_cast(outVal_);
+ output_.value = std::dynamic_pointer_cast(out);
if (!outputIsOnlyMKLDNN()) {
- convertOutputToOtherDevice();
+ // fc cpu output value do not need create convert
+ // just share point
+ getOutput(CPU_DEVICE).value->setData(output_.value->getData());
}
// create forward handle
prop_kind pk = prop_kind::forward;
fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk,
- inVal_->getMemoryDesc(),
- wgtVal_->getMemoryDesc(),
- biasVal_->getMemoryDesc(),
- outVal_->getMemoryDesc())
+ in->getMemoryDesc(),
+ wgt->getMemoryDesc(),
+ bias->getMemoryDesc(),
+ out->getMemoryDesc())
: fc_fwd::desc(pk,
- inVal_->getMemoryDesc(),
- wgtVal_->getMemoryDesc(),
- outVal_->getMemoryDesc());
+ in->getMemoryDesc(),
+ wgt->getMemoryDesc(),
+ out->getMemoryDesc());
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
if (hasBias) {
- fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_));
+ fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out));
} else {
- fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_));
+ fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out));
}
printValueFormatFlow();
- pipelineFwd_.clear();
- pipelineFwd_.push_back(*fwd_);
+ pipeline.push_back(*fwd_);
}
-void MKLDNNFcLayer::resetBwd() {
+void MKLDNNFcLayer::resetBwd(std::vector& pipeline,
+ MKLDNNMatrixPtr& in,
+ MKLDNNMatrixPtr& wgt,
+ MKLDNNMatrixPtr& bias,
+ MKLDNNMatrixPtr& out) {
+ pipeline.clear();
if (!needResetBwd_) {
return;
}
@@ -190,8 +166,8 @@ void MKLDNNFcLayer::resetBwd() {
/// backward weight
CHECK(inVal_) << "Should have input value";
- const MatrixPtr& wgt = weight_->getWGrad();
- const MatrixPtr& bias = hasBias ? biases_->getWGrad() : nullptr;
+ const MatrixPtr& wgtGrad = weight_->getWGrad();
+ const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr;
// TODO(TJ): merge outgrad
int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
@@ -202,101 +178,66 @@ void MKLDNNFcLayer::resetBwd() {
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
- const MatrixPtr& out = getOutput(device).grad;
- outGrad_ = MKLDNNMatrix::create(out, outVal_->getPrimitiveDesc());
- wgtGrad_ = MKLDNNMatrix::create(wgt, wgtVal_->getPrimitiveDesc());
- biasGrad_ = hasBias ? MKLDNNMatrix::create(bias, biasVal_->getPrimitiveDesc())
- : nullptr;
+ const MatrixPtr& outGrad = getOutput(device).grad;
+ out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc());
+ wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc());
+ bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc())
+ : nullptr;
// create memory primitive desc
fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward,
inVal_->getMemoryDesc(),
- wgtGrad_->getMemoryDesc(),
- outGrad_->getMemoryDesc());
+ wgt->getMemoryDesc(),
+ out->getMemoryDesc());
fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_);
fc_bwdWgt::desc bwdWgtDesc = hasBias
? fc_bwdWgt::desc(inVal_->getMemoryDesc(),
- wgtGrad_->getMemoryDesc(),
- biasGrad_->getMemoryDesc(),
- outGrad_->getMemoryDesc())
+ wgt->getMemoryDesc(),
+ bias->getMemoryDesc(),
+ out->getMemoryDesc())
: fc_bwdWgt::desc(inVal_->getMemoryDesc(),
- wgtGrad_->getMemoryDesc(),
- outGrad_->getMemoryDesc());
+ wgt->getMemoryDesc(),
+ out->getMemoryDesc());
fc_bwdWgt::primitive_desc bwdWgtPD =
fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD);
if (hasBias) {
- bwdWgt_.reset(
- new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_));
+ bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias));
} else {
- bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_));
+ bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt));
}
- pipelineBwd_.clear();
- pipelineBwd_.push_back(*bwdWgt_);
+ pipeline.push_back(*bwdWgt_);
/// backward data
- device = inputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
- const MatrixPtr& in = getInputGrad(0, device);
- if (in == nullptr) {
+ const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad;
+ if (inGrad == nullptr) {
return;
}
- if (getInput(0, device).getAllCount() > 1) {
- // TODO(TJ): use outputMaps_ ways when merge outgrad done
+ if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) {
+ // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
} else {
- inGrad_ = MKLDNNMatrix::create(in, inVal_->getPrimitiveDesc());
+ in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc());
}
- fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(inVal_->getMemoryDesc(),
- wgtGrad_->getMemoryDesc(),
- outGrad_->getMemoryDesc());
+ fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(
+ inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc());
fc_bwdData::primitive_desc bwdDataPD =
fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD);
CHECK(wgtVal_) << "Should have weight memory";
- bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_));
+ bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in));
printGradFormatFlow();
- pipelineBwd_.push_back(*bwdData_);
+ pipeline.push_back(*bwdData_);
}
-void MKLDNNFcLayer::forward(PassType passType) {
- Layer::forward(passType);
- reshape();
-
- {
- REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
- syncInputValue();
-
- // just submit forward pipeline
- stream_->submit(pipelineFwd_);
- }
-
- /* activation */ {
- REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
- forwardActivation();
- }
+void MKLDNNFcLayer::updateInputData() {
+ inVal_->setData(getInputValue(0, CPU_DEVICE)->getData());
}
-void MKLDNNFcLayer::backward(const UpdateCallback& callback) {
- /* Do derivation */ {
- REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
- backwardActivation();
- }
-
- {
- REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
- resetBwd();
-
- syncOutputGrad();
- // just sumbmit backward pipeline
- stream_->submit(pipelineBwd_);
- }
-
- {
- REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
- weight_->getParameterPtr()->incUpdate(callback);
- if (biases_ && biases_->getWGrad()) {
- biases_->getParameterPtr()->incUpdate(callback);
- }
+void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) {
+ weight_->getParameterPtr()->incUpdate(callback);
+ if (biases_ && biases_->getWGrad()) {
+ biases_->getParameterPtr()->incUpdate(callback);
}
}
} // namespace paddle
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h
index e138a6faf181c412949218458e7ecf800a0d6a07..3119f863496df092da13c08bf733f13c42e53780 100644
--- a/paddle/gserver/layers/MKLDNNFcLayer.h
+++ b/paddle/gserver/layers/MKLDNNFcLayer.h
@@ -45,35 +45,28 @@ public:
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
- void convertWeightsFromPaddle() override;
+ void reshape(
+ int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override;
- void convertWeightsToPaddle() override;
+ void resetFwd(std::vector& pipeline,
+ MKLDNNMatrixPtr& in,
+ MKLDNNMatrixPtr& wgt,
+ MKLDNNMatrixPtr& bias,
+ MKLDNNMatrixPtr& out) override;
- void forward(PassType passType) override;
+ void resetBwd(std::vector& pipeline,
+ MKLDNNMatrixPtr& in,
+ MKLDNNMatrixPtr& wgt,
+ MKLDNNMatrixPtr& bias,
+ MKLDNNMatrixPtr& out) override;
- void backward(const UpdateCallback& callback) override;
+ void updateInputData() override;
-protected:
- /**
- * reshape the input image sizes
- * and reset output buffer size
- * and reset mkldnn forward
- */
- void reshape();
-
- /**
- * reset the forward primitve and memory
- * only would be called when input size changes
- */
- void resetFwd();
-
- /**
- * reset the backward primitve and memory for mkldnn fc
- * only would be called when needed
- */
- void resetBwd();
-
- void convertOutputToOtherDevice() override;
+ void updateWeights(const UpdateCallback& callback) override;
+
+ void convertWeightsFromPaddle() override;
+
+ void convertWeightsToPaddle() override;
};
} // namespace paddle
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index b983b833d510b823c5d4cff0b9390173e4cefc89..169679c8297542cac4a43f5a8e1af311ad9282df 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -19,6 +19,7 @@ limitations under the License. */
#include "MKLDNNBase.h"
#include "mkldnn.hpp"
#include "paddle/math/MKLDNNMatrix.h"
+#include "paddle/utils/Stat.h"
DECLARE_bool(use_mkldnn);
@@ -33,6 +34,8 @@ typedef std::shared_ptr MKLDNNLayerPtr;
*/
class MKLDNNLayer : public Layer {
protected:
+ // input value element count
+ size_t inputElemenCnt_;
// batch size
int bs_;
// input image channel, height and width
@@ -52,7 +55,7 @@ protected:
std::vector pipelineFwd_;
std::vector pipelineBwd_;
- // MKLDNNMatrixPtr
+ // MKLDNNMatrixPtr with internal format
MKLDNNMatrixPtr inVal_;
MKLDNNMatrixPtr inGrad_;
MKLDNNMatrixPtr outVal_;
@@ -65,6 +68,7 @@ protected:
public:
explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config),
+ inputElemenCnt_(0),
bs_(0),
ic_(0),
ih_(0),
@@ -95,12 +99,104 @@ public:
if (!Layer::init(layerMap, parameterMap)) {
return false;
}
+ checkCPUOutputsNumber();
stream_.reset(new MKLDNNStream());
engine_ = CPUEngine::Instance().getEngine();
return true;
}
+ void forward(PassType passType) override {
+ passType_ = passType;
+
+ {
+ REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str());
+ CHECK(!inputLayers_.empty());
+ copySeqInfoToOutputs();
+ size_t elemenCnt = inputLayers_[0]->getOutput().value->getElementCnt();
+ if (inputElemenCnt_ != elemenCnt) {
+ // reset when input total sizes changed, not only the batchsize
+ inputElemenCnt_ = elemenCnt;
+ reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
+ resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
+ convertWeightsFromPaddle();
+ needResetBwd_ = true;
+ }
+
+ if (inputLayers_[0]->getType() == "data") {
+ updateInputData();
+ }
+
+ stream_->submit(pipelineFwd_);
+ }
+
+ /* activation */ {
+ REGISTER_TIMER_INFO("FwActTimer", getName().c_str());
+ forwardActivation();
+ }
+ }
+
+ void backward(const UpdateCallback& callback) override {
+ /* Do derivation */ {
+ REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
+ backwardActivation();
+ }
+
+ {
+ REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str());
+ if (needResetBwd_) {
+ resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_);
+ needResetBwd_ = false;
+ }
+
+ stream_->submit(pipelineBwd_);
+ }
+
+ {
+ REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
+ updateWeights(callback);
+ }
+ }
+
+ /**
+ * reshape the input image sizes
+ * and reset output image and buffer size
+ * output channel can not be changed
+ */
+ virtual void reshape(
+ int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0;
+
+ /**
+ * reset the mkldnn forward primitve and memory
+ * only would be called when input size changes
+ */
+ virtual void resetFwd(std::vector& pipeline,
+ MKLDNNMatrixPtr& in,
+ MKLDNNMatrixPtr& wgt,
+ MKLDNNMatrixPtr& bias,
+ MKLDNNMatrixPtr& out) = 0;
+
+ /**
+ * reset the mkldnn backward primitve and memory for mkldnn fc
+ * only would be called when needed
+ */
+ virtual void resetBwd(std::vector& pipeline,
+ MKLDNNMatrixPtr& in,
+ MKLDNNMatrixPtr& wgt,
+ MKLDNNMatrixPtr& bias,
+ MKLDNNMatrixPtr& out) = 0;
+
+ /**
+ * Update input value data when input layer is "data" type.
+ * Since the input value data address might be changed.
+ */
+ virtual void updateInputData() {}
+
+ /**
+ * Update weights and biases if necessary.
+ */
+ virtual void updateWeights(const UpdateCallback& callback) {}
+
/**
* convert weight from paddle format to mkldnn format
* weight_ will be override
@@ -114,10 +210,38 @@ public:
virtual void convertWeightsToPaddle() {}
/**
- * convert MKLDNN output to other device.
- * only support CPU device yet
+ * add this interface as public for unit test
+ */
+ void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); }
+
+protected:
+ /**
+ * reshape the input image sizes and input batchsize
*/
- virtual void convertOutputToOtherDevice() {}
+ virtual void reshapeInput(int& batchsize, int& height, int& width) {
+ const Argument& input = inputLayers_[0]->getOutput();
+ batchsize = input.getBatchSize();
+ int h = input.getFrameHeight();
+ int w = input.getFrameWidth();
+ if (h != 0) {
+ height = h;
+ }
+ if (w != 0) {
+ width = w;
+ }
+ }
+
+ /**
+ * reshape output image sizes
+ */
+ virtual void reshapeOutput(size_t height, size_t width) {
+ output_.setFrameHeight(height);
+ output_.setFrameWidth(width);
+ for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+ outputOtherDevice_[i].setFrameHeight(height);
+ outputOtherDevice_[i].setFrameWidth(width);
+ }
+ }
/**
* print info about sizes
@@ -133,8 +257,8 @@ public:
*/
virtual void printValueFormatFlow() {
if (inVal_ && outVal_) {
- VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat()
- << " >>> " << outVal_->getFormat();
+ VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>> "
+ << outVal_->getFormat();
}
}
@@ -143,29 +267,12 @@ public:
*/
virtual void printGradFormatFlow() {
if (inGrad_ && outGrad_) {
- VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat()
- << " <<< " << outGrad_->getFormat();
+ VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<< "
+ << outGrad_->getFormat();
}
}
protected:
- /**
- * copy image size and sequence info to other device
- * @note: can not directly use Layer::copyOutputToOtherDevice since here only
- * copy base info and do not copy data value
- */
- void copyOutputInfoToOtherDevice() {
- for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
- outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
- outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
- outputOtherDevice_[i].sequenceStartPositions =
- output_.sequenceStartPositions;
- outputOtherDevice_[i].subSequenceStartPositions =
- output_.subSequenceStartPositions;
- outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
- }
- }
-
/**
* If input only has MKLDNN device.
* Otherwise, only support the previous layer using CPU device.
@@ -193,37 +300,12 @@ protected:
return outputOtherDevice_.size() == 0;
}
- /**
- * Sync input value data
- */
- void syncInputValue() {
- if (inputIsOnlyMKLDNN()) {
- return;
- }
- real* iData = getInputValue(0, CPU_DEVICE)->getData();
- // update input data
- // since it might be changed if this is after data layer
- inVal_->updateData(iData);
- }
-
- /**
- * Sync output grad data
- */
- void syncOutputGrad() {
- if (outputIsOnlyMKLDNN()) {
- return;
- }
-
- // update diff
- real* oDiff = getOutput(CPU_DEVICE).grad->getData();
- outGrad_->updateData(oDiff);
- }
-
/**
* Set deviceId of this layer.
*/
void setDevice(int id) { deviceId_ = id; }
+private:
/**
* Set deviceId of the params used in this layer.
*/
@@ -247,6 +329,42 @@ protected:
parameter->setDevice(id);
}
}
+
+ /**
+ * Check the cpu device number of outputOtherDevice_.
+ * should have only one at most.
+ */
+ void checkCPUOutputsNumber(int max = 1) {
+ int cnt = 0;
+ for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+ if (outputOtherDevice_[i].deviceId == CPU_DEVICE) {
+ ++cnt;
+ }
+ }
+ CHECK_LE(cnt, max) << "too much CPU devies";
+ }
+
+ /**
+ * copy SeqInfo from input layer to this output and other output devices.
+ * @note: do not use getInput(0) since it used this deviceId_,
+ * use "inputLayers_[0]->getOutput()" instead.
+ */
+ void copySeqInfoToOutputs() {
+ if (inputLayers_.empty() || !needSequenceInfo_) {
+ return;
+ }
+ const Argument& input = inputLayers_[0]->getOutput();
+ output_.sequenceStartPositions = input.sequenceStartPositions;
+ output_.subSequenceStartPositions = input.subSequenceStartPositions;
+ output_.cpuSequenceDims = input.cpuSequenceDims;
+ for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
+ outputOtherDevice_[i].sequenceStartPositions =
+ output_.sequenceStartPositions;
+ outputOtherDevice_[i].subSequenceStartPositions =
+ output_.subSequenceStartPositions;
+ outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
+ }
+ }
};
} // namespace paddle
diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp
index de1635be2af37cd0ba49010199a417090865b0e4..2f48e5b2d3ffc9337ed1314f6db6549e56263fdd 100644
--- a/paddle/gserver/tests/MKLDNNTester.cpp
+++ b/paddle/gserver/tests/MKLDNNTester.cpp
@@ -63,8 +63,12 @@ void MKLDNNTester::reset(const TestConfig& dnn,
initTestLayer(
configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i]));
}
- dnnLayer_ = testLayers_[DNN];
refLayer_ = testLayers_[REF];
+ dnnLayer_ = std::dynamic_pointer_cast(testLayers_[DNN]);
+ CHECK(dnnLayer_);
+ // for comparison with Paddle reference results,
+ // need manually add cpu device output for test
+ dnnLayer_->addOutputArgument(CPU_DEVICE);
EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size());
EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size());
@@ -109,20 +113,22 @@ void MKLDNNTester::randomBotDatas() {
void MKLDNNTester::randomTopDiffs() {
refLayer_->getOutputGrad()->randomizeUniform();
- dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad()));
- VLOG(lvl_) << "Random dom Backward Input, TopDiff: ";
+ dnnLayer_->getOutput(CPU_DEVICE)
+ .grad->copyFrom(*(refLayer_->getOutputGrad()));
+ VLOG(lvl_) << "Random Backward Input, TopDiff: ";
printMatrix(refLayer_->getOutputGrad());
}
void MKLDNNTester::checkForward() {
- printTopDatas();
- double delta = compareMatrix(testLayers_[DNN]->getOutputValue(),
- testLayers_[REF]->getOutputValue());
VLOG(MKLDNN_ALL) << "Check Forward";
+ printTopDatas();
+ double delta = compareMatrix(dnnLayer_->getOutput(-1).value,
+ refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_);
}
void MKLDNNTester::checkBackwardData() {
+ VLOG(MKLDNN_ALL) << "Check Backward Data";
// TODO(TJ): uncomment me when batch norm ready
// const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm";
for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) {
@@ -144,14 +150,12 @@ void MKLDNNTester::checkBackwardData() {
}
void MKLDNNTester::checkBackwardWgts() {
+ VLOG(MKLDNN_ALL) << "Check Backward Weight";
CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size());
vector dnnWgts; // used to temply save mkldnn weights
saveWgt(parameters_[DNN], dnnWgts);
- const MKLDNNLayerPtr dnnlayer =
- std::dynamic_pointer_cast(dnnLayer_);
- CHECK(dnnlayer);
- dnnlayer->convertWeightsToPaddle();
+ dnnLayer_->convertWeightsToPaddle();
for (size_t i = 0; i < parameters_[DNN].size(); ++i) {
const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE);
const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE);
@@ -189,38 +193,38 @@ void MKLDNNTester::restoreWgt(const vector& from,
}
// clear parameters grad
-void MKLDNNTester::clearWgtDiffs() {
+void MKLDNNTester::clearWgtDiffs(size_t id) {
+ CHECK_LE(id, parameters_.size());
for (size_t n = 0; n < parameters_.size(); ++n) {
- for (size_t i = 0; i < parameters_[n].size(); ++i) {
- const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT);
- if (grad) {
- grad->zeroMem();
+ if (id == n || id == parameters_.size()) {
+ for (size_t i = 0; i < parameters_[n].size(); ++i) {
+ const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT);
+ if (grad) {
+ grad->zeroMem();
+ }
}
}
}
}
-void MKLDNNTester::clearBotDiffs() {
- // dnn and ref
+void MKLDNNTester::clearBotDiffs(size_t id) {
+ CHECK_LE(id, dataLayers_.size());
for (size_t n = 0; n < dataLayers_.size(); ++n) {
- // all inputs layers
- for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
- dataLayers_[n][i]->getOutputGrad()->zeroMem();
+ if (id == n || id == dataLayers_.size()) {
+ // clear inputs layers of this specific layer
+ for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
+ dataLayers_[n][i]->getOutputGrad()->zeroMem();
+ }
}
}
}
-void MKLDNNTester::clearBotDiffs(int n) {
- CHECK_LT(n, NUM);
- // all inputs layers
- for (size_t i = 0; i < dataLayers_[n].size(); ++i) {
- dataLayers_[n][i]->getOutputGrad()->zeroMem();
- }
-}
-
-void MKLDNNTester::clearTopDatas() {
+void MKLDNNTester::clearTopDatas(size_t id) {
+ CHECK_LE(id, testLayers_.size());
for (size_t i = 0; i < testLayers_.size(); ++i) {
- testLayers_[i]->getOutputValue()->zeroMem();
+ if (id == i || id == testLayers_.size()) {
+ testLayers_[i]->getOutputValue()->zeroMem();
+ }
}
}
@@ -300,16 +304,24 @@ void MKLDNNTester::runOnce() {
checkForward();
// test backward
+ // simple updater
+ UpdateCallback updateCallback = [](Parameter* para) {
+ auto& grad = para->getBuf(PARAMETER_GRADIENT);
+ auto& value = para->getBuf(PARAMETER_VALUE);
+ real lr = 1e-3;
+ value->add(*grad, lr);
+ };
randomTopDiffs();
- dnnLayer_->backward(nullptr);
- refLayer_->backward(nullptr);
+ dnnLayer_->backward(updateCallback);
+ refLayer_->backward(updateCallback);
checkBackwardData();
checkBackwardWgts();
// clear buffers
// ref code will addto the diff, dnn code will writeto it
- // and clearTopDatas() and clearWgtDiffs() should be coverd by test layers
+ // and clearTopDatas(REF) should be coverd by ref layers
clearBotDiffs(REF);
+ clearWgtDiffs(REF);
}
void MKLDNNTester::run(const TestConfig& dnn,
diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h
index e55e4493ffdfe45b8cfdee423febd1878b8b3d8a..5ac885638cde7693a0c847733e7a6149c1b7e6c2 100644
--- a/paddle/gserver/tests/MKLDNNTester.h
+++ b/paddle/gserver/tests/MKLDNNTester.h
@@ -18,6 +18,7 @@ limitations under the License. */
#include
#include "LayerGradUtil.h"
#include "paddle/gserver/layers/MKLDNNBase.h"
+#include "paddle/gserver/layers/MKLDNNLayer.h"
namespace paddle {
@@ -40,7 +41,8 @@ protected:
vector layerMaps_;
vector> parameters_;
vector testLayers_;
- LayerPtr dnnLayer_, refLayer_;
+ LayerPtr refLayer_;
+ MKLDNNLayerPtr dnnLayer_;
/// run some iterations, all the result should pass
size_t iter_;
@@ -88,10 +90,10 @@ private:
void checkBackwardData();
void checkBackwardWgts();
- void clearWgtDiffs();
- void clearBotDiffs();
- void clearBotDiffs(int n); // clear specific layer
- void clearTopDatas();
+ // clear specific layer, clear all when id equals NUM
+ void clearWgtDiffs(size_t id = NUM);
+ void clearBotDiffs(size_t id = NUM);
+ void clearTopDatas(size_t id = NUM);
void printTopDatas();
void printMatrix(const MatrixPtr& m);
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
index 0e6be2df9ef5f0fae8ed2b0c65ac6c032fe45ab1..090bde7b203652e3ffb1662b8f5b8937885d2608 100644
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -2302,26 +2302,27 @@ void test3DDeConvLayer(const string& type, bool trans, bool useGpu) {
conv->set_stride(2);
conv->set_stride_y(2);
conv->set_stride_z(2);
- conv->set_img_size(IMAGE_SIZE);
- conv->set_img_size_y(IMAGE_SIZE_Y);
- conv->set_img_size_z(IMAGE_SIZE_Z);
- conv->set_output_x(imageSize(conv->img_size(),
+ conv->set_output_x(IMAGE_SIZE);
+ conv->set_output_y(IMAGE_SIZE_Y);
+ conv->set_output_z(IMAGE_SIZE_Z);
+
+ conv->set_img_size(imageSize(conv->output_x(),
conv->filter_size(),
conv->padding(),
conv->stride(),
true));
- conv->set_output_y(imageSize(conv->img_size_y(),
- conv->filter_size_y(),
- conv->padding_y(),
- conv->stride_y(),
- true));
- conv->set_output_z(imageSize(conv->img_size_z(),
- conv->filter_size_z(),
- conv->padding_z(),
- conv->stride_z(),
- true));
- config.layerConfig.set_size(conv->output_x() * conv->output_y() *
- conv->output_z() * NUM_FILTERS);
+ conv->set_img_size_y(imageSize(conv->output_y(),
+ conv->filter_size_y(),
+ conv->padding_y(),
+ conv->stride_y(),
+ true));
+ conv->set_img_size_z(imageSize(conv->output_z(),
+ conv->filter_size_z(),
+ conv->padding_z(),
+ conv->stride_z(),
+ true));
+ config.layerConfig.set_size(conv->img_size() * conv->img_size_y() *
+ conv->img_size_z() * NUM_FILTERS);
conv->set_groups(1);
conv->set_filter_channels(conv->channels() / conv->groups());
config.inputDefs.push_back(
diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/math/MKLDNNMatrix.cpp
index 0a355e2644cce572ce90ecf5c9d2a5b7b395bc61..c4063e5069854242d9f93886b66580385557ca73 100644
--- a/paddle/math/MKLDNNMatrix.cpp
+++ b/paddle/math/MKLDNNMatrix.cpp
@@ -33,14 +33,12 @@ MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m, memory::primitive_desc pd) {
size_t width = cnts / dims[0];
m = Matrix::create(height, width, false, false);
}
-
CHECK(m) << " Matrix should not be empty";
+
CpuMatrixPtr cpuMatrix = std::dynamic_pointer_cast(m);
CHECK(cpuMatrix) << "Only support create from CPU matrix yet";
-
- CHECK_EQ(cnts, m->getElementCnt()) << "Count size does not match";
- return std::make_shared(
- m->getData(), m->getHeight(), m->getWidth(), pd);
+ CHECK_EQ(cpuMatrix->getElementCnt(), cnts) << "Count size does not match";
+ return std::make_shared(cpuMatrix, pd);
}
MKLDNNMatrixPtr MKLDNNMatrix::create(MatrixPtr m,
@@ -138,7 +136,7 @@ void MKLDNNMatrix::downSpatial() {
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
- set_data_handle(getData());
+ set_data_handle(data_);
}
} // namespace paddle
diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h
index e50f698b495713e6f15ab7a12a7ee7487662040f..eef3b429e6fa0087aeac3f5aed9dff983b06e826 100644
--- a/paddle/math/MKLDNNMatrix.h
+++ b/paddle/math/MKLDNNMatrix.h
@@ -30,11 +30,10 @@ typedef std::shared_ptr MKLDNNMatrixPtr;
*/
class MKLDNNMatrix : public CpuMatrix, public mkldnn::memory {
public:
- MKLDNNMatrix(real* data,
- size_t height,
- size_t width,
- mkldnn::memory::primitive_desc pd)
- : CpuMatrix(data, height, width, false), mkldnn::memory(pd, data) {}
+ MKLDNNMatrix(CpuMatrixPtr m, mkldnn::memory::primitive_desc pd)
+ : CpuMatrix(m->getData(), m->getHeight(), m->getWidth(), false),
+ mkldnn::memory(pd, m->getData()),
+ m_(m) {}
~MKLDNNMatrix() {}
@@ -81,11 +80,29 @@ public:
void downSpatial();
/**
- * Update the memory data handle.
+ * set the memory data handle.
* Caution: This will not check the buffer size of the data,
* it should be coverd by user.
*/
- void updateData(void* data) { set_data_handle(data); }
+ void setData(real* data) {
+ set_data_handle(data);
+ CpuMatrix::setData(data);
+ m_.reset();
+ }
+
+ /**
+ * override Matrix::getData
+ * check data before return
+ */
+ real* getData() override {
+ CHECK_EQ((void*)data_, get_data_handle());
+ return data_;
+ }
+
+ const real* getData() const override {
+ CHECK_EQ((void*)data_, get_data_handle());
+ return data_;
+ }
/**
* Get primitive descriptor.
@@ -143,6 +160,10 @@ protected:
memory::format srcFmt,
memory::format dstFmt,
memory::dims dm);
+
+private:
+ // save the CpuMatrixPtr in case the buffer released outside
+ CpuMatrixPtr m_;
};
} // namespace paddle
diff --git a/paddle/operators/concat_op.cc b/paddle/operators/concat_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0ebefbab26ec8fdf316f852fbb7f6d9f3bbc48eb
--- /dev/null
+++ b/paddle/operators/concat_op.cc
@@ -0,0 +1,79 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/concat_op.h"
+#include
+
+namespace paddle {
+namespace operators {
+using framework::Tensor;
+
+class ConcatOp : public framework::OperatorWithKernel {
+ public:
+ using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+ void InferShape(const framework::InferShapeContext &ctx) const override {
+ auto ins = ctx.MultiInput("X");
+ auto *out = ctx.Output("Out");
+ size_t axis = static_cast(ctx.Attr("axis"));
+ size_t n = ins.size();
+
+ PADDLE_ENFORCE_GT(n, 1, "Input tensors count should > 1.");
+
+ auto out_dims = ins[0]->dims();
+ size_t in_zero_dims_size = out_dims.size();
+ for (size_t i = 1; i < n; i++) {
+ for (size_t j = 0; j < in_zero_dims_size; j++) {
+ if (j == axis) {
+ out_dims[axis] += ins[i]->dims()[j];
+ continue;
+ }
+ PADDLE_ENFORCE_EQ(out_dims[j], ins[i]->dims()[j],
+ "Input tensors should have the same "
+ "elements except the specify axis.")
+ }
+ }
+ out->Resize(out_dims);
+ }
+};
+
+class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+ ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "the input tensors of concat operator.").AsDuplicable();
+ AddOutput("Out", "the output tensor of concat operator.");
+ AddComment(R"DOC(
+ Join the input tensors along with the axis.
+ Examples:
+ Input[0] = [[1,2],[3,4]]
+ Input[1] = [[5,6]]
+ axis = 0
+ Output = [[1,2],
+ [3,4],
+ [5,6]]
+ )DOC");
+ AddAttr("axis", "The axis which the inputs will be joined with.")
+ .SetDefault(0);
+ }
+};
+
+} // namespace operators
+} // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(concat, ops::ConcatOp, ops::ConcatOpMaker)
+REGISTER_OP_CPU_KERNEL(concat,
+ ops::ConcatKernel)
diff --git a/paddle/operators/concat_op.cu b/paddle/operators/concat_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..38fee7473dbb2ba97fe95b6632db7a1749cf3bbe
--- /dev/null
+++ b/paddle/operators/concat_op.cu
@@ -0,0 +1,19 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/concat_op.h"
+
+namespace ops = paddle::operators;
+// TODO(Yancey1989) Add GPU kernel
diff --git a/paddle/operators/concat_op.h b/paddle/operators/concat_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..f977054fdf8aa0164db726b94a21c57f770dd674
--- /dev/null
+++ b/paddle/operators/concat_op.h
@@ -0,0 +1,64 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template
+class ConcatKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& ctx) const override {
+ auto ins = ctx.MultiInput("X");
+ auto* out = ctx.Output("Out");
+ int64_t axis = static_cast(ctx.Attr("axis"));
+ size_t n = ins.size();
+ size_t output_axis_dim = 0;
+ size_t before = 1, after = 1;
+ for (size_t i = 0; i < n; i++) {
+ output_axis_dim += ins[i]->dims()[axis];
+ }
+ auto& input_zero = ins[0];
+ for (int64_t i = 0; i < input_zero->dims().size(); i++) {
+ if (i == axis) {
+ continue;
+ }
+ if (i < axis) {
+ before *= input_zero->dims()[i];
+ } else {
+ after *= input_zero->dims()[i];
+ }
+ }
+ size_t output_offset = 0;
+ for (size_t i = 0; i < n; i++) {
+ auto& in = ins[i];
+ auto axis_dim = in->dims()[axis];
+ for (size_t j = 0; j < before; j++) {
+ size_t len = axis_dim * after * sizeof(T);
+ const T* src = in->data() + axis_dim * after * j;
+ T* out_data = out->mutable_data(platform::CPUPlace());
+ T* dest = out_data + output_offset + output_axis_dim * after * j;
+ memcpy(dest, src, len);
+ }
+ output_offset += axis_dim * after;
+ }
+ }
+};
+
+} // namespace operators
+} // namespace paddle
diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h
index 9e2bcebe3b5432c157fac895a9bbab5164193dbb..0dc509952578497671a128374f77ce616a520909 100644
--- a/paddle/operators/cos_sim_op.h
+++ b/paddle/operators/cos_sim_op.h
@@ -42,7 +42,7 @@ class CosSimKernel : public framework::OpKernel {
output_y_norm->mutable_data(context.GetPlace());
auto dims = input_x->dims();
- int size = static_cast(framework::product(dims));
+ int64_t size = input_x->numel();
auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
auto x = EigenMatrix::From(*input_x, new_dims);
auto y = EigenMatrix::From(*input_y, new_dims);
@@ -72,7 +72,7 @@ class CosSimGradKernel : public framework::OpKernel {
auto* input_grad_z = context.Input(framework::GradVarName("Out"));
auto dims = input_x->dims();
- int size = static_cast(framework::product(dims));
+ int64_t size = input_x->numel();
auto new_dims = framework::make_ddim({dims[0], size / dims[0]});
auto x = EigenMatrix::From(*input_x, new_dims);
auto y = EigenMatrix::From(*input_y, new_dims);
diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc
index 6574880c0eb6324b2dd175e39a364d2ef46e735e..3d76516405960c502a46997108049b2db5cab6bf 100644
--- a/paddle/operators/gaussian_random_op.cc
+++ b/paddle/operators/gaussian_random_op.cc
@@ -31,7 +31,7 @@ class CPUGaussianRandomKernel : public framework::OpKernel {
}
engine.seed(seed);
std::normal_distribution dist(mean, std);
- int64_t size = framework::product(tensor->dims());
+ int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu
index d9dbc1dcfe6a6676938d64be93c879ea69148018..2d63b3049988cfc3135a87a57dad56b970df3eab 100644
--- a/paddle/operators/gaussian_random_op.cu
+++ b/paddle/operators/gaussian_random_op.cu
@@ -50,8 +50,8 @@ class GPUGaussianRandomKernel : public framework::OpKernel {
T mean = static_cast(context.Attr("mean"));
T std = static_cast(context.Attr("std"));
thrust::counting_iterator index_sequence_begin(0);
- ssize_t N = framework::product(tensor->dims());
- thrust::transform(index_sequence_begin, index_sequence_begin + N,
+ int64_t size = tensor->numel();
+ thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr(data),
GaussianGenerator(mean, std, seed));
}
diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu
index 27eee3436af8107cef2aa3577ea238be49edf1af..708344046760691aa2da562eb1ee3d8b130c5f18 100644
--- a/paddle/operators/lookup_table_op.cu
+++ b/paddle/operators/lookup_table_op.cu
@@ -70,7 +70,7 @@ class LookupTableCUDAKernel : public framework::OpKernel {
size_t N = table_t->dims()[0];
size_t D = table_t->dims()[1];
- size_t K = product(ids_t->dims());
+ size_t K = ids_t->numel();
auto ids = ids_t->data();
auto table = table_t->data();
auto output = output_t->mutable_data(context.GetPlace());
@@ -91,7 +91,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel {
int N = d_table_t->dims()[0];
int D = d_table_t->dims()[1];
- int K = product(ids_t->dims());
+ int K = ids_t->numel();
const int32_t* ids = ids_t->data();
const T* d_output = d_output_t->data();
T* d_table = d_table_t->mutable_data(context.GetPlace());
diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h
index 877b36cef4ea9cdaaaf37c97d5e5bfce55b91436..a1298906dd4b4209644fe06584f70169519de01c 100644
--- a/paddle/operators/lookup_table_op.h
+++ b/paddle/operators/lookup_table_op.h
@@ -35,7 +35,7 @@ class LookupTableKernel : public framework::OpKernel {
auto ids = ids_t->data();
auto table = table_t->data();
auto output = output_t->mutable_data(context.GetPlace());
- for (ssize_t i = 0; i < product(ids_t->dims()); ++i) {
+ for (int64_t i = 0; i < ids_t->numel(); ++i) {
PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0);
memcpy(output + i * D, table + ids[i] * D, D * sizeof(T));
@@ -61,7 +61,7 @@ class LookupTableGradKernel : public framework::OpKernel {
t.device(context.GetEigenDevice()) =
t.constant(static_cast(0));
- for (ssize_t i = 0; i < product(ids_t->dims()); ++i) {
+ for (int64_t i = 0; i < ids_t->numel(); ++i) {
PADDLE_ENFORCE_LT(ids[i], N);
PADDLE_ENFORCE_GE(ids[i], 0);
for (int j = 0; j < D; ++j) {
diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc
index 186a33edcec88bd5e51091a524a778eeb27ad526..4f380388b108dc173d847f027ba5c9db387a87f8 100644
--- a/paddle/operators/math/im2col_test.cc
+++ b/paddle/operators/math/im2col_test.cc
@@ -119,4 +119,4 @@ TEST(math, im2col) {
#ifndef PADDLE_ONLY_CPU
testIm2col();
#endif
-}
\ No newline at end of file
+}
diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h
index 9848af280b62729bef9243052ceae0b7d8f4c6f5..ce31e178d8e375dc59be80a6c05133201308da70 100644
--- a/paddle/operators/mean_op.h
+++ b/paddle/operators/mean_op.h
@@ -49,12 +49,11 @@ class MeanGradKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto OG = context.Input(framework::GradVarName("Out"));
- PADDLE_ENFORCE(framework::product(OG->dims()) == 1,
- "Mean Gradient should be scalar");
+ PADDLE_ENFORCE(OG->numel() == 1, "Mean Gradient should be scalar");
auto IG = context.Output(framework::GradVarName("X"));
IG->mutable_data(context.GetPlace());
- T ig_size = (T)framework::product(IG->dims());
+ T ig_size = static_cast(IG->numel());
Eigen::DSizes bcast(ig_size);
EigenVector::Flatten(*IG).device(context.GetEigenDevice()) =
diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc
index 069fb5e1abc657aa02a50fde352ce88d078c36e1..a4876feb2edf77bd422fa2a7687b0fa7d55dae47 100644
--- a/paddle/operators/minus_op.cc
+++ b/paddle/operators/minus_op.cc
@@ -31,8 +31,7 @@ class MinusOp : public framework::OperatorWithKernel {
auto *right_tensor = ctx.Input("Y");
PADDLE_ENFORCE_EQ(
- framework::product(left_tensor->dims()),
- framework::product(right_tensor->dims()),
+ left_tensor->numel(), right_tensor->numel(),
"Minus operator must take two tensor with same num of elements");
ctx.Output("Out")->Resize(left_tensor->dims());
}
diff --git a/paddle/operators/reshape_op.cc b/paddle/operators/reshape_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b7061153d2bf13982f14f233e87a87daeeebf5fd
--- /dev/null
+++ b/paddle/operators/reshape_op.cc
@@ -0,0 +1,107 @@
+
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include "paddle/operators/reshape_op.h"
+
+namespace paddle {
+namespace operators {
+
+class ReshapeOp : public framework::OperatorWithKernel {
+ public:
+ ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
+ const framework::VariableNameMap &outputs,
+ const framework::AttributeMap &attrs)
+ : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+ protected:
+ void InferShape(const framework::InferShapeContext &ctx) const override {
+ // input check
+ PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) shouldn't be null");
+ auto shape = ctx.Attr>("shape");
+ PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty.");
+ for (auto dim : shape) {
+ PADDLE_ENFORCE(dim > 0, "Each dimension of shape must be positive.");
+ }
+ // capacity check
+ int64_t capacity =
+ std::accumulate(shape.begin(), shape.end(), 1, std::multiplies());
+ auto *in = ctx.Input("X");
+ int64_t in_size = framework::product(in->dims());
+ PADDLE_ENFORCE_EQ(capacity, in_size,
+ "The size of Input(X) mismatches with Attr(shape).");
+ // resize output
+ std::vector shape_int64(shape.size(), 0);
+ std::transform(shape.begin(), shape.end(), shape_int64.begin(),
+ [](int a) { return static_cast(a); });
+ auto out_dims = framework::make_ddim(shape_int64);
+ ctx.Output("Out")->Resize(out_dims);
+ }
+};
+
+class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+ ReshapeOpMaker(framework::OpProto *proto,
+ framework::OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "The input tensor of reshape operator.");
+ AddOutput("Out", "The output tensor of reshape operator.");
+ AddAttr>("shape", "Target shape of reshape operator.");
+ AddComment(R"DOC(Reshape operator
+
+Reshape Input(X) into the shape specified by Attr(shape).
+
+An example:
+Given a 2-D tensor X with 2 rows and 2 columns
+
+ [[1, 2], [3, 4]]
+
+with target shape = [1, 4], the reshape operator will transform
+the tensor X into a 1-D tensor:
+
+ [1, 2, 3, 4]
+
+)DOC");
+ }
+};
+
+class ReshapeGradOp : public framework::OperatorWithKernel {
+ public:
+ ReshapeGradOp(const std::string &type,
+ const framework::VariableNameMap &inputs,
+ const framework::VariableNameMap &outputs,
+ const framework::AttributeMap &attrs)
+ : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+ protected:
+ void InferShape(const framework::InferShapeContext &ctx) const override {
+ PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) shouldn't be null.");
+ PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
+ "Input(Out@GRAD) shouldn't be null.");
+ auto dims = ctx.Input("X")->dims();
+ auto *d_in = ctx.Output(framework::GradVarName("X"));
+ d_in->Resize(dims);
+ }
+};
+
+} // namespace operators
+} // namespace paddle
+namespace ops = paddle::operators;
+
+REGISTER_OP(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, reshape_grad,
+ ops::ReshapeGradOp);
+REGISTER_OP_CPU_KERNEL(reshape,
+ ops::ReshapeKernel);
+REGISTER_OP_CPU_KERNEL(
+ reshape_grad, ops::ReshapeGradKernel);
diff --git a/paddle/operators/reshape_op.cu b/paddle/operators/reshape_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..23dbe089d3b37aabedf9ef166f7bbfbf67da7e0a
--- /dev/null
+++ b/paddle/operators/reshape_op.cu
@@ -0,0 +1,22 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include "paddle/operators/reshape_op.h"
+
+REGISTER_OP_GPU_KERNEL(
+ reshape,
+ paddle::operators::ReshapeKernel);
+REGISTER_OP_GPU_KERNEL(
+ reshape_grad,
+ paddle::operators::ReshapeGradKernel);
diff --git a/paddle/operators/reshape_op.h b/paddle/operators/reshape_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..26708e72dc8f80d2cff1c1ee5e8763b959320205
--- /dev/null
+++ b/paddle/operators/reshape_op.h
@@ -0,0 +1,56 @@
+
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template
+class ReshapeKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& ctx) const {
+ auto* out = ctx.Output("Out");
+ auto* in = ctx.Input("X");
+ out->mutable_data(ctx.GetPlace());
+
+ auto shape = ctx.Attr>("shape");
+ std::vector shape_int64(shape.size(), 0);
+ std::transform(shape.begin(), shape.end(), shape_int64.begin(),
+ [](int a) { return static_cast(a); });
+ auto out_dims = framework::make_ddim(shape_int64);
+ out->CopyFrom(*in, ctx.GetPlace());
+ out->Resize(out_dims);
+ }
+};
+
+template
+class ReshapeGradKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& ctx) const {
+ auto* d_out = ctx.Input(framework::GradVarName("Out"));
+ auto* d_x = ctx.Output(framework::GradVarName("X"));
+ d_x->mutable_data(ctx.GetPlace());
+
+ auto in_dims = d_x->dims();
+ d_x->CopyFrom(*d_out, ctx.GetPlace());
+ d_x->Resize(in_dims);
+ }
+};
+}
+}
diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc
index dc30644a5e7e33d4289e48cac093aa5fde7e75e7..9f51d3efa8ecba894a1023b9de2df451ca85916c 100644
--- a/paddle/operators/squared_l2_distance_op.cc
+++ b/paddle/operators/squared_l2_distance_op.cc
@@ -41,8 +41,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
int rank = framework::arity(x_dims);
PADDLE_ENFORCE_GE(rank, 2, "Tensor rank should be at least equal to 2.");
- PADDLE_ENFORCE_EQ(framework::product(x_dims) / x_dims[0],
- framework::product(y_dims) / y_dims[0],
+ PADDLE_ENFORCE_EQ(x->numel() / x_dims[0], y->numel() / y_dims[0],
"Product of dimensions expcet the first dimension of "
"input and target must be equal.");
PADDLE_ENFORCE(y_dims[0] == 1 || y_dims[0] == x_dims[0],
@@ -50,8 +49,7 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
"or to 1.");
ctx.Output("sub_result")
- ->Resize({static_cast(x_dims[0]),
- static_cast(framework::product(x_dims) / x_dims[0])});
+ ->Resize({x_dims[0], x->numel() / x_dims[0]});
ctx.Output("Out")->Resize({x_dims[0], 1});
}
};
diff --git a/paddle/operators/squared_l2_distance_op.h b/paddle/operators/squared_l2_distance_op.h
index ad3347a0b35f3385c5adbcd7ceaa94fe134105e3..097ac04fc09a10b3b624f491a847e281e41a802c 100644
--- a/paddle/operators/squared_l2_distance_op.h
+++ b/paddle/operators/squared_l2_distance_op.h
@@ -39,7 +39,7 @@ class SquaredL2DistanceKernel : public framework::OpKernel {
auto in0_dims = in0->dims();
auto in1_dims = in1->dims();
- int cols = framework::product(in0_dims) / in0_dims[0];
+ int cols = in0->numel() / in0_dims[0];
// reduce dimensions except the first
auto x =
EigenMatrix::From(*in0, framework::make_ddim({in0_dims[0], cols}));
@@ -82,7 +82,7 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel {
auto x_dims = x_g->dims();
auto y_dims = y_g->dims();
- int cols = framework::product(x_dims) / x_dims[0];
+ int cols = x_g->numel() / x_dims[0];
// calculate gradient
auto grad_mat = 2 *
(out_grad.broadcast(Eigen::array({{1, cols}}))) *
diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..5805826ee8a555ca6dfc1ca81feaadffea9e1012
--- /dev/null
+++ b/paddle/operators/sum_op.cc
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/sum_op.h"
+#include
+
+namespace paddle {
+namespace operators {
+using framework::Tensor;
+
+class SumOp : public framework::OperatorWithKernel {
+ public:
+ using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+ void InferShape(const framework::InferShapeContext &ctx) const override {
+ auto ins = ctx.MultiInput("X");
+ auto *out = ctx.Output("Out");
+ int N = ins.size();
+
+ auto in_dim = ins[0]->dims();
+
+ PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1.");
+ for (int i = 1; i < N; i++) {
+ auto dim = ins[i]->dims();
+ PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape");
+ }
+ out->Resize(in_dim);
+ }
+};
+
+class SumOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+ SumOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput("X", "the input tensors of sum operator.").AsDuplicable();
+ AddOutput("Out", "the output tensor of sum operator.");
+ AddComment(R"DOC(
+ Sum the input tensors.
+ )DOC");
+ }
+};
+
+class SumGradOp : public framework::OperatorWithKernel {
+ public:
+ using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+ void InferShape(const framework::InferShapeContext &ctx) const override {
+ auto outputs = ctx.MultiOutput(framework::GradVarName("X"));
+ auto dims = ctx.Input(framework::GradVarName("Out"))->dims();
+ for (auto output : outputs) {
+ output->Resize(dims);
+ }
+ }
+};
+
+} // namespace operators
+} // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(sum, ops::SumOp, ops::SumOpMaker, sum_grad, ops::SumGradOp);
+REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel);
+REGISTER_OP_CPU_KERNEL(sum_grad,
+ ops::SumGradKernel);
diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a465cf3659ba7c51338abadfc62962fb6755a39d
--- /dev/null
+++ b/paddle/operators/sum_op.cu
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/sum_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel);
+REGISTER_OP_GPU_KERNEL(sum_grad,
+ ops::SumGradKernel);
diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..0b1e9ebaa38d455fb5e3ce8c1a39cbbcdad9a940
--- /dev/null
+++ b/paddle/operators/sum_op.h
@@ -0,0 +1,65 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+template
+using EigenVector = framework::EigenVector;
+
+template
+class SumKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& context) const override {
+ auto ins = context.MultiInput("X");
+ auto* out = context.Output("Out");
+ out->mutable_data(context.GetPlace());
+
+ auto place = context.GetEigenDevice();
+ auto result = EigenVector::Flatten(*out);
+
+ int N = ins.size();
+ auto in = EigenVector::Flatten(*(ins[0]));
+ result.device(place) = in;
+ for (int i = 1; i < N; i++) {
+ auto in = EigenVector::Flatten(*(ins[i]));
+ result.device(place) = result + in;
+ }
+ }
+};
+
+template
+class SumGradKernel : public framework::OpKernel {
+ public:
+ void Compute(const framework::ExecutionContext& context) const override {
+ auto* input = context.Input(framework::GradVarName("Out"));
+ auto outs = context.MultiOutput(framework::GradVarName("X"));
+ for (auto out : outs) {
+ out->mutable_data(context.GetPlace());
+ }
+
+ auto place = context.GetEigenDevice();
+ auto in = EigenVector::Flatten(*input);
+ for (auto out : outs) {
+ auto result = EigenVector::Flatten(*out);
+ result.device(place) = in;
+ }
+ }
+};
+
+} // namespace operators
+} // namespace paddle
diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc
index f2aeef6c310df8535e67fa3906301a87f8ec4694..b8fbc9b52aecdb5c8d985b5de9bcd7cb85835b60 100644
--- a/paddle/operators/uniform_random_op.cc
+++ b/paddle/operators/uniform_random_op.cc
@@ -35,7 +35,7 @@ class CPUUniformRandomKernel : public framework::OpKernel {
std::uniform_real_distribution dist(
static_cast(context.Attr("min")),
static_cast(context.Attr("max")));
- int64_t size = framework::product(tensor->dims());
+ int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu
index c2c041b144b6ca1f019f972e1301b756ec1c9301..6614b53b3f990d10c82633f3c1f079acea0cd827 100644
--- a/paddle/operators/uniform_random_op.cu
+++ b/paddle/operators/uniform_random_op.cu
@@ -53,8 +53,8 @@ class GPUUniformRandomKernel : public framework::OpKernel {
T min = static_cast(context.Attr("min"));
T max = static_cast(context.Attr("max"));
thrust::counting_iterator index_sequence_begin(0);
- ssize_t N = framework::product(tensor->dims());
- thrust::transform(index_sequence_begin, index_sequence_begin + N,
+ int64_t size = tensor->numel();
+ thrust::transform(index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr(data),
UniformGenerator(min, max, seed));
}
diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h
index 81448897e95eb05f4ce7de8683d98e05bade77cb..64fcbd93b6c4d5d9b36f2636c3ef4f7327f08d25 100644
--- a/paddle/platform/enforce.h
+++ b/paddle/platform/enforce.h
@@ -25,10 +25,6 @@ limitations under the License. */
#include "paddle/string/printf.h"
#include "paddle/string/to_string.h"
-#ifdef __GNUC__
-#include // for __cxa_demangle
-#endif
-
#ifndef PADDLE_ONLY_CPU
#include "paddle/platform/dynload/cublas.h"
@@ -46,19 +42,6 @@ limitations under the License. */
namespace paddle {
namespace platform {
-namespace {
-#ifdef __GNUC__
-inline std::string demangle(std::string name) {
- int status = -4; // some arbitrary value to eliminate the compiler warning
- std::unique_ptr res{
- abi::__cxa_demangle(name.c_str(), NULL, NULL, &status), std::free};
- return (status == 0) ? res.get() : name;
-}
-#else
-inline std::string demangle(std::string name) { return name; }
-#endif
-}
-
struct EnforceNotMet : public std::exception {
std::exception_ptr exp_;
std::string err_str_;
@@ -79,7 +62,7 @@ struct EnforceNotMet : public std::exception {
Dl_info info;
for (int i = 0; i < size; ++i) {
if (dladdr(call_stack[i], &info)) {
- auto demangled = demangle(info.dli_sname);
+ auto demangled = info.dli_sname;
auto addr_offset = static_cast(call_stack[i]) -
static_cast(info.dli_saddr);
sout << string::Sprintf("%-3d %*0p %s + %zd\n", i,
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index a673b7d1a87ed18c24440c5557812ab46071b62a..cc27b87035281b98e0197ee46a4db8f783faac48 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -17,6 +17,7 @@ limitations under the License. */
#include
#include "paddle/framework/backward.h"
+#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h"
@@ -49,14 +50,19 @@ USE_OP(minus);
USE_OP(cos_sim);
USE_CPU_ONLY_OP(gather);
USE_CPU_ONLY_OP(scatter);
+USE_CPU_ONLY_OP(concat);
USE_OP(top_k);
USE_CPU_ONLY_OP(auc);
USE_OP(squared_l2_distance);
+USE_OP(sum);
+USE_OP(reshape);
namespace paddle {
namespace framework {
using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+using LoD = framework::LoD;
static size_t UniqueIntegerGenerator() {
static std::atomic generator;
@@ -116,6 +122,60 @@ PYBIND11_PLUGIN(core) {
return self.data()[offset];
});
+ py::class_(m, "LoDTensor", R"DOC(LoD(Leval of Ddetails) Tensor.
+
+The tensor and LoD info should be created before creating the LoDTensor, then
+call the set_tensor and set_lod functions to set them.
+
+)DOC")
+ .def("__init__",
+ [](LoDTensor &instance,
+ const std::vector> &lod,
+ Tensor *t) {
+#ifdef PADDLE_ONLY_CPU
+ new (&instance) LoDTensor(lod, t);
+#else
+ paddle::framework::LoD new_lod;
+ new_lod.reserve(lod.size());
+ std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
+ new (&instance) LoDTensor(new_lod, t);
+#endif
+ })
+ .def("set_tensor",
+ [](LoDTensor &self, Tensor *tensor) { self.set_tensor(tensor); })
+ .def("set_lod",
+ [](LoDTensor &self, const std::vector> &lod) {
+#ifdef PADDLE_ONLY_CPU
+ self.set_lod(lod);
+#else
+ paddle::framework::LoD new_lod;
+ new_lod.reserve(lod.size());
+ std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
+ self.set_lod(new_lod);
+#endif
+ })
+ .def("tensor",
+ [](LoDTensor &self) -> Tensor & { return self.tensor(); },
+ py::return_value_policy::reference)
+ .def("lod", [](LoDTensor &self) -> std::vector> {
+#ifdef PADDLE_ONLY_CPU
+ return self.lod();
+#else
+ auto lod = self.lod();
+ std::vector> new_lod;
+ new_lod.reserve(lod.size());
+ std::transform(lod.begin(), lod.end(), std::back_inserter(new_lod),
+ [](paddle::framework::Vector item) ->
+ std::vector {
+ std::vector v;
+ v.reserve(item.size());
+ std::copy(item.begin(), item.end(), std::back_inserter(v));
+ return v;
+ });
+ return new_lod;
+#endif
+ });
+
py::class_(m, "Variable", R"DOC(Variable Class.
All parameter, weight, gradient are variables in Paddle.
@@ -127,6 +187,11 @@ All parameter, weight, gradient are variables in Paddle.
.def("get_tensor",
[](Variable &self) -> Tensor * { return self.GetMutable(); },
py::return_value_policy::reference)
+ .def("get_lod_tensor",
+ [](Variable &self) -> LoDTensor * {
+ return self.GetMutable();
+ },
+ py::return_value_policy::reference)
.def("get_net",
[](Variable &self) -> operators::NetOp * {
return self.GetMutable();
@@ -217,7 +282,10 @@ All parameter, weight, gradient are variables in Paddle.
-> std::map> {
return op.Outputs();
})
+ .def("output_vars",
+ [](const OperatorBase &op) { return op.OutputVars(true); })
.def("inputs", [](const OperatorBase &op) { return op.Inputs(); })
+ .def("input_vars", [](const OperatorBase &op) { return op.InputVars(); })
.def("__str__", &OperatorBase::DebugString)
.def("no_intermediate_outputs",
[](const OperatorBase &op) { return op.OutputVars(false); })
diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
index e57f793ac42b19037e9ca43a5e4a3ac5447dc34c..2ac455d771bf78377ce4ee7d921393d3b3958e3c 100644
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -30,6 +30,8 @@ Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=Release
-DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF}
+ -DWITH_MKLDNN=${WITH_MKLDNN:-ON}
+ -DWITH_MKLML=${WITH_MKLML:-ON}
-DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-ON}
-DWITH_SWIG_PY=ON
@@ -50,6 +52,8 @@ cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \
+ -DWITH_MKLDNN=${WITH_MKLDNN:-ON} \
+ -DWITH_MKLML=${WITH_MKLML:-ON} \
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-ON} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 7e9112b43bf851575a3a798886d8b1b17e7c2017..4f68a8953446ffa0510df65c5b214d09b913cff8 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2034,6 +2034,7 @@ class ParameterReluLayer(LayerBase):
config_assert(input_layer.size % partial_sum == 0,
"a wrong setting for partial_sum")
self.set_layer_size(input_layer.size)
+ self.config.partial_sum = partial_sum
self.create_input_parameter(0, input_layer.size / partial_sum)
@@ -3748,8 +3749,8 @@ class SwitchOrderLayer(LayerBase):
def __init__(self, name, inputs, reshape, **xargs):
super(SwitchOrderLayer, self).__init__(
name, 'switch_order', 0, inputs=inputs, **xargs)
- self.config.reshape_conf.heightAxis.extend(reshape['height'])
- self.config.reshape_conf.widthAxis.extend(reshape['width'])
+ self.config.reshape_conf.height_axis.extend(reshape['height'])
+ self.config.reshape_conf.width_axis.extend(reshape['width'])
# Deprecated, use a new layer specific class instead
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index dc68c213da66ac680e6b14266cb5038a5ba73ec2..4b1d80d3db924bfa2ad0e081f785d8f5dd719fce 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1223,7 +1223,8 @@ def detection_output_layer(input_loc,
name=None):
"""
Apply the NMS to the output of network and compute the predict bounding
- box location.
+ box location. The output of this layer could be None if there is no valid
+ bounding box.
:param name: The Layer Name.
:type name: basestring
@@ -6460,6 +6461,7 @@ def switch_order_layer(input,
return LayerOutput(
name=name,
layer_type=LayerType.SWITCH_ORDER_LAYER,
+ activation=act,
parents=input,
size=l.config.size)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
index 64d227565f2b21ff43d4391c682ca90c0f47908e..94ad56cab063df9e6a11bb1c293727fb9dec810f 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
@@ -14,6 +14,29 @@ layers {
input_layer_name: "input"
input_parameter_name: "___prelu_layer_0__.w0"
}
+ partial_sum: 1
+}
+layers {
+ name: "__prelu_layer_1__"
+ type: "prelu"
+ size: 300
+ active_type: ""
+ inputs {
+ input_layer_name: "input"
+ input_parameter_name: "___prelu_layer_1__.w0"
+ }
+ partial_sum: 1
+}
+layers {
+ name: "__prelu_layer_2__"
+ type: "prelu"
+ size: 300
+ active_type: ""
+ inputs {
+ input_layer_name: "input"
+ input_parameter_name: "___prelu_layer_2__.w0"
+ }
+ partial_sum: 5
}
parameters {
name: "___prelu_layer_0__.w0"
@@ -23,14 +46,32 @@ parameters {
initial_strategy: 0
initial_smart: true
}
+parameters {
+ name: "___prelu_layer_1__.w0"
+ size: 300
+ initial_mean: 0.0
+ initial_std: 0.057735026919
+ initial_strategy: 0
+ initial_smart: true
+}
+parameters {
+ name: "___prelu_layer_2__.w0"
+ size: 60
+ initial_mean: 0.0
+ initial_std: 0.129099444874
+ initial_strategy: 0
+ initial_smart: true
+}
input_layer_names: "input"
-output_layer_names: "__prelu_layer_0__"
+output_layer_names: "__prelu_layer_2__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__prelu_layer_0__"
+ layer_names: "__prelu_layer_1__"
+ layer_names: "__prelu_layer_2__"
input_layer_names: "input"
- output_layer_names: "__prelu_layer_0__"
+ output_layer_names: "__prelu_layer_2__"
is_recurrent_layer_group: false
}
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
index 2e3057f323db22ffc3911cce30ec2e8bb95e3dbe..aae90fab32db78a70c2169ed8fafb930433f4136 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
@@ -2,5 +2,7 @@ from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
prelu = prelu_layer(input=data)
+prelu = prelu_layer(input=data, partial_sum=1)
+prelu = prelu_layer(input=data, partial_sum=5)
outputs(prelu)
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
index 7589cc9917f26375d595e200245d5ba099bc38d7..e66bf67d7949057486eb54c46f39128fad5dae55 100644
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@@ -53,10 +53,13 @@ class BeginPass(object):
class EndPass(WithMetric):
"""
Event On One Pass Training Complete.
+ To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
+ in your event_handler call back
"""
- def __init__(self, pass_id, evaluator):
+ def __init__(self, pass_id, evaluator, gm):
self.pass_id = pass_id
+ self.gm = gm
WithMetric.__init__(self, evaluator)
@@ -73,10 +76,13 @@ class BeginIteration(object):
class EndIteration(WithMetric):
"""
Event On One Batch Training Complete.
+ To get the output of a specific layer, add "event.gm.getLayerOutputs('predict_layer')"
+ in your event_handler call back
"""
- def __init__(self, pass_id, batch_id, cost, evaluator):
+ def __init__(self, pass_id, batch_id, cost, evaluator, gm):
self.pass_id = pass_id
self.batch_id = batch_id
self.cost = cost
+ self.gm = gm
WithMetric.__init__(self, evaluator)
diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py
index c1585bcffcceb75292853018179066c9f614261e..9e665adad2d3ad91d183c6815fbd7135ac4e8965 100644
--- a/python/paddle/v2/framework/op.py
+++ b/python/paddle/v2/framework/op.py
@@ -43,7 +43,6 @@ class OpDescCreationMethod(object):
if len(args) != 0:
raise ValueError("Only keyword arguments are supported.")
op_desc = framework_pb2.OpDesc()
-
for input_parameter in self.__op_proto__.inputs:
input_arguments = kwargs.get(input_parameter.name, [])
if is_str(input_arguments):
@@ -142,8 +141,8 @@ def create_op_creation_method(op_proto):
return OpInfo(
method=__impl__,
name=op_proto.type,
- inputs=[var.name for var in op_proto.inputs],
- outputs=[var.name for var in op_proto.outputs],
+ inputs=[(var.name, var.duplicable) for var in op_proto.inputs],
+ outputs=[(var.name, var.duplicable) for var in op_proto.outputs],
attrs=[attr.name for attr in op_proto.attrs])
@@ -180,9 +179,15 @@ class OperatorFactory(object):
return self.op_methods.get(t)
def get_op_input_names(self, type):
+ return map(lambda x: x[0], self.get_op_info(type).inputs)
+
+ def get_op_inputs(self, type):
return self.get_op_info(type).inputs
def get_op_output_names(self, type):
+ return map(lambda x: x[0], self.get_op_info(type).outputs)
+
+ def get_op_outputs(self, type):
return self.get_op_info(type).outputs
def get_op_attr_names(self, type):
diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt
index ef910f939be0b9d3cb5e6d49e69a00daa191b1c6..6b22c0008210b492d00dee42e967ca14d0948b20 100644
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -19,8 +19,6 @@ py_test(test_scatter_op SRCS test_scatter_op.py)
py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
py_test(test_top_k_op SRCS test_top_k_op.py)
-py_test(gradient_checker SRCS gradient_checker.py)
-
py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py)
py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py)
@@ -33,5 +31,8 @@ py_test(test_sgd_op SRCS test_sgd_op.py)
py_test(test_gradient_checker SRCS test_gradient_checker.py)
py_test(test_lookup_table SRCS test_lookup_table.py)
py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py)
+py_test(test_sum_op SRCS test_sum_op.py)
py_test(mnist SRCS mnist.py)
+py_test(test_concat_op SRCS test_concat_op.py)
py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py)
+py_test(test_reshape_op SRCS test_reshape_op.py)
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
deleted file mode 100644
index fdb06b7988935ebbe53f72f4eba89d75ac2502d4..0000000000000000000000000000000000000000
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ /dev/null
@@ -1,311 +0,0 @@
-import unittest
-
-import numpy
-import itertools
-import paddle.v2.framework.core as core
-from paddle.v2.framework.op import Operator
-
-__all__ = ['get_numeric_gradient']
-
-
-def create_op(op_type):
- # TODO need to set attrs
- kwargs = dict()
- for in_name in Operator.get_op_input_names(op_type):
- kwargs[in_name] = in_name
- for out_name in Operator.get_op_output_names(op_type):
- kwargs[out_name] = out_name
-
- return Operator(op_type, **kwargs)
-
-
-def grad_var_name(var_name):
- return var_name + "@GRAD"
-
-
-def empty_var_name():
- return "@EMPTY@"
-
-
-def get_numeric_gradient(op,
- input_values,
- output_name,
- input_to_check,
- delta=0.005,
- local_scope=None,
- in_place=False):
- """
- Get Numeric Gradient for an operator's input.
-
- :param op: C++ operator instance, could be an network
- :param input_values: The input variables. Should be an dictionary, key is
- variable name. Value is numpy array.
- :param output_name: The final output variable name.
- :param input_to_check: The input variable need to get gradient.
- :param delta: The perturbation value for numeric gradient method. The
- smaller delta is, the more accurate result will get. But if that delta is
- too small, it could occur numerical stability problem.
- :param local_scope: The local scope used for get_numeric_gradient.
- :return: The gradient array in numpy format.
- """
- if local_scope is None:
- local_scope = core.Scope()
-
- # Create all input variable in local_scope
- for var_name in input_values:
- var = local_scope.new_var(var_name)
- tensor = var.get_tensor()
- tensor.set_dims(input_values[var_name].shape)
- tensor.alloc_float(core.CPUPlace())
- tensor.set(input_values[var_name], core.CPUPlace())
-
- # Create all output variable in local_scope
- opts = op.outputs()
- for key in opts:
- for output in opts[key]:
- if local_scope.find_var(output) is None:
- local_scope.new_var(output).get_tensor()
- op.infer_shape(local_scope)
-
- # allocate output memory
- for key in opts:
- for output in opts[key]:
- local_scope.find_var(output).get_tensor().alloc_float(core.CPUPlace(
- ))
-
- cpu_ctx = core.DeviceContext.create(core.CPUPlace())
-
- def get_output():
- op.run(local_scope, cpu_ctx)
- return numpy.array(local_scope.find_var(output_name).get_tensor()).sum()
-
- def product(dim):
- return reduce(lambda a, b: a * b, dim, 1)
-
- def restore_inputs():
- for var_name in input_values:
- tensor_ = local_scope.find_var(var_name).get_tensor()
- tensor_.set(numpy.copy(input_values[var_name]), core.CPUPlace())
-
- # get the input tensor that we want to get it's numeric gradient.
- tensor_to_check = local_scope.find_var(input_to_check).get_tensor()
- tensor_size = product(tensor_to_check.get_dims())
- # prepare a numpy array to store the gradient.
- gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32')
-
- # we only compute gradient of one element each time.
- # we use a for loop to compute the gradient of every element.
- for i in xrange(tensor_size):
- if in_place:
- restore_inputs()
- # get one input element throw it's index i.
- origin = tensor_to_check.get_float_element(i)
-
- # add delta to it, run op and then get the sum of the result tensor.
- x_pos = origin + delta
- tensor_to_check.set_float_element(i, x_pos)
- y_pos = get_output()
-
- # plus delta to this element, run op and get the sum of the result tensor.
- if in_place:
- restore_inputs()
- x_neg = origin - delta
- tensor_to_check.set_float_element(i, x_neg)
- y_neg = get_output()
-
- # restore old value
- tensor_to_check.set_float_element(i, origin)
-
- # compute the gradient of this element and store it into a numpy array.
- gradient_flat[i] = (y_pos - y_neg) / delta / 2
-
- # reshape the gradient result to the shape of the source tensor.
- return gradient_flat.reshape(tensor_to_check.get_dims())
-
-
-class GradientChecker(unittest.TestCase):
- def __get_gradient(self, forward_op, backward_op, input_value, grad_names,
- place):
- """Get the input gradients after running forward and backward operators
- on the given places.
-
- :param forward_op: forward operator
- :type forward_op: Operator
- :param backward_op: backward operator
- :type backward_op: Operator
- :param input_value: input values.
- :type input_value: dict{string:numpy.array}
- :param grad_names: the names of returned input gradients.
- :type input_value: a list of string
- :param place: the device type.
- :type place: CPUPlace or GPUPlace
- :return: the input grdients of given grad_names.
- :rtype: a list of numpy.array
- """
- scope = core.Scope()
- ctx = core.DeviceContext.create(place)
-
- inputs = forward_op.inputs()
- in_names = [item for k in inputs for item in inputs[k]]
- outputs = forward_op.outputs()
- out_names = [item for k in outputs for item in outputs[k]]
-
- # create input var and set value
- for name, value in input_value.iteritems():
- if name not in in_names:
- raise ValueError(name + "does not exist in Op's inputs.")
- var = scope.new_var(name).get_tensor()
- var.set_dims(value.shape)
- var.set(value, place)
-
- # run forward op
- for out_name in out_names:
- scope.new_var(out_name)
- forward_op.infer_shape(scope)
- forward_op.run(scope, ctx)
-
- # set output var's shape
- # set output grad to ones
- for name in out_names:
- out_tensor = scope.find_var(name).get_tensor()
- grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
- grad_tensor.set_dims(out_tensor.shape())
- data = numpy.ones(out_tensor.shape(), dtype=numpy.float32)
- grad_tensor.set(data, place)
-
- # run backward op
- backward_outs = backward_op.outputs()
- backward_names = [
- item for key in backward_outs for item in backward_outs[key]
- ]
- for name in backward_names:
- scope.new_var(name)
-
- backward_op.infer_shape(scope)
- backward_op.run(scope, ctx)
-
- outs = [
- numpy.array(scope.find_var(name).get_tensor())
- for name in grad_names
- ]
- return outs
-
- def compare_grad(self, forward_op, input_value, no_grad_set=None):
- """ Compare the input gradients between CPU and GPU for the given forward
- operator.
-
- :param forward_op: forward operator
- :type forward_op: Operator
- :param input_value: input values.
- :type input_value: dict{string:numpy.array}
- :param no_grad_set: the set of variables names without gradients.
- :type no_grad_set: a set of string
- :raises: AssertionError, there is different gradient value.
- """
- if no_grad_set is None:
- no_grad_set = set()
- backward_op = core.Operator.backward(forward_op, no_grad_set)
- # return if not compile with GPU or not implementing GPU kernel
- if not (core.is_compile_gpu() and backward_op.support_gpu()):
- return
-
- outputs = backward_op.outputs()
- out_names = [item for k in outputs for item in outputs[k]]
- out_names = filter(lambda x: x != empty_var_name(), out_names)
- cpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
- out_names, core.CPUPlace())
- gpu_grads = self.__get_gradient(forward_op, backward_op, input_value,
- out_names, core.GPUPlace(0))
-
- for c_grad, g_grad, name in itertools.izip(cpu_grads, gpu_grads,
- out_names):
- self.assertTrue(
- numpy.allclose(
- c_grad, g_grad, atol=1e-4),
- "output name: " + name + " has diff")
-
- def __assert_is_close(self, numeric_grads, analytic_grads, names,
- max_relative_error, msg_prefix):
- """Use relative error for the comparison.
-
- :param numeric_grads: the numerical graidents.
- :type numeric_grads: a list of numpy.array
- :param analytic_grads: the analytical graidents.
- :type analytic_grads: a list of numpy.array
- :param name: the names of gradients, used to print for debug.
- :type names: a list of string
- :param msg_prefix: string info, used to print for debug.
- :type msf_prefix: string
- """
- for a, b, name in itertools.izip(numeric_grads, analytic_grads, names):
- abs_a = numpy.abs(a)
- # if abs_a is nearly zero, then use abs error for a, not relative
- # error.
- abs_a[abs_a < 1e-3] = 1
-
- diff_mat = numpy.abs(a - b) / abs_a
- max_diff = numpy.max(diff_mat)
-
- def err_msg():
- offset = numpy.argmax(diff_mat > max_relative_error)
- return "%s Variable %s max gradient diff %f over limit %f, the first " \
- "error element is %d" % (
- msg_prefix, name, max_diff, max_relative_error, offset)
-
- self.assertLessEqual(max_diff, max_relative_error, err_msg())
-
- def check_grad(self,
- forward_op,
- input_vars,
- inputs_to_check,
- output_name,
- no_grad_set=None,
- only_cpu=False,
- in_place=False,
- max_relative_error=0.005):
- """
- :param forward_op: used to create backward_op
- :param input_vars: numpy value of input variable. The following
- computation will use these variables.
- :param inputs_to_check: inputs var names that should check gradient.
- :param output_name: the output variable name of forward network.
- :param max_relative_error: The relative tolerance parameter.
- :param no_grad_set: used when create backward ops
- :param only_cpu: only compute and check gradient on cpu kernel.
- :return:
- """
- if no_grad_set is None:
- no_grad_set = set()
-
- no_tmp_out = forward_op.no_intermediate_outputs()
- if len(no_tmp_out) != 1:
- raise ValueError("non temp out_names should be 1")
-
- inputs = forward_op.inputs()
- in_names = [item for k in inputs for item in inputs[k]]
- for no_grad in no_grad_set:
- if no_grad not in in_names:
- raise ValueError("no_grad should be in in_names")
- if no_grad in inputs_to_check:
- raise ValueError("no_grad should not be in inputs_to_check")
-
- backward_op = core.Operator.backward(forward_op, no_grad_set)
-
- places = [core.CPUPlace()]
- if not only_cpu and core.is_compile_gpu() and backward_op.support_gpu():
- places.append(core.GPUPlace(0))
-
- # get numerical gradients
- numeric_grads = [
- get_numeric_gradient(
- forward_op, input_vars, output_name, name, in_place=in_place)
- for name in inputs_to_check
- ]
-
- check_names = [grad_var_name(name) for name in inputs_to_check]
- for place in places:
- analytic_grads = self.__get_gradient(forward_op, backward_op,
- input_vars, check_names, place)
- self.__assert_is_close(numeric_grads, analytic_grads, check_names,
- max_relative_error,
- "Gradient Check On %s" % str(place))
diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fec4c9109bf247abb2068177583acb47a8ebd97
--- /dev/null
+++ b/python/paddle/v2/framework/tests/op_test.py
@@ -0,0 +1,281 @@
+import unittest
+import numpy as np
+import itertools
+import paddle.v2.framework.core as core
+from paddle.v2.framework.op import Operator
+
+
+def grad_var_name(var_name):
+ return var_name + "@GRAD"
+
+
+def create_op(scope, op_type, inputs, outputs, attrs):
+ kwargs = dict()
+
+ for in_name, in_dup in Operator.get_op_inputs(op_type):
+ if in_name in inputs:
+ kwargs[in_name] = []
+ if in_dup:
+ sub_in = inputs[in_name]
+ for sub_in_name, _ in sub_in:
+ var = scope.new_var(sub_in_name)
+ kwargs[in_name].append(sub_in_name)
+ else:
+ var = scope.new_var(in_name)
+ kwargs[in_name].append(in_name)
+
+ for out_name, out_dup in Operator.get_op_outputs(op_type):
+ if out_name in outputs:
+ kwargs[out_name] = []
+ if out_dup:
+ sub_in = outputs[out_name]
+ for sub_in_name, _ in sub_in:
+ var = scope.new_var(sub_in_name)
+ kwargs[out_name].append(sub_in_name)
+ else:
+ var = scope.new_var(out_name)
+ kwargs[out_name].append(out_name)
+
+ for attr_name in Operator.get_op_attr_names(op_type):
+ if attr_name in attrs:
+ kwargs[attr_name] = attrs[attr_name]
+ return Operator(op_type, **kwargs)
+
+
+def set_input(scope, op, inputs, place):
+ for in_name, in_dup in Operator.get_op_inputs(op.type()):
+ if in_name in inputs:
+ if in_dup:
+ sub_in = inputs[in_name]
+ for sub_in_name, sub_in_array in sub_in:
+ var = scope.find_var(sub_in_name)
+ tensor = var.get_tensor()
+ tensor.set_dims(sub_in_array.shape)
+ tensor.set(sub_in_array, place)
+ else:
+ var = scope.find_var(in_name)
+ tensor = var.get_tensor()
+ arr = inputs[in_name]
+ tensor.set_dims(arr.shape)
+ tensor.set(arr, place)
+
+
+def set_output_grad(scope, op, outputs, place):
+ for out_name, out_dup in Operator.get_op_outputs(op.type()):
+ if out_name in outputs:
+ if out_dup:
+ sub_out = outputs[out_name]
+ for sub_out_name, _ in sub_out:
+ out_tensor = scope.find_var(sub_out_name).get_tensor()
+ grad_tensor = scope.new_var(grad_var_name(
+ sub_out_name)).get_tensor()
+ grad_tensor.set_dims(out_tensor.shape())
+ data = np.ones(out_tensor.shape(), dtype=np.float32)
+ grad_tensor.set(data, place)
+ else:
+ out_tensor = scope.find_var(out_name).get_tensor()
+ grad_tensor = scope.new_var(grad_var_name(out_name)).get_tensor(
+ )
+ grad_tensor.set_dims(out_tensor.shape())
+ data = np.ones(out_tensor.shape(), dtype=np.float32)
+ grad_tensor.set(data, place)
+
+
+def get_numeric_gradient(scope,
+ op,
+ inputs,
+ input_to_check,
+ output_name,
+ delta=0.005,
+ in_place=False):
+
+ set_input(scope, op, inputs, core.CPUPlace())
+ op.infer_shape(scope)
+
+ tensor_to_check = scope.find_var(input_to_check).get_tensor()
+
+ def product(dim):
+ return reduce(lambda a, b: a * b, dim, 1)
+
+ ctx = core.DeviceContext.create(core.CPUPlace())
+
+ def get_output():
+ op.run(scope, ctx)
+ return np.array(scope.find_var(output_name).get_tensor()).sum()
+
+ tensor_to_check = scope.find_var(input_to_check).get_tensor()
+ tensor_size = product(tensor_to_check.get_dims())
+ gradient_flat = np.zeros(shape=(tensor_size, ), dtype='float32')
+ # we only compute gradient of one element each time.
+ # we use a for loop to compute the gradient of every element.
+ for i in xrange(tensor_size):
+ if in_place:
+ set_input(scope, op, inputs, core.CPUPlace())
+
+ # get one input element throw it's index i.
+ origin = tensor_to_check.get_float_element(i)
+ # add delta to it, run op and then get the sum of the result tensor.
+ x_pos = origin + delta
+ tensor_to_check.set_float_element(i, x_pos)
+ y_pos = get_output()
+
+ if in_place:
+ set_input(scope, op, inputs, core.CPUPlace())
+
+ x_neg = origin - delta
+ tensor_to_check.set_float_element(i, x_neg)
+ y_neg = get_output()
+
+ tensor_to_check.set_float_element(i, origin)
+ gradient_flat[i] = (y_pos - y_neg) / delta / 2
+
+ return gradient_flat.reshape(tensor_to_check.get_dims())
+
+
+def get_backward_op(scope, op, no_grad_set):
+ backward_op = core.Operator.backward(op, no_grad_set)
+ for input in backward_op.input_vars():
+ var = scope.new_var(input)
+ var.get_tensor()
+ for output in backward_op.output_vars():
+ var = scope.new_var(output)
+ var.get_tensor()
+ return backward_op
+
+
+def get_gradient(scope, op, inputs, outputs, grad_name, place,
+ no_grad_set=None):
+ ctx = core.DeviceContext.create(place)
+
+ set_input(scope, op, inputs, place)
+
+ op.infer_shape(scope)
+ op.run(scope, ctx)
+
+ if no_grad_set is None:
+ no_grad_set = set()
+
+ backward_op = get_backward_op(scope, op, no_grad_set)
+ set_output_grad(scope, op, outputs, place)
+
+ backward_op.infer_shape(scope)
+ backward_op.run(scope, ctx)
+
+ out = np.array(scope.find_var(grad_name).get_tensor())
+ return out
+
+
+class OpTest(unittest.TestCase):
+ def check_output_with_place(self, place):
+ self.scope = core.Scope()
+ op_inputs = self.inputs if hasattr(self, "inputs") else dict()
+ op_attrs = self.attrs if hasattr(self, "attrs") else dict()
+ self.op = create_op(self.scope, self.op_type, op_inputs, self.outputs,
+ op_attrs)
+ if isinstance(place, core.GPUPlace) and not self.op.support_gpu():
+ return
+ set_input(self.scope, self.op, self.inputs, place)
+ self.op.infer_shape(self.scope)
+ ctx = core.DeviceContext.create(place)
+ self.op.run(self.scope, ctx)
+
+ for out_name, out_dup in Operator.get_op_outputs(self.op.type()):
+ if out_dup:
+ sub_out = self.outputs[out_name]
+ for sub_out_name in sub_out:
+ actual = np.array(
+ self.scope.find_var(sub_out_name).get_tensor())
+ expect = sub_out[sub_out_name]
+ self.assertTrue(
+ np.allclose(
+ actual, expect, atol=1e-05),
+ "output name: " + out_name + "has diff")
+ else:
+ actual = np.array(self.scope.find_var(out_name).get_tensor())
+ expect = self.outputs[out_name]
+ self.assertTrue(
+ np.allclose(
+ actual, expect, atol=1e-05),
+ "output name: " + out_name + "has diff")
+
+ def check_output(self):
+ places = [core.CPUPlace()]
+ if core.is_compile_gpu():
+ places.append(core.GPUPlace(0))
+ for place in places:
+ self.check_output_with_place(place)
+
+ def __assert_is_close(self, numeric_grads, analytic_grads, names,
+ max_relative_error, msg_prefix):
+
+ for a, b, name in itertools.izip(numeric_grads, analytic_grads, names):
+ abs_a = np.abs(a)
+ abs_a[abs_a < 1e-3] = 1
+
+ diff_mat = np.abs(a - b) / abs_a
+ max_diff = np.max(diff_mat)
+
+ def err_msg():
+ offset = np.argmax(diff_mat > max_relative_error)
+ return "%s Variable %s max gradient diff %f over limit %f, the first " \
+ "error element is %d" % (
+ msg_prefix, name, max_diff, max_relative_error, offset)
+
+ self.assertLessEqual(max_diff, max_relative_error, err_msg())
+
+ def check_grad(self,
+ inputs_to_check,
+ output_name,
+ no_grad_set=None,
+ in_place=False,
+ max_relative_error=0.005):
+ self.scope = core.Scope()
+ op_inputs = self.inputs if hasattr(self, "inputs") else dict()
+ op_attrs = self.attrs if hasattr(self, "attrs") else dict()
+ self.op = create_op(self.scope, self.op_type, op_inputs, self.outputs,
+ op_attrs)
+ if no_grad_set is None:
+ no_grad_set = set()
+
+ numeric_grads = [
+ get_numeric_gradient(
+ self.scope,
+ self.op,
+ self.inputs,
+ input_to_check,
+ output_name,
+ in_place=in_place) for input_to_check in inputs_to_check
+ ]
+ grad_names = [
+ grad_var_name(input_to_check) for input_to_check in inputs_to_check
+ ]
+
+ cpu_place = core.CPUPlace()
+ cpu_analytic_grads = [
+ get_gradient(self.scope, self.op, self.inputs, self.outputs,
+ grad_name, cpu_place, no_grad_set)
+ for grad_name in grad_names
+ ]
+
+ self.__assert_is_close(numeric_grads, cpu_analytic_grads, grad_names,
+ max_relative_error,
+ "Gradient Check On %s" % str(cpu_place))
+
+ if core.is_compile_gpu() and self.op.support_gpu():
+ gpu_place = core.GPUPlace(0)
+ gpu_analytic_grads = [
+ get_gradient(self.scope, self.op, self.inputs, self.outputs,
+ grad_name, gpu_place, no_grad_set)
+ for grad_name in grad_names
+ ]
+
+ self.__assert_is_close(numeric_grads, gpu_analytic_grads,
+ grad_names, max_relative_error,
+ "Gradient Check On %s" % str(gpu_place))
+
+ for c_grad, g_grad, name in itertools.izip(
+ cpu_analytic_grads, gpu_analytic_grads, grad_names):
+ self.assertTrue(
+ np.allclose(
+ c_grad, g_grad, atol=1e-4),
+ "output name: " + name + " has diff")
diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py
deleted file mode 100644
index 370f27eaf658dadbf7e82262c118140a10d15c41..0000000000000000000000000000000000000000
--- a/python/paddle/v2/framework/tests/op_test_util.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import numpy
-import paddle.v2.framework.core as core
-from paddle.v2.framework.op import Operator
-
-
-class OpTestMeta(type):
- """
- Operator Test ClassMeta.
-
- It injects `test_all` method into user's OperatorTest class, to make Python
- unittest module run that method.
-
- The `test_all` read what value is stored in `self`. It use self's values to
- create and run a operator, and check whether that op is OK or not.
-
- See `test_add_two_op` for example usage.
- """
-
- def __new__(cls, name, bases, attrs):
- obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs)
-
- def test_all(self):
- scope = core.Scope()
- kwargs = dict()
- places = [core.CPUPlace()]
- if core.is_compile_gpu():
- places.append(core.GPUPlace(0))
-
- for place in places:
- for in_name in Operator.get_op_input_names(self.type):
- if hasattr(self, "inputs") and in_name in self.inputs:
- kwargs[in_name] = in_name
- var = scope.new_var(in_name).get_tensor()
- arr = self.inputs[in_name]
- var.set_dims(arr.shape)
- var.set(arr, place)
- else:
- kwargs[in_name] = "@EMPTY@"
-
- for out_name in Operator.get_op_output_names(self.type):
- if not hasattr(self, "outputs"):
- raise ValueError(
- "The test op must set self.outputs dict.")
- if out_name not in self.outputs:
- raise ValueError("The %s is not in self.outputs dict." %
- (out_name))
- kwargs[out_name] = out_name
- scope.new_var(out_name).get_tensor()
-
- for attr_name in Operator.get_op_attr_names(self.type):
- if hasattr(self, "attrs") and attr_name in self.attrs:
- kwargs[attr_name] = self.attrs[attr_name]
-
- op = Operator(self.type, **kwargs)
- if isinstance(place, core.GPUPlace) and not op.support_gpu():
- return
-
- op.infer_shape(scope)
-
- ctx = core.DeviceContext.create(place)
- op.run(scope, ctx)
-
- for out_name in Operator.get_op_output_names(self.type):
- actual = numpy.array(scope.find_var(out_name).get_tensor())
- expect = self.outputs[out_name]
- self.assertTrue(
- numpy.allclose(
- actual, expect, atol=1e-05),
- "output name: " + out_name + " has diff")
-
- obj.test_all = test_all
- return obj
diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py
index a578e74eca9a3c4327a4881f853028e2347c98ad..3ca34d9b9fc2b7b54cc25ca0e0d1a08a71e37c52 100644
--- a/python/paddle/v2/framework/tests/test_add_two_op.py
+++ b/python/paddle/v2/framework/tests/test_add_two_op.py
@@ -1,23 +1,20 @@
import unittest
+import numpy as np
+from op_test import OpTest
-import numpy
-import paddle.v2.framework.core as core
-from paddle.v2.framework.op import Operator
-
-from op_test_util import OpTestMeta
-
-
-class TestAddOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
+class TestAddOp(OpTest):
def setUp(self):
- self.type = "add"
+ self.op_type = "add"
self.inputs = {
- 'X': numpy.random.random((102, 105)).astype("float32"),
- 'Y': numpy.random.random((102, 105)).astype("float32")
+ 'X': np.random.random((102, 105)).astype("float32"),
+ 'Y': np.random.random((102, 105)).astype("float32")
}
self.outputs = {'Out': self.inputs['X'] + self.inputs['Y']}
+ def test_check_output(self):
+ self.check_output()
+
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_concat_op.py b/python/paddle/v2/framework/tests/test_concat_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..656563f96e52df30951ec0ec7042ad9c530e90b2
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_concat_op.py
@@ -0,0 +1,22 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class TestConcatOp(OpTest):
+ def setUp(self):
+ self.op_type = "concat"
+ x0 = np.random.random((2, 3, 2, 5)).astype('float32')
+ x1 = np.random.random((2, 3, 3, 5)).astype('float32')
+ x2 = np.random.random((2, 3, 4, 5)).astype('float32')
+ axis = 2
+ self.inputs = {'X': [('x0', x0), ('x1', x1), ('x2', x2)]}
+ self.attrs = {'axis': axis}
+ self.outputs = {'Out': np.concatenate((x0, x1, x2), axis=axis)}
+
+ def test_check_output(self):
+ self.check_output()
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_cos_sim_op.py b/python/paddle/v2/framework/tests/test_cos_sim_op.py
index 32013a7999a4be42e5974b9ac751d5d911730994..797cbd8cc5cf7f73d58ca713d02667731d5c8a0e 100644
--- a/python/paddle/v2/framework/tests/test_cos_sim_op.py
+++ b/python/paddle/v2/framework/tests/test_cos_sim_op.py
@@ -1,17 +1,14 @@
import unittest
import numpy as np
-from gradient_checker import GradientChecker, create_op
-from op_test_util import OpTestMeta
+from op_test import OpTest
-class TestCosSimOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestCosSimOp(OpTest):
def setUp(self):
- self.type = "cos_sim"
+ self.op_type = "cos_sim"
self.inputs = {
- 'X': np.random.random((32, 64)).astype("float32"),
- 'Y': np.random.random((32, 64)).astype("float32")
+ 'X': np.random.random((10, 5)).astype("float32"),
+ 'Y': np.random.random((10, 5)).astype("float32")
}
expect_x_norm = np.linalg.norm(self.inputs['X'], axis=1)
expect_y_norm = np.linalg.norm(self.inputs['Y'], axis=1)
@@ -23,38 +20,20 @@ class TestCosSimOp(unittest.TestCase):
'Out': np.expand_dims(expect_out, 1)
}
+ def test_check_output(self):
+ self.check_output()
-class TestCosSimGradOp(GradientChecker):
- def setUp(self):
- self.op = create_op("cos_sim")
- self.inputs = {
- 'X': np.random.random((10, 5)).astype("float32"),
- 'Y': np.random.random((10, 5)).astype("float32")
- }
-
- def test_cpu_gpu_compare(self):
- self.compare_grad(self.op, self.inputs)
-
- def test_normal(self):
- self.check_grad(
- self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.05)
+ def test_check_grad_normal(self):
+ self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.05)
- def test_ignore_x(self):
+ def test_check_grad_ingore_x(self):
self.check_grad(
- self.op,
- self.inputs, ["Y"],
- "Out",
- max_relative_error=0.05,
- no_grad_set={"X"})
+ ['Y'], 'Out', max_relative_error=0.05, no_grad_set=set('X'))
- def test_ignore_y(self):
+ def test_check_grad_ignore_y(self):
self.check_grad(
- self.op,
- self.inputs, ["X"],
- "Out",
- max_relative_error=0.05,
- no_grad_set={"Y"})
+ ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y'))
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
index d4277f2a42ce2e66e37405ccd3b2ee444d403d1a..c2fc102a8b8de82da5c3fc5fee273790325908f8 100644
--- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py
+++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py
@@ -1,36 +1,27 @@
import unittest
import numpy
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
+from op_test import OpTest
-class TestCrossEntropy(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestCrossEntropy(OpTest):
def setUp(self):
- self.type = "onehot_cross_entropy"
+ self.op_type = "onehot_cross_entropy"
batch_size = 30
class_num = 10
- X = numpy.random.random((batch_size, class_num)).astype("float32")
- label = 5 * numpy.ones(batch_size).astype("int32")
+ X = numpy.random.uniform(0.1, 1.0,
+ [batch_size, class_num]).astype("float32")
+ label = (class_num / 2) * numpy.ones(batch_size).astype("int32")
self.inputs = {'X': X, 'label': label}
Y = []
for i in range(0, batch_size):
Y.append(-numpy.log(X[i][label[i]]))
self.outputs = {'Y': numpy.array(Y).astype("float32")}
+ def test_check_output(self):
+ self.check_output()
-class CrossEntropyGradOpTest(GradientChecker):
def test_check_grad(self):
- op = create_op("onehot_cross_entropy")
- batch_size = 30
- class_num = 10
- inputs = {
- "X": numpy.random.uniform(
- 0.1, 1.0, [batch_size, class_num]).astype("float32"),
- "label": (class_num / 2) * numpy.ones(batch_size).astype("int32")
- }
- self.check_grad(op, inputs, set("X"), "Y")
+ self.check_grad(['X'], 'Y')
if __name__ == "__main__":
diff --git a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py
index e5c862605fb11a5ea1426cf8f9054589dc377ff1..2473daaba24438819f3f55ccc40fe1c64ee59960 100644
--- a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py
+++ b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py
@@ -1,16 +1,17 @@
import unittest
-from op_test_util import OpTestMeta
-import numpy
+import numpy as np
+from op_test import OpTest
-class TestFillZerosLikeOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestFillZerosLikeOp(OpTest):
def setUp(self):
- self.type = "fill_zeros_like"
- self.inputs = {'Src': numpy.random.random((219, 232)).astype("float32")}
- self.outputs = {'Dst': numpy.zeros_like(self.inputs['Src'])}
+ self.op_type = "fill_zeros_like"
+ self.inputs = {'Src': np.random.random((219, 232)).astype("float32")}
+ self.outputs = {'Dst': np.zeros_like(self.inputs["Src"])}
+
+ def test_check_output(self):
+ self.check_output()
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py
index e3de3fd0a1dddb3edb0de5987bd71d8a176d97ef..b0ab429ef1b53640dfb696f6ea2f7b745564b874 100644
--- a/python/paddle/v2/framework/tests/test_gather_op.py
+++ b/python/paddle/v2/framework/tests/test_gather_op.py
@@ -1,30 +1,20 @@
import unittest
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
-import numpy
-import paddle.v2.framework.core as core
-from paddle.v2.framework.op import Operator
+import numpy as np
+from op_test import OpTest
-class TestGatherOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestGatherOp(OpTest):
def setUp(self):
- self.type = "gather"
- xnp = numpy.random.random((10, 20)).astype("float32")
- self.inputs = {
- 'X': xnp,
- 'Index': numpy.array([1, 3, 5]).astype("int32")
- }
- self.outputs = {'Out': self.inputs['X'][self.inputs['Index']]}
+ self.op_type = "gather"
+ xnp = np.random.random((10, 20)).astype("float32")
+ self.inputs = {'X': xnp, 'Index': np.array([1, 3, 5]).astype("int32")}
+ self.outputs = {'Out': self.inputs["X"][self.inputs["Index"]]}
+ def test_check_output(self):
+ self.check_output()
-class TestGatherGradOp(GradientChecker):
- def test_gather_grad(self):
- op = create_op("gather")
- xnp = numpy.random.random((10, 20)).astype("float32")
- inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")}
- self.check_grad(op, inputs, set("X"), "Out")
+ def test_check_grad(self):
+ self.check_grad(['X'], 'Out')
if __name__ == "__main__":
diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py
index f95ed70b58d611b3233a21d3f2a34c864ae4d1b3..1f9e4db783c9907a22db72c8a6ff06c7ca0735da 100644
--- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py
+++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py
@@ -14,11 +14,11 @@ class GaussianRandomTest(unittest.TestCase):
def gaussian_random_test(self, place):
scope = core.Scope()
- scope.new_var("Out").get_tensor()
+ scope.new_var('Out').get_tensor()
op = Operator(
"gaussian_random",
- Out="Out",
+ Out='Out',
dims=[1000, 784],
mean=.0,
std=1.,
@@ -27,10 +27,10 @@ class GaussianRandomTest(unittest.TestCase):
op.infer_shape(scope)
context = core.DeviceContext.create(place)
op.run(scope, context)
- tensor = numpy.array(scope.find_var("Out").get_tensor())
+ tensor = numpy.array(scope.find_var('Out').get_tensor())
self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_gradient_checker.py b/python/paddle/v2/framework/tests/test_gradient_checker.py
index e8a7f848dffa0529c8cb0d6599286ce0e228d180..abeb01cb34158a43b5dcce5e39efc0e21e9fe638 100644
--- a/python/paddle/v2/framework/tests/test_gradient_checker.py
+++ b/python/paddle/v2/framework/tests/test_gradient_checker.py
@@ -1,42 +1,44 @@
import unittest
-import numpy
-from paddle.v2.framework.op import Operator
-from gradient_checker import GradientChecker
-from gradient_checker import get_numeric_gradient
+import numpy as np
+import paddle.v2.framework.core as core
+from op_test import get_numeric_gradient
+from op_test import create_op
class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self):
- add_op = Operator("add", X="X", Y="Y", Out="Z")
- x = numpy.random.random((10, 1)).astype("float32")
- y = numpy.random.random((10, 1)).astype("float32")
-
- arr = get_numeric_gradient(add_op, {"X": x, "Y": y}, "Z", "X")
+ x = np.random.random((10, 1)).astype("float32")
+ y = np.random.random((10, 1)).astype("float32")
+ z = x + y
+ scope = core.Scope()
+ add_op = create_op(scope, "add", {'X': x, 'Y': y}, {'Out': z}, dict())
+ arr = get_numeric_gradient(scope, add_op, {'X': x, 'Y': y}, 'X', 'Out')
self.assertAlmostEqual(arr.mean(), 1.0, delta=1e-4)
def test_softmax_op(self):
def stable_softmax(x):
"""Compute the softmax of vector x in a numerically stable way."""
- shiftx = x - numpy.max(x)
- exps = numpy.exp(shiftx)
- return exps / numpy.sum(exps)
+ shiftx = x - np.max(x)
+ exps = np.exp(shiftx)
+ return exps / np.sum(exps)
def label_softmax_grad(Y, dY):
dX = Y * 0.0
for i in range(Y.shape[0]):
- d = numpy.dot(Y[i, :], dY[i, :])
+ d = np.dot(Y[i, :], dY[i, :])
dX[i, :] = Y[i, :] * (dY[i, :] - d)
return dX
- softmax_op = Operator("softmax", X="X", Y="Y")
-
- X = numpy.random.random((2, 2)).astype("float32")
- Y = numpy.apply_along_axis(stable_softmax, 1, X)
- dY = numpy.ones(Y.shape)
+ X = np.random.random((2, 2)).astype("float32")
+ Y = np.apply_along_axis(stable_softmax, 1, X)
+ dY = np.ones(Y.shape)
dX = label_softmax_grad(Y, dY)
- arr = get_numeric_gradient(softmax_op, {"X": X}, "Y", "X")
- numpy.testing.assert_almost_equal(arr, dX, decimal=1e-2)
+ scope = core.Scope()
+ softmax_op = create_op(scope, "softmax", {"X": X}, {"Y": Y}, dict())
+
+ arr = get_numeric_gradient(scope, softmax_op, {"X": X}, "X", "Y")
+ np.testing.assert_almost_equal(arr, dX, decimal=1e-2)
if __name__ == "__main__":
diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py
index 4b7ce92c0f0492a73c158378299933a0b329948b..b259bb67e832adcb31b0ab4e992738be2b85f884 100644
--- a/python/paddle/v2/framework/tests/test_lookup_table.py
+++ b/python/paddle/v2/framework/tests/test_lookup_table.py
@@ -1,31 +1,22 @@
import unittest
import numpy as np
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
+from op_test import OpTest
-class TestLookupTableOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestLookupTableOp(OpTest):
def setUp(self):
- self.type = 'lookup_table'
- table = np.random.random((17, 31)).astype('float32')
- ids = np.random.randint(0, 17, 4).astype('int32')
+ self.op_type = "lookup_table"
+ table = np.random.random((17, 31)).astype("float32")
+ ids = np.random.randint(0, 17, 4).astype("int32")
self.inputs = {'W': table, 'Ids': ids}
self.outputs = {'Out': table[ids]}
+ def test_check_output(self):
+ self.check_output()
-class TestLookupTableGradOp(GradientChecker):
- def test_grad(self):
- op = create_op('lookup_table')
- table = np.random.random((17, 31)).astype('float32')
- ids = np.random.randint(0, 17, 4).astype('int32')
- inputs = {'W': table, 'Ids': ids}
- # comapre gradients
- self.compare_grad(op, inputs, set(['Ids']))
- # check gradients
- self.check_grad(op, inputs, set('W'), 'Out')
+ def test_check_grad(self):
+ self.check_grad(['W'], 'Out', no_grad_set=set('Ids'))
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_mean_op.py b/python/paddle/v2/framework/tests/test_mean_op.py
index f32b3160d651a290823223c46c45bb3b6950a505..7823abd8f813aad6462c98a9ace9a13dc286a157 100644
--- a/python/paddle/v2/framework/tests/test_mean_op.py
+++ b/python/paddle/v2/framework/tests/test_mean_op.py
@@ -1,24 +1,20 @@
import unittest
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
import numpy as np
+from op_test import OpTest
-class TestMeanOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestMeanOp(OpTest):
def setUp(self):
- self.type = "mean"
- self.inputs = {'X': np.random.random((32, 784)).astype("float32")}
- self.outputs = {'Out': np.mean(self.inputs['X'])}
+ self.op_type = "mean"
+ self.inputs = {'X': np.random.random((10, 10)).astype("float32")}
+ self.outputs = {'Out': np.mean(self.inputs["X"])}
+ def test_check_output(self):
+ self.check_output()
-class MeanGradOpTest(GradientChecker):
- def test_normal(self):
- op = create_op("mean")
- inputs = {"X": np.random.random((10, 10)).astype("float32")}
- self.check_grad(op, inputs, set("X"), "Out")
+ def test_checkout_grad(self):
+ self.check_grad(['X'], 'Out')
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_minus_op.py b/python/paddle/v2/framework/tests/test_minus_op.py
index 5abdd4a69bf3faa2f3341f338e195815389a7cef..dea797a1fea34265d0a32e097f413f421abf2521 100644
--- a/python/paddle/v2/framework/tests/test_minus_op.py
+++ b/python/paddle/v2/framework/tests/test_minus_op.py
@@ -1,30 +1,23 @@
import unittest
import numpy as np
-from gradient_checker import GradientChecker, create_op
-from op_test_util import OpTestMeta
+from op_test import OpTest
-class MinusOpTest(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class MinusOpTest(OpTest):
def setUp(self):
- self.type = "minus"
+ self.op_type = "minus"
self.inputs = {
'X': np.random.random((32, 84)).astype("float32"),
'Y': np.random.random((32, 84)).astype("float32")
}
self.outputs = {'Out': (self.inputs['X'] - self.inputs['Y'])}
+ def test_check_output(self):
+ self.check_output()
-class MinusGradTest(GradientChecker):
- def test_left(self):
- op = create_op("minus")
- inputs = {
- "X": np.random.random((10, 10)).astype("float32"),
- "Y": np.random.random((10, 10)).astype("float32")
- }
- self.check_grad(op, inputs, ["X", 'Y'], "Out")
+ def test_check_grad(self):
+ self.check_grad(['X', 'Y'], 'Out')
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_mul_op.py b/python/paddle/v2/framework/tests/test_mul_op.py
index 8c827e242e866b267e0fc4b73c31bafa0ccc7c48..b3d95a56b88e510734da54f36ff21ccd7e1baabb 100644
--- a/python/paddle/v2/framework/tests/test_mul_op.py
+++ b/python/paddle/v2/framework/tests/test_mul_op.py
@@ -1,27 +1,35 @@
import unittest
import numpy as np
-from gradient_checker import GradientChecker, create_op
-from op_test_util import OpTestMeta
-from paddle.v2.framework.op import Operator
+from op_test import OpTest
-class TestMulOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestMulOp(OpTest):
def setUp(self):
- self.type = "mul"
+ self.op_type = "mul"
self.inputs = {
'X': np.random.random((32, 84)).astype("float32"),
'Y': np.random.random((84, 100)).astype("float32")
}
self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])}
+ def test_check_output(self):
+ self.check_output()
+
+ def test_check_grad_normal(self):
+ self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5)
-class TestMulOp2(unittest.TestCase):
- __metaclass__ = OpTestMeta
+ def test_check_grad_ingore_x(self):
+ self.check_grad(
+ ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
+ def test_check_grad_ingore_y(self):
+ self.check_grad(
+ ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
+
+
+class TestMulOp2(OpTest):
def setUp(self):
- self.type = "mul"
+ self.op_type = "mul"
self.inputs = {
'X': np.random.random((15, 4, 12, 10)).astype("float32"),
'Y': np.random.random((4, 30, 8, 2, 9)).astype("float32")
@@ -32,72 +40,20 @@ class TestMulOp2(unittest.TestCase):
self.inputs['Y'].reshape(4 * 30, 8 * 2 * 9))
}
+ def test_check_output(self):
+ self.check_output()
-class TestMulGradOp(GradientChecker):
- def setUp(self):
- self.op = create_op("mul")
- self.inputs = {
- 'X': np.random.random((32, 84)).astype("float32"),
- 'Y': np.random.random((84, 100)).astype("float32")
- }
-
- def test_cpu_gpu_compare(self):
- self.compare_grad(self.op, self.inputs)
-
- def test_normal(self):
- # mul op will enlarge the relative error
- self.check_grad(
- self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)
-
- def test_ignore_x(self):
- self.check_grad(
- self.op,
- self.inputs, ["Y"],
- "Out",
- max_relative_error=0.5,
- no_grad_set={"X"})
-
- def test_ignore_y(self):
- self.check_grad(
- self.op,
- self.inputs, ["X"],
- "Out",
- max_relative_error=0.5,
- no_grad_set={"Y"})
-
-
-class TestMulGradTest2(GradientChecker):
- def setUp(self):
- self.op = Operator(
- "mul", X="X", Y="Y", Out="Out", x_num_col_dims=2, y_num_col_dims=2)
- self.inputs = {
- "X": np.random.random((15, 4, 12, 10)).astype("float32"),
- "Y": np.random.random((4, 30, 8, 2, 9)).astype("float32")
- }
-
- def test_cpu_gpu_compare(self):
- self.compare_grad(self.op, self.inputs)
-
- def test_normal(self):
- self.check_grad(
- self.op, self.inputs, ["X", "Y"], "Out", max_relative_error=0.5)
+ def test_check_grad_normal(self):
+ self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5)
- def test_ignore_x(self):
+ def test_check_grad_ingore_x(self):
self.check_grad(
- self.op,
- self.inputs, ["Y"],
- "Out",
- max_relative_error=0.5,
- no_grad_set={"X"})
+ ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set('X'))
- def test_ignore_y(self):
+ def test_check_grad_ignore_y(self):
self.check_grad(
- self.op,
- self.inputs, ["X"],
- "Out",
- max_relative_error=0.5,
- no_grad_set={"Y"})
+ ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py
index e4b7cd480cb36249bb64ba3cab9a4b220d812346..50cfb855f2b01d8fd32342855d46716da7e07856 100644
--- a/python/paddle/v2/framework/tests/test_net.py
+++ b/python/paddle/v2/framework/tests/test_net.py
@@ -35,5 +35,5 @@ Op(plain_net), inputs:{all[W, X, Y]}, outputs:{all[Out, fc.out, pre_activation]}
self.assertEqual(expected, "\n" + str(net))
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_reshape_op.py b/python/paddle/v2/framework/tests/test_reshape_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..16bb6bb2af67f7d32a2fafc1cb37412084ec0829
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_reshape_op.py
@@ -0,0 +1,21 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class TestReshapeOp(OpTest):
+ def setUp(self):
+ self.op_type = "reshape"
+ self.inputs = {'X': np.random.random((10, 20)).astype("float32")}
+ self.attrs = {'shape': [10 * 20]}
+ self.outputs = {'Out': self.inputs['X'].reshape(self.attrs['shape'])}
+
+ def test_check_output(self):
+ self.check_output()
+
+ def test_check_grad(self):
+ self.check_grad(["X"], "Out")
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_rowwise_add_op.py b/python/paddle/v2/framework/tests/test_rowwise_add_op.py
index 8378c1cd21c21fd31da9b82d2cdaaff332f291d7..336645bd993ff743cbe20bb5cae5cd278db57ce7 100644
--- a/python/paddle/v2/framework/tests/test_rowwise_add_op.py
+++ b/python/paddle/v2/framework/tests/test_rowwise_add_op.py
@@ -1,68 +1,51 @@
import unittest
import numpy as np
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
+from op_test import OpTest
-class TestRowwiseAddOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
- def setUp(self):
- self.type = "rowwise_add"
- self.inputs = {
- 'X': np.random.random((32, 84)).astype("float32"),
- 'b': np.random.random(84).astype("float32")
- }
- self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
-
-
-class TestRowwiseAddOp2(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestRowwiseAddOp(OpTest):
def setUp(self):
- self.type = "rowwise_add"
+ self.op_type = "rowwise_add"
self.inputs = {
- 'X': np.random.random((13, 6, 7, 8)).astype("float32"),
- 'b': np.random.random((7, 8)).astype("float32")
+ 'X': np.random.uniform(0.1, 1, [5, 10]).astype("float32"),
+ 'b': np.random.uniform(0.1, 1, [10]).astype("float32")
}
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
+ def test_check_output(self):
+ self.check_output()
-class TestRowwiseAddGradOp(GradientChecker):
- def setUp(self):
- self.op = create_op("rowwise_add")
- self.inputs = {
- "X": np.random.uniform(0.1, 1, [5, 10]).astype("float32"),
- "b": np.random.uniform(0.1, 1, [10]).astype("float32")
- }
+ def test_check_grad_normal(self):
+ self.check_grad(['X', 'b'], 'Out')
- def test_normal(self):
- self.check_grad(self.op, self.inputs, ["X", "b"], "Out")
+ def test_check_grad_ingore_b(self):
+ self.check_grad(['X'], 'Out', no_grad_set=set('b'))
- def test_ignore_b(self):
- self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"})
+ def test_check_grad_ingore_x(self):
+ self.check_grad(['b'], 'Out', no_grad_set=set('X'))
- def test_ignore_x(self):
- self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
-
-class TestRowwiseAddGradOp2(GradientChecker):
+class TestRowwiseAddOp2(OpTest):
def setUp(self):
- self.op = create_op("rowwise_add")
+ self.op_type = "rowwise_add"
self.inputs = {
- "X": np.random.uniform(0.1, 1, [2, 3, 2, 5]).astype("float32"),
- "b": np.random.uniform(0.1, 1, [2, 5]).astype("float32")
+ 'X': np.random.uniform(0.1, 1, [2, 3, 2, 5]).astype("float32"),
+ 'b': np.random.uniform(0.1, 1, [2, 5]).astype("float32")
}
+ self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
+
+ def test_check_output(self):
+ self.check_output()
- def test_normal(self):
- self.check_grad(self.op, self.inputs, ["X", "b"], "Out")
+ def test_check_grad_normal(self):
+ self.check_grad(['X', 'b'], 'Out')
- def test_ignore_b(self):
- self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"})
+ def test_check_grad_ignore_b(self):
+ self.check_grad(['X'], 'Out', no_grad_set=set('b'))
- def test_ignore_x(self):
- self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
+ def test_check_grad_ignore_x(self):
+ self.check_grad(['b'], 'Out', no_grad_set=set('X'))
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py
index 69b301c376ee7a4ebb2e2dadc645c7d10f823a08..05d76d428299c8176d1a6adf6da15a203fa7502a 100644
--- a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py
+++ b/python/paddle/v2/framework/tests/test_scale_and_identity_op.py
@@ -1,43 +1,34 @@
import unittest
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
import numpy as np
-from paddle.v2.framework.op import Operator
+from op_test import OpTest
-class IdentityTest(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class IdentityTest(OpTest):
def setUp(self):
- self.type = "identity"
- self.inputs = {'X': np.random.random((32, 784)).astype("float32")}
+ self.op_type = "identity"
+ self.inputs = {'X': np.random.random((10, 10)).astype("float32")}
self.outputs = {'Out': self.inputs['X']}
+ def test_check_output(self):
+ self.check_output()
-class IdentityGradOpTest(GradientChecker):
- def test_normal(self):
- op = create_op("identity")
- inputs = {"X": np.random.random((10, 10)).astype("float32")}
- self.check_grad(op, inputs, set("X"), "Out")
-
+ def test_check_grad(self):
+ self.check_grad(['X'], 'Out')
-class ScaleTest(unittest.TestCase):
- __metaclass__ = OpTestMeta
+class ScaleTest(OpTest):
def setUp(self):
- self.type = "scale"
- self.inputs = {'X': np.random.random((32, 784)).astype("float32")}
+ self.op_type = "scale"
+ self.inputs = {'X': np.random.random((10, 10)).astype("float32")}
self.attrs = {'scale': -2.3}
self.outputs = {'Out': self.inputs['X'] * self.attrs['scale']}
+ def test_check_output(self):
+ self.check_output()
-class ScaleGradTest(GradientChecker):
- def test_normal(self):
- op = Operator("scale", X="X", Out="Out", scale=3.2)
- self.check_grad(op,
- {"X": np.random.random((10, 10)).astype("float32")},
- set("X"), "Out")
+ def test_check_grad(self):
+ self.check_grad(['X'], 'Out')
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_scatter_op.py b/python/paddle/v2/framework/tests/test_scatter_op.py
index c1f9444889372104e39ded78fc7207a59b80a293..33c73c52631a09ea0fefdeb9467991ae9c04321c 100644
--- a/python/paddle/v2/framework/tests/test_scatter_op.py
+++ b/python/paddle/v2/framework/tests/test_scatter_op.py
@@ -1,37 +1,24 @@
import unittest
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
-import numpy
-import paddle.v2.framework.core as core
-from paddle.v2.framework.op import Operator
+import numpy as np
+from op_test import OpTest
-class TestScatterOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestScatterOp(OpTest):
def setUp(self):
- self.type = "scatter"
- ref_np = numpy.ones((3, 3)).astype("float32")
- index_np = numpy.array([1, 2]).astype("int32")
- updates_np = numpy.random.random((2, 3)).astype("float32")
- output_np = numpy.copy(ref_np)
+ self.op_type = "scatter"
+ ref_np = np.ones((3, 3)).astype("float32")
+ index_np = np.array([1, 2]).astype("int32")
+ updates_np = np.random.random((2, 3)).astype("float32")
+ output_np = np.copy(ref_np)
output_np[index_np] += updates_np
self.inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
self.outputs = {'Out': output_np}
+ def test_check_output(self):
+ self.check_output()
-class TestScatterGradOp(GradientChecker):
- def test_scatter_grad(self):
- op = create_op("scatter")
- # test data setup
- ref_np = numpy.ones((3, 10)).astype("float32")
- index_np = numpy.array([1, 2]).astype("int32")
- updates_np = numpy.random.random((2, 10)).astype("float32")
- output_np = numpy.copy(ref_np)
- output_np[index_np] += updates_np
- inputs = {'Ref': ref_np, 'Index': index_np, 'Updates': updates_np}
- self.check_grad(
- op, inputs, set(["Updates", "Ref"]), "Out", in_place=True)
+ def test_check_grad(self):
+ self.check_grad(['Updates', 'Ref'], 'Out', in_place=True)
if __name__ == "__main__":
diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py
index e5f9ef865e84f1a78e28884ad7e2e758f9ca8054..557cf15ace63e336462c7dcdbbc10f30aeedc6f4 100644
--- a/python/paddle/v2/framework/tests/test_sgd_op.py
+++ b/python/paddle/v2/framework/tests/test_sgd_op.py
@@ -1,21 +1,22 @@
import unittest
-import numpy
-from op_test_util import OpTestMeta
+import numpy as np
+from op_test import OpTest
-class TestSGD(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestSGD(OpTest):
def setUp(self):
- self.type = "sgd"
- w = numpy.random.random((102, 105)).astype("float32")
- g = numpy.random.random((102, 105)).astype("float32")
+ self.op_type = "sgd"
+ w = np.random.random((102, 105)).astype("float32")
+ g = np.random.random((102, 105)).astype("float32")
lr = 0.1
self.inputs = {'param': w, 'grad': g}
self.attrs = {'learning_rate': lr}
self.outputs = {'param_out': w - lr * g}
+ def test_check_output(self):
+ self.check_output()
+
if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py
index 273c2e5ab1a84d12621fe9568c4cf22073b6aed4..2316e49eff7bb1cdb53acb3889a6ef05060b59f3 100644
--- a/python/paddle/v2/framework/tests/test_sigmoid_op.py
+++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py
@@ -1,27 +1,21 @@
import unittest
import numpy as np
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
+from op_test import OpTest
-class TestSigmoidOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestSigmoid(OpTest):
def setUp(self):
- self.type = "sigmoid"
- self.inputs = {'X': np.random.random((15, 31)).astype("float32")}
+ self.op_type = "sigmoid"
+ self.inputs = {
+ 'X': np.random.uniform(0.1, 1, [11, 17]).astype("float32")
+ }
self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))}
+ def test_check_output(self):
+ self.check_output()
-class TestSigmoidGradOp(GradientChecker):
- def test_grad(self):
- op = create_op("sigmoid")
- inputs = {"X": np.random.uniform(0.1, 1, [11, 17]).astype("float32")}
- # compare gpu and cpu results for backward op.
- # this test will be skiped if only compiling CPU version.
- self.compare_grad(op, inputs)
- # check gradients
- self.check_grad(op, inputs, set("X"), "Y", max_relative_error=0.007)
+ def test_check_grad(self):
+ self.check_grad(["X"], "Y", max_relative_error=0.007)
if __name__ == '__main__':
diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py
index 0d590fa7065bdd2df0e3f2aea5464f0524d70670..1b948f252fa631e9886840b377de2996e110dc91 100644
--- a/python/paddle/v2/framework/tests/test_softmax_op.py
+++ b/python/paddle/v2/framework/tests/test_softmax_op.py
@@ -1,9 +1,6 @@
import unittest
-
import numpy as np
-
-from gradient_checker import GradientChecker, create_op
-from op_test_util import OpTestMeta
+from op_test import OpTest
def stable_softmax(x):
@@ -13,26 +10,21 @@ def stable_softmax(x):
return exps / np.sum(exps)
-class TestSoftmaxOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestSoftmaxOp(OpTest):
def setUp(self):
- self.type = "softmax"
- self.inputs = {"X": np.random.random((10, 10)).astype("float32")}
+ self.op_type = "softmax"
+ self.inputs = {
+ 'X': np.random.uniform(0.1, 1, [10, 10]).astype("float32")
+ }
self.outputs = {
- "Y": np.apply_along_axis(stable_softmax, 1, self.inputs["X"])
+ 'Y': np.apply_along_axis(stable_softmax, 1, self.inputs['X'])
}
+ def test_check_output(self):
+ self.check_output()
-class TestSoftmaxGradOp(GradientChecker):
- def setUp(self):
- self.op = create_op("softmax")
- self.inputs = {
- "X": np.random.uniform(0.1, 1, [10, 10]).astype("float32")
- }
-
- def test_softmax_grad(self):
- self.check_grad(self.op, self.inputs, ["X"], "Y")
+ def test_check_grad(self):
+ self.check_grad(['X'], 'Y')
if __name__ == "__main__":
diff --git a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py
index 2bcdf37df434c9a089d75438d876114156261a5c..dc6ebf5d30369231b4918a168bbdf25c7096c808 100644
--- a/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py
+++ b/python/paddle/v2/framework/tests/test_squared_l2_distance_op.py
@@ -1,17 +1,14 @@
import unittest
-from op_test_util import OpTestMeta
-from gradient_checker import GradientChecker, create_op
import numpy as np
+from op_test import OpTest
-class TestSquaredL2DistanceOp_f0(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestSquaredL2DistanceOp_f0(OpTest):
def setUp(self):
- self.type = 'squared_l2_distance'
+ self.op_type = "squared_l2_distance"
self.inputs = {
- 'X': np.random.uniform(0.1, 1., (32, 64)).astype('float32'),
- 'Y': np.random.uniform(0.1, 1., (32, 64)).astype('float32')
+ 'X': np.random.uniform(0.1, 0.6, (2, 3)).astype("float32"),
+ 'Y': np.random.uniform(0.1, 0.6, (2, 3)).astype("float32")
}
sub_res = self.inputs['X'] - self.inputs['Y']
output = sub_res * sub_res
@@ -20,15 +17,19 @@ class TestSquaredL2DistanceOp_f0(unittest.TestCase):
'Out': np.expand_dims(output.sum(1), 1)
}
+ def test_check_output(self):
+ self.check_output()
+
+ def test_check_grad(self):
+ self.check_grad(['X', 'Y'], 'Out')
-class TestSquaredL2DistanceOp_f1(unittest.TestCase):
- __metaclass__ = OpTestMeta
+class TestSquaredL2DistanceOp_f1(OpTest):
def setUp(self):
- self.type = 'squared_l2_distance'
+ self.op_type = "squared_l2_distance"
self.inputs = {
- 'X': np.random.uniform(0.1, 1., (32, 64)).astype('float32'),
- 'Y': np.random.uniform(0.1, 1., (1, 64)).astype('float32')
+ 'X': np.random.uniform(0.1, 0.6, (2, 3)).astype("float32"),
+ 'Y': np.random.uniform(0.1, 0.6, (1, 3)).astype("float32")
}
sub_res = self.inputs['X'] - self.inputs['Y']
output = sub_res * sub_res
@@ -37,53 +38,34 @@ class TestSquaredL2DistanceOp_f1(unittest.TestCase):
'Out': np.expand_dims(output.sum(1), 1)
}
+ def test_check_output(self):
+ self.check_output()
-class TestSquaredL2DistanceOp_f2(unittest.TestCase):
- __metaclass__ = OpTestMeta
+ def test_check_grad(self):
+ self.check_grad(['X', 'Y'], 'Out')
+
+class TestSquaredL2DistanceOp_f2(OpTest):
def setUp(self):
- self.type = 'squared_l2_distance'
+ self.op_type = "squared_l2_distance"
self.inputs = {
- 'X': np.random.uniform(0.1, 1., (32, 64, 128)).astype('float32'),
- 'Y': np.random.uniform(0.1, 1., (1, 64, 128)).astype('float32')
+ 'X': np.random.uniform(0.1, 0.6, (2, 3, 4)).astype("float32"),
+ 'Y': np.random.uniform(0.1, 0.6, (1, 3, 4)).astype("float32")
}
sub_res = self.inputs['X'] - self.inputs['Y']
- sub_res = sub_res.reshape((32, 64 * 128))
+ sub_res = sub_res.reshape((2, 3 * 4))
output = sub_res * sub_res
self.outputs = {
'sub_result': sub_res,
'Out': np.expand_dims(output.sum(1), 1)
}
+ def test_check_output(self):
+ self.check_output()
-class TestSquaredL2DistanceGradOp(GradientChecker):
- def test_squared_l2_distance_b0(self):
- op = create_op("squared_l2_distance")
- inputs = {
- 'X': np.random.uniform(0.1, .6, (2, 3)).astype('float32'),
- 'Y': np.random.uniform(0.1, .6, (2, 3)).astype('float32')
- }
- self.compare_grad(op, inputs)
- self.check_grad(op, inputs, set(["X", "Y"]), "Out")
-
- def test_squared_l2_distance_b1(self):
- op = create_op("squared_l2_distance")
- inputs = {
- 'X': np.random.uniform(0.1, .6, (2, 3)).astype('float32'),
- 'Y': np.random.uniform(0.1, .6, (1, 3)).astype('float32')
- }
- self.compare_grad(op, inputs)
- self.check_grad(op, inputs, set(["X", "Y"]), "Out")
-
- def test_squared_l2_distance_b2(self):
- op = create_op("squared_l2_distance")
- inputs = {
- 'X': np.random.uniform(0.1, .6, (2, 3, 4)).astype('float32'),
- 'Y': np.random.uniform(0.1, .6, (1, 3, 4)).astype('float32')
- }
- self.compare_grad(op, inputs)
- self.check_grad(op, inputs, set(["X", "Y"]), "Out")
+ def test_check_grad(self):
+ self.check_grad(['X', 'Y'], 'Out')
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_sum_op.py b/python/paddle/v2/framework/tests/test_sum_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..60254291e2ab9215e2bc37c12d5e2e1ca6d33d5d
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_sum_op.py
@@ -0,0 +1,24 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class TestSumOp(OpTest):
+ def setUp(self):
+ self.op_type = "sum"
+ x0 = np.random.random((3, 4)).astype('float32')
+ x1 = np.random.random((3, 4)).astype('float32')
+ x2 = np.random.random((3, 4)).astype('float32')
+ self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
+ y = x0 + x1 + x2
+ self.outputs = {'Out': y}
+
+ def test_check_output(self):
+ self.check_output()
+
+ def test_check_grad(self):
+ self.check_grad(['x0'], 'Out')
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py
index 1af39818a305215b45219b8c5f0a10630fd64279..f26ed4964c521be1cd839b39d7244f96c653cb1a 100644
--- a/python/paddle/v2/framework/tests/test_tensor.py
+++ b/python/paddle/v2/framework/tests/test_tensor.py
@@ -3,7 +3,7 @@ import unittest
import numpy
-class TestScope(unittest.TestCase):
+class TestTensor(unittest.TestCase):
def test_int_tensor(self):
scope = core.Scope()
var = scope.new_var("test_tensor")
@@ -20,8 +20,8 @@ class TestScope(unittest.TestCase):
tensor.set(tensor_array, place)
tensor_array_2 = numpy.array(tensor)
- self.assertEqual(1.0, tensor_array_2[3, 9])
- self.assertEqual(2.0, tensor_array_2[19, 11])
+ self.assertEqual(1, tensor_array_2[3, 9])
+ self.assertEqual(2, tensor_array_2[19, 11])
def test_float_tensor(self):
scope = core.Scope()
@@ -43,6 +43,84 @@ class TestScope(unittest.TestCase):
self.assertAlmostEqual(1.0, tensor_array_2[3, 9])
self.assertAlmostEqual(2.0, tensor_array_2[19, 11])
+ def test_int_lod_tensor(self):
+ places = [core.CPUPlace(), core.GPUPlace(0)]
+ for place in places:
+ scope = core.Scope()
+ var = scope.new_var("test_tensor")
+ var_lod = scope.new_var("test_lod_tensor")
+
+ tensor = var.get_tensor()
+ lod_tensor = var_lod.get_lod_tensor()
+
+ tensor.set_dims([4, 4, 6])
+ tensor.alloc_int(place)
+ array = numpy.array(tensor)
+ array[0, 0, 0] = 3
+ array[3, 3, 5] = 10
+ tensor.set(array, place)
+
+ lod_tensor.set_tensor(tensor)
+ lod_tensor.set_lod([[0, 2, 4]])
+
+ lod_v = numpy.array(lod_tensor.tensor())
+ self.assertTrue(numpy.alltrue(array == lod_v))
+
+ lod = lod_tensor.lod()
+ self.assertEqual(0, lod[0][0])
+ self.assertEqual(2, lod[0][1])
+ self.assertEqual(4, lod[0][2])
+
+ def test_float_lod_tensor(self):
+ places = [core.CPUPlace(), core.GPUPlace(0)]
+ for place in places:
+ scope = core.Scope()
+ var = scope.new_var("test_tensor")
+ var_lod = scope.new_var("test_lod_tensor")
+
+ tensor = var.get_tensor()
+ lod_tensor = var_lod.get_lod_tensor()
+
+ tensor.set_dims([5, 2, 3, 4])
+ tensor.alloc_float(place)
+
+ tensor_array = numpy.array(tensor)
+ self.assertEqual((5, 2, 3, 4), tensor_array.shape)
+ tensor_array[0, 0, 0, 0] = 1.0
+ tensor_array[0, 0, 0, 1] = 2.0
+ tensor.set(tensor_array, place)
+
+ lod_tensor.set_tensor(tensor)
+
+ lod_v = numpy.array(lod_tensor.tensor())
+ self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
+ self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
+ self.assertEqual(len(lod_tensor.lod()), 0)
+
+ lod_py = [[0, 2, 5], [0, 2, 4, 5]]
+ lod_tensor.set_lod(lod_py)
+ lod = lod_tensor.lod()
+ self.assertListEqual(lod_py, lod)
+
+ def test_lod_tensor_init(self):
+ scope = core.Scope()
+ var = scope.new_var("test_tensor")
+ place = core.CPUPlace()
+ tensor = var.get_tensor()
+ tensor.set_dims([5, 2, 3, 4])
+ tensor.alloc_float(place)
+ tensor_array = numpy.array(tensor)
+ tensor_array[0, 0, 0, 0] = 1.0
+ tensor_array[0, 0, 0, 1] = 2.0
+ tensor.set(tensor_array, place)
+ lod_py = [[0, 2, 5], [0, 2, 4, 5]]
+
+ lod_tensor = core.LoDTensor(lod_py, tensor)
+ lod_v = numpy.array(lod_tensor.tensor())
+ self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
+ self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
+ self.assertListEqual(lod_py, lod_tensor.lod())
+
if __name__ == '__main__':
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_top_k_op.py b/python/paddle/v2/framework/tests/test_top_k_op.py
index e841d96d26bba13b2780c41ea7a209fd470cad3b..cab799256d791889c295aa7f9048080f5caaf2dc 100644
--- a/python/paddle/v2/framework/tests/test_top_k_op.py
+++ b/python/paddle/v2/framework/tests/test_top_k_op.py
@@ -1,14 +1,11 @@
import unittest
import numpy as np
-from gradient_checker import GradientChecker, create_op
-from op_test_util import OpTestMeta
+from op_test import OpTest
-class TestTopkOp(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestTopkOp(OpTest):
def setUp(self):
- self.type = "top_k"
+ self.op_type = "top_k"
k = 1
input = np.random.random((32, 84)).astype("float32")
output = np.ndarray((32, k))
@@ -25,11 +22,9 @@ class TestTopkOp(unittest.TestCase):
self.outputs = {'Out': output, 'Indices': indices}
-class TestTopkOp3d(unittest.TestCase):
- __metaclass__ = OpTestMeta
-
+class TestTopkOp3d(OpTest):
def setUp(self):
- self.type = "top_k"
+ self.op_type = "top_k"
k = 1
input = np.random.random((32, 2, 84)).astype("float32")
input_flat_2d = input.reshape(64, 84)
@@ -48,5 +43,5 @@ class TestTopkOp3d(unittest.TestCase):
self.outputs = {'Out': output, 'Indices': indices}
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py
index c3d2bb44da3977c0899b2609a8efe15b7e1789f2..76a5e36e56ab08230bdc2597d209fcf5d1d2acb0 100644
--- a/python/paddle/v2/framework/tests/test_uniform_random_op.py
+++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py
@@ -14,11 +14,11 @@ class UniformRandomTest(unittest.TestCase):
def uniform_random_test(self, place):
scope = core.Scope()
- scope.new_var("X").get_tensor()
+ scope.new_var('X').get_tensor()
op = Operator(
"uniform_random",
- Out="X",
+ Out='X',
dims=[1000, 784],
min=-5.0,
max=10.0,
@@ -27,9 +27,9 @@ class UniformRandomTest(unittest.TestCase):
op.infer_shape(scope)
ctx = core.DeviceContext.create(place)
op.run(scope, ctx)
- tensor = numpy.array(scope.find_var("X").get_tensor())
+ tensor = numpy.array(scope.find_var('X').get_tensor())
self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1)
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
index 8acea6155c588f2e8e5ad009cd8f0a0c09afb92b..e80456d9bbeb3c34ac9eab873a84dbf8f06e34df 100644
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -2,6 +2,7 @@ import numpy
import collections
import topology
import minibatch
+import cPickle
__all__ = ['infer', 'Inference']
@@ -25,11 +26,23 @@ class Inference(object):
:type parameters: paddle.v2.parameters.Parameters
"""
- def __init__(self, output_layer, parameters):
+ def __init__(self, parameters, output_layer=None, fileobj=None):
import py_paddle.swig_paddle as api
- topo = topology.Topology(output_layer)
- gm = api.GradientMachine.createFromConfigProto(
- topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
+
+ if output_layer is not None:
+ topo = topology.Topology(output_layer)
+ gm = api.GradientMachine.createFromConfigProto(
+ topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
+ self.__data_types__ = topo.data_type()
+ elif fileobj is not None:
+ tmp = cPickle.load(fileobj)
+ gm = api.GradientMachine.createByConfigProtoStr(
+ tmp['protobin'], api.CREATE_MODE_TESTING,
+ [api.PARAMETER_VALUE])
+ self.__data_types__ = tmp['data_type']
+ else:
+ raise ValueError("Either output_layer or fileobj must be set")
+
for param in gm.getParameters():
val = param.getBuf(api.PARAMETER_VALUE)
name = param.getName()
@@ -43,7 +56,6 @@ class Inference(object):
# called here, but it's better to call this function in one place.
param.setValueUpdated()
self.__gradient_machine__ = gm
- self.__data_types__ = topo.data_type()
def iter_infer(self, input, feeding=None):
from data_feeder import DataFeeder
diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py
index a20e878d0817d0a75e9c47a44f8765deca99225c..2db66be2505dde38a501edf45984e1f36beb351d 100644
--- a/python/paddle/v2/topology.py
+++ b/python/paddle/v2/topology.py
@@ -18,6 +18,7 @@ from paddle.proto.ModelConfig_pb2 import ModelConfig
import paddle.trainer_config_helpers as conf_helps
import layer as v2_layer
import config_base
+import cPickle
__all__ = ['Topology']
@@ -100,6 +101,14 @@ class Topology(object):
return layer
return None
+ def serialize_for_inference(self, stream):
+ protobin = self.proto().SerializeToString()
+ data_type = self.data_type()
+ cPickle.dump({
+ 'protobin': protobin,
+ 'data_type': data_type
+ }, stream, cPickle.HIGHEST_PROTOCOL)
+
def __check_layer_type__(layer):
if not isinstance(layer, config_base.Layer):
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index 0654a301049dcb347b79879076a869a0c14a07ae..ca95ef13bd440ac0ba3d46f6e4680d4d7aa94c42 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -174,13 +174,18 @@ class SGD(object):
pass_id=pass_id,
batch_id=batch_id,
cost=cost,
- evaluator=batch_evaluator))
+ evaluator=batch_evaluator,
+ gm=self.__gradient_machine__))
self.__parameter_updater__.finishBatch(cost)
batch_evaluator.finish()
self.__parameter_updater__.finishPass()
pass_evaluator.finish()
- event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
+ event_handler(
+ v2_event.EndPass(
+ pass_id,
+ evaluator=pass_evaluator,
+ gm=self.__gradient_machine__))
self.__gradient_machine__.finish()
def test(self, reader, feeding=None):