提交 3ef776ef 编写于 作者: W wanghaox

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into my_maxout_op

...@@ -21,11 +21,10 @@ third_party/ ...@@ -21,11 +21,10 @@ third_party/
cmake-build-* cmake-build-*
# generated while compiling # generated while compiling
python/paddle/v2/framework/core.so python/paddle/v2/fluid/core.so
paddle/pybind/pybind.h paddle/pybind/pybind.h
CMakeFiles CMakeFiles
cmake_install.cmake cmake_install.cmake
paddle/.timestamp paddle/.timestamp
python/paddlepaddle.egg-info/ python/paddlepaddle.egg-info/
paddle/pybind/pybind.h paddle/pybind/pybind.h
python/paddle/v2/framework/tests/tmp/*
...@@ -98,7 +98,7 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -98,7 +98,7 @@ IF(NOT ${CBLAS_FOUND})
ENDIF() ENDIF()
INSTALL(CODE "execute_process( INSTALL(CODE "execute_process(
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CBLAS_INSTALL_DIR}/lib COMMAND ${CMAKE_COMMAND} -E copy_directory ${CBLAS_INSTALL_DIR}/lib
destination ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR} ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}
)" )"
) )
INSTALL(CODE "MESSAGE(STATUS \"Installing: \" INSTALL(CODE "MESSAGE(STATUS \"Installing: \"
......
## Evaluator Design
### The Problem
During training or serving, we provide the evaluation function to measure the model performance, e.g., accuracy, precision. In the operator based framework design, the data go through the network pipeline batch by batch. As a result, inside the operator, we only can calculate one minibatch metrics. We need to provide a mechanism to calculate the metrics for each N pass/batch the user wanted.
### Evaluator Design
Currently, every operation is expressed in the graph. we divide the evaluator process into three steps.
1. Initialize the metric state and add it into the block.
2. Calculate the statistic of the metric state in every mini-batch. The single operator is only responsible for calculating necessary statistics for one mini-batch. For example, accuracy operator only calculate a minibatch data if run once.
3. Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. When it comes to distributed training/Multi-GPU training, aggregate the value from different devices.
### Implementation
This design is shown in python API.
Each metric operator need to caculate the metric statistic and return the batch aware states, Python side responsible for accumulate the states for each pass.
```python
class Evaluator(object):
"""
Evaluator Base class.
"""
def __init__(self, name, **kwargs):
"""
Different evaluator may has different metric states. E.g, Accuracy need two variables, total and right sample counts.
Auc need four variables, `true_positives`,
`true_negatives`, `false_positives` and `false_negatives`. So every evaluator should create its needed variables and append to main_program
The initialization of Evaluator should be responsible for:
create metric states and append to the main_program
"""
pass
def _update_ops(self, input, label, **kwargs)
"""
Add mini-batch evaluator caculate operators to the main_program.
Add increment operator to accumulate the metric states.
"""
def reset(self, executor, reset_program=None):
"""
Reset metric states at the begin of each pass/user specified batch number.
Execute the reset_program to reset the states.
"""
def eval(self, executor, eval_program=None):
"""
Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
Execute the eval_program and return the result.
"""
return eval_result
```
...@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat, ...@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat,
paddle_matrix paddle_matrix_create_sparse( paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) { uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) {
#ifndef PADDLE_MOBILE_INFERENCE
auto ptr = new paddle::capi::CMatrix(); auto ptr = new paddle::capi::CMatrix();
ptr->mat = paddle::Matrix::createSparseMatrix( ptr->mat = paddle::Matrix::createSparseMatrix(
height, height,
...@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse( ...@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse(
false, false,
useGpu); useGpu);
return ptr; return ptr;
#else
return nullptr;
#endif
} }
paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
...@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, ...@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
uint64_t colSize, uint64_t colSize,
float* valueArray, float* valueArray,
uint64_t valueSize) { uint64_t valueSize) {
#ifndef PADDLE_MOBILE_INFERENCE
if (mat == nullptr) return kPD_NULLPTR; if (mat == nullptr) return kPD_NULLPTR;
auto ptr = cast(mat); auto ptr = cast(mat);
if (rowArray == nullptr || colArray == nullptr || if (rowArray == nullptr || colArray == nullptr ||
...@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, ...@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
} else { } else {
return kPD_NOT_SUPPORTED; return kPD_NOT_SUPPORTED;
} }
#else
return kPD_NOT_SUPPORTED;
#endif
} }
...@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height, ...@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height,
* @param isBinary is binary (either 1 or 0 in matrix) or not. * @param isBinary is binary (either 1 or 0 in matrix) or not.
* @param useGpu is using GPU or not. * @param useGpu is using GPU or not.
* @return paddle_matrix. * @return paddle_matrix.
* @note Mobile inference does not support this interface.
*/ */
PD_API paddle_matrix paddle_matrix_create_sparse( PD_API paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu); uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu);
...@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat, ...@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat,
* NULL if the matrix is binary. * NULL if the matrix is binary.
* @param [in] valueSize length of value array. Zero if the matrix is binary. * @param [in] valueSize length of value array. Zero if the matrix is binary.
* @return paddle_error * @return paddle_error
* @note Mobile inference does not support this interface.
*/ */
PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
int* rowArray, int* rowArray,
......
...@@ -27,7 +27,9 @@ if(WITH_GPU) ...@@ -27,7 +27,9 @@ if(WITH_GPU)
set_source_files_properties(${CUDA_CXX_SOURCES} set_source_files_properties(${CUDA_CXX_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__") PROPERTIES COMPILE_FLAGS "-D__NVCC__")
else() else()
if (NOT MOBILE_INFERENCE)
set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc) set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
endif()
endif() endif()
set(CUDA_CU_SOURCES set(CUDA_CU_SOURCES
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include "hl_base.h" #include "hl_base.h"
/** /**
* @brief Maximum pool forward. * @brief Maximum pool forward with Mask output.
* *
* @param[in] frameCnt batch size of input image. * @param[in] frameCnt batch size of input image.
* @param[in] inputData input data. * @param[in] inputData input data.
...@@ -35,7 +35,7 @@ limitations under the License. */ ...@@ -35,7 +35,7 @@ limitations under the License. */
* @param[in] paddingW padding width. * @param[in] paddingW padding width.
* @param[out] tgtData output data. * @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples. * @param[in] tgtStride stride between output data samples.
* * @param[out] maskData the location indices of select max data.
*/ */
extern void hl_maxpool_forward(const int frameCnt, extern void hl_maxpool_forward(const int frameCnt,
const real* inputData, const real* inputData,
...@@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt, ...@@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt,
const int paddingH, const int paddingH,
const int paddingW, const int paddingW,
real* tgtData, real* tgtData,
const int tgtStride); const int tgtStride,
real* maskData = NULL);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
......
...@@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt, ...@@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt,
const int paddingH, const int paddingH,
const int paddingW, const int paddingW,
real* tgtData, real* tgtData,
const int tgtStride) {} const int tgtStride,
real* MaskData) {}
inline void hl_maxpool_backward(const int frameCnt, inline void hl_maxpool_backward(const int frameCnt,
const real* inputData, const real* inputData,
......
...@@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads, ...@@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads,
const int offsetH, const int offsetH,
const int offsetW, const int offsetW,
real* tgtData, real* tgtData,
const int tgtStride) { const int tgtStride,
real* maskData) {
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) { if (index < nthreads) {
int pw = index % pooledW; int pw = index % pooledW;
...@@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads, ...@@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads,
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
real maxval = -FLT_MAX; real maxval = -FLT_MAX;
int max_index = -1;
inputData += (frameNum * channels + c) * height * width; inputData += (frameNum * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
if (maxval < inputData[h * width + w]) if (maxval < inputData[h * width + w]) {
maxval = inputData[h * width + w]; max_index = h * width + w;
maxval = inputData[max_index];
}
} }
} }
int tgtIndex = int tgtIndex =
index % (pooledW * pooledH * channels) + frameNum * tgtStride; index % (pooledW * pooledH * channels) + frameNum * tgtStride;
tgtData[tgtIndex] = maxval; tgtData[tgtIndex] = maxval;
if (maskData != NULL) {
maskData[tgtIndex] = max_index;
}
} }
} }
...@@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt, ...@@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt,
const int paddingH, const int paddingH,
const int paddingW, const int paddingW,
real* tgtData, real* tgtData,
const int tgtStride) { const int tgtStride,
real* maskData) {
int num_kernels = pooledH * pooledW * channels * frameCnt; int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024; int blocks = (num_kernels + 1024 - 1) / 1024;
dim3 threads(1024, 1); dim3 threads(1024, 1);
...@@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt, ...@@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt,
paddingH, paddingH,
paddingW, paddingW,
tgtData, tgtData,
tgtStride); tgtStride,
maskData);
CHECK_SYNC("hl_maxpool_forward failed"); CHECK_SYNC("hl_maxpool_forward failed");
} }
......
...@@ -38,9 +38,9 @@ py_proto_compile(framework_py_proto SRCS framework.proto) ...@@ -38,9 +38,9 @@ py_proto_compile(framework_py_proto SRCS framework.proto)
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init) add_dependencies(framework_py_proto framework_py_proto_init)
add_custom_command(TARGET framework_py_proto POST_BUILD add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/proto
COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/proto/ COMMAND cp *.py ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/proto/
COMMENT "Copy generated python proto into directory paddle/v2/framework/proto." COMMENT "Copy generated python proto into directory paddle/v2/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
......
...@@ -61,6 +61,7 @@ public: ...@@ -61,6 +61,7 @@ public:
// function arguments // function arguments
strides_ = config.get<std::vector<size_t>>("strides"); strides_ = config.get<std::vector<size_t>>("strides");
paddings_ = config.get<std::vector<size_t>>("paddings"); paddings_ = config.get<std::vector<size_t>>("paddings");
dilations_ = config.get<std::vector<size_t>>("dilations");
groups_ = config.get<size_t>("groups"); groups_ = config.get<size_t>("groups");
// number of inputs and outputs // number of inputs and outputs
...@@ -118,6 +119,7 @@ protected: ...@@ -118,6 +119,7 @@ protected:
std::vector<size_t> strides_; std::vector<size_t> strides_;
std::vector<size_t> paddings_; std::vector<size_t> paddings_;
std::vector<size_t> dilations_;
/// Group size, refer to grouped convolution in /// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the /// Alex Krizhevsky's paper: when group=2, the first half of the
...@@ -133,6 +135,10 @@ protected: ...@@ -133,6 +135,10 @@ protected:
inline int paddingW() const { return paddings_[1]; } inline int paddingW() const { return paddings_[1]; }
inline int dilationH() const { return dilations_[0]; }
inline int dilationW() const { return dilations_[1]; }
// A temporary memory in convolution calculation. // A temporary memory in convolution calculation.
MemoryHandlePtr memory_; MemoryHandlePtr memory_;
......
...@@ -79,15 +79,26 @@ void Convolution(const std::string& conv1, ...@@ -79,15 +79,26 @@ void Convolution(const std::string& conv1,
if (outputChannels < inputChannels) continue; if (outputChannels < inputChannels) continue;
for (size_t stride : {1, 2}) { for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) { for (size_t padding : {0, 1}) {
for (size_t dilation : {1, 3}) {
if (padding >= filterSize) break; if (padding >= filterSize) break;
size_t filterS = (filterSize - 1) * dilation + 1;
if (inputSize + 2 * padding < filterS) break;
if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" ||
conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
dilation > 1)
break;
// NNPACK only supports stride = 1 if batchSize > 1 // NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") && if ((conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1) batchSize > 1 && stride > 1)
break; break;
size_t outputSize = size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride; (inputSize - filterS + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels << " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize << " inputHeight=" << inputSize
...@@ -96,17 +107,19 @@ void Convolution(const std::string& conv1, ...@@ -96,17 +107,19 @@ void Convolution(const std::string& conv1,
<< " filterHeight=" << filterSize << " filterHeight=" << filterSize
<< " filterWidth=" << filterSize << " filterWidth=" << filterSize
<< " outputHeight=" << outputSize << " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride << " outputWidth=" << outputSize
<< " padding=" << padding; << " stride=" << stride << " padding=" << padding;
std::vector<size_t> paddings = {padding, padding}; std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride}; std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test( Compare2Function<DType1, DType2> test(
conv1, conv1,
conv2, conv2,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)1) .set("groups", (size_t)1)
.set("algo", (std::string) "auto")); .set("algo", (std::string) "auto"));
...@@ -125,6 +138,7 @@ void Convolution(const std::string& conv1, ...@@ -125,6 +138,7 @@ void Convolution(const std::string& conv1,
} }
} }
} }
}
} }
/** /**
...@@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1, ...@@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1,
for (size_t outputChannels : {7}) { for (size_t outputChannels : {7}) {
size_t stride = 1; size_t stride = 1;
size_t padding = 0; size_t padding = 0;
size_t dilation = 1;
size_t outputHeight = size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) / (inputHeight - filterHeight + 2 * padding + stride) /
stride; stride;
...@@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1, ...@@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding}; std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride}; std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test( Compare2Function<DType1, DType2> test(
conv1, conv1,
conv2, conv2,
...@@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1, ...@@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1,
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)1) .set("groups", (size_t)1)
.set("dilations", dilations)
.set("algo", (std::string) "auto")); .set("algo", (std::string) "auto"));
TensorShape input{ TensorShape input{
...@@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1, ...@@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding}; std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride}; std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {1, 1};
size_t groups = inputChannels; size_t groups = inputChannels;
Compare2Function<DType1, DType2> test( Compare2Function<DType1, DType2> test(
conv1, conv1,
...@@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1, ...@@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1,
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", groups) .set("groups", groups)
.set("dilations", dilations)
.set("algo", (std::string) "auto")); .set("algo", (std::string) "auto"));
TensorShape input{ TensorShape input{
......
...@@ -100,7 +100,9 @@ public: ...@@ -100,7 +100,9 @@ public:
strideH(), strideH(),
strideW(), strideW(),
paddingH(), paddingH(),
paddingW()); paddingW(),
dilationH(),
dilationW());
} else { } else {
colData = inputData + g * inputOffset; colData = inputData + g * inputOffset;
} }
...@@ -223,7 +225,9 @@ public: ...@@ -223,7 +225,9 @@ public:
strideH(), strideH(),
strideW(), strideW(),
paddingH(), paddingH(),
paddingW()); paddingW(),
dilationH(),
dilationW());
} }
} }
inputGrad += inputChannels * inputHeight * inputWidth; inputGrad += inputChannels * inputHeight * inputWidth;
...@@ -310,7 +314,9 @@ public: ...@@ -310,7 +314,9 @@ public:
strideH(), strideH(),
strideW(), strideW(),
paddingH(), paddingH(),
paddingW()); paddingW(),
dilationH(),
dilationW());
} else { } else {
colData = inputData + g * inputOffset; colData = inputData + g * inputOffset;
} }
......
...@@ -78,7 +78,9 @@ public: ...@@ -78,7 +78,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth); int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
}; };
template <ColFormat Format, DeviceType Device, class T> template <ColFormat Format, DeviceType Device, class T>
...@@ -91,7 +93,9 @@ public: ...@@ -91,7 +93,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth); int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
}; };
} // namespace paddle } // namespace paddle
...@@ -31,7 +31,9 @@ public: ...@@ -31,7 +31,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -47,8 +49,8 @@ public: ...@@ -47,8 +49,8 @@ public:
int c_im = c / filterWidth / filterHeight; int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) { for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) { for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset; int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset; int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) < 0 || if ((imRowIdx - paddingHeight) < 0 ||
(imRowIdx - paddingHeight) >= inputHeight || (imRowIdx - paddingHeight) >= inputHeight ||
(imColIdx - paddingWidth) < 0 || (imColIdx - paddingWidth) < 0 ||
...@@ -81,7 +83,9 @@ public: ...@@ -81,7 +83,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -97,8 +101,8 @@ public: ...@@ -97,8 +101,8 @@ public:
int c_im = c / filterWidth / filterHeight; int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) { for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) { for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset; int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset; int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) >= 0 && if ((imRowIdx - paddingHeight) >= 0 &&
(imRowIdx - paddingHeight) < inputHeight && (imRowIdx - paddingHeight) < inputHeight &&
(imColIdx - paddingWidth) >= 0 && (imColIdx - paddingWidth) >= 0 &&
...@@ -134,7 +138,9 @@ public: ...@@ -134,7 +138,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -147,9 +153,10 @@ public: ...@@ -147,9 +153,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) { for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) { for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset = int imRowOffset = outputH * strideHeight +
outputH * strideHeight + filterH - paddingHeight; filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth; int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset = int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels + (((outputH * outputWidth + outputW) * inputChannels +
channel) * channel) *
...@@ -189,7 +196,9 @@ public: ...@@ -189,7 +196,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -202,9 +211,10 @@ public: ...@@ -202,9 +211,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) { for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) { for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset = int imRowOffset = outputH * strideHeight +
outputH * strideHeight + filterH - paddingHeight; filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth; int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset = int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels + (((outputH * outputWidth + outputW) * inputChannels +
channel) * channel) *
......
...@@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im, ...@@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im,
int strideW, int strideW,
int paddingH, int paddingH,
int paddingW, int paddingW,
int dilationH,
int dilationW,
int height_col, int height_col,
int width_col, int width_col,
T* data_col) { T* data_col) {
...@@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im, ...@@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im,
data_col += (channel_out * height_col + h_out) * width_col + w_out; data_col += (channel_out * height_col + h_out) * width_col + w_out;
for (int i = 0; i < blockH; ++i) { for (int i = 0; i < blockH; ++i) {
for (int j = 0; j < blockW; ++j) { for (int j = 0; j < blockW; ++j) {
int rIdx = int(h_in + i); int rIdx = int(h_in + i * dilationH);
int cIdx = int(w_in + j); int cIdx = int(w_in + j * dilationW);
if ((rIdx - (int)paddingH) >= (int)height || if ((rIdx - (int)paddingH) >= (int)height ||
(rIdx - (int)paddingH) < 0 || (rIdx - (int)paddingH) < 0 ||
(cIdx - (int)paddingW) >= (int)width || (cIdx - (int)paddingW) >= (int)width ||
...@@ -77,7 +79,9 @@ public: ...@@ -77,7 +79,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -102,6 +106,8 @@ public: ...@@ -102,6 +106,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth, outputWidth,
colData); colData);
...@@ -121,6 +127,8 @@ __global__ void col2im(size_t n, ...@@ -121,6 +127,8 @@ __global__ void col2im(size_t n,
size_t strideW, size_t strideW,
size_t paddingH, size_t paddingH,
size_t paddingW, size_t paddingW,
size_t dilationH,
size_t dilationW,
size_t height_col, size_t height_col,
size_t width_col, size_t width_col,
T* data_im) { T* data_im) {
...@@ -131,23 +139,34 @@ __global__ void col2im(size_t n, ...@@ -131,23 +139,34 @@ __global__ void col2im(size_t n,
int w = int(index % width); int w = int(index % width);
int h = int((index / width) % height); int h = int((index / width) % height);
int c = int(index / (width * height)); int c = int(index / (width * height));
int filterH = (blockH - 1) * dilationH + 1;
int filterW = (blockW - 1) * dilationW + 1;
if ((w - (int)paddingW) >= 0 && if ((w - (int)paddingW) >= 0 &&
(w - (int)paddingW) < (width - 2 * paddingW) && (w - (int)paddingW) < (width - 2 * paddingW) &&
(h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) { (h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) {
// compute the start and end of the output // compute the start and end of the output
int w_col_start = int w_col_start =
(w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1; (w < (int)filterW) ? 0 : (w - int(filterW)) / (int)strideW + 1;
int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col)); int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col));
int h_col_start = int h_col_start =
(h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1; (h < (int)filterH) ? 0 : (h - (int)filterH) / (int)strideH + 1;
int h_col_end = min(int(h / strideH + 1), int(height_col)); int h_col_end = min(int(h / strideH + 1), int(height_col));
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
// the col location: [c * width * height + h_out, w_out] // the col location: [c * width * height + h_out, w_out]
int c_col = int(c * blockH * blockW) + int h_k = (h - h_col * strideH);
(h - h_col * (int)strideH) * (int)blockW + int w_k = (w - w_col * strideW);
(w - w_col * (int)strideW); if (h_k % dilationH == 0 && w_k % dilationW == 0) {
val += data_col[(c_col * height_col + h_col) * width_col + w_col]; h_k /= dilationH;
w_k /= dilationW;
int c_col =
(((c * blockH + h_k) * blockW + w_k) * height_col + h_col) *
width_col +
w_col;
val += data_col[c_col];
}
} }
} }
h -= paddingH; h -= paddingH;
...@@ -173,7 +192,9 @@ public: ...@@ -173,7 +192,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -205,6 +226,8 @@ public: ...@@ -205,6 +226,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth, outputWidth,
imData); imData);
...@@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData, ...@@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth, int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight, int outputHeight,
int outputWidth) { int outputWidth) {
int swId = blockIdx.x; int swId = blockIdx.x;
...@@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData, ...@@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData,
channelId += blockDim.z) { channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth; int widthOffset =
int heightOffset = idy + shId * strideHeight - paddingHeight; idx * dilationHeight + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationWidth + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth + int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth; channelId * inputHeight * inputWidth;
...@@ -273,7 +300,9 @@ public: ...@@ -273,7 +300,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -312,6 +341,8 @@ public: ...@@ -312,6 +341,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth); outputWidth);
CHECK_SYNC("Im2ColFunctor GPU failed"); CHECK_SYNC("Im2ColFunctor GPU failed");
...@@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData, ...@@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth, int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight, int outputHeight,
int outputWidth) { int outputWidth) {
int swId = blockIdx.x; int swId = blockIdx.x;
...@@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData, ...@@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData,
channelId += blockDim.z) { channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth; int widthOffset =
int heightOffset = idy + shId * strideHeight - paddingHeight; idx * dilationWidth + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationHeight + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth + int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth; channelId * inputHeight * inputWidth;
...@@ -372,7 +407,9 @@ public: ...@@ -372,7 +407,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -411,6 +448,8 @@ public: ...@@ -411,6 +448,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth); outputWidth);
CHECK_SYNC("Col2ImFunctor GPU failed"); CHECK_SYNC("Col2ImFunctor GPU failed");
......
...@@ -29,14 +29,17 @@ void TestIm2ColFunctor() { ...@@ -29,14 +29,17 @@ void TestIm2ColFunctor() {
for (size_t filterWidth : {3, 7}) { for (size_t filterWidth : {3, 7}) {
for (size_t stride : {1, 2}) { for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) { for (size_t padding : {0, 1}) {
if (inputHeight <= filterHeight || inputWidth <= filterWidth) for (size_t dilation : {1, 3}) {
size_t filterSizeH = (filterHeight - 1) * dilation + 1;
size_t filterSizeW = (filterWidth - 1) * dilation + 1;
if (inputHeight + 2 * padding < filterSizeH ||
inputWidth + 2 * padding < filterSizeW)
break; break;
if (padding >= filterHeight || padding >= filterWidth) break; if (padding >= filterSizeH || padding >= filterSizeW) break;
size_t outputHeight = size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) / (inputHeight - filterSizeH + 2 * padding) / stride + 1;
stride;
size_t outputWidth = size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) / stride; (inputWidth - filterSizeW + 2 * padding) / stride + 1;
TensorShape imShape = TensorShape imShape =
TensorShape({channels, inputHeight, inputWidth}); TensorShape({channels, inputHeight, inputWidth});
...@@ -53,10 +56,14 @@ void TestIm2ColFunctor() { ...@@ -53,10 +56,14 @@ void TestIm2ColFunctor() {
size_t height = channels * filterHeight * filterWidth; size_t height = channels * filterHeight * filterWidth;
size_t width = outputHeight * outputWidth; size_t width = outputHeight * outputWidth;
VectorPtr input1 = Vector::create(imShape.getElements(), false); VectorPtr input1 =
VectorPtr input2 = Vector::create(imShape.getElements(), false); Vector::create(imShape.getElements(), false);
MatrixPtr output1 = Matrix::create(height, width, false, false); VectorPtr input2 =
MatrixPtr output2 = Matrix::create(width, height, false, false); Vector::create(imShape.getElements(), false);
MatrixPtr output1 =
Matrix::create(height, width, false, false);
MatrixPtr output2 =
Matrix::create(width, height, false, false);
input1->uniform(0.001, 1); input1->uniform(0.001, 1);
input2->copyFrom(*input1); input2->copyFrom(*input1);
...@@ -69,7 +76,9 @@ void TestIm2ColFunctor() { ...@@ -69,7 +76,9 @@ void TestIm2ColFunctor() {
stride, stride,
stride, stride,
padding, padding,
padding); padding,
dilation,
dilation);
im2Col2(input2->getData(), im2Col2(input2->getData(),
imShape, imShape,
output2->getData(), output2->getData(),
...@@ -77,7 +86,9 @@ void TestIm2ColFunctor() { ...@@ -77,7 +86,9 @@ void TestIm2ColFunctor() {
stride, stride,
stride, stride,
padding, padding,
padding); padding,
dilation,
dilation);
// The transposition of the result of ColFormat == kCFO // The transposition of the result of ColFormat == kCFO
// is equal to the result of ColFormat == kOCF. // is equal to the result of ColFormat == kOCF.
...@@ -87,6 +98,7 @@ void TestIm2ColFunctor() { ...@@ -87,6 +98,7 @@ void TestIm2ColFunctor() {
Col2ImFunctor<kCFO, Device, T> col2Im1; Col2ImFunctor<kCFO, Device, T> col2Im1;
Col2ImFunctor<kOCF, Device, T> col2Im2; Col2ImFunctor<kOCF, Device, T> col2Im2;
col2Im1(input1->getData(), col2Im1(input1->getData(),
imShape, imShape,
output1->getData(), output1->getData(),
...@@ -94,7 +106,9 @@ void TestIm2ColFunctor() { ...@@ -94,7 +106,9 @@ void TestIm2ColFunctor() {
stride, stride,
stride, stride,
padding, padding,
padding); padding,
dilation,
dilation);
col2Im2(input2->getData(), col2Im2(input2->getData(),
imShape, imShape,
output2->getData(), output2->getData(),
...@@ -102,8 +116,9 @@ void TestIm2ColFunctor() { ...@@ -102,8 +116,9 @@ void TestIm2ColFunctor() {
stride, stride,
stride, stride,
padding, padding,
padding); padding,
dilation,
dilation);
autotest::TensorCheckErr(*input1, *input2); autotest::TensorCheckErr(*input1, *input2);
} }
} }
...@@ -112,6 +127,7 @@ void TestIm2ColFunctor() { ...@@ -112,6 +127,7 @@ void TestIm2ColFunctor() {
} }
} }
} }
}
} }
TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); } TEST(Im2ColFunctor, CPU) { TestIm2ColFunctor<DEVICE_TYPE_CPU, float>(); }
......
...@@ -85,9 +85,49 @@ if(MOBILE_INFERENCE) ...@@ -85,9 +85,49 @@ if(MOBILE_INFERENCE)
gradientmachines/GradientMachineMode.cpp gradientmachines/GradientMachineMode.cpp
gradientmachines/MultiGradientMachine.cpp) gradientmachines/MultiGradientMachine.cpp)
# Remove useless layers # Remove layers that used in training
list(REMOVE_ITEM GSERVER_SOURCES list(REMOVE_ITEM GSERVER_SOURCES
layers/RecurrentLayerGroup.cpp) layers/RecurrentLayerGroup.cpp
layers/CostLayer.cpp
layers/MultiBoxLossLayer.cpp
layers/WarpCTCLayer.cpp
layers/CTCLayer.cpp
layers/LinearChainCTC.cpp
layers/PrintLayer.cpp)
list(REMOVE_ITEM GSERVER_SOURCES
layers/OuterProdLayer.cpp
layers/SumToOneNormLayer.cpp
layers/ConvShiftLayer.cpp
layers/InterpolationLayer.cpp
layers/AgentLayer.cpp
layers/DotMulOperator.cpp
layers/GruStepLayer.cpp
layers/LstmStepLayer.cpp
layers/ConvexCombinationLayer.cpp
layers/Conv3DLayer.cpp
layers/DeConv3DLayer.cpp
layers/CropLayer.cpp
layers/CrossEntropyOverBeam.cpp
layers/DataNormLayer.cpp
layers/FeatureMapExpandLayer.cpp
layers/HierarchicalSigmoidLayer.cpp
layers/MultinomialSampler.cpp
layers/NCELayer.cpp
layers/KmaxSeqScoreLayer.cpp
layers/MDLstmLayer.cpp
layers/MultiplexLayer.cpp
layers/PadLayer.cpp
layers/Pool3DLayer.cpp
layers/ResizeLayer.cpp
layers/RotateLayer.cpp
layers/RowConvLayer.cpp
layers/RowL2NormLayer.cpp
layers/SamplingIdLayer.cpp
layers/ScaleShiftLayer.cpp
layers/SelectiveFullyConnectedLayer.cpp
layers/SpatialPyramidPoolLayer.cpp
layers/BilinearInterpLayer.cpp
layers/ClipLayer.cpp)
endif() endif()
if(WITH_GPU) if(WITH_GPU)
......
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
#include "NeuralNetwork.h" #include "NeuralNetwork.h"
#include "hl_gpu.h" #include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/CustomStackTrace.h" #include "paddle/utils/CustomStackTrace.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
...@@ -28,6 +27,7 @@ limitations under the License. */ ...@@ -28,6 +27,7 @@ limitations under the License. */
#ifndef PADDLE_MOBILE_INFERENCE #ifndef PADDLE_MOBILE_INFERENCE
#include "MultiNetwork.h" #include "MultiNetwork.h"
#include "RecurrentGradientMachine.h" #include "RecurrentGradientMachine.h"
#include "paddle/gserver/layers/AgentLayer.h"
#endif #endif
namespace paddle { namespace paddle {
...@@ -192,9 +192,11 @@ void NeuralNetwork::init(const ModelConfig& config, ...@@ -192,9 +192,11 @@ void NeuralNetwork::init(const ModelConfig& config,
void NeuralNetwork::connect(LayerPtr agentLayer, void NeuralNetwork::connect(LayerPtr agentLayer,
LayerPtr realLayer, LayerPtr realLayer,
int height) { int height) {
#ifndef PADDLE_MOBILE_INFERENCE
AgentLayer* agent = dynamic_cast<AgentLayer*>(agentLayer.get()); AgentLayer* agent = dynamic_cast<AgentLayer*>(agentLayer.get());
CHECK_NOTNULL(agent); CHECK_NOTNULL(agent);
agent->setRealLayer(realLayer, height); agent->setRealLayer(realLayer, height);
#endif
} }
void NeuralNetwork::connect(std::string agentLayerName, void NeuralNetwork::connect(std::string agentLayerName,
......
...@@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
for (int i = 0; i < config_.inputs_size(); i++) { for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]}; std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
std::vector<size_t> dilations = {(size_t)dilationY_[i],
(size_t)dilation_[i]};
bool useDilation = ((size_t)dilationY_[i] > 1 || (size_t)dilation_[i] > 1);
// Convolution Layer uses the GemmConv function by default. // Convolution Layer uses the GemmConv function by default.
convType = "GemmConv"; convType = "GemmConv";
...@@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
#if defined(__ARM_NEON__) || defined(__ARM_NEON) #if defined(__ARM_NEON__) || defined(__ARM_NEON)
if ((filterSize_[i] == filterSizeY_[i]) && if ((filterSize_[i] == filterSizeY_[i]) &&
(filterSize_[i] == 3 || filterSize_[i] == 4) && (filterSize_[i] == 3 || filterSize_[i] == 4) &&
(stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) { (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2) &&
!useDilation) {
convType = "NeonDepthwiseConv"; convType = "NeonDepthwiseConv";
} }
#endif #endif
} }
if (FLAGS_use_nnpack && !isDeconv_) { if (FLAGS_use_nnpack && !isDeconv_ && !useDilation) {
createFunction(forward_, createFunction(forward_,
"NNPACKConv", "NNPACKConv",
FuncConfig() FuncConfig()
...@@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
createFunction(backward_, createFunction(backward_,
...@@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
createFunction(backward_, createFunction(backward_,
...@@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
} }
} }
......
...@@ -98,6 +98,7 @@ ClassRegistrar<Layer, LayerConfig> Layer::registrar_; ...@@ -98,6 +98,7 @@ ClassRegistrar<Layer, LayerConfig> Layer::registrar_;
LayerPtr Layer::create(const LayerConfig& config) { LayerPtr Layer::create(const LayerConfig& config) {
std::string type = config.type(); std::string type = config.type();
#ifndef PADDLE_MOBILE_INFERENCE
// NOTE: As following types have illegal character '-', // NOTE: As following types have illegal character '-',
// they can not use REGISTER_LAYER to registrar. // they can not use REGISTER_LAYER to registrar.
// Besides, to fit with old training models, // Besides, to fit with old training models,
...@@ -106,7 +107,6 @@ LayerPtr Layer::create(const LayerConfig& config) { ...@@ -106,7 +107,6 @@ LayerPtr Layer::create(const LayerConfig& config) {
return LayerPtr(new MultiClassCrossEntropy(config)); return LayerPtr(new MultiClassCrossEntropy(config));
else if (type == "rank-cost") else if (type == "rank-cost")
return LayerPtr(new RankingCost(config)); return LayerPtr(new RankingCost(config));
#ifndef PADDLE_MOBILE_INFERENCE
else if (type == "auc-validation") else if (type == "auc-validation")
return LayerPtr(new AucValidation(config)); return LayerPtr(new AucValidation(config));
else if (type == "pnpair-validation") else if (type == "pnpair-validation")
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MaxPoolWithMaskLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
bool MaxPoolWithMaskLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
PoolLayer::init(layerMap, parameterMap);
setOutput("mask", &mask_);
return true;
}
size_t MaxPoolWithMaskLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
outputY_ = outputSize(imgSizeY_,
sizeY_,
confPaddingY_,
strideY_,
/* caffeMode */ false);
outputX_ = outputSize(imgSize_,
sizeX_,
confPadding_,
stride_,
/* caffeMode */ false);
layerSize = outputX_ * outputY_ * channels_;
getOutput().setFrameHeight(outputY_);
getOutput().setFrameWidth(outputX_);
return layerSize;
}
void MaxPoolWithMaskLayer::forward(PassType passType) {
size_t size = getSize();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
int batchSize = inputV->getHeight();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
CHECK_EQ(size, outV->getWidth());
resetSpecifyOutput(mask_,
batchSize,
size,
/* isValueClean */ false,
/* isGradClean */ true);
MatrixPtr maskV = mask_.value;
outV->maxPoolForward(*inputV,
imgSizeY_,
imgSize_,
channels_,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
confPaddingY_,
confPadding_,
maskV);
}
void MaxPoolWithMaskLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
MatrixPtr outGrad = getOutputGrad();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
MatrixPtr outV = getOutputValue();
MatrixPtr inputGrad = inputLayers_[0]->getOutputGrad();
inputGrad->maxPoolBackward(*inputV,
imgSizeY_,
imgSize_,
*outGrad,
*outV,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
1,
1,
confPaddingY_,
confPadding_);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "PoolLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief Basic parent layer of different kinds of pooling
*/
class MaxPoolWithMaskLayer : public PoolLayer {
protected:
Argument mask_;
public:
explicit MaxPoolWithMaskLayer(const LayerConfig& config)
: PoolLayer(config) {}
size_t getSize();
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
};
} // namespace paddle
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PoolLayer.h" #include "PoolLayer.h"
#include "MaxPoolWithMaskLayer.h"
#include "PoolProjectionLayer.h" #include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -44,7 +45,6 @@ bool PoolLayer::init(const LayerMap& layerMap, ...@@ -44,7 +45,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride();
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
return true; return true;
} }
...@@ -57,6 +57,8 @@ Layer* PoolLayer::create(const LayerConfig& config) { ...@@ -57,6 +57,8 @@ Layer* PoolLayer::create(const LayerConfig& config) {
} else if (CudnnPoolLayer::typeCheck(pool)) { } else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config); return new CudnnPoolLayer(config);
#endif #endif
} else if (pool == "max-pool-with-mask") {
return new MaxPoolWithMaskLayer(config);
} else { } else {
LOG(FATAL) << "Unknown pool type: " << pool; LOG(FATAL) << "Unknown pool type: " << pool;
return nullptr; return nullptr;
......
...@@ -100,8 +100,9 @@ void ROIPoolLayer::forward(PassType passType) { ...@@ -100,8 +100,9 @@ void ROIPoolLayer::forward(PassType passType) {
size_t roiEndH = round(bottomROIs[4] * spatialScale_); size_t roiEndH = round(bottomROIs[4] * spatialScale_);
CHECK_GE(roiBatchIdx, 0UL); CHECK_GE(roiBatchIdx, 0UL);
CHECK_LT(roiBatchIdx, batchSize); CHECK_LT(roiBatchIdx, batchSize);
size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); size_t roiHeight =
size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); std::max(roiEndH - roiStartH + 1, static_cast<size_t>(1));
size_t roiWidth = std::max(roiEndW - roiStartW + 1, static_cast<size_t>(1));
real binSizeH = real binSizeH =
static_cast<real>(roiHeight) / static_cast<real>(pooledHeight_); static_cast<real>(roiHeight) / static_cast<real>(pooledHeight_);
real binSizeW = real binSizeW =
...@@ -114,10 +115,14 @@ void ROIPoolLayer::forward(PassType passType) { ...@@ -114,10 +115,14 @@ void ROIPoolLayer::forward(PassType passType) {
size_t wstart = static_cast<size_t>(std::floor(pw * binSizeW)); size_t wstart = static_cast<size_t>(std::floor(pw * binSizeW));
size_t hend = static_cast<size_t>(std::ceil((ph + 1) * binSizeH)); size_t hend = static_cast<size_t>(std::ceil((ph + 1) * binSizeH));
size_t wend = static_cast<size_t>(std::ceil((pw + 1) * binSizeW)); size_t wend = static_cast<size_t>(std::ceil((pw + 1) * binSizeW));
hstart = std::min(std::max(hstart + roiStartH, 0UL), height_); hstart = std::min(
wstart = std::min(std::max(wstart + roiStartW, 0UL), width_); std::max(hstart + roiStartH, static_cast<size_t>(0)), height_);
hend = std::min(std::max(hend + roiStartH, 0UL), height_); wstart = std::min(
wend = std::min(std::max(wend + roiStartW, 0UL), width_); std::max(wstart + roiStartW, static_cast<size_t>(0)), width_);
hend = std::min(std::max(hend + roiStartH, static_cast<size_t>(0)),
height_);
wend = std::min(std::max(wend + roiStartW, static_cast<size_t>(0)),
width_);
bool isEmpty = (hend <= hstart) || (wend <= wstart); bool isEmpty = (hend <= hstart) || (wend <= wstart);
size_t poolIndex = ph * pooledWidth_ + pw; size_t poolIndex = ph * pooledWidth_ + pw;
......
# gserver pacakge unittests # gserver pacakge unittests
add_simple_unittest(test_LinearChainCRF) add_simple_unittest(test_LinearChainCRF)
add_simple_unittest(test_MultinomialSampler)
add_simple_unittest(test_RecurrentLayer) add_simple_unittest(test_RecurrentLayer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_MultinomialSampler)
endif()
function(gserver_test TARGET) function(gserver_test TARGET)
add_unittest_without_exec(${TARGET} add_unittest_without_exec(${TARGET}
${TARGET}.cpp ${TARGET}.cpp
...@@ -24,6 +27,7 @@ gserver_test(test_ConvUnify) ...@@ -24,6 +27,7 @@ gserver_test(test_ConvUnify)
gserver_test(test_BatchNorm) gserver_test(test_BatchNorm)
gserver_test(test_KmaxSeqScore) gserver_test(test_KmaxSeqScore)
gserver_test(test_Expand) gserver_test(test_Expand)
gserver_test(test_MaxPoolingWithMaskOutput)
########## test_Mkldnn layers and activations ########## ########## test_Mkldnn layers and activations ##########
if(WITH_MKLDNN) if(WITH_MKLDNN)
...@@ -48,7 +52,7 @@ if(WITH_PYTHON) ...@@ -48,7 +52,7 @@ if(WITH_PYTHON)
endif() endif()
############### test_WarpCTCLayer ####################### ############### test_WarpCTCLayer #######################
if(NOT WITH_DOUBLE) if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
add_unittest_without_exec(test_WarpCTCLayer add_unittest_without_exec(test_WarpCTCLayer
test_WarpCTCLayer.cpp) test_WarpCTCLayer.cpp)
......
...@@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1); config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true); config.layerConfig.set_shared_biases(true);
int dilation = 1; int dilation = 2;
if (type == "cudnn_conv") { if (type == "cudnn_conv") {
#if CUDNN_VERSION >= 6000 #if CUDNN_VERSION >= 6000
dilation = 2; dilation = 2;
...@@ -1234,6 +1234,7 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { ...@@ -1234,6 +1234,7 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, PoolLayer) { TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
...@@ -1242,6 +1243,7 @@ TEST(Layer, PoolLayer) { ...@@ -1242,6 +1243,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/math/MathUtils.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle;
void setPoolConfig(TestConfig* config,
PoolConfig* pool,
const string& poolType) {
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(1);
int kw = 3, kh = 3;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType);
pool->set_channels(1);
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_start(0);
pool->set_padding(pw);
pool->set_padding_y(ph);
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
pool->set_output_x(ow);
pool->set_output_y(oh);
}
void doOneMaxPoolingWithMaskOutputTest(MatrixPtr& inputMat,
const string& poolType,
bool use_gpu,
MatrixPtr& maskMat) {
TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 25, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
pool->set_img_size(5);
pool->set_img_size_y(5);
setPoolConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
config.layerConfig.set_name("MaxPoolWithMask");
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(config,
&dataLayers,
&datas,
&layerMap,
"MaxPoolWithMask",
1,
false,
use_gpu);
dataLayers[0]->getOutputValue()->copyFrom(*inputMat);
FLAGS_use_gpu = use_gpu;
std::vector<ParameterPtr> parameters;
LayerPtr maxPoolingWithMaskOutputLayer;
initTestLayer(config, &layerMap, &parameters, &maxPoolingWithMaskOutputLayer);
maxPoolingWithMaskOutputLayer->forward(PASS_GC);
checkMatrixEqual(maxPoolingWithMaskOutputLayer->getOutput("mask").value,
maskMat);
}
TEST(Layer, maxPoolingWithMaskOutputLayerFwd) {
bool useGpu = false;
MatrixPtr inputMat;
MatrixPtr maskMat;
real inputData[] = {0.1, 0.1, 0.5, 0.5, 1.1, 0.2, 0.2, 0.6, 0.1,
0.1, 0.3, 0.3, 0.7, 0.1, 0.1, 0.4, 0.4, 0.8,
0.8, 0.1, 1.0, 2.0, 3.0, 0.0, 9.0};
real maskData[] = {12, 4, 22, 24};
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->setData(inputData);
maskMat->setData(maskData);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#ifdef PADDLE_WITH_CUDA
useGpu = true;
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->copyFrom(inputData, 25);
maskMat->copyFrom(maskData, 4);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#endif
}
...@@ -1902,5 +1902,52 @@ void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b, ...@@ -1902,5 +1902,52 @@ void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b,
} }
template class BaseMatrixT<real>; template class BaseMatrixT<real>;
#ifndef PADDLE_MOBILE_INFERENCE
template class BaseMatrixT<int>; template class BaseMatrixT<int>;
#else
template <>
void BaseMatrixT<int>::zero() {
applyUnary(unary::Zero<int>());
}
template <>
void BaseMatrixT<int>::assign(int p) {
applyUnary(unary::Assign<int>(p));
}
template <>
void BaseMatrixT<int>::isEqualTo(BaseMatrixT& b, int value) {
applyBinary(binary::IsEqual<int>(value), b);
}
template <>
void BaseMatrixT<int>::neg() {
applyUnary(unary::Neg<int>());
}
template <>
void BaseMatrixT<int>::abs2() {
applyUnary(unary::Abs<int>());
}
template <>
void BaseMatrixT<int>::add(int p) {
applyUnary(unary::Add<int>(p));
}
template <>
void BaseMatrixT<int>::add(int p1, int p2) {
applyUnary(unary::Add2<int>(p1, p2));
}
template <>
void BaseMatrixT<int>::applyL1(int learningRate, int decayRate) {
applyUnary(unary::ApplyL1<int>(learningRate * decayRate));
}
#endif
} // namespace paddle } // namespace paddle
...@@ -25,6 +25,19 @@ else() ...@@ -25,6 +25,19 @@ else()
message(STATUS "Compile with MKLDNNMatrix") message(STATUS "Compile with MKLDNNMatrix")
endif() endif()
if(MOBILE_INFERENCE)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/SIMDFunctions.cpp)
# Remove sparse
list(REMOVE_ITEM MATH_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.h)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.cpp)
endif()
set(MATH_SOURCES set(MATH_SOURCES
"${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu"
"${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu"
......
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef> #include <cstddef>
#include "Matrix.h" #include "Matrix.h"
...@@ -309,3 +312,57 @@ private: ...@@ -309,3 +312,57 @@ private:
using Matrix::subMatrix; using Matrix::subMatrix;
}; };
} // namespace paddle } // namespace paddle
#else
#include "Matrix.h"
namespace paddle {
class CpuSparseMatrix : public Matrix {
public:
CpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
CpuSparseMatrix(real* data,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, false) {}
real* getValue() const { return nullptr; }
size_t getColStartIdx(size_t i) const { return 0; }
size_t getRowStartIdx(size_t i) const { return 0; }
size_t getColNum(size_t i) const { return 0; }
int* getRowCols(size_t i) const { return nullptr; }
CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width) {
return nullptr;
}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
...@@ -451,6 +451,7 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) { ...@@ -451,6 +451,7 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) {
} }
void GpuMatrix::collectBias(Matrix& a, real scale) { void GpuMatrix::collectBias(Matrix& a, real scale) {
#ifdef PADDLE_WITH_CUDA
CHECK_EQ(getHeight(), (size_t)1); CHECK_EQ(getHeight(), (size_t)1);
CHECK_EQ(width_, a.getWidth()); CHECK_EQ(width_, a.getWidth());
GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a); GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a);
...@@ -461,6 +462,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) { ...@@ -461,6 +462,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get(); hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale); hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale);
} }
#endif
} }
void GpuMatrix::collectSharedBias(Matrix& a, real scale) { void GpuMatrix::collectSharedBias(Matrix& a, real scale) {
...@@ -552,6 +554,7 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, ...@@ -552,6 +554,7 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
const GpuMatrix& b, const GpuMatrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous()); CHECK(isContiguous());
CHECK(b.isContiguous()); CHECK(b.isContiguous());
CHECK(b.useGpu_ == true) << "Matrix type are not equal"; CHECK(b.useGpu_ == true) << "Matrix type are not equal";
...@@ -578,12 +581,14 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, ...@@ -578,12 +581,14 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
b.height_, b.height_,
scaleAB, scaleAB,
scaleT); scaleT);
#endif
} }
void GpuMatrix::mul(const GpuMatrix& a, void GpuMatrix::mul(const GpuMatrix& a,
const GpuSparseMatrix& b, const GpuSparseMatrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous()); CHECK(isContiguous());
CHECK(a.isContiguous()); CHECK(a.isContiguous());
CHECK(a.useGpu_ == true) << "Matrix type are not equal"; CHECK(a.useGpu_ == true) << "Matrix type are not equal";
...@@ -622,6 +627,7 @@ void GpuMatrix::mul(const GpuMatrix& a, ...@@ -622,6 +627,7 @@ void GpuMatrix::mul(const GpuMatrix& a,
scaleAB, scaleAB,
scaleT); scaleT);
} }
#endif
} }
/* this = a*b */ /* this = a*b */
...@@ -1028,15 +1034,23 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1028,15 +1034,23 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW,
MatrixPtr maskMatP) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* maskData = NULL;
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth()); CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels); CHECK(width_ == outputH * outputW * channels);
if (maskMatP != NULL) {
CHECK(maskMatP->useGpu_ == true) << "Matrix type are not equal";
CHECK(outputH * outputW * channels == maskMatP->getWidth());
maskData = maskMatP->getData();
}
hl_maxpool_forward(frameNum, hl_maxpool_forward(frameNum,
inputData, inputData,
channels, channels,
...@@ -1051,7 +1065,8 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1051,7 +1065,8 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
paddingH, paddingH,
paddingW, paddingW,
data_, data_,
getStride()); getStride(),
maskData);
} }
void GpuMatrix::maxPoolBackward(Matrix& inputMat, void GpuMatrix::maxPoolBackward(Matrix& inputMat,
...@@ -1548,6 +1563,7 @@ void GpuMatrix::bilinearBackward(const Matrix& out, ...@@ -1548,6 +1563,7 @@ void GpuMatrix::bilinearBackward(const Matrix& out,
} }
void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output); GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label); auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
...@@ -1563,9 +1579,11 @@ void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { ...@@ -1563,9 +1579,11 @@ void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get(); hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy( hl_matrix_multi_binary_cross_entropy(
output_d, entropy_d, mat_d, height_, outputPtr->width_); output_d, entropy_d, mat_d, height_, outputPtr->width_);
#endif
} }
void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output); GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label); auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
...@@ -1581,6 +1599,7 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { ...@@ -1581,6 +1599,7 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get(); hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy_bp( hl_matrix_multi_binary_cross_entropy_bp(
output_d, grad_d, mat_d, height_, width_); output_d, grad_d, mat_d, height_, width_);
#endif
} }
void GpuMatrix::vol2Col(real* dataSrc, void GpuMatrix::vol2Col(real* dataSrc,
...@@ -1973,9 +1992,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1973,9 +1992,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW,
MatrixPtr maskMatP) {
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* outData = data_; real* outData = data_;
real* maskData = NULL;
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t inLength = imgSizeH * imgSizeW; size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW; size_t outLength = outputH * outputW;
...@@ -1984,6 +2005,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1984,6 +2005,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
CHECK_EQ(channels * outLength, this->getWidth()); CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride(); size_t outStride = getStride();
if (maskMatP != NULL) {
maskData = maskMatP->getData();
CHECK_EQ(channels * outLength, maskMatP->getWidth());
}
/* initialize the data_ */ /* initialize the data_ */
for (size_t i = 0; i < height_; i++) { for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) { for (size_t j = 0; j < width_; j++) {
...@@ -2005,17 +2031,30 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -2005,17 +2031,30 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
int wstart = pw * strideW - paddingW; int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW); int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
if (maskData == NULL) {
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max( outData[ph * outputW + pw] = std::max(
outData[ph * outputW + pw], inputData[h * imgSizeW + w]); outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
} }
} }
} else {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (outData[ph * outputW + pw] < inputData[h * imgSizeW + w]) {
outData[ph * outputW + pw] = inputData[h * imgSizeW + w];
maskData[ph * outputW + pw] = h * imgSizeW + w;
}
}
}
}
} }
} }
// compute offset // compute offset
inputData += inLength; inputData += inLength;
outData += outLength; outData += outLength;
if (maskData != NULL) maskData += outLength;
} }
} }
} }
...@@ -3226,6 +3265,7 @@ template void CpuMatrix::mul<CpuMatrix, CacheRowCpuMatrix>(CpuSparseMatrix* a, ...@@ -3226,6 +3265,7 @@ template void CpuMatrix::mul<CpuMatrix, CacheRowCpuMatrix>(CpuSparseMatrix* a,
real scaleAB, real scaleAB,
real scaleT); real scaleT);
#ifndef PADDLE_MOBILE_INFERENCE
void SharedCpuMatrix::mul(CpuSparseMatrix* a, void SharedCpuMatrix::mul(CpuSparseMatrix* a,
CpuMatrix* b, CpuMatrix* b,
real scaleAB, real scaleAB,
...@@ -3354,6 +3394,7 @@ void SharedCpuMatrix::initBlock(int blockNum) { ...@@ -3354,6 +3394,7 @@ void SharedCpuMatrix::initBlock(int blockNum) {
} }
} }
#endif
/* Add a (column) vector b to matrix a, column by column */ /* Add a (column) vector b to matrix a, column by column */
void CpuMatrix::addColumnVector(const Matrix& b) { void CpuMatrix::addColumnVector(const Matrix& b) {
BaseMatrix::addColVector(const_cast<Matrix&>(b)); BaseMatrix::addColVector(const_cast<Matrix&>(b));
......
...@@ -861,7 +861,8 @@ public: ...@@ -861,7 +861,8 @@ public:
/** /**
* Pooling forward operation, pick out the largest element * Pooling forward operation, pick out the largest element
* in the sizeX of value * in the sizeX of value, if the maskMatP is not NULL, it will
* also caculate the location indices.
*/ */
virtual void maxPoolForward(Matrix& inputMat, virtual void maxPoolForward(Matrix& inputMat,
size_t imgSizeH, size_t imgSizeH,
...@@ -874,7 +875,8 @@ public: ...@@ -874,7 +875,8 @@ public:
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW,
MatrixPtr maskMatP = NULL) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
...@@ -1426,7 +1428,8 @@ public: ...@@ -1426,7 +1428,8 @@ public:
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW); size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image, void maxPoolBackward(Matrix& image,
size_t imgSizeH, size_t imgSizeH,
...@@ -1697,7 +1700,8 @@ public: ...@@ -1697,7 +1700,8 @@ public:
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW); size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image, void maxPoolBackward(Matrix& image,
size_t imgSizeH, size_t imgSizeH,
...@@ -2066,6 +2070,7 @@ public: ...@@ -2066,6 +2070,7 @@ public:
class SharedCpuMatrix : public CpuMatrix { class SharedCpuMatrix : public CpuMatrix {
public: public:
#ifndef PADDLE_MOBILE_INFERENCE
/* blockNum is number of partitions of the matrix */ /* blockNum is number of partitions of the matrix */
SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false) SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false)
: CpuMatrix(height, width, trans) { : CpuMatrix(height, width, trans) {
...@@ -2111,6 +2116,7 @@ private: ...@@ -2111,6 +2116,7 @@ private:
ThreadLocal<CpuMatrixPtr> localBuf_; ThreadLocal<CpuMatrixPtr> localBuf_;
ThreadLocal<std::vector<int>> localBufRows_; ThreadLocal<std::vector<int>> localBufRows_;
ThreadLocal<std::vector<int>> blockSeq_; ThreadLocal<std::vector<int>> blockSeq_;
#endif
}; };
typedef struct { unsigned int col; } sparse_non_value_t; typedef struct { unsigned int col; } sparse_non_value_t;
......
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef> #include <cstddef>
#include "CpuSparseMatrix.h" #include "CpuSparseMatrix.h"
#include "Matrix.h" #include "Matrix.h"
...@@ -237,3 +240,47 @@ private: ...@@ -237,3 +240,47 @@ private:
}; };
} // namespace paddle } // namespace paddle
#else
#include "CpuSparseMatrix.h"
namespace paddle {
class GpuSparseMatrix : public Matrix {
public:
GpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format_ = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
GpuSparseMatrix(real* value,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, true) {}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <string.h> #include <string.h>
#include <algorithm> #include <algorithm>
...@@ -313,3 +315,27 @@ private: ...@@ -313,3 +315,27 @@ private:
}; };
} // namespace paddle } // namespace paddle
#else
namespace paddle {
class SparseRowCpuMatrix : public CpuMatrix {
public:
void reserveStore() {}
void clearIndices() {}
};
class SparsePrefetchRowCpuMatrix : public SparseRowCpuMatrix {
public:
void setupIndices() {}
void addRows(MatrixPtr input) {}
void addRows(IVectorPtr ids) {}
};
class SparseAutoGrowRowCpuMatrix : public SparseRowCpuMatrix {};
class CacheRowCpuMatrix : public SparseAutoGrowRowCpuMatrix {};
class SparseRowIdsCpuMatrix : public CpuMatrix {};
} // namespace paddle
#endif
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
add_simple_unittest(test_ExecViaCpu) add_simple_unittest(test_ExecViaCpu)
add_simple_unittest(test_SIMDFunctions) add_simple_unittest(test_SIMDFunctions)
add_simple_unittest(test_TrainingAlgorithm) add_simple_unittest(test_TrainingAlgorithm)
add_simple_unittest(test_SparseMatrix)
add_simple_unittest(test_RowBuffer) add_simple_unittest(test_RowBuffer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_SparseMatrix)
endif()
# TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference. # TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference.
add_unittest(test_matrixCompare add_unittest(test_matrixCompare
......
...@@ -30,6 +30,10 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -30,6 +30,10 @@ class AccuracyOp : public framework::OperatorWithKernel {
"Input (Label) of accuracy op should not be null."); "Input (Label) of accuracy op should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Accuracy"), PADDLE_ENFORCE(ctx->HasOutput("Accuracy"),
"Output (Accuracy) of AccuracyOp should not be null."); "Output (Accuracy) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Correct"),
"Output (Correct) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Total"),
"Output (Total) of AccuracyOp should not be null.");
auto inference_dim = ctx->GetInputDim("Out"); auto inference_dim = ctx->GetInputDim("Out");
auto label_dim = ctx->GetInputDim("Label"); auto label_dim = ctx->GetInputDim("Label");
...@@ -43,6 +47,8 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -43,6 +47,8 @@ class AccuracyOp : public framework::OperatorWithKernel {
" the same as label."); " the same as label.");
ctx->SetOutputDim("Accuracy", {1}); ctx->SetOutputDim("Accuracy", {1});
ctx->SetOutputDim("Correct", {1});
ctx->SetOutputDim("Total", {1});
ctx->ShareLoD("Out", /*->*/ "Accuracy"); ctx->ShareLoD("Out", /*->*/ "Accuracy");
} }
...@@ -66,6 +72,8 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -66,6 +72,8 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Label", "Label of the training data"); AddInput("Label", "Label of the training data");
// TODO(typhoonzero): AddInput("Weight", ... // TODO(typhoonzero): AddInput("Weight", ...
AddOutput("Accuracy", "The accuracy of current batch"); AddOutput("Accuracy", "The accuracy of current batch");
AddOutput("Correct", "The correct samples count of current batch");
AddOutput("Total", "The samples count of current batch");
AddComment(R"DOC( AddComment(R"DOC(
Accuracy Operator. Accuracy Operator.
......
...@@ -24,7 +24,8 @@ using platform::PADDLE_CUDA_NUM_THREADS; ...@@ -24,7 +24,8 @@ using platform::PADDLE_CUDA_NUM_THREADS;
template <int BlockSize> template <int BlockSize>
__global__ void AccuracyCudaKernel(const int N, const int D, __global__ void AccuracyCudaKernel(const int N, const int D,
const int64_t* Xdata, const int64_t* Xdata,
const int64_t* labeldata, float* accuracy) { const int64_t* labeldata, int* correct_data,
float* accuracy) {
int count = 0; int count = 0;
__shared__ int total[BlockSize]; __shared__ int total[BlockSize];
...@@ -43,6 +44,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D, ...@@ -43,6 +44,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D,
// reduce the count with init value 0, and output accuracy. // reduce the count with init value 0, and output accuracy.
int result = thrust::reduce(thrust::device, total, total + BlockSize, 0); int result = thrust::reduce(thrust::device, total, total + BlockSize, 0);
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
*correct_data = result;
*accuracy = static_cast<float>(result) / static_cast<float>(N); *accuracy = static_cast<float>(result) / static_cast<float>(N);
} }
} }
...@@ -56,31 +58,48 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> { ...@@ -56,31 +58,48 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
auto* inference = ctx.Input<Tensor>("Out"); auto* inference = ctx.Input<Tensor>("Out");
auto* indices = ctx.Input<Tensor>("Indices"); auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label"); auto* label = ctx.Input<Tensor>("Label");
auto* accuracy = ctx.Output<Tensor>("Accuracy"); auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
// FIXME(typhoonzero): only support indices currently // FIXME(typhoonzero): only support indices currently
// if add support for output values, how to detect the data type? // if add support for output values, how to detect the data type?
const int64_t* indices_data = indices->data<int64_t>(); const int64_t* indices_data = indices->data<int64_t>();
const int64_t* label_data = label->data<int64_t>(); const int64_t* label_data = label->data<int64_t>();
int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace()); float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());
size_t num_samples = inference->dims()[0]; int num_samples = static_cast<int>(inference->dims()[0]);
size_t infer_width = inference->dims()[1]; size_t infer_width = inference->dims()[1];
PADDLE_ENFORCE(cudaMemset(accuracy_data, 0, sizeof(float))); PADDLE_ENFORCE(cudaMemset(accuracy_data, 0, sizeof(float)));
// cudaMemset((void**)&correct_data, 0, sizeof(float));
if (num_samples == 0) { if (num_samples == 0) {
return; return;
} }
cudaMemcpy(total_data, &num_samples, sizeof(int), cudaMemcpyHostToDevice);
AccuracyCudaKernel<PADDLE_CUDA_NUM_THREADS><<< AccuracyCudaKernel<PADDLE_CUDA_NUM_THREADS><<<
1, PADDLE_CUDA_NUM_THREADS, 0, ctx.cuda_device_context().stream()>>>( 1, PADDLE_CUDA_NUM_THREADS, 0, ctx.cuda_device_context().stream()>>>(
num_samples, infer_width, indices_data, label_data, accuracy_data); num_samples, infer_width, indices_data, label_data, correct_data,
accuracy_data);
int d_num_samples, d_num_correct;
float d_accuracy;
cudaMemcpy(&d_num_correct, correct_data, sizeof(int),
cudaMemcpyDeviceToHost);
cudaMemcpy(&d_num_samples, total_data, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(&d_accuracy, accuracy_data, sizeof(float),
cudaMemcpyDeviceToHost);
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
// FIXME(typhoonzero): types of T is for infernece data. // FIXME(typhoonzero): types of T is for inference data.
// label data is always int // label data is always int64
REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>, REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>,
paddle::operators::AccuracyOpCUDAKernel<double>); paddle::operators::AccuracyOpCUDAKernel<double>);
...@@ -29,7 +29,11 @@ class AccuracyKernel : public framework::OpKernel<T> { ...@@ -29,7 +29,11 @@ class AccuracyKernel : public framework::OpKernel<T> {
auto* indices = ctx.Input<Tensor>("Indices"); auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label"); auto* label = ctx.Input<Tensor>("Label");
auto* accuracy = ctx.Output<Tensor>("Accuracy"); auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace()); float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());
const int64_t* indices_data = indices->data<int64_t>(); const int64_t* indices_data = indices->data<int64_t>();
...@@ -55,7 +59,8 @@ class AccuracyKernel : public framework::OpKernel<T> { ...@@ -55,7 +59,8 @@ class AccuracyKernel : public framework::OpKernel<T> {
} }
} }
// FIXME(typhoonzero): we don't accumulate the accuracy for now. *correct_data = num_correct;
*total_data = num_samples;
*accuracy_data = *accuracy_data =
static_cast<float>(num_correct) / static_cast<float>(num_samples); static_cast<float>(num_correct) / static_cast<float>(num_samples);
} }
......
...@@ -94,5 +94,13 @@ class CompareOp : public framework::OperatorWithKernel { ...@@ -94,5 +94,13 @@ class CompareOp : public framework::OperatorWithKernel {
REGISTER_LOGICAL_OP(less_than, "Out = X < Y"); REGISTER_LOGICAL_OP(less_than, "Out = X < Y");
REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor); REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y");
REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_OP(greater_than, "Out = X > Y");
REGISTER_LOGICAL_KERNEL(greater_than, CPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_OP(greater_equal, "Out = X >= Y");
REGISTER_LOGICAL_KERNEL(greater_equal, CPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_OP(equal, "Out = X == Y"); REGISTER_LOGICAL_OP(equal, "Out = X == Y");
REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor); REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor);
...@@ -15,4 +15,9 @@ ...@@ -15,4 +15,9 @@
#include "paddle/operators/compare_op.h" #include "paddle/operators/compare_op.h"
REGISTER_LOGICAL_KERNEL(less_than, GPU, paddle::operators::LessThanFunctor); REGISTER_LOGICAL_KERNEL(less_than, GPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_KERNEL(less_equal, GPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_KERNEL(greater_than, GPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_KERNEL(greater_equal, GPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_KERNEL(equal, GPU, paddle::operators::EqualFunctor); REGISTER_LOGICAL_KERNEL(equal, GPU, paddle::operators::EqualFunctor);
...@@ -27,6 +27,24 @@ struct LessThanFunctor { ...@@ -27,6 +27,24 @@ struct LessThanFunctor {
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a < b; } HOSTDEVICE bool operator()(const T& a, const T& b) const { return a < b; }
}; };
template <typename T>
struct LessEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; }
};
template <typename T>
struct GreaterThanFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; }
};
template <typename T>
struct GreaterEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; }
};
template <typename T> template <typename T>
struct EqualFunctor { struct EqualFunctor {
using ELEM_TYPE = T; using ELEM_TYPE = T;
......
...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_add, ops::ElementwiseOp, ops::ElementwiseAddOpMaker, ...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_add, ops::ElementwiseOp, ops::ElementwiseAddOpMaker,
elementwise_add_grad, ops::ElementwiseOpGrad); elementwise_add_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_add, elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseAddKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_add_grad, elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -19,7 +19,13 @@ namespace ops = paddle::operators; ...@@ -19,7 +19,13 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_add, elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, float>); ops::ElementwiseAddKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseAddKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_add_grad, elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, float>); ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseAddGradKernel<paddle::platform::GPUPlace, int64_t>);
...@@ -35,7 +35,13 @@ REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker, ...@@ -35,7 +35,13 @@ REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker,
elementwise_div_grad, ops::ElementwiseOpGrad); elementwise_div_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_div, elementwise_div,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseDivKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_div_grad, elementwise_div_grad,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -19,7 +19,13 @@ namespace ops = paddle::operators; ...@@ -19,7 +19,13 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_div, elementwise_div,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, float>); ops::ElementwiseDivKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseDivKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_div_grad, elementwise_div_grad,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, float>); ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseDivGradKernel<paddle::platform::GPUPlace, int64_t>);
...@@ -37,8 +37,12 @@ REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker, ...@@ -37,8 +37,12 @@ REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_mul, elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, float>, ops::ElementwiseMulKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, double>); ops::ElementwiseMulKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_mul_grad, elementwise_mul_grad,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, float>, ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, double>); ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -20,8 +20,12 @@ namespace ops = paddle::operators; ...@@ -20,8 +20,12 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_mul, elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, float>, ops::ElementwiseMulKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, double>); ops::ElementwiseMulKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseMulKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_mul_grad, elementwise_mul_grad,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, float>, ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, double>); ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseMulGradKernel<paddle::platform::GPUPlace, int64_t>);
...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker, ...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker,
elementwise_sub_grad, ops::ElementwiseOpGrad); elementwise_sub_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_sub, elementwise_sub,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseSubKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_sub_grad, elementwise_sub_grad,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -19,7 +19,13 @@ namespace ops = paddle::operators; ...@@ -19,7 +19,13 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_sub, elementwise_sub,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, float>); ops::ElementwiseSubKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseSubKernel<paddle::platform::GPUPlace, int64_t>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
elementwise_sub_grad, elementwise_sub_grad,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, float>); ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, float>,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, double>,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, int>,
ops::ElementwiseSubGradKernel<paddle::platform::GPUPlace, int64_t>);
...@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T> ...@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> { class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -87,11 +87,12 @@ template <typename PoolProcess, class T> ...@@ -87,11 +87,12 @@ template <typename PoolProcess, class T>
class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> { class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) { PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -154,10 +155,11 @@ template <class T> ...@@ -154,10 +155,11 @@ template <class T>
class MaxPool2dGradFunctor<platform::CPUPlace, T> { class MaxPool2dGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> { ...@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -240,17 +242,17 @@ template <typename PoolProcess, class T> ...@@ -240,17 +242,17 @@ template <typename PoolProcess, class T>
class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> { class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_depth * output_height * output_width; const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -315,11 +317,12 @@ template <typename PoolProcess, class T> ...@@ -315,11 +317,12 @@ template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> { class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) { PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -398,10 +401,11 @@ template <class T> ...@@ -398,10 +401,11 @@ template <class T>
class MaxPool3dGradFunctor<platform::CPUPlace, T> { class MaxPool3dGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> { ...@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -498,15 +502,15 @@ template <typename T> ...@@ -498,15 +502,15 @@ template <typename T>
class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -563,13 +567,13 @@ template <typename T> ...@@ -563,13 +567,13 @@ template <typename T>
class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> { class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_height = input_grad.dims()[2]; const int batch_size = input_grad->dims()[0];
const int input_width = input_grad.dims()[3]; const int input_height = input_grad->dims()[2];
const int input_width = input_grad->dims()[3];
const int output_channels = output_grad.dims()[1]; const int output_channels = output_grad.dims()[1];
const int output_height = output_grad.dims()[2]; const int output_height = output_grad.dims()[2];
const int output_width = output_grad.dims()[3]; const int output_width = output_grad.dims()[3];
...@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) { for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -612,17 +616,17 @@ template <typename T> ...@@ -612,17 +616,17 @@ template <typename T>
class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_depth * output_height * output_width; const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -691,14 +695,14 @@ template <typename T> ...@@ -691,14 +695,14 @@ template <typename T>
class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> { class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_depth = input_grad.dims()[2]; const int batch_size = input_grad->dims()[0];
const int input_height = input_grad.dims()[3]; const int input_depth = input_grad->dims()[2];
const int input_width = input_grad.dims()[4]; const int input_height = input_grad->dims()[3];
const int input_width = input_grad->dims()[4];
const int output_channels = output_grad.dims()[1]; const int output_channels = output_grad.dims()[1];
const int output_depth = output_grad.dims()[2]; const int output_depth = output_grad.dims()[2];
const int output_height = output_grad.dims()[3]; const int output_height = output_grad.dims()[3];
...@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) { for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
......
...@@ -21,13 +21,13 @@ namespace math { ...@@ -21,13 +21,13 @@ namespace math {
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool2D(const int nthreads, const T* input_data, __global__ void KernelPool2D(const int nthreads, const T* input_data,
T* output_data, const int channels, const int channels, const int input_height,
const int input_height, const int input_width, const int input_width, const int output_height,
const int output_height, const int output_width, const int output_width, const int ksize_height,
const int ksize_height, const int ksize_width, const int ksize_width, const int stride_height,
const int stride_height, const int stride_width, const int stride_width, const int padding_height,
const int padding_height, const int padding_width, const int padding_width, PoolProcess pool_process,
PoolProcess pool_process) { T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool2DGrad( __global__ void KernelPool2DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_height,
const int input_height, const int input_width, const int output_height, const int input_width, const int output_height, const int output_width,
const int output_width, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_height,
const int stride_height, const int stride_width, const int padding_height, const int stride_width, const int padding_height, const int padding_width,
const int padding_width, PoolProcess pool_process) { PoolProcess pool_process, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width; int offsetW = index % input_width + padding_width;
...@@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad( ...@@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad(
template <typename T> template <typename T>
__global__ void KernelMaxPool2DGrad( __global__ void KernelMaxPool2DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_height,
const int input_height, const int input_width, const int output_height, const int input_width, const int output_height, const int output_width,
const int output_width, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_height,
const int stride_height, const int stride_width, const int padding_height, const int stride_width, const int padding_height, const int padding_width,
const int padding_width) { T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -158,16 +158,16 @@ template <typename PoolProcess, typename T> ...@@ -158,16 +158,16 @@ template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> { class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -176,7 +176,7 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -176,7 +176,7 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
const int padding_width = paddings[1]; const int padding_width = paddings[1];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -187,11 +187,10 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -187,11 +187,10 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
PoolProcess, PoolProcess,
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, output_data, input_channels, .stream()>>>(
input_height, input_width, output_height, nthreads, input_data, input_channels, input_height, input_width,
output_width, ksize_height, ksize_width, output_height, output_width, ksize_height, ksize_width, stride_height,
stride_height, stride_width, padding_height, stride_width, padding_height, padding_width, pool_process, output_data);
padding_width, pool_process);
} }
}; };
...@@ -204,11 +203,11 @@ template <typename PoolProcess, typename T> ...@@ -204,11 +203,11 @@ template <typename PoolProcess, typename T>
class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> { class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
...@@ -225,7 +224,7 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -225,7 +224,7 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width; int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -237,10 +236,10 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -237,10 +236,10 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_height, input_width, output_height, output_width, input_height, input_width, output_height, output_width, ksize_height,
ksize_height, ksize_width, stride_height, stride_width, padding_height, ksize_width, stride_height, stride_width, padding_height, padding_width,
padding_width, pool_process); pool_process, input_grad_data);
} }
}; };
...@@ -253,10 +252,11 @@ template <typename T> ...@@ -253,10 +252,11 @@ template <typename T>
class MaxPool2dGradFunctor<platform::GPUPlace, T> { class MaxPool2dGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
...@@ -274,7 +274,7 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> { ...@@ -274,7 +274,7 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -285,10 +285,10 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> { ...@@ -285,10 +285,10 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_height, input_width, output_height, output_width, input_height, input_width, output_height, output_width, ksize_height,
ksize_height, ksize_width, stride_height, stride_width, padding_height, ksize_width, stride_height, stride_width, padding_height, padding_width,
padding_width); input_grad_data);
} }
}; };
...@@ -313,14 +313,16 @@ template class Pool2dGradFunctor< ...@@ -313,14 +313,16 @@ template class Pool2dGradFunctor<
platform::GPUPlace, paddle::operators::math::AvgPoolGrad<double>, double>; platform::GPUPlace, paddle::operators::math::AvgPoolGrad<double>, double>;
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool3D( __global__ void KernelPool3D(const int nthreads, const T* input_data,
const int nthreads, const T* input_data, T* output_data, const int channels, const int channels, const int input_depth,
const int input_depth, const int input_height, const int input_width, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width, const int output_depth, const int output_height,
const int ksize_depth, const int ksize_height, const int ksize_width, const int output_width, const int ksize_depth,
const int stride_depth, const int stride_height, const int stride_width, const int ksize_height, const int ksize_width,
const int padding_depth, const int padding_height, const int padding_width, const int stride_depth, const int stride_height,
PoolProcess pool_process) { const int stride_width, const int padding_depth,
const int padding_height, const int padding_width,
PoolProcess pool_process, T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -358,13 +360,13 @@ __global__ void KernelPool3D( ...@@ -358,13 +360,13 @@ __global__ void KernelPool3D(
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool3DGrad( __global__ void KernelPool3DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_depth,
const int input_depth, const int input_height, const int input_width, const int input_height, const int input_width, const int output_depth,
const int output_depth, const int output_height, const int output_width, const int output_height, const int output_width, const int ksize_depth,
const int ksize_depth, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_depth, const int stride_height, const int stride_width, const int stride_height, const int stride_width, const int padding_depth,
const int padding_depth, const int padding_height, const int padding_width, const int padding_height, const int padding_width, PoolProcess pool_process,
PoolProcess pool_process) { T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width; int offsetW = index % input_width + padding_width;
...@@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad( ...@@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad(
template <typename T> template <typename T>
__global__ void KernelMaxPool3DGrad( __global__ void KernelMaxPool3DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_depth,
const int input_depth, const int input_height, const int input_width, const int input_height, const int input_width, const int output_depth,
const int output_depth, const int output_height, const int output_width, const int output_height, const int output_width, const int ksize_depth,
const int ksize_depth, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_depth, const int stride_height, const int stride_width, const int stride_height, const int stride_width, const int padding_depth,
const int padding_depth, const int padding_height, const int padding_height, const int padding_width, T* input_grad) {
const int padding_width) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -480,18 +481,18 @@ template <typename PoolProcess, class T> ...@@ -480,18 +481,18 @@ template <typename PoolProcess, class T>
class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> { class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -503,7 +504,7 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -503,7 +504,7 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
const int padding_width = paddings[2]; const int padding_width = paddings[2];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height * int nthreads = batch_size * output_channels * output_depth * output_height *
output_width; output_width;
...@@ -516,11 +517,11 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -516,11 +517,11 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, input_channels, input_depth, nthreads, input_data, input_channels, input_depth, input_height,
input_height, input_width, output_depth, output_height, output_width, input_width, output_depth, output_height, output_width, ksize_depth,
ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, ksize_height, ksize_width, stride_depth, stride_height, stride_width,
stride_width, padding_depth, padding_height, padding_width, padding_depth, padding_height, padding_width, pool_process,
pool_process); output_data);
} }
}; };
...@@ -533,11 +534,11 @@ template <typename PoolProcess, class T> ...@@ -533,11 +534,11 @@ template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> { class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
...@@ -560,7 +561,7 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -560,7 +561,7 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = int nthreads =
batch_size * input_channels * input_depth * input_height * input_width; batch_size * input_channels * input_depth * input_height * input_width;
...@@ -573,11 +574,11 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -573,11 +574,11 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_depth, input_height, input_width, output_depth, input_depth, input_height, input_width, output_depth, output_height,
output_height, output_width, ksize_depth, ksize_height, ksize_width, output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_depth, stride_height, stride_width, padding_depth, stride_height, stride_width, padding_depth, padding_height,
padding_height, padding_width, pool_process); padding_width, pool_process, input_grad_data);
} }
}; };
...@@ -590,10 +591,11 @@ template <class T> ...@@ -590,10 +591,11 @@ template <class T>
class MaxPool3dGradFunctor<platform::GPUPlace, T> { class MaxPool3dGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
...@@ -616,7 +618,7 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> { ...@@ -616,7 +618,7 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height * int nthreads = batch_size * output_channels * output_depth * output_height *
output_width; output_width;
...@@ -628,11 +630,11 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> { ...@@ -628,11 +630,11 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_depth, input_height, input_width, output_depth, input_depth, input_height, input_width, output_depth, output_height,
output_height, output_width, ksize_depth, ksize_height, ksize_width, output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_depth, stride_height, stride_width, padding_depth, stride_height, stride_width, padding_depth, padding_height,
padding_height, padding_width); padding_width, input_grad_data);
} }
}; };
...@@ -658,11 +660,11 @@ template class Pool3dGradFunctor< ...@@ -658,11 +660,11 @@ template class Pool3dGradFunctor<
template <typename T> template <typename T>
__global__ void KernelMaxPool2dWithIdx( __global__ void KernelMaxPool2dWithIdx(
const int nthreads, const T* input_data, T* output_data, T* mask_data, const int nthreads, const T* input_data, const int channels,
const int channels, const int input_height, const int input_width, const int input_height, const int input_width, const int output_height,
const int output_height, const int output_width, const int ksize_height, const int output_width, const int ksize_height, const int ksize_width,
const int ksize_width, const int stride_height, const int stride_width, const int stride_height, const int stride_width, const int padding_height,
const int padding_height, const int padding_width) { const int padding_width, T* output_data, T* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx( ...@@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx(
template <typename T> template <typename T>
__global__ void KernelMaxPool2DWithIdxGrad( __global__ void KernelMaxPool2DWithIdxGrad(
const int nthreads, T* input_grad, const T* output_grad, const T* mask_data, const int nthreads, const T* output_grad, const T* mask_data,
const int channels, const int input_height, const int input_width, const int channels, const int input_height, const int input_width,
const int output_height, const int output_width, const int ksize_height, const int output_height, const int output_width, const int ksize_height,
const int ksize_width, const int stride_height, const int stride_width, const int ksize_width, const int stride_height, const int stride_width,
const int padding_height, const int padding_width) { const int padding_height, const int padding_width, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
...@@ -748,16 +750,16 @@ template <typename T> ...@@ -748,16 +750,16 @@ template <typename T>
class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_width = paddings[1]; const int padding_width = paddings[1];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
KernelMaxPool2dWithIdx< KernelMaxPool2dWithIdx<
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, output_data, mask_data, .stream()>>>(
input_channels, input_height, input_width, nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, output_height, output_width, ksize_height, ksize_width, stride_height,
ksize_width, stride_height, stride_width, stride_width, padding_height, padding_width, output_data, mask_data);
padding_height, padding_width);
} }
}; };
...@@ -794,14 +795,14 @@ template <typename T> ...@@ -794,14 +795,14 @@ template <typename T>
class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_channels = input_grad.dims()[1]; const int batch_size = input_grad->dims()[0];
const int input_height = input_grad.dims()[2]; const int input_channels = input_grad->dims()[1];
const int input_width = input_grad.dims()[3]; const int input_height = input_grad->dims()[2];
const int input_width = input_grad->dims()[3];
const int output_height = output_grad.dims()[2]; const int output_height = output_grad.dims()[2];
const int output_width = output_grad.dims()[3]; const int output_width = output_grad.dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
...@@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width; int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
KernelMaxPool2DWithIdxGrad< KernelMaxPool2DWithIdxGrad<
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_grad_data, output_grad_data, .stream()>>>(nthreads, output_grad_data, mask_data,
mask_data, input_channels, input_height, input_channels, input_height, input_width,
input_width, output_height, output_width, output_height, output_width, ksize_height,
ksize_height, ksize_width, stride_height, ksize_width, stride_height, stride_width,
stride_width, padding_height, padding_width); padding_height, padding_width, input_grad_data);
} }
}; };
...@@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>; ...@@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>;
template <typename T> template <typename T>
__global__ void KernelMaxPool3DWithIdx( __global__ void KernelMaxPool3DWithIdx(
const int nthreads, const T* input_data, T* output_data, T* mask_data, const int nthreads, const T* input_data, const int channels,
const int channels, const int input_depth, const int input_height, const int input_depth, const int input_height, const int input_width,
const int input_width, const int output_depth, const int output_height, const int output_depth, const int output_height, const int output_width,
const int output_width, const int ksize_depth, const int ksize_height, const int ksize_depth, const int ksize_height, const int ksize_width,
const int ksize_width, const int stride_depth, const int stride_height, const int stride_depth, const int stride_height, const int stride_width,
const int stride_width, const int padding_depth, const int padding_height, const int padding_depth, const int padding_height, const int padding_width,
const int padding_width) { T* output_data, T* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx( ...@@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx(
template <typename T> template <typename T>
__global__ void KernelMaxPool3DWithIdxGrad( __global__ void KernelMaxPool3DWithIdxGrad(
const int nthreads, T* input_grad, const T* output_grad, const T* mask, const int nthreads, const T* output_grad, const T* mask, const int channels,
const int channels, const int input_depth, const int input_height, const int input_depth, const int input_height, const int input_width,
const int input_width, const int output_depth, const int output_height, const int output_depth, const int output_height, const int output_width,
const int output_width, const int ksize_depth, const int ksize_height, const int ksize_depth, const int ksize_height, const int ksize_width,
const int ksize_width, const int stride_depth, const int stride_height, const int stride_depth, const int stride_height, const int stride_width,
const int stride_width, const int padding_depth, const int padding_height, const int padding_depth, const int padding_height, const int padding_width,
const int padding_width) { T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
...@@ -952,18 +953,18 @@ template <typename T> ...@@ -952,18 +953,18 @@ template <typename T>
class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_width = paddings[2]; const int padding_width = paddings[2];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height * int nthreads = batch_size * output_channels * output_depth * output_height *
output_width; output_width;
...@@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, mask_data, input_channels, nthreads, input_data, input_channels, input_depth, input_height,
input_depth, input_height, input_width, output_depth, output_height, input_width, output_depth, output_height, output_width, ksize_depth,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth, ksize_height, ksize_width, stride_depth, stride_height, stride_width,
stride_height, stride_width, padding_depth, padding_height, padding_depth, padding_height, padding_width, output_data, mask_data);
padding_width);
} }
}; };
...@@ -1005,15 +1005,15 @@ template <typename T> ...@@ -1005,15 +1005,15 @@ template <typename T>
class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_channels = input_grad.dims()[1]; const int batch_size = input_grad->dims()[0];
const int input_depth = input_grad.dims()[2]; const int input_channels = input_grad->dims()[1];
const int input_height = input_grad.dims()[3]; const int input_depth = input_grad->dims()[2];
const int input_width = input_grad.dims()[4]; const int input_height = input_grad->dims()[3];
const int input_width = input_grad->dims()[4];
const int output_depth = output_grad.dims()[2]; const int output_depth = output_grad.dims()[2];
const int output_height = output_grad.dims()[3]; const int output_height = output_grad.dims()[3];
const int output_width = output_grad.dims()[4]; const int output_width = output_grad.dims()[4];
...@@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = int nthreads =
batch_size * input_channels * input_depth * input_height * input_width; batch_size * input_channels * input_depth * input_height * input_width;
...@@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_grad_data, output_grad_data, mask_data, input_channels, nthreads, output_grad_data, mask_data, input_channels, input_depth,
input_depth, input_height, input_width, output_depth, output_height, input_height, input_width, output_depth, output_height, output_width,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height,
stride_height, stride_width, padding_depth, padding_height, stride_width, padding_depth, padding_height, padding_width,
padding_width); input_grad_data);
} }
}; };
......
...@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T> ...@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T>
class Pool2dFunctor { class Pool2dFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* output);
}; };
template <typename Place, typename PoolProcess, typename T> template <typename Place, typename PoolProcess, typename T>
class Pool2dGradFunctor { class Pool2dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* input_grad);
}; };
template <typename Place, class T> template <typename Place, class T>
class MaxPool2dGradFunctor { class MaxPool2dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
template <typename Place, typename PoolProcess, typename T> template <typename Place, typename PoolProcess, typename T>
class Pool3dFunctor { class Pool3dFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* output);
}; };
template <typename Place, typename PoolProcess, typename T> template <typename Place, typename PoolProcess, typename T>
class Pool3dGradFunctor { class Pool3dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* input_grad);
}; };
template <typename Place, class T> template <typename Place, class T>
class MaxPool3dGradFunctor { class MaxPool3dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
/* /*
...@@ -155,38 +157,38 @@ template <typename Place, typename T> ...@@ -155,38 +157,38 @@ template <typename Place, typename T>
class MaxPool2dWithIndexFunctor { class MaxPool2dWithIndexFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename Place, typename T> template <typename Place, typename T>
class MaxPool2dWithIndexGradFunctor { class MaxPool2dWithIndexGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
template <typename Place, typename T> template <typename Place, typename T>
class MaxPool3dWithIndexFunctor { class MaxPool3dWithIndexFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename Place, typename T> template <typename Place, typename T>
class MaxPool3dWithIndexGradFunctor { class MaxPool3dWithIndexGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
} // namespace math } // namespace math
......
...@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T> Place, paddle::operators::math::MaxPool<T>, T>
pool2d_forward; pool2d_forward;
paddle::operators::math::MaxPool<T> pool_process; paddle::operators::math::MaxPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides, pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool2dFunctor< paddle::operators::math::Pool2dFunctor<
Place, paddle::operators::math::AvgPool<T>, T> Place, paddle::operators::math::AvgPool<T>, T>
pool2d_forward; pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process; paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides, pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} }
} break; } break;
case 3: { case 3: {
...@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T> Place, paddle::operators::math::MaxPool<T>, T>
pool3d_forward; pool3d_forward;
paddle::operators::math::MaxPool<T> pool_process; paddle::operators::math::MaxPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides, pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool3dFunctor< paddle::operators::math::Pool3dFunctor<
Place, paddle::operators::math::AvgPool<T>, T> Place, paddle::operators::math::AvgPool<T>, T>
pool3d_forward; pool3d_forward;
paddle::operators::math::AvgPool<T> pool_process; paddle::operators::math::AvgPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides, pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
...@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> { ...@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> {
if (pooling_type == "max") { if (pooling_type == "max") {
paddle::operators::math::MaxPool2dGradFunctor<Place, T> paddle::operators::math::MaxPool2dGradFunctor<Place, T>
pool2d_backward; pool2d_backward;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings); ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool2dGradFunctor< paddle::operators::math::Pool2dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T> Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool2d_backward; pool2d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process; paddle::operators::math::AvgPoolGrad<T> pool_process;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings, pool_process); ksize, strides, paddings, pool_process, in_x_grad);
} }
} break; } break;
case 3: { case 3: {
if (pooling_type == "max") { if (pooling_type == "max") {
paddle::operators::math::MaxPool3dGradFunctor<Place, T> paddle::operators::math::MaxPool3dGradFunctor<Place, T>
pool3d_backward; pool3d_backward;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings); ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool3dGradFunctor< paddle::operators::math::Pool3dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T> Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool3d_backward; pool3d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process; paddle::operators::math::AvgPoolGrad<T> pool_process;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings, pool_process); ksize, strides, paddings, pool_process, in_x_grad);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......
...@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> { ...@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
case 2: { case 2: {
paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T> paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T>
pool2d_forward; pool2d_forward;
pool2d_forward(context.device_context(), *in_x, *out, *mask, ksize, pool2d_forward(context.device_context(), *in_x, ksize, strides,
strides, paddings); paddings, out, mask);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T> paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T>
pool3d_forward; pool3d_forward;
pool3d_forward(context.device_context(), *in_x, *out, *mask, ksize, pool3d_forward(context.device_context(), *in_x, ksize, strides,
strides, paddings); paddings, out, mask);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
} }
...@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> { ...@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
case 2: { case 2: {
paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T> paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T>
pool2d_backward; pool2d_backward;
pool2d_backward(context.device_context(), *in_x_grad, *out_grad, pool2d_backward(context.device_context(), *out_grad, *mask, ksize,
*mask, ksize, strides, paddings); strides, paddings, in_x_grad);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T> paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T>
pool3d_backward; pool3d_backward;
pool3d_backward(context.device_context(), *in_x_grad, *out_grad, pool3d_backward(context.device_context(), *out_grad, *mask, ksize,
*mask, ksize, strides, paddings); strides, paddings, in_x_grad);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include "glog/logging.h"
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
...@@ -26,6 +27,10 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor, ...@@ -26,6 +27,10 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>; using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
struct SumFunctor { struct SumFunctor {
template <typename Place, typename X, typename Y, typename Dim> template <typename Place, typename X, typename Y, typename Dim>
void operator()(const Place& place, X& x, Y& y, const Dim& dim) { void operator()(const Place& place, X& x, Y& y, const Dim& dim) {
...@@ -133,11 +138,18 @@ class ReduceKernel : public framework::OpKernel<T> { ...@@ -133,11 +138,18 @@ class ReduceKernel : public framework::OpKernel<T> {
dims_vector.erase(dims_vector.begin() + dim); dims_vector.erase(dims_vector.begin() + dim);
dims = framework::make_ddim(dims_vector); dims = framework::make_ddim(dims_vector);
} }
auto out = EigenTensor < T, D == 1 ? 1 : (D - 1) > ::From(*output, dims);
auto& place = context.GetEigenDevice<Place>(); auto& place = context.GetEigenDevice<Place>();
Functor functor; Functor functor;
if (D == 1) {
auto out = EigenScalar<T>::From(*output);
functor(place, x, out, reduce_dim);
} else {
auto out = EigenTensor<T, (D - 1)>::From(*output, dims);
functor(place, x, out, reduce_dim); functor(place, x, out, reduce_dim);
} }
}
}; };
template <typename Place, typename T, typename Functor> template <typename Place, typename T, typename Functor>
...@@ -186,13 +198,13 @@ class ReduceGradKernel : public framework::OpKernel<T> { ...@@ -186,13 +198,13 @@ class ReduceGradKernel : public framework::OpKernel<T> {
auto x_reduce = EigenTensor<T, D>::From(*input1, dims); auto x_reduce = EigenTensor<T, D>::From(*input1, dims);
auto x_reduce_grad = EigenTensor<T, D>::From(*input2, dims); auto x_reduce_grad = EigenTensor<T, D>::From(*input2, dims);
Eigen::array<int, D> braodcast_dim; Eigen::array<int, D> broadcast_dim;
for (size_t i = 0; i < D; ++i) braodcast_dim[i] = 1; for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1;
braodcast_dim[dim] = input0->dims()[dim]; broadcast_dim[dim] = input0->dims()[dim];
auto& place = context.GetEigenDevice<Place>(); auto& place = context.GetEigenDevice<Place>();
Functor functor; Functor functor;
functor(place, x, x_reduce, x_grad, x_reduce_grad, braodcast_dim, functor(place, x, x_reduce, x_grad, x_reduce_grad, broadcast_dim,
braodcast_dim[dim]); broadcast_dim[dim]);
} }
}; };
......
...@@ -200,7 +200,10 @@ void Parameter::setMat(ParameterType pType, int matType) { ...@@ -200,7 +200,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
false, false,
useGpu_); useGpu_);
} }
} else if (matType == MAT_NORMAL_SHARED) { }
#ifndef PADDLE_MOBILE_INFERENCE
// NOLINTNEXTLINE
else if (matType == MAT_NORMAL_SHARED) {
CHECK_EQ(height * width, bufs_[pType]->getSize()); CHECK_EQ(height * width, bufs_[pType]->getSize());
size_t blockNum = 0; size_t blockNum = 0;
CHECK(isGradShared(&blockNum)); CHECK(isGradShared(&blockNum));
...@@ -259,7 +262,10 @@ void Parameter::setMat(ParameterType pType, int matType) { ...@@ -259,7 +262,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
} else if (matType == MAT_SPARSE_ROW_AUTO_GROW) { } else if (matType == MAT_SPARSE_ROW_AUTO_GROW) {
CHECK(isGradSparseUpdate()); CHECK(isGradSparseUpdate());
mats_[pType] = std::make_shared<SparseAutoGrowRowCpuMatrix>(height, width); mats_[pType] = std::make_shared<SparseAutoGrowRowCpuMatrix>(height, width);
} else { }
#endif
// NOLINTNEXTLINE
else {
LOG(FATAL) << "Unsupported mat type" << matType; LOG(FATAL) << "Unsupported mat type" << matType;
} }
} }
......
#!/bin/bash
set -e
echo "Post install paddle debian package."
echo "Install some python package used for paddle. You can run "
echo " pip install /usr/opt/paddle/share/wheels/*.whl to install them."
find /usr/ -name '*paddle*.whl' | xargs pip install
...@@ -2,178 +2,198 @@ ...@@ -2,178 +2,198 @@
## Goals ## Goals
We want the building procedure generates Docker images so that we can run PaddlePaddle applications on Kubernetes clusters. We want to make the building procedures:
We want to build .deb packages so that enterprise users can run PaddlePaddle applications without Docker. 1. Static, can reproduce easily.
1. Generate python `whl` packages that can be widely use cross many distributions.
1. Build different binaries per release to satisfy different environments:
- Binaries for different CUDA and CUDNN versions, like CUDA 7.5, 8.0, 9.0
- Binaries containing only capi
- Binaries for python with wide unicode support or not.
1. Build docker images with PaddlePaddle pre-installed, so that we can run
PaddlePaddle applications directly in docker or on Kubernetes clusters.
We want to minimize the size of generated Docker images and .deb packages so to reduce the download time. To achieve this, we created a repo: https://github.com/PaddlePaddle/buildtools
which gives several docker images that are `manylinux1` sufficient. Then we
can build PaddlePaddle using these images to generate corresponding `whl`
binaries.
We want to encapsulate building tools and dependencies in a *development* Docker image so to ease the tools installation for developers. ## Run The Build
Developers use various editors (emacs, vim, Eclipse, Jupyter Notebook), so the development Docker image contains only building tools, not editing tools, and developers are supposed to git clone source code into their development computers and map the code into the development container. ### Build Evironments
We want the procedure and tools also work with testing, continuous integration, and releasing. The pre-built build environment images are:
| Image | Tag |
| ----- | --- |
| paddlepaddle/paddle_manylinux_devel | cuda7.5_cudnn5 |
| paddlepaddle/paddle_manylinux_devel | cuda8.0_cudnn5 |
| paddlepaddle/paddle_manylinux_devel | cuda7.5_cudnn7 |
| paddlepaddle/paddle_manylinux_devel | cuda9.0_cudnn7 |
## Docker Images ### Start Build
So we need two Docker images for each version of PaddlePaddle:
1. `paddle:<version>-dev`
This a development image contains only the development tools and standardizes the building procedure. Users include:
- developers -- no longer need to install development tools on the host, and can build their current work on the host (development computer). Choose one docker image that suit your environment and run the following
- release engineers -- use this to build the official release from certain branch/tag on Github.com. command to start a build:
- document writers / Website developers -- Our documents are in the source repo in the form of .md/.rst files and comments in source code. We need tools to extract the information, typeset, and generate Web pages.
Of course, developers can install building tools on their development computers. But different versions of PaddlePaddle might require different set or version of building tools. Also, it makes collaborative debugging easier if all developers use a unified development environment. ```bash
git clone https://github.com/PaddlePaddle/Paddle.git
The development image should include the following tools: cd Paddle
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TESTING=OFF" -e "RUN_TEST=OFF" -e "PYTHON_ABI=cp27-cp27mu" paddlepaddle/paddle_manylinux_devel /paddle/paddle/scripts/docker/build.sh
- gcc/clang ```
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers work on a remote computer with GPU; they could ssh into the computer and `docker exec` into the development container. However, running `sshd` in the container allows developers to ssh into the container directly. After the build finishes, you can get output `whl` package under
`build/python/dist`.
1. `paddle:<version>` This command mounts the source directory on the host into `/paddle` in the container, then run the build script `/paddle/paddle/scripts/docker/build.sh`
in the container. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed.
This is the production image, generated using the development image. This image might have multiple variants: ### Build Options
- GPU/AVX `paddle:<version>-gpu` Users can specify the following Docker build arguments with either "ON" or "OFF" value:
- GPU/no-AVX `paddle:<version>-gpu-noavx`
- no-GPU/AVX `paddle:<version>`
- no-GPU/no-AVX `paddle:<version>-noavx`
We allow users to choose between GPU and no-GPU because the GPU version image is much larger than then the no-GPU version. | Option | Default | Description |
| ------ | -------- | ----------- |
| `WITH_GPU` | OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. |
| `WITH_AVX` | OFF | Set to "ON" to enable AVX support. |
| `WITH_TESTING` | ON | Build unit tests binaries. |
| `WITH_MKLDNN` | ON | Build with [Intel® MKL DNN](https://github.com/01org/mkl-dnn) support. |
| `WITH_MKLML` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) support. |
| `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. |
| `WITH_SWIG_PY` | ON | Build with SWIG python API support. |
| `WITH_C_API` | OFF | Build capi libraries for inference. |
| `WITH_PYTHON` | ON | Build with python support. Turn this off if build is only for capi. |
| `WITH_STYLE_CHECK` | ON | Check the code style when building. |
| `PYTHON_ABI` | "" | Build for different python ABI support, can be cp27-cp27m or cp27-cp27mu |
| `RUN_TEST` | OFF | Run unit test immediently after the build. |
| `WITH_DOC` | OFF | Build docs after build binaries. |
| `WOBOQ` | OFF | Generate WOBOQ code viewer under `build/woboq_out` |
We allow users the choice between AVX and no-AVX, because some cloud providers don't provide AVX-enabled VMs.
## Docker Images
## Development Environment You can get the latest PaddlePaddle docker images by
`docker pull paddlepaddle/paddle:<version>` or build one by yourself.
Here we describe how to use above two images. We start from considering our daily development environment. ### Official Docker Releases
Developers work on a computer, which is usually a laptop or desktop: Official docker images at
[here](https://hub.docker.com/r/paddlepaddle/paddle/tags/),
you can choose either latest or images with a release tag like `0.10.0`,
Currently available tags are:
<img src="doc/paddle-development-environment.png" width=500 /> | Tag | Description |
| ------ | --------------------- |
| latest | latest CPU only image |
| latest-gpu | latest binary with GPU support |
| 0.10.0 | release 0.10.0 CPU only binary image |
| 0.10.0-gpu | release 0.10.0 with GPU support |
or, they might rely on a more sophisticated box (like with GPUs): ### Build Your Own Image
<img src="doc/paddle-development-environment-gpu.png" width=500 /> Build PaddlePaddle docker images are quite simple since PaddlePaddle can
be installed by just running `pip install`. A sample `Dockerfile` is:
A principle here is that source code lies on the development computer (host) so that editors like Eclipse can parse the source code to support auto-completion. ```dockerfile
FROM nvidia/cuda:7.5-cudnn5-runtime-centos6
RUN yum install -y centos-release-SCL
RUN yum install -y python27
# This whl package is generated by previous build steps.
ADD python/dist/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl /
RUN pip install /paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl && rm -f /*.whl
```
Then build the image by running `docker build -t [REPO]/paddle:[TAG] .` under
the directory containing your own `Dockerfile`.
## Usages - NOTE: note that you can choose different base images for your environment, you can find all the versions [here](https://hub.docker.com/r/nvidia/cuda/).
### Build the Development Docker Image ### Use Docker Images
The following commands check out the source code to the host and build the development image `paddle:dev`: Suppose that you have written an application program `train.py` using
PaddlePaddle, we can test and run it using docker:
```bash ```bash
git clone https://github.com/PaddlePaddle/Paddle paddle docker run --rm -it -v $PWD:/work paddlepaddle/paddle /work/a.py
cd paddle
docker build -t paddle:dev .
``` ```
The `docker build` command assumes that `Dockerfile` is in the root source tree. Note that in this design, this `Dockerfile` is this only one in our repo. But this works only if all dependencies of `train.py` are in the production image. If this is not the case, we need to build a new Docker image from the production image and with more dependencies installs.
Users can specify a Ubuntu mirror server for faster downloading:
```bash
docker build -t paddle:dev --build-arg UBUNTU_MIRROR=mirror://mirrors.ubuntu.com/mirrors.txt .
```
### Build PaddlePaddle from Source Code ### Run PaddlePaddle Book In Docker
Given the development image `paddle:dev`, the following command builds PaddlePaddle from the source tree on the development computer (host): Our [book repo](https://github.com/paddlepaddle/book) also provide a docker
image to start a jupiter notebook inside docker so that you can run this book
using docker:
```bash ```bash
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TESTING=OFF" -e "RUN_TEST=OFF" paddle:dev docker run -d -p 8888:8888 paddlepaddle/book
``` ```
This command mounts the source directory on the host into `/paddle` in the container, so the default entry point of `paddle:dev`, `build.sh`, could build the source code with possible local changes. When it writes to `/paddle/build` in the container, it writes to `$PWD/build` on the host indeed. Please refer to https://github.com/paddlepaddle/book if you want to build this
docker image by your self.
`build.sh` builds the following:
- PaddlePaddle binaries,
- `$PWD/build/paddle-<version>.deb` for production installation, and
- `$PWD/build/Dockerfile`, which builds the production Docker image.
Users can specify the following Docker build arguments with either "ON" or "OFF" value: ### Run Distributed Applications
- `WITH_GPU`: ***Required***. Generates NVIDIA CUDA GPU code and relies on CUDA libraries.
- `WITH_AVX`: ***Required***. Set to "OFF" prevents from generating AVX instructions. If you don't know what is AVX, you might want to set "ON".
- `WITH_TEST`: ***Optional, default OFF***. Build unit tests binaries. Once you've built the unit tests, you can run these test manually by the following command:
```bash
docker run --rm -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" paddle:dev sh -c "cd /paddle/build; make coverall"
```
- `RUN_TEST`: ***Optional, default OFF***. Run unit tests after building. You can't run unit tests without building it.
### Build the Production Docker Image In our [API design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md#distributed-training), we proposed an API that starts a distributed training job on a cluster. This API need to build a PaddlePaddle application into a Docker image as above and calls kubectl to run it on the cluster. This API might need to generate a Dockerfile look like above and call `docker build`.
The following command builds the production image: Of course, we can manually build an application image and launch the job using the kubectl tool:
```bash ```bash
docker build -t paddle -f build/Dockerfile ./build docker build -f some/Dockerfile -t myapp .
docker tag myapp me/myapp
docker push
kubectl ...
``` ```
This production image is minimal -- it includes binary `paddle`, the shared library `libpaddle.so`, and Python runtime. ## Docker Images for Developers
### Run PaddlePaddle Applications We have a special docker image for developers:
`paddlepaddle/paddle:<version>-dev`. This image is also generated from
https://github.com/PaddlePaddle/buildtools
Again the development happens on the host. Suppose that we have a simple application program in `a.py`, we can test and run it using the production image: This a development image contains only the
development tools and standardizes the building procedure. Users include:
```bash - developers -- no longer need to install development tools on the host, and can build their current work on the host (development computer).
docker run --rm -it -v $PWD:/work paddle /work/a.py - release engineers -- use this to build the official release from certain branch/tag on Github.com.
``` - document writers / Website developers -- Our documents are in the source repo in the form of .md/.rst files and comments in source code. We need tools to extract the information, typeset, and generate Web pages.
But this works only if all dependencies of `a.py` are in the production image. If this is not the case, we need to build a new Docker image from the production image and with more dependencies installs. Of course, developers can install building tools on their development computers. But different versions of PaddlePaddle might require different set or version of building tools. Also, it makes collaborative debugging easier if all developers use a unified development environment.
### Build and Run PaddlePaddle Applications The development image contains the following tools:
We need a Dockerfile in https://github.com/paddlepaddle/book that builds Docker image `paddlepaddle/book:<version>`, basing on the PaddlePaddle production image: - gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
``` Many developers work on a remote computer with GPU; they could ssh into the computer and `docker exec` into the development container. However, running `sshd` in the container allows developers to ssh into the container directly.
FROM paddlepaddle/paddle:<version>
RUN pip install -U matplotlib jupyter ...
COPY . /book
EXPOSE 8080
CMD ["jupyter"]
```
The book image is an example of PaddlePaddle application image. We can build it
```bash ### Development Workflow
git clone https://github.com/paddlepaddle/book
cd book
docker build -t book .
```
### Build and Run Distributed Applications Here we describe how the workflow goes on. We start from considering our daily development environment.
In our [API design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/api.md#distributed-training), we proposed an API that starts a distributed training job on a cluster. This API need to build a PaddlePaddle application into a Docker image as above and calls kubectl to run it on the cluster. This API might need to generate a Dockerfile look like above and call `docker build`. Developers work on a computer, which is usually a laptop or desktop:
Of course, we can manually build an application image and launch the job using the kubectl tool: <img src="doc/paddle-development-environment.png" width=500 />
```bash or, they might rely on a more sophisticated box (like with GPUs):
docker build -f some/Dockerfile -t myapp .
docker tag myapp me/myapp <img src="doc/paddle-development-environment-gpu.png" width=500 />
docker push
kubectl ... A principle here is that source code lies on the development computer (host) so that editors like Eclipse can parse the source code to support auto-completion.
```
### Reading source code with woboq codebrowser ### Reading source code with woboq codebrowser
For developers who are interested in the C++ source code, please use -e "WOBOQ=ON" to enable the building of C++ source code into HTML pages using [Woboq codebrowser](https://github.com/woboq/woboq_codebrowser). For developers who are interested in the C++ source code, please use -e "WOBOQ=ON" to enable the building of C++ source code into HTML pages using [Woboq codebrowser](https://github.com/woboq/woboq_codebrowser).
- The following command builds PaddlePaddle, generates HTML pages from C++ source code, and writes HTML pages into `$HOME/woboq_out` on the host: - The following command builds PaddlePaddle, generates HTML pages from C++ source code, and writes HTML pages into `$HOME/woboq_out` on the host:
```bash ```bash
docker run -v $PWD:/paddle -v $HOME/woboq_out:/woboq_out -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" -e "WOBOQ=ON" paddle:dev docker run -v $PWD:/paddle -v $HOME/woboq_out:/woboq_out -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" -e "WOBOQ=ON" paddlepaddle/paddle:latest-dev
``` ```
- You can open the generated HTML files in your Web browser. Or, if you want to run a Nginx container to serve them for a wider audience, you can run: - You can open the generated HTML files in your Web browser. Or, if you want to run a Nginx container to serve them for a wider audience, you can run:
......
#!/bin/bash #!/bin/bash
set -xe
function cmake_gen() { function cmake_gen() {
# Set BASE_IMAGE according to env variables
if [[ ${WITH_GPU} == "ON" ]]; then
BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
else
BASE_IMAGE="ubuntu:16.04"
fi
DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi
mkdir -p /paddle/build mkdir -p /paddle/build
cd /paddle/build cd /paddle/build
...@@ -26,10 +9,29 @@ function cmake_gen() { ...@@ -26,10 +9,29 @@ function cmake_gen() {
# delete previous built whl packages # delete previous built whl packages
rm -rf /paddle/paddle/dist 2>/dev/null || true rm -rf /paddle/paddle/dist 2>/dev/null || true
# Support build for all python versions, currently
# including cp27-cp27m and cp27-cp27mu.
PYTHON_FLAGS=""
if [ "$1" != "" ]; then
echo "using python abi: $1"
if [ "$1" == "cp27-cp27m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
elif [ "$1" == "cp27-cp27mu" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
fi
fi
cat <<EOF cat <<EOF
======================================== ========================================
Configuring cmake in /paddle/build ... Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
${PYTHON_FLAGS}
-DWITH_DOC=OFF -DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF} -DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_MKLDNN=${WITH_MKLDNN:-ON} -DWITH_MKLDNN=${WITH_MKLDNN:-ON}
...@@ -46,12 +48,12 @@ function cmake_gen() { ...@@ -46,12 +48,12 @@ function cmake_gen() {
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
======================================== ========================================
EOF EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because # Disable UNITTEST_USE_VIRTUALENV in docker because
# docker environment is fully controlled by this script. # docker environment is fully controlled by this script.
# See /Paddle/CMakeLists.txt, UNITTEST_USE_VIRTUALENV option. # See /Paddle/CMakeLists.txt, UNITTEST_USE_VIRTUALENV option.
cmake .. \ cmake .. \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=Release \
${PYTHON_FLAGS} \
-DWITH_DOC=OFF \ -DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_MKLDNN=${WITH_MKLDNN:-ON} \ -DWITH_MKLDNN=${WITH_MKLDNN:-ON} \
...@@ -134,6 +136,19 @@ EOF ...@@ -134,6 +136,19 @@ EOF
function gen_dockerfile() { function gen_dockerfile() {
# Set BASE_IMAGE according to env variables
if [[ ${WITH_GPU} == "ON" ]]; then
BASE_IMAGE="nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04"
else
BASE_IMAGE="ubuntu:16.04"
fi
DOCKERFILE_GPU_ENV=""
DOCKERFILE_CUDNN_DSO=""
if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then
DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so"
fi
cat <<EOF cat <<EOF
======================================== ========================================
...@@ -168,13 +183,14 @@ EOF ...@@ -168,13 +183,14 @@ EOF
${DOCKERFILE_GPU_ENV} ${DOCKERFILE_GPU_ENV}
ADD go/cmd/pserver/pserver /usr/bin/ ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/ ADD go/cmd/master/master /usr/bin/
ADD paddle/pybind/print_operators_doc /usr/bin/
# default command shows the paddle version and exit # default command shows the paddle version and exit
CMD ["paddle", "version"] CMD ["paddle", "version"]
EOF EOF
} }
cmake_gen set -xe
cmake_gen ${PYTHON_ABI:-""}
run_build run_build
run_test run_test
gen_docs gen_docs
......
...@@ -44,7 +44,7 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then ...@@ -44,7 +44,7 @@ if [ $ANDROID_ABI == "armeabi-v7a" ]; then
-DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \ -DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=MinSizeRel \
-DUSE_EIGEN_FOR_BLAS=ON \ -DUSE_EIGEN_FOR_BLAS=ON \
-DWITH_C_API=ON \ -DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \ -DWITH_SWIG_PY=OFF \
...@@ -58,7 +58,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then ...@@ -58,7 +58,7 @@ elif [ $ANDROID_ABI == "arm64-v8a" ]; then
-DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \ -DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=MinSizeRel \
-DUSE_EIGEN_FOR_BLAS=OFF \ -DUSE_EIGEN_FOR_BLAS=OFF \
-DWITH_C_API=ON \ -DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \ -DWITH_SWIG_PY=OFF \
...@@ -72,7 +72,7 @@ elif [ $ANDROID_ABI == "armeabi" ]; then ...@@ -72,7 +72,7 @@ elif [ $ANDROID_ABI == "armeabi" ]; then
-DHOST_C_COMPILER=/usr/bin/gcc \ -DHOST_C_COMPILER=/usr/bin/gcc \
-DHOST_CXX_COMPILER=/usr/bin/g++ \ -DHOST_CXX_COMPILER=/usr/bin/g++ \
-DCMAKE_INSTALL_PREFIX=$DEST_ROOT \ -DCMAKE_INSTALL_PREFIX=$DEST_ROOT \
-DCMAKE_BUILD_TYPE=Release \ -DCMAKE_BUILD_TYPE=MinSizeRel \
-DWITH_C_API=ON \ -DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \ -DWITH_SWIG_PY=OFF \
-DWITH_STYLE_CHECK=OFF \ -DWITH_STYLE_CHECK=OFF \
......
...@@ -33,6 +33,7 @@ MatrixPtr makeRandomSparseMatrix(size_t height, ...@@ -33,6 +33,7 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
bool withValue, bool withValue,
bool useGpu, bool useGpu,
bool equalNnzPerSample) { bool equalNnzPerSample) {
#ifndef PADDLE_MOBILE_INFERENCE
std::vector<int64_t> ids(height); std::vector<int64_t> ids(height);
std::vector<int64_t> indices(height + 1); std::vector<int64_t> indices(height + 1);
indices[0] = 0; indices[0] = 0;
...@@ -84,6 +85,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height, ...@@ -84,6 +85,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
} }
return mat; return mat;
} }
#endif
return nullptr;
} }
void generateSequenceStartPositions(size_t batchSize, void generateSequenceStartPositions(size_t batchSize,
......
...@@ -27,6 +27,9 @@ using namespace paddle; // NOLINT ...@@ -27,6 +27,9 @@ using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
int main(int argc, char** argv) { int main(int argc, char** argv) {
initMain(argc, argv);
initPython(argc, argv);
if (FLAGS_model_dir.empty() || FLAGS_config_file.empty() || if (FLAGS_model_dir.empty() || FLAGS_config_file.empty() ||
FLAGS_model_file.empty()) { FLAGS_model_file.empty()) {
LOG(INFO) << "Usage: ./paddle_merge_model --model_dir=pass-00000 " LOG(INFO) << "Usage: ./paddle_merge_model --model_dir=pass-00000 "
...@@ -34,9 +37,6 @@ int main(int argc, char** argv) { ...@@ -34,9 +37,6 @@ int main(int argc, char** argv) {
return 0; return 0;
} }
initMain(argc, argv);
initPython(argc, argv);
string confFile = FLAGS_config_file; string confFile = FLAGS_config_file;
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
FLAGS_use_gpu = false; FLAGS_use_gpu = false;
......
...@@ -37,10 +37,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -37,10 +37,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so
DEPENDS paddle_pybind) DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so) add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
...@@ -66,7 +66,7 @@ if (WITH_TESTING) ...@@ -66,7 +66,7 @@ if (WITH_TESTING)
add_subdirectory(paddle/v2/tests) add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests) add_subdirectory(paddle/v2/plot/tests)
add_subdirectory(paddle/v2/framework/tests) add_subdirectory(paddle/v2/fluid/tests)
endif() endif()
endif() endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
......
...@@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None): ...@@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None):
#caffe_mode: compute the output size using floor instead of ceil, #caffe_mode: compute the output size using floor instead of ceil,
# which is consistent of caffe and CuDNN's convention. # which is consistent of caffe and CuDNN's convention.
def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): def cnn_output_size(img_size,
output = (2 * padding + img_size - filter_size) / float(stride) filter_size,
padding,
stride,
caffe_mode,
dilation=1):
filter_s = (filter_size - 1) * dilation + 1
output = (2 * padding + img_size - filter_s) / float(stride)
if caffe_mode: if caffe_mode:
return 1 + int(math.floor(output)) return 1 + int(math.floor(output))
else: else:
...@@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): ...@@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
#calcualte image_size based on output_size for de-convolution (ConvTransLayer). #calcualte image_size based on output_size for de-convolution (ConvTransLayer).
#It is the reverse function of cnn_output_size #It is the reverse function of cnn_output_size
def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): def cnn_image_size(output_size,
img_size = (output_size - 1) * stride + filter_size - 2 * padding filter_size,
padding,
stride,
caffe_mode,
dilation=1):
filter_s = (filter_size - 1) * dilation + 1
img_size = (output_size - 1) * stride + filter_s - 2 * padding
if not caffe_mode: if not caffe_mode:
img_size = img_size + 1 img_size = img_size + 1
return img_size return img_size
...@@ -1253,9 +1265,9 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf): ...@@ -1253,9 +1265,9 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): def parse_pool(pool, input_layer_name, pool_conf, ceil_mode):
pool_conf.pool_type = pool.pool_type pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in [ config_assert(pool.pool_type in [
'max-projection', 'avg-projection', 'cudnn-max-pool', 'cudnn-avg-pool' 'max-projection', 'avg-projection', 'max-pool-with-mask', 'cudnn-max-pool', 'cudnn-avg-pool'
], "pool-type %s is not in " ], "pool-type %s is not in " \
"['max-projection', 'avg-projection', " "['max-projection', 'avg-projection', 'max-pool-with-mask'," \
"'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type) "'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type)
pool_conf.channels = pool.channels pool_conf.channels = pool.channels
...@@ -1376,6 +1388,12 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): ...@@ -1376,6 +1388,12 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.stride_y = conv.stride_y conv_conf.stride_y = conv.stride_y
conv_conf.groups = conv.groups conv_conf.groups = conv.groups
conv_conf.caffe_mode = conv.caffe_mode conv_conf.caffe_mode = conv.caffe_mode
if not conv.dilation:
conv.dilation = 1
conv.dilation_y = 1
else:
conv_conf.dilation = conv.dilation
conv_conf.dilation_y = conv.dilation_y
if not trans: if not trans:
conv_conf.filter_channels = conv.channels / conv.groups conv_conf.filter_channels = conv.channels / conv.groups
...@@ -1383,20 +1401,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): ...@@ -1383,20 +1401,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
get_img_size(input_layer_name, conv.channels) get_img_size(input_layer_name, conv.channels)
conv_conf.output_x = cnn_output_size( conv_conf.output_x = cnn_output_size(
conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode) conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
conv_conf.output_y = cnn_output_size( conv_conf.output_y = cnn_output_size(
conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode) conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
else: else:
conv_conf.filter_channels = num_filters / conv.groups conv_conf.filter_channels = num_filters / conv.groups
conv_conf.output_x, conv_conf.output_y = \ conv_conf.output_x, conv_conf.output_y = \
get_img_size(input_layer_name, conv.channels) get_img_size(input_layer_name, conv.channels)
conv_conf.img_size = cnn_image_size( conv_conf.img_size = cnn_image_size(
conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode) conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
conv_conf.img_size_y = cnn_image_size( conv_conf.img_size_y = cnn_image_size(
conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode) conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
#caffe_mode: compute the output size using floor instead of ceil, #caffe_mode: compute the output size using floor instead of ceil,
......
...@@ -20,7 +20,7 @@ from paddle.trainer.config_parser import * ...@@ -20,7 +20,7 @@ from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
from .evaluators import * from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType, \ from .poolings import MaxPooling, AvgPooling, MaxWithMaskPooling, BasePoolingType, \
CudnnAvgPooling, CudnnMaxPooling CudnnAvgPooling, CudnnMaxPooling
from .attrs import * from .attrs import *
from .default_decorators import * from .default_decorators import *
...@@ -888,7 +888,7 @@ def mixed_layer(size=0, ...@@ -888,7 +888,7 @@ def mixed_layer(size=0,
:type size: int :type size: int
:param input: The input of this layer. It is an optional parameter. If set, :param input: The input of this layer. It is an optional parameter. If set,
then this function will just return layer's name. then this function will just return layer's name.
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
...@@ -1030,7 +1030,7 @@ def fc_layer(input, ...@@ -1030,7 +1030,7 @@ def fc_layer(input,
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param size: The layer dimension. :param size: The layer dimension.
:type size: int :type size: int
:param act: Activation Type. TanhActivation is the default. :param act: Activation Type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute|list. :param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -1527,7 +1527,7 @@ def lstmemory(input, ...@@ -1527,7 +1527,7 @@ def lstmemory(input,
:type input: LayerOutput :type input: LayerOutput
:param reverse: is sequence process reversed or not. :param reverse: is sequence process reversed or not.
:type reverse: bool :type reverse: bool
:param act: Activation type. TanhActivation is the default. :math:`h_t` :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activation type, SigmoidActivation by default. :param gate_act: gate activation type, SigmoidActivation by default.
:type gate_act: BaseActivation :type gate_act: BaseActivation
...@@ -1920,7 +1920,7 @@ def repeat_layer(input, ...@@ -1920,7 +1920,7 @@ def repeat_layer(input,
False for treating input as column vector and repeating False for treating input as column vector and repeating
in the row direction. in the row direction.
:type as_row_vector: bool :type as_row_vector: bool
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
...@@ -1974,7 +1974,7 @@ def seq_reshape_layer(input, ...@@ -1974,7 +1974,7 @@ def seq_reshape_layer(input,
:type reshape_size: int :type reshape_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
...@@ -2487,7 +2487,7 @@ def img_conv_layer(input, ...@@ -2487,7 +2487,7 @@ def img_conv_layer(input,
shape will be (filter_size, filter_size_y). shape will be (filter_size, filter_size_y).
:type filter_size_y: int | None :type filter_size_y: int | None
:param num_filters: Each filter group's number of filter :param num_filters: Each filter group's number of filter
:param act: Activation type. ReluActivation is the default. :param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param groups: Group size of filters. :param groups: Group size of filters.
:type groups: int :type groups: int
...@@ -2571,7 +2571,9 @@ def img_conv_layer(input, ...@@ -2571,7 +2571,9 @@ def img_conv_layer(input,
if layer_type: if layer_type:
if dilation > 1 or dilation_y > 1: if dilation > 1 or dilation_y > 1:
assert layer_type in ["cudnn_conv", "cudnn_convt"] assert layer_type in [
"cudnn_conv", "cudnn_convt", "exconv", "exconvt"
]
if trans: if trans:
assert layer_type in ["exconvt", "cudnn_convt"] assert layer_type in ["exconvt", "cudnn_convt"]
else: else:
...@@ -2699,9 +2701,9 @@ def img_pool_layer(input, ...@@ -2699,9 +2701,9 @@ def img_pool_layer(input,
elif isinstance(pool_type, AvgPooling): elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg' pool_type.name = 'avg'
assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, assert type(pool_type) in [AvgPooling, MaxPooling, MaxWithMaskPooling, CudnnAvgPooling,
CudnnMaxPooling], \ CudnnMaxPooling], \
"only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported" "only (Cudnn)AvgPooling, (Cudnn)MaxPooling, MaxWithMaskPooling are supported"
type_name = pool_type.name + '-projection' \ type_name = pool_type.name + '-projection' \
if ( if (
...@@ -3253,7 +3255,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): ...@@ -3253,7 +3255,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
:param input: Input layers. It could be a LayerOutput or list/tuple of :param input: Input layers. It could be a LayerOutput or list/tuple of
LayerOutput. LayerOutput.
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
...@@ -3311,7 +3313,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -3311,7 +3313,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
:type name: basestring :type name: basestring
:param input: input layers or projections :param input: input layers or projections
:type input: list | tuple | collections.Sequence :type input: list | tuple | collections.Sequence
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3406,7 +3408,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -3406,7 +3408,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
:type a: LayerOutput :type a: LayerOutput
:param b: input sequence layer :param b: input sequence layer
:type b: LayerOutput :type b: LayerOutput
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3572,30 +3574,32 @@ def lstm_step_layer(input, ...@@ -3572,30 +3574,32 @@ def lstm_step_layer(input,
... ...
This layer has two outputs. Default output is :math:`h_t`. The other This layer has two outputs. The default output is :math:`h_t`. The other
output is :math:`o_t`, whose name is 'state' and can use output is :math:`o_t`, whose name is 'state' and users can use
:code:`get_output_layer` to extract this output. :code:`get_output_layer` to extract this output.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param size: Layer's size. NOTE: lstm layer's size, should be equal to :param size: The dimension of this layer's output, which must be
:code:`input.size/4`, and should be equal to equal to the dimension of the state.
:code:`state.size`.
:type size: int :type size: int
:param input: input layer. :math:`Wx_t + Wh_{t-1}` :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param state: State Layer. :math:`c_{t-1}` :param state: The state of the LSTM unit.
:type state: LayerOutput :type state: LayerOutput
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: Gate Activation Type. SigmoidActivation is the default. :param gate_act: Activation type of the gate. SigmoidActivation is the
default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: State Activation Type. TanhActivation is the default. :param state_act: Activation type of the state. TanhActivation is the
default activation.
:type state_act: BaseActivation :type state_act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is :param bias_attr: The bias attribute. If the parameter is set to False or an object
set to True or None, the bias is initialized to zero. whose type is not ParameterAttribute, no bias is defined. If the
:type bias_attr: ParameterAttribute | None | True parameter is set to True, the bias is initialized to zero.
:param layer_attr: layer's extra attribute. :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -3640,23 +3644,31 @@ def gru_step_layer(input, ...@@ -3640,23 +3644,31 @@ def gru_step_layer(input,
layer_attr=None): layer_attr=None):
""" """
:param input: :param input: The input of this layer, whose dimension can be divided by 3.
:type input: LayerOutput :type input: LayerOutput
:param output_mem: :param output_mem: A memory which memorizes the output of this layer at previous
:param size: time step.
:param act: :type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :type name: basestring
:param gate_act: Activation type of this layer's two gates. SigmoidActivation is
the default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, no bias False or an object whose type is not ParameterAttribute, no bias
is defined. If this parameter is set to True, is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: the parameter_attribute for transforming the output_mem :param param_attr: The parameter attribute. See ParameterAttribute for details.
from previous step. :type param_attr: ParameterAttribute
:param layer_attr: :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3701,25 +3713,34 @@ def gru_step_naive_layer(input, ...@@ -3701,25 +3713,34 @@ def gru_step_naive_layer(input,
param_attr=None, param_attr=None,
layer_attr=None): layer_attr=None):
""" """
GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING GRU Step Layer, which is realized using PaddlePaddle API. It supports ERROR_CLIPPING
and DROPOUT. and DROPOUT.
:param input: :param input: The input of this layer, whose dimensionality can be divided by 3.
:param output_mem: :param output_mem: A memory which memorizes the output of this layer at previous
:param size: time step.
:type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param act: :type name: basestring
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :param gate_act: Activation type of this layer's two gates. SigmoidActivation
is the default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, no bias False or an object whose type is not ParameterAttribute, no bias
is defined. If this parameter is set to True, is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: :param param_attr: The parameter attribute. See ParameterAttribute for details.
:param layer_attr: :type param_attr: ParameterAttribute
:return: :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if input.size % 3 != 0: if input.size % 3 != 0:
...@@ -3781,12 +3802,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None): ...@@ -3781,12 +3802,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: get output layer's input. And this layer should contains :param input: The input layer. And this layer should contain
multiple outputs. multiple outputs.
:type input: LayerOutput :type input: LayerOutput
:param arg_name: Output name from input. :param arg_name: The name of the output to be extracted from the input layer.
:type arg_name: basestring :type arg_name: basestring
:param layer_attr: Layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3843,18 +3865,20 @@ def recurrent_layer(input, ...@@ -3843,18 +3865,20 @@ def recurrent_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, False or an object whose type is not ParameterAttribute,
no bias is defined. If the parameter is set to True, no bias is defined. If the parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: parameter attribute. :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -3879,7 +3903,7 @@ def recurrent_layer(input, ...@@ -3879,7 +3903,7 @@ def recurrent_layer(input,
class StaticInput(object): class StaticInput(object):
""" """
StaticInput is only used in recurrent_group which defines a read-only memory StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence. and can be a sequence or non-sequence.
:param size: DEPRECATED :param size: DEPRECATED
:param is_seq: DEPRECATED :param is_seq: DEPRECATED
""" """
...@@ -3912,8 +3936,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3912,8 +3936,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
Recurrent layer group is an extremely flexible recurrent unit in Recurrent layer group is an extremely flexible recurrent unit in
PaddlePaddle. As long as the user defines the calculation done within a PaddlePaddle. As long as the user defines the calculation done within a
time step, PaddlePaddle will iterate such a recurrent calculation over time step, PaddlePaddle will iterate such a recurrent calculation over
sequence input. This is extremely usefull for attention based model, or sequence input. This is useful for attention-based models, or Neural
Neural Turning Machine like models. Turning Machine like models.
The basic usage (time steps) is: The basic usage (time steps) is:
...@@ -3935,18 +3959,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3935,18 +3959,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
demo/seqToseq/seqToseq_net.py demo/seqToseq/seqToseq_net.py
- sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf - sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf
:param step: recurrent one time step function.The input of this function is :param step: A step function which takes the input of recurrent_group as its own
input of the group. The return of this function will be input and returns values as recurrent_group's output every time step.
recurrent group's return value.
The recurrent group scatter a sequence into time steps. And The recurrent group scatters a sequence into time steps. And
for each time step, will invoke step function, and return for each time step, it will invoke step function, and return
a time step result. Then gather each time step of output into a time step result. Then gather outputs of each time step into
layer group's output. layer group's output.
:type step: callable :type step: callable
:param name: recurrent_group's name. :param name: The recurrent_group's name. It is optional.
:type name: basestring :type name: basestring
:param input: Input links array. :param input: Input links array.
...@@ -3954,11 +3977,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3954,11 +3977,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
LayerOutput will be scattered into time steps. LayerOutput will be scattered into time steps.
SubsequenceInput will be scattered into sequence steps. SubsequenceInput will be scattered into sequence steps.
StaticInput will be imported to each time step, and doesn't change StaticInput will be imported to each time step, and doesn't change
through time. It's a mechanism to access layer outside step function. over time. It's a mechanism to access layer outside step function.
:type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple :type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple
:param reverse: If reverse is set true, the recurrent unit will process the :param reverse: If reverse is set to True, the recurrent unit will process the
input sequence in a reverse order. input sequence in a reverse order.
:type reverse: bool :type reverse: bool
...@@ -4093,7 +4116,8 @@ def maxid_layer(input, name=None, layer_attr=None): ...@@ -4093,7 +4116,8 @@ def maxid_layer(input, name=None, layer_attr=None):
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4126,11 +4150,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): ...@@ -4126,11 +4150,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input1: The first input layer name. :param input1: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param input2: The second input layer name. :param input2: The second input layer.
:type input2: LayerOutput :type input2: LayerOutput
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4169,9 +4194,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): ...@@ -4169,9 +4194,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param eos_id: end id of sequence :param eos_id: End id of sequence
:type eos_id: int :type eos_id: int
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4232,8 +4258,9 @@ def beam_search(step, ...@@ -4232,8 +4258,9 @@ def beam_search(step,
- machine translation : demo/seqToseq/translation/gen.conf \ - machine translation : demo/seqToseq/translation/gen.conf \
demo/seqToseq/seqToseq_net.py demo/seqToseq/seqToseq_net.py
:param name: Name of the recurrent unit that generates sequences. :param name: The name of the recurrent unit that is responsible for
:type name: base string generating sequences. It is optional.
:type name: basestring
:param step: A callable function that defines the calculation in a time :param step: A callable function that defines the calculation in a time
step, and it is applied to sequences with arbitrary length by step, and it is applied to sequences with arbitrary length by
sharing a same set of weights. sharing a same set of weights.
...@@ -4358,16 +4385,18 @@ def square_error_cost(input, ...@@ -4358,16 +4385,18 @@ def square_error_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Network prediction. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: Data label. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4400,17 +4429,20 @@ def classification_cost(input, ...@@ -4400,17 +4429,20 @@ def classification_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: input layer name. network output. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: label layer name. data_layer often. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param evaluator: Evaluator method. :param evaluator: Evaluator method. classification_error_evaluator is the default.
:param layer_attr: layer's extra attribute. :type evaluator: Evaluator method
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4463,7 +4495,7 @@ def conv_operator(img, ...@@ -4463,7 +4495,7 @@ def conv_operator(img,
Different from img_conv_layer, conv_op is an Operator, which can be used Different from img_conv_layer, conv_op is an Operator, which can be used
in mixed_layer. And conv_op takes two inputs to perform convolution. in mixed_layer. And conv_op takes two inputs to perform convolution.
The first input is the image and the second is filter kernel. It only The first input is the image and the second is filter kernel. It only
support GPU mode. supports GPU mode.
The example usage is: The example usage is:
...@@ -4475,27 +4507,31 @@ def conv_operator(img, ...@@ -4475,27 +4507,31 @@ def conv_operator(img,
num_filters=64, num_filters=64,
num_channels=64) num_channels=64)
:param img: input image :param img: The input image.
:type img: LayerOutput :type img: LayerOutput
:param filter: input filter :param filter: The input filter.
:type filter: LayerOutput :type filter: LayerOutput
:param filter_size: The x dimension of a filter kernel. :param filter_size: The dimension of the filter kernel on the x axis.
:type filter_size: int :type filter_size: int
:param filter_size_y: The y dimension of a filter kernel. Since :param filter_size_y: The dimension of the filter kernel on the y axis.
PaddlePaddle now supports rectangular filters, If the parameter is not set or set to None, it will
the filter's shape can be (filter_size, filter_size_y). set to 'filter_size' automatically.
:type filter_size_y: int :type filter_size_y: int
:param num_filters: channel of output data. :param num_filters: The number of the output channels.
:type num_filters: int :type num_filters: int
:param num_channels: channel of input data. :param num_channels: The number of the input channels. If the parameter is not set
or set to None, it will be automatically set to the channel
number of the 'img'.
:type num_channels: int :type num_channels: int
:param stride: The x dimension of the stride. :param stride: The stride on the x axis.
:type stride: int :type stride: int
:param stride_y: The y dimension of the stride. :param stride_y: The stride on the y axis. If the parameter is not set or
set to None, it will be set to 'stride' automatically.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of padding. :param padding: The padding size on the x axis.
:type padding: int :type padding: int
:param padding_y: The y dimension of padding. :param padding_y: The padding size on the y axis. If the parameter is not set
or set to None, it will be set to 'padding' automatically.
:type padding_y: int :type padding_y: int
:return: A ConvOperator Object. :return: A ConvOperator Object.
:rtype: ConvOperator :rtype: ConvOperator
...@@ -4546,9 +4582,9 @@ def conv_projection(input, ...@@ -4546,9 +4582,9 @@ def conv_projection(input,
param_attr=None, param_attr=None,
trans=False): trans=False):
""" """
Different from img_conv_layer and conv_op, conv_projection is an Projection, Different from img_conv_layer and conv_op, conv_projection is a Projection,
which can be used in mixed_layer and conat_layer. It use cudnn to implement which can be used in mixed_layer and concat_layer. It uses cudnn to implement
conv and only support GPU mode. convolution and only supports GPU mode.
The example usage is: The example usage is:
...@@ -4561,32 +4597,45 @@ def conv_projection(input, ...@@ -4561,32 +4597,45 @@ def conv_projection(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. :param filter_size: The dimensions of the filter kernel. If the parameter is
:type filter_size: int set to one integer, the two dimensions on x and y axises
:param filter_size_y: The y dimension of a filter kernel. Since will be same when filter_size_y is not set. If it is set
PaddlePaddle now supports rectangular filters, to a list, the first element indicates the dimension on
the filter's shape can be (filter_size, filter_size_y). the x axis, and the second is used to specify the dimension
on the y axis when filter_size is not provided.
:type filter_size: int | tuple | list
:param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
is not set, it will be set automatically according to filter_size.
:type filter_size_y: int :type filter_size_y: int
:param num_filters: channel of output data. :param num_filters: The number of filters.
:type num_filters: int :type num_filters: int
:param num_channels: channel of input data. :param num_channels: The number of the input channels.
:type num_channels: int :type num_channels: int
:param stride: The x dimension of the stride. :param stride: The strides. If the parameter is set to one integer, the strides
:type stride: int on x and y axises will be same when stride_y is not set. If it is
:param stride_y: The y dimension of the stride. set to a list, the first element indicates the stride on the x axis,
and the second is used to specify the stride on the y axis when
stride_y is not provided.
:type stride: int | tuple | list
:param stride_y: The stride on the y axis.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of padding. :param padding: The padding sizes. If the parameter is set to one integer, the padding
:type padding: int sizes on x and y axises will be same when padding_y is not set. If it
:param padding_y: The y dimension of padding. is set to a list, the first element indicates the padding size on the
x axis, and the second is used to specify the padding size on the y axis
when padding_y is not provided.
:type padding: int | tuple | list
:param padding_y: The padding size on the y axis.
:type padding_y: int :type padding_y: int
:param groups: The group number. :param groups: The group number.
:type groups: int :type groups: int
:param param_attr: Convolution param attribute. None means default attribute :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param trans: whether it is convTrans or conv :param trans: Whether it is ConvTransProjection or ConvProjection
:type trans: bool :type trans: bool
:return: A DotMulProjection Object. :return: A Projection Object.
:rtype: DotMulProjection :rtype: ConvTransProjection | ConvProjection
""" """
if num_channels is None: if num_channels is None:
assert input.num_filters is not None assert input.num_filters is not None
...@@ -4651,13 +4700,13 @@ def pad_layer(input, ...@@ -4651,13 +4700,13 @@ def pad_layer(input,
layer_attr=None): layer_attr=None):
""" """
This operation pads zeros to the input data according to pad_c,pad_h This operation pads zeros to the input data according to pad_c,pad_h
and pad_w. pad_c, pad_h, pad_w specifies the which dimension and size and pad_w. pad_c, pad_h, pad_w specify the size in the corresponding
of padding. And the input data shape is NCHW. dimension. And the input data shape is NCHW.
For example, pad_c=[2,3] means padding 2 zeros before the For example, pad_c=[2,3] means padding 2 zeros before the input data
input data and 3 zeros after the input data in channel dimension. and 3 zeros after the input data in the channel dimension. pad_h means
pad_h means padding zeros in height dimension. pad_w means padding zeros padding zeros in the height dimension. pad_w means padding zeros in the
in width dimension. width dimension.
For example, For example,
...@@ -4694,13 +4743,14 @@ def pad_layer(input, ...@@ -4694,13 +4743,14 @@ def pad_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pad_c: padding size in channel dimension. :param pad_c: The padding size in the channel dimension.
:type pad_c: list | None :type pad_c: list | None
:param pad_h: padding size in height dimension. :param pad_h: The padding size in the height dimension.
:type pad_h: list | None :type pad_h: list | None
:param pad_w: padding size in width dimension. :param pad_w: The padding size in the width dimension.
:type pad_w: list | None :type pad_w: list | None
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -4749,7 +4799,7 @@ def pad_layer(input, ...@@ -4749,7 +4799,7 @@ def pad_layer(input,
@layer_support() @layer_support()
def conv_shift_layer(a, b, name=None, layer_attr=None): def conv_shift_layer(a, b, name=None, layer_attr=None):
""" """
This layer performs cyclic convolution for two input. For example: This layer performs cyclic convolution on two inputs. For example:
- a[in]: contains M elements. - a[in]: contains M elements.
- b[in]: contains N elements (N should be odd). - b[in]: contains N elements (N should be odd).
- c[out]: contains M elements. - c[out]: contains M elements.
...@@ -4758,7 +4808,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -4758,7 +4808,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j} c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j}
In this formular: In this formula:
- a's index is computed modulo M. When it is negative, then get item from - a's index is computed modulo M. When it is negative, then get item from
the right side (which is the end of array) to the left. the right side (which is the end of array) to the left.
- b's index is computed modulo N. When it is negative, then get item from - b's index is computed modulo N. When it is negative, then get item from
...@@ -4772,11 +4822,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -4772,11 +4822,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: The first input of this layer.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b. :param b: The second input of this layer.
:type b: LayerOutput :type b: LayerOutput
:param layer_attr: layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4807,8 +4858,8 @@ def tensor_layer(a, ...@@ -4807,8 +4858,8 @@ def tensor_layer(a,
bias_attr=None, bias_attr=None,
layer_attr=None): layer_attr=None):
""" """
This layer performs tensor operation for two input. This layer performs tensor operation on two inputs.
For example, each sample: For example:
.. math:: .. math::
y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1 y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1
...@@ -4828,22 +4879,24 @@ def tensor_layer(a, ...@@ -4828,22 +4879,24 @@ def tensor_layer(a,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: The first input of this layer.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b. :param b: The second input of this layer.
:type b: LayerOutput :type b: LayerOutput
:param size: the layer dimension. :param size: The dimension of this layer.
:type size: int. :type size: int
:param act: Activation type. LinearActivation is the default. :param act: Activation type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, False or an object whose type is not ParameterAttribute,
no bias is defined. If this parameter is set to True, no bias is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4879,7 +4932,7 @@ def selective_fc_layer(input, ...@@ -4879,7 +4932,7 @@ def selective_fc_layer(input,
layer_attr=None): layer_attr=None):
""" """
Selectived fully connected layer. Different from fc_layer, the output Selectived fully connected layer. Different from fc_layer, the output
of this layer maybe sparse. It requires an additional input to indicate of this layer can be sparse. It requires an additional input to indicate
several selected columns for output. If the selected columns is not several selected columns for output. If the selected columns is not
specified, selective_fc_layer acts exactly like fc_layer. specified, selective_fc_layer acts exactly like fc_layer.
...@@ -4893,22 +4946,34 @@ def selective_fc_layer(input, ...@@ -4893,22 +4946,34 @@ def selective_fc_layer(input,
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param select: The select layer. The output of select layer should be a :param select: The layer to select columns to output. It should be a sparse
sparse binary matrix, and treat as the mask of selective fc. binary matrix, and is treated as the mask of selective fc. If
If is None, acts exactly like fc_layer. it is not set or set to None, selective_fc_layer acts exactly
like fc_layer.
:type select: LayerOutput :type select: LayerOutput
:param size: The layer dimension. :param size: The dimension of this layer, which should be equal to that of
the layer 'select'.
:type size: int :type size: int
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param pass_generation: The flag which indicates whether it is during generation.
:type pass_generation: bool
:param has_selected_colums: The flag which indicates whether the parameter 'select'
has been set. True is the default.
:type has_selected_colums: bool
:param mul_ratio: A ratio helps to judge how sparse the output is and determine
the computation method for speed consideration.
:type mul_ratio: float
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param bias_attr: The parameter attribute for bias. If this parameter is set to :param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, False or an object whose type is not ParameterAttribute,
no bias is defined. If this parameter is set to True, no bias is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4959,7 +5024,7 @@ def selective_fc_layer(input, ...@@ -4959,7 +5024,7 @@ def selective_fc_layer(input,
@layer_support() @layer_support()
def sampling_id_layer(input, name=None, layer_attr=None): def sampling_id_layer(input, name=None, layer_attr=None):
""" """
A layer for sampling id from multinomial distribution from the input layer. A layer for sampling id from a multinomial distribution from the input layer.
Sampling one id for one sample. Sampling one id for one sample.
The simple usage is: The simple usage is:
...@@ -4972,8 +5037,9 @@ def sampling_id_layer(input, name=None, layer_attr=None): ...@@ -4972,8 +5037,9 @@ def sampling_id_layer(input, name=None, layer_attr=None):
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -4994,8 +5060,7 @@ def slope_intercept_layer(input, ...@@ -4994,8 +5060,7 @@ def slope_intercept_layer(input,
intercept=0.0, intercept=0.0,
layer_attr=None): layer_attr=None):
""" """
This layer for applying a slope and an intercept to the input This layer for applying a slope and an intercept to the input.
element-wise. There is no activation and weight.
.. math:: .. math::
y = slope * x + intercept y = slope * x + intercept
...@@ -5010,12 +5075,13 @@ def slope_intercept_layer(input, ...@@ -5010,12 +5075,13 @@ def slope_intercept_layer(input,
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param slope: the scale factor. :param slope: The scale factor.
:type slope: float. :type slope: float
:param intercept: the offset. :param intercept: The offset.
:type intercept: float. :type intercept: float
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5070,12 +5136,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None): ...@@ -5070,12 +5136,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None):
:type weights: LayerOutput :type weights: LayerOutput
:param vectors: The vector layer. :param vectors: The vector layer.
:type vectors: LayerOutput :type vectors: LayerOutput
:param size: the dimension of this layer. :param size: The dimension of this layer.
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5122,11 +5189,11 @@ def block_expand_layer(input, ...@@ -5122,11 +5189,11 @@ def block_expand_layer(input,
outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x
The expand method is the same with ExpandConvLayer, but saved the transposed The expanding method is the same with ExpandConvLayer, but saved the transposed
value. After expanding, output.sequenceStartPositions will store timeline. value. After expanding, output.sequenceStartPositions will store timeline.
The number of time steps are outputH * outputW and the dimension of each The number of time steps is outputH * outputW and the dimension of each
time step is block_y * block_x * num_channels. This layer can be used after time step is block_y * block_x * num_channels. This layer can be used after
convolution neural network, and before recurrent neural network. convolutional neural network, and before recurrent neural network.
The simple usage is: The simple usage is:
...@@ -5141,8 +5208,10 @@ def block_expand_layer(input, ...@@ -5141,8 +5208,10 @@ def block_expand_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. :param num_channels: The number of input channels. If the parameter is not set or
:type num_channels: int | None set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int
:param block_x: The width of sub block. :param block_x: The width of sub block.
:type block_x: int :type block_x: int
:param block_y: The width of sub block. :param block_y: The width of sub block.
...@@ -5156,9 +5225,10 @@ def block_expand_layer(input, ...@@ -5156,9 +5225,10 @@ def block_expand_layer(input,
:param padding_y: The padding size in vertical direction. :param padding_y: The padding size in vertical direction.
:type padding_y: int :type padding_y: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring. :type name: basestring.
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5188,12 +5258,19 @@ def block_expand_layer(input, ...@@ -5188,12 +5258,19 @@ def block_expand_layer(input,
@layer_support() @layer_support()
def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
""" """
A layer to do max out on conv layer output. A layer to do max out on convolutional layer output.
- Input: output of a conv layer. - Input: the output of a convolutional layer.
- Output: feature map size same as input. Channel is (input channel) / groups. - Output: feature map size same as the input's, and its channel number is
(input channel) / groups.
So groups should be larger than 1, and the num of channels should be able So groups should be larger than 1, and the num of channels should be able
to devided by groups. to be devided by groups.
Reference:
Maxout Networks
http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
https://arxiv.org/pdf/1312.6082v4.pdf
.. math:: .. math::
y_{si+j} = \max_k x_{gsi + sk + j} y_{si+j} = \max_k x_{gsi + sk + j}
...@@ -5203,12 +5280,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5203,12 +5280,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
0 \le j < s 0 \le j < s
0 \le k < groups 0 \le k < groups
Please refer to Paper:
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
The simple usage is: The simple usage is:
.. code-block:: python .. code-block:: python
...@@ -5219,14 +5290,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5219,14 +5290,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set :param num_channels: The number of input channels. If the parameter is not set or
automatically from previous output. set to None, its actual value will be automatically set to
:type num_channels: int | None the channels number of the input.
:type num_channels: int
:param groups: The group number of input layer. :param groups: The group number of input layer.
:type groups: int :type groups: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring. :type name: basestring
:param layer_attr: Extra Layer attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5258,20 +5331,20 @@ def ctc_layer(input, ...@@ -5258,20 +5331,20 @@ def ctc_layer(input,
layer_attr=None): layer_attr=None):
""" """
Connectionist Temporal Classification (CTC) is designed for temporal Connectionist Temporal Classification (CTC) is designed for temporal
classication task. That is, for sequence labeling problems where the classication task. e.g. sequence labeling problems where the
alignment between the inputs and the target labels is unknown. alignment between the inputs and the target labels is unknown.
More details can be found by referring to `Connectionist Temporal Reference:
Classification: Labelling Unsegmented Sequence Data with Recurrent Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ with Recurrent Neural Networks
icml2006_GravesFGS06.pdf>`_ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note: Note:
Considering the 'blank' label needed by CTC, you need to use Considering the 'blank' label needed by CTC, you need to use (num_classes + 1)
(num_classes + 1) as the input size. num_classes is the category number. as the size of the input, where num_classes is the category number.
And the 'blank' is the last category index. So the size of 'input' layer, such as And the 'blank' is the last category index. So the size of 'input' layer (e.g.
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer fc_layer with softmax activation) should be (num_classes + 1). The size of
should also be num_classes + 1. ctc_layer should also be (num_classes + 1).
The example usage is: The example usage is:
...@@ -5284,16 +5357,17 @@ def ctc_layer(input, ...@@ -5284,16 +5357,17 @@ def ctc_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring | None :type name: basestring
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5334,20 +5408,19 @@ def warp_ctc_layer(input, ...@@ -5334,20 +5408,19 @@ def warp_ctc_layer(input,
building process, PaddlePaddle will clone the source codes, build and building process, PaddlePaddle will clone the source codes, build and
install it to :code:`third_party/install/warpctc` directory. install it to :code:`third_party/install/warpctc` directory.
More details of CTC can be found by referring to `Connectionist Temporal Reference:
Classification: Labelling Unsegmented Sequence Data with Recurrent Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ with Recurrent Neural Networks
icml2006_GravesFGS06.pdf>`_. http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note: Note:
- Let num_classes represent the category number. Considering the 'blank' - Let num_classes represents the category number. Considering the 'blank'
label needed by CTC, you need to use (num_classes + 1) as the input size. label needed by CTC, you need to use (num_classes + 1) as the size of
Thus, the size of both warp_ctc layer and 'input' layer should be set to warp_ctc layer.
num_classes + 1.
- You can set 'blank' to any value ranged in [0, num_classes], which - You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels. should be consistent with those used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library, - As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer. 'linear' activation is expected to be used instead in the 'input' layer.
The example usage is: The example usage is:
...@@ -5361,18 +5434,19 @@ def warp_ctc_layer(input, ...@@ -5361,18 +5434,19 @@ def warp_ctc_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring | None :type name: basestring
:param blank: the 'blank' label used in ctc :param blank: The 'blank' label used in ctc.
:type blank: int :type blank: int
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5418,23 +5492,26 @@ def crf_layer(input, ...@@ -5418,23 +5492,26 @@ def crf_layer(input,
label=label, label=label,
size=label_dim) size=label_dim)
:param input: The first input layer is the feature. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The second input layer is label. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: The category number. :param size: The category number.
:type size: int :type size: int
:param weight: The third layer is "weight" of each sample, which is an :param weight: The weight layer defines a weight for each sample in the
optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param param_attr: Parameter attribute. None means default attribute :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5480,9 +5557,9 @@ def crf_decoding_layer(input, ...@@ -5480,9 +5557,9 @@ def crf_decoding_layer(input,
""" """
A layer for calculating the decoding sequence of sequential conditional A layer for calculating the decoding sequence of sequential conditional
random field model. The decoding sequence is stored in output.ids. random field model. The decoding sequence is stored in output.ids.
If a second input is provided, it is treated as the ground-truth label, and If the input 'label' is provided, it is treated as the ground-truth label, and
this layer will also calculate error. output.value[i] is 1 for incorrect this layer will also calculate error. output.value[i] is 1 for an incorrect
decoding or 0 for correct decoding. decoding and 0 for the correct.
The example usage is: The example usage is:
...@@ -5493,16 +5570,18 @@ def crf_decoding_layer(input, ...@@ -5493,16 +5570,18 @@ def crf_decoding_layer(input,
:param input: The first input layer. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param size: size of this layer. :param size: The dimension of this layer.
:type size: int :type size: int
:param label: None or ground-truth label. :param label: The input label.
:type label: LayerOutput or None :type label: LayerOutput | None
:param param_attr: Parameter attribute. None means default attribute :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5549,8 +5628,7 @@ def nce_layer(input, ...@@ -5549,8 +5628,7 @@ def nce_layer(input,
bias_attr=None, bias_attr=None,
layer_attr=None): layer_attr=None):
""" """
Noise-contrastive estimation. This layer implements the method in the Noise-contrastive estimation.
following paper:
Reference: Reference:
A fast and simple algorithm for training neural probabilistic language A fast and simple algorithm for training neural probabilistic language
...@@ -5566,25 +5644,27 @@ def nce_layer(input, ...@@ -5566,25 +5644,27 @@ def nce_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layers. It should be a LayerOutput or a list/tuple :param input: The first input of this layer.
of LayerOutput.
:type input: LayerOutput | list | tuple | collections.Sequence :type input: LayerOutput | list | tuple | collections.Sequence
:param label: The ground truth. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight layer defines a weight for each sample in the :param weight: The weight layer defines a weight for each sample in the
mini-batch. The default value is None. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param num_classes: The class number. :param num_classes: The number of classes.
:type num_classes: int :type num_classes: int
:param param_attr: The parameter attributes. :param act: Activation type. SigmoidActivation is the default activation.
:type param_attr: ParameterAttribute|list :type act: BaseActivation
:param num_neg_samples: The number of sampled negative labels. The default :param param_attr: The parameter attribute. See ParameterAttribute for
value is 10. details.
:type param_attr: ParameterAttribute
:param num_neg_samples: The number of sampled negative labels. 10 is the
default value.
:type num_neg_samples: int :type num_neg_samples: int
:param neg_distribution: The discrete noisy distribution over the output :param neg_distribution: The discrete noisy distribution over the output
space from which num_neg_samples negative labels space from which num_neg_samples negative labels
are sampled. If this parameter is not set, a are sampled. If this parameter is not set, a
uniform distribution will be used. A user defined uniform distribution will be used. A user-defined
distribution is a list whose length must be equal distribution is a list whose length must be equal
to the num_classes. Each member of the list defines to the num_classes. Each member of the list defines
the probability of a class given input x. the probability of a class given input x.
...@@ -5594,9 +5674,10 @@ def nce_layer(input, ...@@ -5594,9 +5674,10 @@ def nce_layer(input,
no bias is defined. If this parameter is set to True, no bias is defined. If this parameter is set to True,
the bias is initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: The LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if isinstance(input, LayerOutput): if isinstance(input, LayerOutput):
...@@ -5663,11 +5744,11 @@ def rank_cost(left, ...@@ -5663,11 +5744,11 @@ def rank_cost(left,
coeff=1.0, coeff=1.0,
layer_attr=None): layer_attr=None):
""" """
A cost Layer for learning to rank using gradient descent. Details can refer A cost Layer for learning to rank using gradient descent.
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/
ICML_ranking.pdf>`_. Reference:
This layer contains at least three inputs. The weight is an optional Learning to Rank using Gradient Descent
argument, which affects the cost. http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf
.. math:: .. math::
...@@ -5698,14 +5779,16 @@ def rank_cost(left, ...@@ -5698,14 +5779,16 @@ def rank_cost(left,
:type right: LayerOutput :type right: LayerOutput
:param label: Label is 1 or 0, means positive order and reverse order. :param label: Label is 1 or 0, means positive order and reverse order.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5750,25 +5833,25 @@ def lambda_cost(input, ...@@ -5750,25 +5833,25 @@ def lambda_cost(input,
NDCG_num=8, NDCG_num=8,
max_sort_size=-1) max_sort_size=-1)
:param input: Samples of the same query should be loaded as sequence. :param input: The first input of this layer, which is often a document
samples list of the same query and whose type must be sequence.
:type input: LayerOutput :type input: LayerOutput
:param score: The 2nd input. Score of each sample. :param score: The scores of the samples.
:type input: LayerOutput :type input: LayerOutput
:param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain), :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain),
e.g., 5 for NDCG@5. It must be less than or equal to the e.g., 5 for NDCG@5. It must be less than or equal to the
minimum size of lists. minimum size of the list.
:type NDCG_num: int :type NDCG_num: int
:param max_sort_size: The size of partial sorting in calculating gradient. :param max_sort_size: The size of partial sorting in calculating gradient. If
If max_sort_size = -1, then for each list, the max_sort_size is equal to -1 or greater than the number
algorithm will sort the entire list to get gradient. of the samples in the list, then the algorithm will sort
In other cases, max_sort_size must be greater than or the entire list to compute the gradient. In other cases,
equal to NDCG_num. And if max_sort_size is greater max_sort_size must be greater than or equal to NDCG_num.
than the size of a list, the algorithm will sort the
entire list of get gradient.
:type max_sort_size: int :type max_sort_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5813,11 +5896,10 @@ def cross_entropy(input, ...@@ -5813,11 +5896,10 @@ def cross_entropy(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param weight: The cost of each sample is multiplied with each weight. :param weight: The weight layer defines a weight for each sample in the
The weight should be a layer with size=1. Note that gradient mini-batch. It is optional.
will not be calculated for weight.
:type weight: LayerOutout :type weight: LayerOutout
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -5862,7 +5944,7 @@ def cross_entropy_with_selfnorm(input, ...@@ -5862,7 +5944,7 @@ def cross_entropy_with_selfnorm(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param softmax_selfnorm_alpha: The scale factor affects the cost. :param softmax_selfnorm_alpha: The scale factor affects the cost.
:type softmax_selfnorm_alpha: float :type softmax_selfnorm_alpha: float
...@@ -5952,7 +6034,7 @@ def huber_regression_cost(input, ...@@ -5952,7 +6034,7 @@ def huber_regression_cost(input,
:param delta: The difference between the observed and predicted values. :param delta: The difference between the observed and predicted values.
:type delta: float :type delta: float
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6002,7 +6084,7 @@ def huber_classification_cost(input, ...@@ -6002,7 +6084,7 @@ def huber_classification_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6047,7 +6129,7 @@ def multi_binary_label_cross_entropy(input, ...@@ -6047,7 +6129,7 @@ def multi_binary_label_cross_entropy(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6218,7 +6300,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -6218,7 +6300,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6370,7 +6452,7 @@ def row_conv_layer(input, ...@@ -6370,7 +6452,7 @@ def row_conv_layer(input,
:param context_len: The context length equals the lookahead step number :param context_len: The context length equals the lookahead step number
plus one. plus one.
:type context_len: int :type context_len: int
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The parameter attribute. See ParameterAttribute for :param param_attr: The parameter attribute. See ParameterAttribute for
details. details.
...@@ -6492,7 +6574,8 @@ def gated_unit_layer(input, ...@@ -6492,7 +6574,8 @@ def gated_unit_layer(input,
:type input: LayerOutput :type input: LayerOutput
:param size: The dimension of this layer's output. :param size: The dimension of this layer's output.
:type size: int :type size: int
:param act: Activation type of the projection. LinearActivation is the default. :param act: Activation type of the projection. LinearActivation is the default
activation.
:type act: BaseActivation :type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -6873,7 +6956,7 @@ def img_conv3d_layer(input, ...@@ -6873,7 +6956,7 @@ def img_conv3d_layer(input,
:type filter_size: int | tuple | list :type filter_size: int | tuple | list
:param num_filters: The number of filters in each group. :param num_filters: The number of filters in each group.
:type num_filters: int :type num_filters: int
:param act: Activation type. ReluActivation is the default. :param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param groups: The number of the filter groups. :param groups: The number of the filter groups.
:type groups: int :type groups: int
...@@ -6889,7 +6972,7 @@ def img_conv3d_layer(input, ...@@ -6889,7 +6972,7 @@ def img_conv3d_layer(input,
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param num_channels: The number of input channels. If the parameter is not set or :param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to set to None, its actual value will be automatically set to
the channels number of the input . the channels number of the input.
:type num_channels: int :type num_channels: int
:param param_attr: The parameter attribute of the convolution. See ParameterAttribute for :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details. details.
...@@ -7065,7 +7148,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None): ...@@ -7065,7 +7148,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None):
:type offsets: LayerOutput :type offsets: LayerOutput
:param sizes: The sizes of the sub-sequences, which should be sequence type. :param sizes: The sizes of the sub-sequences, which should be sequence type.
:type sizes: LayerOutput :type sizes: LayerOutput
:param act: Activation type, LinearActivation is the default. :param act: Activation type, LinearActivation is the default activation.
:type act: BaseActivation. :type act: BaseActivation.
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
""" """
__all__ = [ __all__ = [
"BasePoolingType", "MaxPooling", "AvgPooling", "CudnnMaxPooling", "BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling",
"CudnnAvgPooling", "SumPooling", "SquareRootNPooling" "CudnnMaxPooling", "CudnnAvgPooling", "SumPooling", "SquareRootNPooling"
] ]
...@@ -55,6 +55,19 @@ class MaxPooling(BasePoolingType): ...@@ -55,6 +55,19 @@ class MaxPooling(BasePoolingType):
self.output_max_index = output_max_index self.output_max_index = output_max_index
class MaxWithMaskPooling(BasePoolingType):
"""
MaxWithMask pooling.
Not only return the very large values for each dimension in sequence or time steps,
but also the location indices of found maxinum values.
"""
def __init__(self):
BasePoolingType.__init__(self, "max-pool-with-mask")
class CudnnMaxPooling(BasePoolingType): class CudnnMaxPooling(BasePoolingType):
""" """
Cudnn max pooling only support GPU. Return the maxinum value in the Cudnn max pooling only support GPU. Return the maxinum value in the
......
...@@ -28,6 +28,8 @@ layers { ...@@ -28,6 +28,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 227 output_y: 227
img_size_y: 256 img_size_y: 256
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -28,6 +28,8 @@ layers { ...@@ -28,6 +28,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 227 output_y: 227
img_size_y: 256 img_size_y: 256
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -28,6 +28,8 @@ layers { ...@@ -28,6 +28,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 48 output_y: 48
img_size_y: 48 img_size_y: 48
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -30,6 +30,8 @@ layers { ...@@ -30,6 +30,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 48 output_y: 48
img_size_y: 48 img_size_y: 48
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
...@@ -105,6 +107,8 @@ layers { ...@@ -105,6 +107,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 24 output_y: 24
img_size_y: 24 img_size_y: 24
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_1__.wbias" bias_parameter_name: "___conv_1__.wbias"
......
...@@ -30,6 +30,8 @@ layers { ...@@ -30,6 +30,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 48 output_y: 48
img_size_y: 48 img_size_y: 48
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -36,6 +36,8 @@ layers { ...@@ -36,6 +36,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 14 output_y: 14
img_size_y: 14 img_size_y: 14
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -37,6 +37,8 @@ import model ...@@ -37,6 +37,8 @@ import model
import paddle.trainer.config_parser as cp import paddle.trainer.config_parser as cp
__all__ = [ __all__ = [
'default_startup_program',
'default_main_program',
'optimizer', 'optimizer',
'layer', 'layer',
'activation', 'activation',
......
from paddle.v2.framework import framework as framework from paddle.v2.fluid import framework as framework
__all__ = ['append_backward_ops'] __all__ = ['append_backward_ops']
......
...@@ -13,7 +13,7 @@ A `scoped_function` will take a `function` as input. That function will be ...@@ -13,7 +13,7 @@ A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope. invoked in a new local scope.
""" """
import paddle.v2.framework.core import paddle.v2.fluid.core
import threading import threading
__tl_scope__ = threading.local() __tl_scope__ = threading.local()
...@@ -27,13 +27,13 @@ __all__ = [ ...@@ -27,13 +27,13 @@ __all__ = [
def get_cur_scope(): def get_cur_scope():
""" """
Get current scope. Get current scope.
:rtype: paddle.v2.framework.core.Scope :rtype: paddle.v2.fluid.core.Scope
""" """
cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None) cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None)
if cur_scope_stack is None: if cur_scope_stack is None:
__tl_scope__.cur_scope = list() __tl_scope__.cur_scope = list()
if len(__tl_scope__.cur_scope) == 0: if len(__tl_scope__.cur_scope) == 0:
__tl_scope__.cur_scope.append(paddle.v2.framework.core.Scope()) __tl_scope__.cur_scope.append(paddle.v2.fluid.core.Scope())
return __tl_scope__.cur_scope[-1] return __tl_scope__.cur_scope[-1]
......
import numpy as np
from paddle.v2.fluid.framework import Program, g_main_program, unique_name, Variable
import paddle.v2.fluid.core as core
def _clone_var_in_block_(block, var):
assert isinstance(var, Variable)
return block.create_var(
name=var.name,
shape=var.shape,
dtype=var.data_type,
type=var.type,
lod_level=var.lod_level,
persistable=True)
class Evaluator(object):
"""
Evalutor Base class.
create metric states
add mini-batch evaluator caculate operator
add increment operator to accumulate the metric states
"""
def __init__(self, name, **kwargs):
"""
init the global states
"""
self._states = {}
if kwargs.has_key("main_program"):
self._main_program = kwargs.get("main_program")
else:
self._main_program = g_main_program
def _update_ops(self, *args, **kwargs):
"""
append update ops to the global states
"""
raise NotImplementedError()
def reset(self, executor, reset_program=None):
"""
Clear metric states at the begin of each pass/user specified batch
"""
if reset_program == None:
reset_program = Program()
else:
reset_program = program
block = reset_program.global_block()
for k, var in self._states.iteritems():
g_var = _clone_var_in_block_(block, var)
zeros = block.create_var(dtype="float32", persistable=True)
block.append_op(
type="fill_constant",
outputs={"Out": [zeros]},
attrs={
"shape": g_var.shape,
"value": .0,
"data_type": 5,
})
block.append_op(
type="scale", inputs={"X": zeros}, outputs={"Out": g_var})
executor.run(reset_program, fetch_list=self._states.values())
def eval(self, executor, eval_program=None):
"""
Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
"""
raise NotImplementedError()
class Accuracy(Evaluator):
"""
Accuracy need two state variable Total, Correct
"""
def __init__(self, *args, **kwargs):
super(Accuracy, self).__init__("accuracy", **kwargs)
block = self._main_program.global_block()
g_total = block.create_var(
name=unique_name("Total"),
persistable=True,
dtype="int64",
shape=[1])
g_correct = block.create_var(
name=unique_name("Correct"),
persistable=True,
dtype="int64",
shape=[1])
self._states["Total"] = g_total
self._states["Correct"] = g_correct
def _update_ops(self, input, label, k=1, **kwargs):
block = self._main_program.global_block()
topk_out = block.create_var(dtype=input.data_type)
topk_indices = block.create_var(dtype="int64")
block.append_op(
type="top_k",
inputs={"X": [input]},
outputs={"Out": [topk_out],
"Indices": [topk_indices]},
attrs={"k": k})
acc_out = block.create_var(dtype=kwargs.get("out_dtype", "float32"))
correct = block.create_var(dtype="int64", persistable=True)
total = block.create_var(dtype="int64", persistable=True)
block.append_op(
type="accuracy",
inputs={
"Out": [topk_out],
"Indices": [topk_indices],
"Label": [label]
},
outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
block.append_op(
type="cast",
inputs={"X": [self._states["Total"]]},
outputs={"Out": [self._states["Total"]]},
attrs={
"in_data_type": 5, # float32
"out_data_type": 2, #int32
})
block.append_op(
type="cast",
inputs={"X": [self._states["Correct"]]},
outputs={"Out": [self._states["Correct"]]},
attrs={
"in_data_type": 5,
"out_data_type": 2,
})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Total"]],
"Y": [total]},
outputs={"Out": [self._states["Total"]]})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Correct"]],
"Y": [correct]},
outputs={"Out": [self._states["Correct"]]})
return acc_out
def eval(self, executor, eval_program=None):
if eval_program != None:
eval_program = eval_program
else:
eval_program = Program()
block = eval_program.global_block()
eval_out = block.create_var(dtype=self._states["Total"].data_type)
e_total = _clone_var_in_block_(block, self._states["Total"])
e_correct = _clone_var_in_block_(block, self._states["Correct"])
block.append_op(
type="cast",
inputs={"X": [e_total]},
outputs={"Out": [e_total]},
attrs={
"in_data_type": 2, #int32
"out_data_type": 5, #float32
})
block.append_op(
type="cast",
inputs={"X": [e_correct]},
outputs={"Out": [e_correct]},
attrs={
"in_data_type": 2,
"out_data_type": 5,
})
block.append_op(
type="elementwise_div",
inputs={"X": e_correct,
"Y": e_total},
outputs={"Out": eval_out})
out = executor.run(eval_program, fetch_list=[eval_out])
return np.array(out[0])
def accuracy(*args, **kwargs):
cls = Accuracy(*args, **kwargs)
out = cls._update_ops(*args, **kwargs)
return cls, out
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.framework import Block, Program, g_main_program from paddle.v2.fluid.framework import Block, Program, g_main_program
g_scope = core.Scope() g_scope = core.Scope()
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
import collections import collections
import numpy as np import numpy as np
import copy import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator'] __all__ = ['Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'default_main_program']
def unique_name(prefix): def unique_name(prefix):
...@@ -562,3 +562,9 @@ class Parameter(Variable): ...@@ -562,3 +562,9 @@ class Parameter(Variable):
# program is a global instance. # program is a global instance.
g_main_program = Program() g_main_program = Program()
g_startup_program = Program() g_startup_program = Program()
def default_startup_program():
return g_startup_program
def default_main_program():
return g_main_program
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import numpy as np import numpy as np
__all__ = [ __all__ = [
......
import os import os
import cPickle as pickle import cPickle as pickle
from paddle.v2.framework.framework import Program, Parameter, g_main_program, \ from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \
Variable Variable
__all__ = [ __all__ = [
......
import copy import copy
import itertools import itertools
from paddle.v2.framework.framework import Variable, g_main_program, \ from paddle.v2.fluid.framework import Variable, g_main_program, \
g_startup_program, unique_name, Program g_startup_program, unique_name, Program
from paddle.v2.framework.initializer import ConstantInitializer, \ from paddle.v2.fluid.initializer import ConstantInitializer, \
UniformInitializer, XavierInitializer UniformInitializer, XavierInitializer
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ from paddle.v2.fluid.framework import OpProtoHolder, Variable, Program, \
Operator Operator
from paddle.v2.framework.initializer import ConstantInitializer, \ from paddle.v2.fluid.initializer import ConstantInitializer, \
NormalInitializer NormalInitializer
from paddle.v2.framework.layer_helper import LayerHelper, unique_name from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
import re import re
import cStringIO import cStringIO
...@@ -574,7 +574,9 @@ def accuracy(input, label, k=1, **kwargs): ...@@ -574,7 +574,9 @@ def accuracy(input, label, k=1, **kwargs):
"Indices": [topk_indices]}, "Indices": [topk_indices]},
attrs={"k": k}) attrs={"k": k})
acc_out_dtype = kwargs.get("out_dtype", "float32") acc_out_dtype = kwargs.get("out_dtype", "float32")
acc_out = helper.create_tmp_variable(dtype=acc_out_dtype) acc_out = helper.create_tmp_variable(dtype="float32")
correct = helper.create_tmp_variable(dtype="int64")
total = helper.create_tmp_variable(dtype="int64")
helper.append_op( helper.append_op(
type="accuracy", type="accuracy",
inputs={ inputs={
...@@ -582,7 +584,11 @@ def accuracy(input, label, k=1, **kwargs): ...@@ -582,7 +584,11 @@ def accuracy(input, label, k=1, **kwargs):
"Indices": [topk_indices], "Indices": [topk_indices],
"Label": [label] "Label": [label]
}, },
outputs={"Accuracy": [acc_out]}) outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
return acc_out return acc_out
......
...@@ -3,8 +3,8 @@ import json ...@@ -3,8 +3,8 @@ import json
import logging import logging
from collections import defaultdict from collections import defaultdict
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
__all__ = ["simple_img_conv_pool", "sequence_conv_pool"] __all__ = ["simple_img_conv_pool", "sequence_conv_pool"]
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
def get_all_op_protos(): def get_all_op_protos():
......
from collections import defaultdict from collections import defaultdict
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import unique_name, Program from paddle.v2.fluid.framework import unique_name, Program
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.initializer import ConstantInitializer from paddle.v2.fluid.initializer import ConstantInitializer
from paddle.v2.framework.regularizer import append_regularization_ops from paddle.v2.fluid.regularizer import append_regularization_ops
from paddle.v2.framework.layer_helper import LayerHelper from paddle.v2.fluid.layer_helper import LayerHelper
__all__ = [ __all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer' 'AdamaxOptimizer', 'DecayedAdagradOptimizer'
] ]
...@@ -85,7 +85,7 @@ class Optimizer(object): ...@@ -85,7 +85,7 @@ class Optimizer(object):
""" """
if (name in self._accumulators and if (name in self._accumulators and
param.name in self._accumulators[name]): param.name in self._accumulators[name]):
raise Exception("Accumulator {} already exists for parmeter {}". raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name)) format(name, param.name))
assert isinstance(self.helper, LayerHelper) assert isinstance(self.helper, LayerHelper)
...@@ -307,7 +307,7 @@ class AdagradOptimizer(Optimizer): ...@@ -307,7 +307,7 @@ class AdagradOptimizer(Optimizer):
moment_acc = self._get_accumulator(self._moment_acc_str, moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0]) param_and_grad[0])
# create the adagrad optimizer op # Create the adagrad optimizer op
adagrad_op = block.append_op( adagrad_op = block.append_op(
type=self.type, type=self.type,
inputs={ inputs={
...@@ -510,3 +510,51 @@ class AdamaxOptimizer(Optimizer): ...@@ -510,3 +510,51 @@ class AdamaxOptimizer(Optimizer):
attrs={"scale": self._beta1}) attrs={"scale": self._beta1})
return [scale_beta1] return [scale_beta1]
class DecayedAdagradOptimizer(Optimizer):
"""Simple Decayed Adagrad optimizer with moment state
"""
_moment_acc_str = "moment"
def __init__(self,
learning_rate,
decay=0.95,
epsilon=1.0e-6,
global_step=None):
assert learning_rate is not None
assert decay is not None
assert epsilon is not None
super(DecayedAdagradOptimizer, self).__init__(global_step)
self.type = "decayed_adagrad"
self._learning_rate = learning_rate
self._decay = decay
self._epsilon = epsilon
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
for p in parameters:
self._add_accumulator(self._moment_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0])
# Create the decayed adagrad optimizer op
decayed_adagrad_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"Moment": moment_acc,
"LearningRate": self._create_param_lr(param_and_grad)
},
outputs={"ParamOut": param_and_grad[0],
"MomentOut": moment_acc},
attrs={"epsilon": self._epsilon})
return decayed_adagrad_op
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
__all__ = [ __all__ = [
'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer' 'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer'
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.io import save_persistables, load_persistables
from paddle.v2.framework.io import save_persistables, load_persistables from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
x = layers.data( x = layers.data(
name='x', name='x',
shape=[13], shape=[13],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
y_predict = layers.fc(input=x, y_predict = layers.fc(input=x,
size=1, size=1,
act=None, act=None)
main_program=main_program,
startup_program=startup_program)
y = layers.data( y = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
cost = layers.square_error_cost( cost = layers.square_error_cost(
input=y_predict, input=y_predict,
label=y, label=y)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20 BATCH_SIZE = 20
...@@ -52,12 +40,12 @@ train_reader = paddle.batch( ...@@ -52,12 +40,12 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
save_persistables(exe, "./fit_a_line.model/", main_program=main_program) save_persistables(exe, "./fit_a_line.model/")
load_persistables(exe, "./fit_a_line.model/", main_program=main_program) load_persistables(exe, "./fit_a_line.model/")
for data in train_reader(): for data in train_reader():
x_data = np.array(map(lambda x: x[0], data)).astype("float32") x_data = np.array(map(lambda x: x[0], data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("float32")
...@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM): ...@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
# print tensor_y.get_dims() # print tensor_y.get_dims()
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost])
......
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import g_startup_program, g_main_program import paddle.v2.fluid.framework as framework
from paddle.v2.framework.initializer import XavierInitializer from paddle.v2.fluid.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, def conv_bn_layer(input,
ch_out, ch_out,
filter_size, filter_size,
stride, stride,
padding, padding,
act='relu', act='relu'):
main_program=None,
startup_program=None):
tmp = layers.conv2d( tmp = layers.conv2d(
input=input, input=input,
filter_size=filter_size, filter_size=filter_size,
...@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
stride=stride, stride=stride,
padding=padding, padding=padding,
act=None, act=None,
bias_attr=False, bias_attr=False)
main_program=main_program,
startup_program=startup_program)
return layers.batch_norm( return layers.batch_norm(
input=tmp, input=tmp,
act=act, act=act)
main_program=main_program,
startup_program=startup_program)
def shortcut(input, ch_in, ch_out, stride, program, init_program): def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out: if ch_in != ch_out:
...@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def basicblock(input, def basicblock(input,
ch_in, ch_in,
ch_out, ch_out,
stride, stride):
main_program=main_program,
startup_program=startup_program):
tmp = conv_bn_layer( tmp = conv_bn_layer(
input, input,
ch_out, ch_out,
3, 3,
stride, stride,
1, 1)
main_program=main_program,
startup_program=startup_program)
tmp = conv_bn_layer( tmp = conv_bn_layer(
tmp, tmp,
ch_out, ch_out,
3, 3,
1, 1,
1, 1,
act=None, act=None)
main_program=main_program, short = shortcut(input, ch_in, ch_out, stride)
startup_program=startup_program)
short = shortcut(input, ch_in, ch_out, stride, main_program,
startup_program)
return layers.elementwise_add( return layers.elementwise_add(
x=tmp, x=tmp,
y=short, y=short,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
def layer_warp(block_func, input, ch_in, ch_out, count, stride, program, def layer_warp(block_func, input, ch_in, ch_out, count, stride):
startup_program): tmp = block_func(input, ch_in, ch_out, stride)
tmp = block_func(input, ch_in, ch_out, stride, program, startup_program)
for i in range(1, count): for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program) tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp return tmp
assert (depth - 2) % 6 == 0 assert (depth - 2) % 6 == 0
...@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
ch_out=16, ch_out=16,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1, padding=1)
main_program=main_program,
startup_program=startup_program)
res1 = layer_warp( res1 = layer_warp(
basicblock, basicblock,
conv1, conv1,
16, 16,
16, 16,
n, n,
1, 1)
main_program=main_program,
startup_program=startup_program)
res2 = layer_warp( res2 = layer_warp(
basicblock, basicblock,
res1, res1,
16, 16,
32, 32,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
res3 = layer_warp( res3 = layer_warp(
basicblock, basicblock,
res2, res2,
32, 32,
64, 64,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
pool = layers.pool2d( pool = layers.pool2d(
input=res3, input=res3,
pool_size=8, pool_size=8,
pool_type='avg', pool_type='avg',
pool_stride=1, pool_stride=1)
main_program=main_program,
startup_program=startup_program)
return pool return pool
def vgg16_bn_drop(input, main_program=None, startup_program=None): def vgg16_bn_drop(input):
def conv_block(input, def conv_block(input,
num_filter, num_filter,
groups, groups,
dropouts, dropouts):
main_program=None,
startup_program=None):
return nets.img_conv_group( return nets.img_conv_group(
input=input, input=input,
pool_size=2, pool_size=2,
...@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None): ...@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None):
conv_act='relu', conv_act='relu',
conv_with_batchnorm=True, conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type='max', pool_type='max')
main_program=main_program,
startup_program=startup_program)
conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program) conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program) conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program, conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
startup_program) conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program, conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
drop = layers.dropout( drop = layers.dropout(
x=conv5, x=conv5,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc1 = layers.fc(input=drop, fc1 = layers.fc(input=drop,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
reshape1 = layers.reshape( reshape1 = layers.reshape(
x=fc1, x=fc1,
shape=list(fc1.shape + (1, 1)), shape=list(fc1.shape + (1, 1)))
main_program=main_program,
startup_program=startup_program)
bn = layers.batch_norm( bn = layers.batch_norm(
input=reshape1, input=reshape1,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
drop2 = layers.dropout( drop2 = layers.dropout(
x=bn, x=bn,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc2 = layers.fc(input=drop2, fc2 = layers.fc(input=drop2,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
return fc2 return fc2
...@@ -225,7 +180,7 @@ train_reader = paddle.batch( ...@@ -225,7 +180,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
batch_id = 0 batch_id = 0
...@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM): ...@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.evaluator as evaluator
from paddle.v2.framework.framework import Program import paddle.v2.fluid.framework as framework
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[1, 28, 28], shape=[1, 28, 28],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
label = layers.data( label = layers.data(
name='label', name='label',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
conv_pool_1 = nets.simple_img_conv_pool( conv_pool_1 = nets.simple_img_conv_pool(
input=images, input=images,
filter_size=5, filter_size=5,
num_filters=20, num_filters=20,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
conv_pool_2 = nets.simple_img_conv_pool( conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1, input=conv_pool_1,
filter_size=5, filter_size=5,
num_filters=50, num_filters=50,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
predict = layers.fc(input=conv_pool_2, predict = layers.fc(input=conv_pool_2,
size=10, size=10,
act="softmax", act="softmax")
main_program=main_program, cost = layers.cross_entropy(input=predict, label=label)
startup_program=startup_program) avg_cost = layers.mean(x=cost)
cost = layers.cross_entropy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=cost, main_program=main_program)
accuracy = layers.accuracy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0,
# momentum=0.9)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(
input=predict,
label=label)
BATCH_SIZE = 50 BATCH_SIZE = 50
PASS_NUM = 3 PASS_NUM = 3
...@@ -75,10 +54,11 @@ train_reader = paddle.batch( ...@@ -75,10 +54,11 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
count = 0 count = 0
accuracy.reset(exe)
for data in train_reader(): for data in train_reader():
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]), img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
data)).astype("float32") data)).astype("float32")
...@@ -90,14 +70,20 @@ for pass_id in range(PASS_NUM): ...@@ -90,14 +70,20 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, acc_out])
loss = np.array(outs[0]) loss = np.array(outs[0])
acc = np.array(outs[1]) acc = np.array(outs[1])
pass_acc = accuracy.eval(exe)
print "pass id : ", pass_id, pass_acc
# print loss, acc
if loss < 10.0 and acc > 0.9: if loss < 10.0 and acc > 0.9:
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. # if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good.
exit(0) exit(0)
pass_acc = accuracy.eval(exe)
print "pass id : ", pass_id, pass_acc
exit(1) exit(1)
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.regularizer import L2DecayRegularizer
from paddle.v2.framework.regularizer import L2DecayRegularizer from paddle.v2.fluid.initializer import UniformInitializer
from paddle.v2.framework.initializer import UniformInitializer
import numpy as np import numpy as np
BATCH_SIZE = 128 BATCH_SIZE = 128
startup_program = Program()
main_program = Program()
image = layers.data( image = layers.data(
name='x', name='x',
shape=[784], shape=[784],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
param_attr = { param_attr = {
'name': None, 'name': None,
...@@ -30,45 +25,30 @@ param_attr = { ...@@ -30,45 +25,30 @@ param_attr = {
hidden1 = layers.fc(input=image, hidden1 = layers.fc(input=image,
size=128, size=128,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
hidden2 = layers.fc(input=hidden1, hidden2 = layers.fc(input=hidden1,
size=64, size=64,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
predict = layers.fc(input=hidden2, predict = layers.fc(input=hidden2,
size=10, size=10,
act='softmax', act='softmax',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
label = layers.data( label = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(input=predict, label=label)
input=predict, avg_cost = layers.mean(x=cost)
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
accuracy = layers.accuracy( accuracy = layers.accuracy(
input=predict, input=predict,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -78,7 +58,7 @@ train_reader = paddle.batch( ...@@ -78,7 +58,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
...@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM): ...@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
startup_program = Program() IS_SPARSE = True
main_program = Program() USE_GPU = False
is_sparse = True
use_gpu = False
BATCH_SIZE = 256 BATCH_SIZE = 256
...@@ -25,99 +22,71 @@ def get_usr_combined_features(): ...@@ -25,99 +22,71 @@ def get_usr_combined_features():
uid = layers.data( uid = layers.data(
name='user_id', name='user_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
usr_emb = layers.embedding( usr_emb = layers.embedding(
input=uid, input=uid,
data_type='float32', data_type='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'}, param_attr={'name': 'user_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_fc = layers.fc(input=usr_emb, usr_fc = layers.fc(input=usr_emb,
size=32, size=32)
main_program=main_program,
startup_program=startup_program)
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data( usr_gender_id = layers.data(
name='gender_id', name='gender_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
usr_gender_emb = layers.embedding( usr_gender_emb = layers.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'}, param_attr={'name': 'gender_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_gender_fc = layers.fc(input=usr_gender_emb, usr_gender_fc = layers.fc(input=usr_gender_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data( usr_age_id = layers.data(
name='age_id', name='age_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64")
main_program=main_program,
startup_program=startup_program)
usr_age_emb = layers.embedding( usr_age_emb = layers.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'}, param_attr={'name': 'age_table'})
main_program=main_program,
startup_program=startup_program)
usr_age_fc = layers.fc(input=usr_age_emb, usr_age_fc = layers.fc(input=usr_age_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data( usr_job_id = layers.data(
name='job_id', name='job_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64")
main_program=main_program,
startup_program=startup_program)
usr_job_emb = layers.embedding( usr_job_emb = layers.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'}, param_attr={'name': 'job_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_job_fc = layers.fc(input=usr_job_emb, usr_job_fc = layers.fc(input=usr_job_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
usr_combined_features = layers.fc(input=concat_embed, usr_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh")
main_program=main_program,
startup_program=startup_program)
return usr_combined_features return usr_combined_features
...@@ -129,83 +98,61 @@ def get_mov_combined_features(): ...@@ -129,83 +98,61 @@ def get_mov_combined_features():
mov_id = layers.data( mov_id = layers.data(
name='movie_id', name='movie_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_emb = layers.embedding( mov_emb = layers.embedding(
input=mov_id, input=mov_id,
data_type='float32', data_type='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'}, param_attr={'name': 'movie_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_fc = layers.fc(input=mov_emb, mov_fc = layers.fc(input=mov_emb,
size=32, size=32)
main_program=main_program,
startup_program=startup_program)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = layers.data(
name='category_id', name='category_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_categories_emb = layers.embedding( mov_categories_emb = layers.embedding(
input=category_id, input=category_id,
size=[CATEGORY_DICT_SIZE, 32], size=[CATEGORY_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
input=mov_categories_emb, input=mov_categories_emb,
pool_type="sum", pool_type="sum")
main_program=main_program,
startup_program=startup_program)
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = layers.data(
name='movie_title', name='movie_title',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_title_emb = layers.embedding( mov_title_emb = layers.embedding(
input=mov_title_id, input=mov_title_id,
size=[MOV_TITLE_DICT_SIZE, 32], size=[MOV_TITLE_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb, input=mov_title_emb,
num_filters=32, num_filters=32,
filter_size=3, filter_size=3,
act="tanh", act="tanh",
pool_type="sum", pool_type="sum")
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[mov_fc, mov_categories_hidden, mov_title_conv], input=[mov_fc, mov_categories_hidden, mov_title_conv],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
# FIXME(dzh) : need tanh operator # FIXME(dzh) : need tanh operator
mov_combined_features = layers.fc(input=concat_embed, mov_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh")
main_program=main_program,
startup_program=startup_program)
return mov_combined_features return mov_combined_features
...@@ -217,27 +164,18 @@ def model(): ...@@ -217,27 +164,18 @@ def model():
# need cos sim # need cos sim
inference = layers.cos_sim( inference = layers.cos_sim(
X=usr_combined_features, X=usr_combined_features,
Y=mov_combined_features, Y=mov_combined_features)
main_program=main_program,
startup_program=startup_program)
label = layers.data( label = layers.data(
name='score', name='score',
shape=[1], shape=[1],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
square_cost = layers.square_error_cost( square_cost = layers.square_error_cost(
input=inference, input=inference,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean( avg_cost = layers.mean(x=square_cost)
x=square_cost,
main_program=main_program,
startup_program=startup_program)
return avg_cost return avg_cost
...@@ -245,16 +183,15 @@ def model(): ...@@ -245,16 +183,15 @@ def model():
def main(): def main():
cost = model() cost = model()
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(cost, startup_program=startup_program) opts = sgd_optimizer.minimize(cost)
block = main_program.block(0)
if use_gpu: if USE_GPU:
place = core.GPUPlace(0) place = core.GPUPlace(0)
else: else:
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -303,7 +240,7 @@ def main(): ...@@ -303,7 +240,7 @@ def main():
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed=func_feed(feeding, data), feed=func_feed(feeding, data),
fetch_list=[cost]) fetch_list=[cost])
out = np.array(outs[0]) out = np.array(outs[0])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program, g_main_program, g_startup_program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
...@@ -70,7 +69,7 @@ def main(): ...@@ -70,7 +69,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
...@@ -82,7 +81,7 @@ def main(): ...@@ -82,7 +81,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program, g_main_program, g_startup_program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
...@@ -81,7 +80,7 @@ def main(): ...@@ -81,7 +80,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
...@@ -93,7 +92,7 @@ def main(): ...@@ -93,7 +92,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import g_main_program, g_startup_program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
...@@ -88,10 +87,10 @@ def main(): ...@@ -88,10 +87,10 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
tensor_words, tensor_label = prepare_feed_data(data, place) tensor_words, tensor_label = prepare_feed_data(data, place)
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
while True: while True:
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
startup_program = Program() PASS_NUM = 100
main_program = Program() EMBED_SIZE = 32
HIDDEN_SIZE = 256
embed_size = 32
hidden_size = 256
N = 5 N = 5
batch_size = 32 BATCH_SIZE = 32
is_sparse = True IS_SPARSE = True
word_dict = paddle.dataset.imikolov.build_dict() word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict) dict_size = len(word_dict)
...@@ -23,97 +20,67 @@ dict_size = len(word_dict) ...@@ -23,97 +20,67 @@ dict_size = len(word_dict)
first_word = layers.data( first_word = layers.data(
name='firstw', name='firstw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
second_word = layers.data( second_word = layers.data(
name='secondw', name='secondw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
third_word = layers.data( third_word = layers.data(
name='thirdw', name='thirdw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
forth_word = layers.data( forth_word = layers.data(
name='forthw', name='forthw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
next_word = layers.data( next_word = layers.data(
name='nextw', name='nextw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
embed_first = layers.embedding( embed_first = layers.embedding(
input=first_word, input=first_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_second = layers.embedding( embed_second = layers.embedding(
input=second_word, input=second_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_third = layers.embedding( embed_third = layers.embedding(
input=third_word, input=third_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_forth = layers.embedding( embed_forth = layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], input=[embed_first, embed_second, embed_third, embed_forth],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
hidden1 = layers.fc(input=concat_embed, hidden1 = layers.fc(input=concat_embed,
size=hidden_size, size=HIDDEN_SIZE,
act='sigmoid', act='sigmoid')
main_program=main_program,
startup_program=startup_program)
predict_word = layers.fc(input=hidden1, predict_word = layers.fc(input=hidden1,
size=dict_size, size=dict_size,
act='softmax', act='softmax')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict_word, input=predict_word,
label=next_word, label=next_word)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size) paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
...@@ -122,8 +89,8 @@ exe = Executor(place) ...@@ -122,8 +89,8 @@ exe = Executor(place)
# below exit line. # below exit line.
exit(0) exit(0)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)] input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)]
...@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM): ...@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM):
next_tensor = core.LoDTensor() next_tensor = core.LoDTensor()
next_tensor.set(next_data, place) next_tensor.set(next_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={ feed={
'firstw': first_tensor, 'firstw': first_tensor,
'secondw': second_tensor, 'secondw': second_tensor,
......
...@@ -2,12 +2,12 @@ import unittest ...@@ -2,12 +2,12 @@ import unittest
import numpy as np import numpy as np
import random import random
import itertools import itertools
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import collections import collections
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import Program, OpProtoHolder from paddle.v2.fluid.framework import Program, OpProtoHolder
def randomize_probability(batch_size, class_num, dtype='float32'): def randomize_probability(batch_size, class_num, dtype='float32'):
......
...@@ -18,7 +18,9 @@ class TestAccuracyOp(OpTest): ...@@ -18,7 +18,9 @@ class TestAccuracyOp(OpTest):
num_correct += 1 num_correct += 1
break break
self.outputs = { self.outputs = {
'Accuracy': np.array([num_correct / float(n)]).astype("float32") 'Accuracy': np.array([num_correct / float(n)]).astype("float32"),
'Correct': np.array([num_correct]).astype("int32"),
'Total': np.array([n]).astype("int32")
} }
def test_check_output(self): def test_check_output(self):
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import numpy import numpy
......
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
def grad_var_name(var_name): def grad_var_name(var_name):
......
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
class TestBeamSearchDecodeOp(unittest.TestCase): class TestBeamSearchDecodeOp(unittest.TestCase):
......
import op_test import op_test
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestCastOp(op_test.OpTest): class TestCastOp(op_test.OpTest):
......
...@@ -23,6 +23,9 @@ def create_test_class(op_type, typename, callback): ...@@ -23,6 +23,9 @@ def create_test_class(op_type, typename, callback):
for _type_name in {'float32', 'float64', 'int32', 'int64'}: for _type_name in {'float32', 'float64', 'int32', 'int64'}:
create_test_class('less_than', _type_name, lambda _a, _b: _a < _b) create_test_class('less_than', _type_name, lambda _a, _b: _a < _b)
create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b)
create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b)
create_test_class('greater_equal', _type_name, lambda _a, _b: _a >= _b)
create_test_class('equal', _type_name, lambda _a, _b: _a == _b) create_test_class('equal', _type_name, lambda _a, _b: _a == _b)
if __name__ == '__main__': if __name__ == '__main__':
......
import logging import logging
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
from paddle.v2.framework.op import Operator, CondOp from paddle.v2.fluid.op import Operator, CondOp
class PySimpleCond(object): class PySimpleCond(object):
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.framework import g_startup_program, g_main_program from paddle.v2.fluid.framework import g_startup_program, g_main_program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
class TestDocString(unittest.TestCase): class TestDocString(unittest.TestCase):
......
from paddle.v2.framework.default_scope_funcs import * from paddle.v2.fluid.default_scope_funcs import *
import unittest import unittest
......
import logging import logging
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
from paddle.v2.framework.op import Operator, DynamicRecurrentOp from paddle.v2.fluid.op import Operator, DynamicRecurrentOp
import numpy as np import numpy as np
# for siplicity, just one level LoD # for siplicity, just one level LoD
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
......
import unittest import unittest
from paddle.v2.framework.layers import mul, data from paddle.v2.fluid.layers import mul, data
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import numpy import numpy
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
import unittest import unittest
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
class TestDebugStringFramework(unittest.TestCase): class TestDebugStringFramework(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
def conv_block(input, def conv_block(input,
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestInferShape(unittest.TestCase): class TestInferShape(unittest.TestCase):
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.io import save_inference_model, load_inference_model from paddle.v2.fluid.io import save_inference_model, load_inference_model
import paddle.v2.framework.executor as executor import paddle.v2.fluid.executor as executor
import unittest import unittest
import numpy as np import numpy as np
......
import numpy as np import numpy as np
import unittest import unittest
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.framework.initializer as initializer import paddle.v2.fluid.initializer as initializer
DELTA = 0.00001 DELTA = 0.00001
......
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
from paddle.v2.framework.layers import lod_rank_table, data from paddle.v2.fluid.layers import lod_rank_table, data
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
......
import unittest, os import unittest, os
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from op_test import OpTest, create_op, set_input from op_test import OpTest, create_op, set_input
if not core.is_compile_gpu(): if not core.is_compile_gpu():
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestOpSupportGPU(unittest.TestCase): class TestOpSupportGPU(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.op as op import paddle.v2.fluid.op as op
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
class TestGetAllProtos(unittest.TestCase): class TestGetAllProtos(unittest.TestCase):
......
import unittest import unittest
from paddle.v2.framework.framework import Variable, Program, g_main_program from paddle.v2.fluid.framework import Variable, Program, g_main_program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestOperator(unittest.TestCase): class TestOperator(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestOptimizer(unittest.TestCase): class TestOptimizer(unittest.TestCase):
...@@ -198,7 +198,7 @@ class TestAdagradOptimizer(unittest.TestCase): ...@@ -198,7 +198,7 @@ class TestAdagradOptimizer(unittest.TestCase):
adagrad_op = opts[0] adagrad_op = opts[0]
self.assertEqual(adagrad_op.type, "adagrad") self.assertEqual(adagrad_op.type, "adagrad")
# check accumulators # Check accumulators
accumulators = adagrad_optimizer.get_accumulators() accumulators = adagrad_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 1) self.assertEqual(len(accumulators), 1)
self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators) self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators)
...@@ -331,5 +331,59 @@ class TestAdamaxOptimizer(unittest.TestCase): ...@@ -331,5 +331,59 @@ class TestAdamaxOptimizer(unittest.TestCase):
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
class TestDecayedAdagradOptimizer(unittest.TestCase):
class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer):
def get_accumulators(self):
return self._accumulators
def get_moment_str(self):
return self._moment_acc_str
def test_decayed_adagrad_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass(
params_grads, mul_out, init_program)
self.assertEqual(len(opts), 1)
decayed_adagrad_op = opts[0]
self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad")
# Check accumulators
accumulators = decayed_adagrad_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 1)
self.assertTrue(
decayed_adagrad_optimizer.get_moment_str() in accumulators)
moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()]
self.assertEqual(len(moment_acc), 1)
self.assertTrue(mul_x.name in moment_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 2)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
self.assertEqual(init_ops[1].type, "fill_constant")
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
import unittest import unittest
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestParameter(unittest.TestCase): class TestParameter(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
class TestProgram(unittest.TestCase): class TestProgram(unittest.TestCase):
......
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestOpDesc(unittest.TestCase): class TestOpDesc(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class PyRNNBase(object): class PyRNNBase(object):
......
import unittest import unittest
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.framework.regularizer as regularizer import paddle.v2.fluid.regularizer as regularizer
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestL2DecayRegularizer(unittest.TestCase): class TestL2DecayRegularizer(unittest.TestCase):
......
import unittest import unittest
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
def create_tensor(np_data, place): def create_tensor(np_data, place):
......
import paddle.v2.framework.core import paddle.v2.fluid.core
import unittest import unittest
class TestScope(unittest.TestCase): class TestScope(unittest.TestCase):
def test_create_destroy(self): def test_create_destroy(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
self.assertIsNotNone(scope) self.assertIsNotNone(scope)
scope_with_parent = scope.new_scope() scope_with_parent = scope.new_scope()
self.assertIsNotNone(scope_with_parent) self.assertIsNotNone(scope_with_parent)
def test_none_variable(self): def test_none_variable(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
self.assertIsNone(scope.find_var("test")) self.assertIsNone(scope.find_var("test"))
def test_create_var_get_var(self): def test_create_var_get_var(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var_a = scope.var("var_a") var_a = scope.var("var_a")
self.assertIsNotNone(var_a) self.assertIsNotNone(var_a)
...@@ -25,7 +25,7 @@ class TestScope(unittest.TestCase): ...@@ -25,7 +25,7 @@ class TestScope(unittest.TestCase):
self.assertIsNotNone(scope2.find_var('var_a')) self.assertIsNotNone(scope2.find_var('var_a'))
def test_var_get_int(self): def test_var_get_int(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var = scope.var("test_int") var = scope.var("test_int")
var.set_int(10) var.set_int(10)
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
from op_test import OpTest from op_test import OpTest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy as np import numpy as np
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy import numpy
......
import logging import logging
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
import unittest import unittest
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
import unittest import unittest
from paddle.v2.framework.framework import Variable, g_main_program, Program from paddle.v2.fluid.framework import Variable, g_main_program, Program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy as np import numpy as np
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
import paddle.v2.framework.op as op
import numpy as np
import paddle.v2.framework.core as core
def avg_accumulate(accumulated_var, per_eval, num_batches, place):
t = np.array(accumulated_var.get_tensor())
t[0] += per_eval[0]
accumulated_var.get_tensor().set([t[0] / float(num_batches)], place)
class Evaluator(object):
def __init__(self,
scope,
operator='accuracy',
input='Inference',
label='Label',
output='Output',
place=core.CPUPlace()):
"""
create an evaluator for evaluating the inference.
NOTE: default run on CPUPlace(), running on GPUPlace doesn't improve performance much.
:param scope: the scope instance contains the input.
:type scope: paddle.v2.framework.core.scope
:param operator: operator name for caculating the evaluation for each mini-batch.
:type operator: string
:param input: output variable name of forward network.
:type input: string
:param label: variable name of label
:type label: string
"""
self.scope = scope
self.place = place
self.output_name = output
self.num_batches = 0
# create variable to store accumulated evaluator output
eval_name = ''.join([operator, "@Eval"])
if scope.find_var(eval_name):
raise Exception("evaluator already exist in scope: %s" % eval_name)
self.accumulated_var = scope.var(eval_name)
t = self.accumulated_var.get_tensor()
t.set_dims((1, ))
t.set([0.0], place)
# self.accumulated_var = block.create_var(block, name=eval_name, shape=(1,))
# self.accumulated_var.get_tensor().set([0.0])
# create operator of evaluation
var_map = dict() # var name -> variable
var_map[input] = [input]
var_map[label] = [label]
var_map[output] = [output]
self.op = op.Operator(operator, **var_map)
def evaluate(self, ctx, accumulator=avg_accumulate):
self.op.run(self.scope, ctx)
per_eval = np.array(self.scope.find_var(self.output_name).get_tensor())
self.num_batches += 1
accumulator(self.accumulated_var, per_eval, self.num_batches,
self.place)
from paddle.v2.framework.evaluator import Evaluator
from paddle.v2.framework.op import Operator
import paddle.v2.framework.core as core
import unittest
import op_test
import numpy as np
class TestEvaluator(unittest.TestCase):
def setup(self, scope, inputs, outputs):
def __create_var__(var_name, arr):
np_arr = np.array(arr)
scope.var(var_name)
# tensor = var.get_tensor()
# tensor.set_dims(np_arr.shape)
for var_name, arr in inputs.iteritems():
__create_var__(var_name, arr)
for var_name, arr in outputs.iteritems():
__create_var__(var_name, arr)
def test_evaluator(self):
inputs = {
'Inference': np.array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 1]]).T,
'Label': np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
}
outputs = {'Accuracy': np.array([0.9])}
out_name = 'Accuracy'
places = [core.CPUPlace()]
if core.is_compile_gpu():
places.append(core.GPUPlace(0))
for place in places:
scope = core.Scope()
self.setup(scope, inputs, outputs)
evaluator = Evaluator(
scope,
operator='accuracy',
input='Inference',
label='Label',
output=out_name,
place=place)
op_test.set_input(scope, evaluator.op, inputs, place)
ctx = core.DeviceContext.create(place)
for i in range(10): # simulate 10 mini-batches
evaluator.evaluate(ctx)
actual = np.array(scope.find_var(out_name).get_tensor())
print actual
self.assertTrue(
np.allclose(
actual, outputs[out_name], atol=1e-5),
"output name: " + out_name + " has diff.")
if __name__ == '__main__':
exit(0)
unittest.main()
...@@ -13,8 +13,8 @@ packages=['paddle', ...@@ -13,8 +13,8 @@ packages=['paddle',
'paddle.v2.reader', 'paddle.v2.reader',
'paddle.v2.master', 'paddle.v2.master',
'paddle.v2.plot', 'paddle.v2.plot',
'paddle.v2.framework', 'paddle.v2.fluid',
'paddle.v2.framework.proto', 'paddle.v2.fluid.proto',
'py_paddle'] 'py_paddle']
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
...@@ -44,14 +44,14 @@ setup(name='paddlepaddle', ...@@ -44,14 +44,14 @@ setup(name='paddlepaddle',
ext_modules=[Extension('_foo', ['stub.cc'])], ext_modules=[Extension('_foo', ['stub.cc'])],
package_data={ package_data={
'paddle.v2.master': ['libpaddle_master.so'], 'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so'], 'paddle.v2.fluid': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so'] 'py_paddle':['*.py','_swig_paddle.so']
}, },
package_dir={ package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}', '': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.framework.proto will be generated while compiling. # The paddle.v2.fluid.proto will be generated while compiling.
# So that package points to other directory. # So that package points to other directory.
'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework', 'paddle.v2.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
}, },
scripts=paddle_bins, scripts=paddle_bins,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册