提交 b7cb9561 编写于 作者: L Luo Tao

Merge branch 'develop' into ProtoDataProvider

...@@ -21,7 +21,7 @@ third_party/ ...@@ -21,7 +21,7 @@ third_party/
cmake-build-* cmake-build-*
# generated while compiling # generated while compiling
python/paddle/v2/framework/core.so python/paddle/v2/fluid/core.so
paddle/pybind/pybind.h paddle/pybind/pybind.h
CMakeFiles CMakeFiles
cmake_install.cmake cmake_install.cmake
......
## Evaluator Design
### The Problem
During training or serving, we provide the evaluation function to measure the model performance, e.g., accuracy, precision. In the operator based framework design, the data go through the network pipeline batch by batch. As a result, inside the operator, we only can calculate one minibatch metrics. We need to provide a mechanism to calculate the metrics for each N pass/batch the user wanted.
### Evaluator Design
Currently, every operation is expressed in the graph. we divide the evaluator process into three steps.
1. Initialize the metric state and add it into the block.
2. Calculate the statistic of the metric state in every mini-batch. The single operator is only responsible for calculating necessary statistics for one mini-batch. For example, accuracy operator only calculate a minibatch data if run once.
3. Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. When it comes to distributed training/Multi-GPU training, aggregate the value from different devices.
### Implementation
This design is shown in python API.
Each metric operator need to caculate the metric statistic and return the batch aware states, Python side responsible for accumulate the states for each pass.
```python
class Evaluator(object):
"""
Evaluator Base class.
"""
def __init__(self, name, **kwargs):
"""
Different evaluator may has different metric states. E.g, Accuracy need two variables, total and right sample counts.
Auc need four variables, `true_positives`,
`true_negatives`, `false_positives` and `false_negatives`. So every evaluator should create its needed variables and append to main_program
The initialization of Evaluator should be responsible for:
create metric states and append to the main_program
"""
pass
def _update_ops(self, input, label, **kwargs)
"""
Add mini-batch evaluator caculate operators to the main_program.
Add increment operator to accumulate the metric states.
"""
def reset(self, executor, reset_program=None):
"""
Reset metric states at the begin of each pass/user specified batch number.
Execute the reset_program to reset the states.
"""
def eval(self, executor, eval_program=None):
"""
Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
Execute the eval_program and return the result.
"""
return eval_result
```
...@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat, ...@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat,
paddle_matrix paddle_matrix_create_sparse( paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) { uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) {
#ifndef PADDLE_MOBILE_INFERENCE
auto ptr = new paddle::capi::CMatrix(); auto ptr = new paddle::capi::CMatrix();
ptr->mat = paddle::Matrix::createSparseMatrix( ptr->mat = paddle::Matrix::createSparseMatrix(
height, height,
...@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse( ...@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse(
false, false,
useGpu); useGpu);
return ptr; return ptr;
#else
return nullptr;
#endif
} }
paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
...@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, ...@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
uint64_t colSize, uint64_t colSize,
float* valueArray, float* valueArray,
uint64_t valueSize) { uint64_t valueSize) {
#ifndef PADDLE_MOBILE_INFERENCE
if (mat == nullptr) return kPD_NULLPTR; if (mat == nullptr) return kPD_NULLPTR;
auto ptr = cast(mat); auto ptr = cast(mat);
if (rowArray == nullptr || colArray == nullptr || if (rowArray == nullptr || colArray == nullptr ||
...@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, ...@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
} else { } else {
return kPD_NOT_SUPPORTED; return kPD_NOT_SUPPORTED;
} }
#else
return kPD_NOT_SUPPORTED;
#endif
} }
...@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height, ...@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height,
* @param isBinary is binary (either 1 or 0 in matrix) or not. * @param isBinary is binary (either 1 or 0 in matrix) or not.
* @param useGpu is using GPU or not. * @param useGpu is using GPU or not.
* @return paddle_matrix. * @return paddle_matrix.
* @note Mobile inference does not support this interface.
*/ */
PD_API paddle_matrix paddle_matrix_create_sparse( PD_API paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu); uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu);
...@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat, ...@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat,
* NULL if the matrix is binary. * NULL if the matrix is binary.
* @param [in] valueSize length of value array. Zero if the matrix is binary. * @param [in] valueSize length of value array. Zero if the matrix is binary.
* @return paddle_error * @return paddle_error
* @note Mobile inference does not support this interface.
*/ */
PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat, PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
int* rowArray, int* rowArray,
......
...@@ -27,7 +27,9 @@ if(WITH_GPU) ...@@ -27,7 +27,9 @@ if(WITH_GPU)
set_source_files_properties(${CUDA_CXX_SOURCES} set_source_files_properties(${CUDA_CXX_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__") PROPERTIES COMPILE_FLAGS "-D__NVCC__")
else() else()
if (NOT MOBILE_INFERENCE)
set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc) set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
endif()
endif() endif()
set(CUDA_CU_SOURCES set(CUDA_CU_SOURCES
......
...@@ -18,7 +18,7 @@ limitations under the License. */ ...@@ -18,7 +18,7 @@ limitations under the License. */
#include "hl_base.h" #include "hl_base.h"
/** /**
* @brief Maximum pool forward. * @brief Maximum pool forward with Mask output.
* *
* @param[in] frameCnt batch size of input image. * @param[in] frameCnt batch size of input image.
* @param[in] inputData input data. * @param[in] inputData input data.
...@@ -35,7 +35,7 @@ limitations under the License. */ ...@@ -35,7 +35,7 @@ limitations under the License. */
* @param[in] paddingW padding width. * @param[in] paddingW padding width.
* @param[out] tgtData output data. * @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples. * @param[in] tgtStride stride between output data samples.
* * @param[out] maskData the location indices of select max data.
*/ */
extern void hl_maxpool_forward(const int frameCnt, extern void hl_maxpool_forward(const int frameCnt,
const real* inputData, const real* inputData,
...@@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt, ...@@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt,
const int paddingH, const int paddingH,
const int paddingW, const int paddingW,
real* tgtData, real* tgtData,
const int tgtStride); const int tgtStride,
real* maskData = NULL);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
......
...@@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt, ...@@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt,
const int paddingH, const int paddingH,
const int paddingW, const int paddingW,
real* tgtData, real* tgtData,
const int tgtStride) {} const int tgtStride,
real* MaskData) {}
inline void hl_maxpool_backward(const int frameCnt, inline void hl_maxpool_backward(const int frameCnt,
const real* inputData, const real* inputData,
......
...@@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads, ...@@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads,
const int offsetH, const int offsetH,
const int offsetW, const int offsetW,
real* tgtData, real* tgtData,
const int tgtStride) { const int tgtStride,
real* maskData) {
int index = blockIdx.x * blockDim.x + threadIdx.x; int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) { if (index < nthreads) {
int pw = index % pooledW; int pw = index % pooledW;
...@@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads, ...@@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads,
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
real maxval = -FLT_MAX; real maxval = -FLT_MAX;
int max_index = -1;
inputData += (frameNum * channels + c) * height * width; inputData += (frameNum * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
if (maxval < inputData[h * width + w]) if (maxval < inputData[h * width + w]) {
maxval = inputData[h * width + w]; max_index = h * width + w;
maxval = inputData[max_index];
}
} }
} }
int tgtIndex = int tgtIndex =
index % (pooledW * pooledH * channels) + frameNum * tgtStride; index % (pooledW * pooledH * channels) + frameNum * tgtStride;
tgtData[tgtIndex] = maxval; tgtData[tgtIndex] = maxval;
if (maskData != NULL) {
maskData[tgtIndex] = max_index;
}
} }
} }
...@@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt, ...@@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt,
const int paddingH, const int paddingH,
const int paddingW, const int paddingW,
real* tgtData, real* tgtData,
const int tgtStride) { const int tgtStride,
real* maskData) {
int num_kernels = pooledH * pooledW * channels * frameCnt; int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024; int blocks = (num_kernels + 1024 - 1) / 1024;
dim3 threads(1024, 1); dim3 threads(1024, 1);
...@@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt, ...@@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt,
paddingH, paddingH,
paddingW, paddingW,
tgtData, tgtData,
tgtStride); tgtStride,
maskData);
CHECK_SYNC("hl_maxpool_forward failed"); CHECK_SYNC("hl_maxpool_forward failed");
} }
......
...@@ -61,6 +61,7 @@ public: ...@@ -61,6 +61,7 @@ public:
// function arguments // function arguments
strides_ = config.get<std::vector<size_t>>("strides"); strides_ = config.get<std::vector<size_t>>("strides");
paddings_ = config.get<std::vector<size_t>>("paddings"); paddings_ = config.get<std::vector<size_t>>("paddings");
dilations_ = config.get<std::vector<size_t>>("dilations");
groups_ = config.get<size_t>("groups"); groups_ = config.get<size_t>("groups");
// number of inputs and outputs // number of inputs and outputs
...@@ -118,6 +119,7 @@ protected: ...@@ -118,6 +119,7 @@ protected:
std::vector<size_t> strides_; std::vector<size_t> strides_;
std::vector<size_t> paddings_; std::vector<size_t> paddings_;
std::vector<size_t> dilations_;
/// Group size, refer to grouped convolution in /// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the /// Alex Krizhevsky's paper: when group=2, the first half of the
...@@ -133,6 +135,10 @@ protected: ...@@ -133,6 +135,10 @@ protected:
inline int paddingW() const { return paddings_[1]; } inline int paddingW() const { return paddings_[1]; }
inline int dilationH() const { return dilations_[0]; }
inline int dilationW() const { return dilations_[1]; }
// A temporary memory in convolution calculation. // A temporary memory in convolution calculation.
MemoryHandlePtr memory_; MemoryHandlePtr memory_;
......
...@@ -79,45 +79,59 @@ void Convolution(const std::string& conv1, ...@@ -79,45 +79,59 @@ void Convolution(const std::string& conv1,
if (outputChannels < inputChannels) continue; if (outputChannels < inputChannels) continue;
for (size_t stride : {1, 2}) { for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) { for (size_t padding : {0, 1}) {
if (padding >= filterSize) break; for (size_t dilation : {1, 3}) {
if (padding >= filterSize) break;
size_t filterS = (filterSize - 1) * dilation + 1;
// NNPACK only supports stride = 1 if batchSize > 1 if (inputSize + 2 * padding < filterS) break;
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;
size_t outputSize = if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" ||
(inputSize - filterSize + 2 * padding + stride) / stride; conv1 == "NNPACKConv-CPU" ||
VLOG(3) << " batchSize=" << batchSize conv2 == "NNPACKConv-CPU") &&
<< " inputChannels=" << inputChannels dilation > 1)
<< " inputHeight=" << inputSize break;
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
std::vector<size_t> paddings = {padding, padding}; // NNPACK only supports stride = 1 if batchSize > 1
std::vector<size_t> strides = {stride, stride}; if ((conv1 == "NNPACKConv-CPU" ||
Compare2Function<DType1, DType2> test( conv2 == "NNPACKConv-CPU") &&
conv1, batchSize > 1 && stride > 1)
conv2, break;
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
TensorShape input{ size_t outputSize =
batchSize, inputChannels, inputSize, inputSize}; (inputSize - filterS + 2 * padding + stride) / stride;
TensorShape filter{ VLOG(3) << " batchSize=" << batchSize
outputChannels, inputChannels, filterSize, filterSize}; << " inputChannels=" << inputChannels
TensorShape output{ << " inputHeight=" << inputSize
batchSize, outputChannels, outputSize, outputSize}; << " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;
function(test, input, filter, output); std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
function(test, input, filter, output);
}
} }
} }
} }
...@@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1, ...@@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1,
for (size_t outputChannels : {7}) { for (size_t outputChannels : {7}) {
size_t stride = 1; size_t stride = 1;
size_t padding = 0; size_t padding = 0;
size_t dilation = 1;
size_t outputHeight = size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) / (inputHeight - filterHeight + 2 * padding + stride) /
stride; stride;
...@@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1, ...@@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding}; std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride}; std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test( Compare2Function<DType1, DType2> test(
conv1, conv1,
conv2, conv2,
...@@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1, ...@@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1,
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", (size_t)1) .set("groups", (size_t)1)
.set("dilations", dilations)
.set("algo", (std::string) "auto")); .set("algo", (std::string) "auto"));
TensorShape input{ TensorShape input{
...@@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1, ...@@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding}; std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride}; std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {1, 1};
size_t groups = inputChannels; size_t groups = inputChannels;
Compare2Function<DType1, DType2> test( Compare2Function<DType1, DType2> test(
conv1, conv1,
...@@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1, ...@@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1,
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("groups", groups) .set("groups", groups)
.set("dilations", dilations)
.set("algo", (std::string) "auto")); .set("algo", (std::string) "auto"));
TensorShape input{ TensorShape input{
......
...@@ -100,7 +100,9 @@ public: ...@@ -100,7 +100,9 @@ public:
strideH(), strideH(),
strideW(), strideW(),
paddingH(), paddingH(),
paddingW()); paddingW(),
dilationH(),
dilationW());
} else { } else {
colData = inputData + g * inputOffset; colData = inputData + g * inputOffset;
} }
...@@ -223,7 +225,9 @@ public: ...@@ -223,7 +225,9 @@ public:
strideH(), strideH(),
strideW(), strideW(),
paddingH(), paddingH(),
paddingW()); paddingW(),
dilationH(),
dilationW());
} }
} }
inputGrad += inputChannels * inputHeight * inputWidth; inputGrad += inputChannels * inputHeight * inputWidth;
...@@ -310,7 +314,9 @@ public: ...@@ -310,7 +314,9 @@ public:
strideH(), strideH(),
strideW(), strideW(),
paddingH(), paddingH(),
paddingW()); paddingW(),
dilationH(),
dilationW());
} else { } else {
colData = inputData + g * inputOffset; colData = inputData + g * inputOffset;
} }
......
...@@ -78,7 +78,9 @@ public: ...@@ -78,7 +78,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth); int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
}; };
template <ColFormat Format, DeviceType Device, class T> template <ColFormat Format, DeviceType Device, class T>
...@@ -91,7 +93,9 @@ public: ...@@ -91,7 +93,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth); int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
}; };
} // namespace paddle } // namespace paddle
...@@ -31,7 +31,9 @@ public: ...@@ -31,7 +31,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -47,8 +49,8 @@ public: ...@@ -47,8 +49,8 @@ public:
int c_im = c / filterWidth / filterHeight; int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) { for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) { for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset; int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset; int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) < 0 || if ((imRowIdx - paddingHeight) < 0 ||
(imRowIdx - paddingHeight) >= inputHeight || (imRowIdx - paddingHeight) >= inputHeight ||
(imColIdx - paddingWidth) < 0 || (imColIdx - paddingWidth) < 0 ||
...@@ -81,7 +83,9 @@ public: ...@@ -81,7 +83,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -97,8 +101,8 @@ public: ...@@ -97,8 +101,8 @@ public:
int c_im = c / filterWidth / filterHeight; int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) { for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) { for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset; int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset; int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) >= 0 && if ((imRowIdx - paddingHeight) >= 0 &&
(imRowIdx - paddingHeight) < inputHeight && (imRowIdx - paddingHeight) < inputHeight &&
(imColIdx - paddingWidth) >= 0 && (imColIdx - paddingWidth) >= 0 &&
...@@ -134,7 +138,9 @@ public: ...@@ -134,7 +138,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -147,9 +153,10 @@ public: ...@@ -147,9 +153,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) { for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) { for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset = int imRowOffset = outputH * strideHeight +
outputH * strideHeight + filterH - paddingHeight; filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth; int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset = int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels + (((outputH * outputWidth + outputW) * inputChannels +
channel) * channel) *
...@@ -189,7 +196,9 @@ public: ...@@ -189,7 +196,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -202,9 +211,10 @@ public: ...@@ -202,9 +211,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) { for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) { for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) { for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset = int imRowOffset = outputH * strideHeight +
outputH * strideHeight + filterH - paddingHeight; filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth; int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset = int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels + (((outputH * outputWidth + outputW) * inputChannels +
channel) * channel) *
......
...@@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im, ...@@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im,
int strideW, int strideW,
int paddingH, int paddingH,
int paddingW, int paddingW,
int dilationH,
int dilationW,
int height_col, int height_col,
int width_col, int width_col,
T* data_col) { T* data_col) {
...@@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im, ...@@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im,
data_col += (channel_out * height_col + h_out) * width_col + w_out; data_col += (channel_out * height_col + h_out) * width_col + w_out;
for (int i = 0; i < blockH; ++i) { for (int i = 0; i < blockH; ++i) {
for (int j = 0; j < blockW; ++j) { for (int j = 0; j < blockW; ++j) {
int rIdx = int(h_in + i); int rIdx = int(h_in + i * dilationH);
int cIdx = int(w_in + j); int cIdx = int(w_in + j * dilationW);
if ((rIdx - (int)paddingH) >= (int)height || if ((rIdx - (int)paddingH) >= (int)height ||
(rIdx - (int)paddingH) < 0 || (rIdx - (int)paddingH) < 0 ||
(cIdx - (int)paddingW) >= (int)width || (cIdx - (int)paddingW) >= (int)width ||
...@@ -77,7 +79,9 @@ public: ...@@ -77,7 +79,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -102,6 +106,8 @@ public: ...@@ -102,6 +106,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth, outputWidth,
colData); colData);
...@@ -121,6 +127,8 @@ __global__ void col2im(size_t n, ...@@ -121,6 +127,8 @@ __global__ void col2im(size_t n,
size_t strideW, size_t strideW,
size_t paddingH, size_t paddingH,
size_t paddingW, size_t paddingW,
size_t dilationH,
size_t dilationW,
size_t height_col, size_t height_col,
size_t width_col, size_t width_col,
T* data_im) { T* data_im) {
...@@ -131,23 +139,34 @@ __global__ void col2im(size_t n, ...@@ -131,23 +139,34 @@ __global__ void col2im(size_t n,
int w = int(index % width); int w = int(index % width);
int h = int((index / width) % height); int h = int((index / width) % height);
int c = int(index / (width * height)); int c = int(index / (width * height));
int filterH = (blockH - 1) * dilationH + 1;
int filterW = (blockW - 1) * dilationW + 1;
if ((w - (int)paddingW) >= 0 && if ((w - (int)paddingW) >= 0 &&
(w - (int)paddingW) < (width - 2 * paddingW) && (w - (int)paddingW) < (width - 2 * paddingW) &&
(h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) { (h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) {
// compute the start and end of the output // compute the start and end of the output
int w_col_start = int w_col_start =
(w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1; (w < (int)filterW) ? 0 : (w - int(filterW)) / (int)strideW + 1;
int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col)); int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col));
int h_col_start = int h_col_start =
(h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1; (h < (int)filterH) ? 0 : (h - (int)filterH) / (int)strideH + 1;
int h_col_end = min(int(h / strideH + 1), int(height_col)); int h_col_end = min(int(h / strideH + 1), int(height_col));
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
// the col location: [c * width * height + h_out, w_out] // the col location: [c * width * height + h_out, w_out]
int c_col = int(c * blockH * blockW) + int h_k = (h - h_col * strideH);
(h - h_col * (int)strideH) * (int)blockW + int w_k = (w - w_col * strideW);
(w - w_col * (int)strideW); if (h_k % dilationH == 0 && w_k % dilationW == 0) {
val += data_col[(c_col * height_col + h_col) * width_col + w_col]; h_k /= dilationH;
w_k /= dilationW;
int c_col =
(((c * blockH + h_k) * blockW + w_k) * height_col + h_col) *
width_col +
w_col;
val += data_col[c_col];
}
} }
} }
h -= paddingH; h -= paddingH;
...@@ -173,7 +192,9 @@ public: ...@@ -173,7 +192,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -205,6 +226,8 @@ public: ...@@ -205,6 +226,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth, outputWidth,
imData); imData);
...@@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData, ...@@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth, int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight, int outputHeight,
int outputWidth) { int outputWidth) {
int swId = blockIdx.x; int swId = blockIdx.x;
...@@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData, ...@@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData,
channelId += blockDim.z) { channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth; int widthOffset =
int heightOffset = idy + shId * strideHeight - paddingHeight; idx * dilationHeight + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationWidth + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth + int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth; channelId * inputHeight * inputWidth;
...@@ -273,7 +300,9 @@ public: ...@@ -273,7 +300,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -312,6 +341,8 @@ public: ...@@ -312,6 +341,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth); outputWidth);
CHECK_SYNC("Im2ColFunctor GPU failed"); CHECK_SYNC("Im2ColFunctor GPU failed");
...@@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData, ...@@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth, int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight, int outputHeight,
int outputWidth) { int outputWidth) {
int swId = blockIdx.x; int swId = blockIdx.x;
...@@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData, ...@@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData,
channelId += blockDim.z) { channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) { for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) { for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth; int widthOffset =
int heightOffset = idy + shId * strideHeight - paddingHeight; idx * dilationWidth + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationHeight + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth + int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth; channelId * inputHeight * inputWidth;
...@@ -372,7 +407,9 @@ public: ...@@ -372,7 +407,9 @@ public:
int strideHeight, int strideHeight,
int strideWidth, int strideWidth,
int paddingHeight, int paddingHeight,
int paddingWidth) { int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0]; int inputChannels = imShape[0];
int inputHeight = imShape[1]; int inputHeight = imShape[1];
int inputWidth = imShape[2]; int inputWidth = imShape[2];
...@@ -411,6 +448,8 @@ public: ...@@ -411,6 +448,8 @@ public:
strideWidth, strideWidth,
paddingHeight, paddingHeight,
paddingWidth, paddingWidth,
dilationHeight,
dilationWidth,
outputHeight, outputHeight,
outputWidth); outputWidth);
CHECK_SYNC("Col2ImFunctor GPU failed"); CHECK_SYNC("Col2ImFunctor GPU failed");
......
...@@ -29,82 +29,98 @@ void TestIm2ColFunctor() { ...@@ -29,82 +29,98 @@ void TestIm2ColFunctor() {
for (size_t filterWidth : {3, 7}) { for (size_t filterWidth : {3, 7}) {
for (size_t stride : {1, 2}) { for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) { for (size_t padding : {0, 1}) {
if (inputHeight <= filterHeight || inputWidth <= filterWidth) for (size_t dilation : {1, 3}) {
break; size_t filterSizeH = (filterHeight - 1) * dilation + 1;
if (padding >= filterHeight || padding >= filterWidth) break; size_t filterSizeW = (filterWidth - 1) * dilation + 1;
size_t outputHeight = if (inputHeight + 2 * padding < filterSizeH ||
(inputHeight - filterHeight + 2 * padding + stride) / inputWidth + 2 * padding < filterSizeW)
stride; break;
size_t outputWidth = if (padding >= filterSizeH || padding >= filterSizeW) break;
(inputWidth - filterWidth + 2 * padding + stride) / stride; size_t outputHeight =
(inputHeight - filterSizeH + 2 * padding) / stride + 1;
TensorShape imShape = size_t outputWidth =
TensorShape({channels, inputHeight, inputWidth}); (inputWidth - filterSizeW + 2 * padding) / stride + 1;
TensorShape colShape1 = TensorShape({channels,
filterHeight, TensorShape imShape =
filterWidth, TensorShape({channels, inputHeight, inputWidth});
outputHeight, TensorShape colShape1 = TensorShape({channels,
outputWidth}); filterHeight,
TensorShape colShape2 = TensorShape({outputHeight, filterWidth,
outputWidth, outputHeight,
channels, outputWidth});
filterHeight, TensorShape colShape2 = TensorShape({outputHeight,
filterWidth}); outputWidth,
channels,
size_t height = channels * filterHeight * filterWidth; filterHeight,
size_t width = outputHeight * outputWidth; filterWidth});
VectorPtr input1 = Vector::create(imShape.getElements(), false);
VectorPtr input2 = Vector::create(imShape.getElements(), false); size_t height = channels * filterHeight * filterWidth;
MatrixPtr output1 = Matrix::create(height, width, false, false); size_t width = outputHeight * outputWidth;
MatrixPtr output2 = Matrix::create(width, height, false, false); VectorPtr input1 =
input1->uniform(0.001, 1); Vector::create(imShape.getElements(), false);
input2->copyFrom(*input1); VectorPtr input2 =
Vector::create(imShape.getElements(), false);
Im2ColFunctor<kCFO, Device, T> im2Col1; MatrixPtr output1 =
Im2ColFunctor<kOCF, Device, T> im2Col2; Matrix::create(height, width, false, false);
im2Col1(input1->getData(), MatrixPtr output2 =
imShape, Matrix::create(width, height, false, false);
output1->getData(), input1->uniform(0.001, 1);
colShape1, input2->copyFrom(*input1);
stride,
stride, Im2ColFunctor<kCFO, Device, T> im2Col1;
padding, Im2ColFunctor<kOCF, Device, T> im2Col2;
padding); im2Col1(input1->getData(),
im2Col2(input2->getData(), imShape,
imShape, output1->getData(),
output2->getData(), colShape1,
colShape2, stride,
stride, stride,
stride, padding,
padding, padding,
padding); dilation,
dilation);
// The transposition of the result of ColFormat == kCFO im2Col2(input2->getData(),
// is equal to the result of ColFormat == kOCF. imShape,
MatrixPtr test; output2->getData(),
output2->transpose(test, true); colShape2,
autotest::TensorCheckErr(*output1, *test); stride,
stride,
Col2ImFunctor<kCFO, Device, T> col2Im1; padding,
Col2ImFunctor<kOCF, Device, T> col2Im2; padding,
col2Im1(input1->getData(), dilation,
imShape, dilation);
output1->getData(),
colShape1, // The transposition of the result of ColFormat == kCFO
stride, // is equal to the result of ColFormat == kOCF.
stride, MatrixPtr test;
padding, output2->transpose(test, true);
padding); autotest::TensorCheckErr(*output1, *test);
col2Im2(input2->getData(),
imShape, Col2ImFunctor<kCFO, Device, T> col2Im1;
output2->getData(), Col2ImFunctor<kOCF, Device, T> col2Im2;
colShape2,
stride, col2Im1(input1->getData(),
stride, imShape,
padding, output1->getData(),
padding); colShape1,
stride,
autotest::TensorCheckErr(*input1, *input2); stride,
padding,
padding,
dilation,
dilation);
col2Im2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding,
dilation,
dilation);
autotest::TensorCheckErr(*input1, *input2);
}
} }
} }
} }
......
...@@ -84,9 +84,49 @@ if(MOBILE_INFERENCE) ...@@ -84,9 +84,49 @@ if(MOBILE_INFERENCE)
gradientmachines/GradientMachineMode.cpp gradientmachines/GradientMachineMode.cpp
gradientmachines/MultiGradientMachine.cpp) gradientmachines/MultiGradientMachine.cpp)
# Remove useless layers # Remove layers that used in training
list(REMOVE_ITEM GSERVER_SOURCES list(REMOVE_ITEM GSERVER_SOURCES
layers/RecurrentLayerGroup.cpp) layers/RecurrentLayerGroup.cpp
layers/CostLayer.cpp
layers/MultiBoxLossLayer.cpp
layers/WarpCTCLayer.cpp
layers/CTCLayer.cpp
layers/LinearChainCTC.cpp
layers/PrintLayer.cpp)
list(REMOVE_ITEM GSERVER_SOURCES
layers/OuterProdLayer.cpp
layers/SumToOneNormLayer.cpp
layers/ConvShiftLayer.cpp
layers/InterpolationLayer.cpp
layers/AgentLayer.cpp
layers/DotMulOperator.cpp
layers/GruStepLayer.cpp
layers/LstmStepLayer.cpp
layers/ConvexCombinationLayer.cpp
layers/Conv3DLayer.cpp
layers/DeConv3DLayer.cpp
layers/CropLayer.cpp
layers/CrossEntropyOverBeam.cpp
layers/DataNormLayer.cpp
layers/FeatureMapExpandLayer.cpp
layers/HierarchicalSigmoidLayer.cpp
layers/MultinomialSampler.cpp
layers/NCELayer.cpp
layers/KmaxSeqScoreLayer.cpp
layers/MDLstmLayer.cpp
layers/MultiplexLayer.cpp
layers/PadLayer.cpp
layers/Pool3DLayer.cpp
layers/ResizeLayer.cpp
layers/RotateLayer.cpp
layers/RowConvLayer.cpp
layers/RowL2NormLayer.cpp
layers/SamplingIdLayer.cpp
layers/ScaleShiftLayer.cpp
layers/SelectiveFullyConnectedLayer.cpp
layers/SpatialPyramidPoolLayer.cpp
layers/BilinearInterpLayer.cpp
layers/ClipLayer.cpp)
endif() endif()
if(WITH_GPU) if(WITH_GPU)
......
...@@ -16,7 +16,6 @@ limitations under the License. */ ...@@ -16,7 +16,6 @@ limitations under the License. */
#include "NeuralNetwork.h" #include "NeuralNetwork.h"
#include "hl_gpu.h" #include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/CustomStackTrace.h" #include "paddle/utils/CustomStackTrace.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
...@@ -28,6 +27,7 @@ limitations under the License. */ ...@@ -28,6 +27,7 @@ limitations under the License. */
#ifndef PADDLE_MOBILE_INFERENCE #ifndef PADDLE_MOBILE_INFERENCE
#include "MultiNetwork.h" #include "MultiNetwork.h"
#include "RecurrentGradientMachine.h" #include "RecurrentGradientMachine.h"
#include "paddle/gserver/layers/AgentLayer.h"
#endif #endif
namespace paddle { namespace paddle {
...@@ -192,9 +192,11 @@ void NeuralNetwork::init(const ModelConfig& config, ...@@ -192,9 +192,11 @@ void NeuralNetwork::init(const ModelConfig& config,
void NeuralNetwork::connect(LayerPtr agentLayer, void NeuralNetwork::connect(LayerPtr agentLayer,
LayerPtr realLayer, LayerPtr realLayer,
int height) { int height) {
#ifndef PADDLE_MOBILE_INFERENCE
AgentLayer* agent = dynamic_cast<AgentLayer*>(agentLayer.get()); AgentLayer* agent = dynamic_cast<AgentLayer*>(agentLayer.get());
CHECK_NOTNULL(agent); CHECK_NOTNULL(agent);
agent->setRealLayer(realLayer, height); agent->setRealLayer(realLayer, height);
#endif
} }
void NeuralNetwork::connect(std::string agentLayerName, void NeuralNetwork::connect(std::string agentLayerName,
......
...@@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
for (int i = 0; i < config_.inputs_size(); i++) { for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]}; std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]}; std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
std::vector<size_t> dilations = {(size_t)dilationY_[i],
(size_t)dilation_[i]};
bool useDilation = ((size_t)dilationY_[i] > 1 || (size_t)dilation_[i] > 1);
// Convolution Layer uses the GemmConv function by default. // Convolution Layer uses the GemmConv function by default.
convType = "GemmConv"; convType = "GemmConv";
...@@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
#if defined(__ARM_NEON__) || defined(__ARM_NEON) #if defined(__ARM_NEON__) || defined(__ARM_NEON)
if ((filterSize_[i] == filterSizeY_[i]) && if ((filterSize_[i] == filterSizeY_[i]) &&
(filterSize_[i] == 3 || filterSize_[i] == 4) && (filterSize_[i] == 3 || filterSize_[i] == 4) &&
(stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) { (stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2) &&
!useDilation) {
convType = "NeonDepthwiseConv"; convType = "NeonDepthwiseConv";
} }
#endif #endif
} }
if (FLAGS_use_nnpack && !isDeconv_) { if (FLAGS_use_nnpack && !isDeconv_ && !useDilation) {
createFunction(forward_, createFunction(forward_,
"NNPACKConv", "NNPACKConv",
FuncConfig() FuncConfig()
...@@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
createFunction(backward_, createFunction(backward_,
...@@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
createFunction(backward_, createFunction(backward_,
...@@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap, ...@@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig() FuncConfig()
.set("paddings", paddings) .set("paddings", paddings)
.set("strides", strides) .set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i])); .set("groups", (size_t)groups_[i]));
} }
} }
......
...@@ -98,6 +98,7 @@ ClassRegistrar<Layer, LayerConfig> Layer::registrar_; ...@@ -98,6 +98,7 @@ ClassRegistrar<Layer, LayerConfig> Layer::registrar_;
LayerPtr Layer::create(const LayerConfig& config) { LayerPtr Layer::create(const LayerConfig& config) {
std::string type = config.type(); std::string type = config.type();
#ifndef PADDLE_MOBILE_INFERENCE
// NOTE: As following types have illegal character '-', // NOTE: As following types have illegal character '-',
// they can not use REGISTER_LAYER to registrar. // they can not use REGISTER_LAYER to registrar.
// Besides, to fit with old training models, // Besides, to fit with old training models,
...@@ -106,7 +107,6 @@ LayerPtr Layer::create(const LayerConfig& config) { ...@@ -106,7 +107,6 @@ LayerPtr Layer::create(const LayerConfig& config) {
return LayerPtr(new MultiClassCrossEntropy(config)); return LayerPtr(new MultiClassCrossEntropy(config));
else if (type == "rank-cost") else if (type == "rank-cost")
return LayerPtr(new RankingCost(config)); return LayerPtr(new RankingCost(config));
#ifndef PADDLE_MOBILE_INFERENCE
else if (type == "auc-validation") else if (type == "auc-validation")
return LayerPtr(new AucValidation(config)); return LayerPtr(new AucValidation(config));
else if (type == "pnpair-validation") else if (type == "pnpair-validation")
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MaxPoolWithMaskLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
bool MaxPoolWithMaskLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
PoolLayer::init(layerMap, parameterMap);
setOutput("mask", &mask_);
return true;
}
size_t MaxPoolWithMaskLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
outputY_ = outputSize(imgSizeY_,
sizeY_,
confPaddingY_,
strideY_,
/* caffeMode */ false);
outputX_ = outputSize(imgSize_,
sizeX_,
confPadding_,
stride_,
/* caffeMode */ false);
layerSize = outputX_ * outputY_ * channels_;
getOutput().setFrameHeight(outputY_);
getOutput().setFrameWidth(outputX_);
return layerSize;
}
void MaxPoolWithMaskLayer::forward(PassType passType) {
size_t size = getSize();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
int batchSize = inputV->getHeight();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
CHECK_EQ(size, outV->getWidth());
resetSpecifyOutput(mask_,
batchSize,
size,
/* isValueClean */ false,
/* isGradClean */ true);
MatrixPtr maskV = mask_.value;
outV->maxPoolForward(*inputV,
imgSizeY_,
imgSize_,
channels_,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
confPaddingY_,
confPadding_,
maskV);
}
void MaxPoolWithMaskLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
MatrixPtr outGrad = getOutputGrad();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
MatrixPtr outV = getOutputValue();
MatrixPtr inputGrad = inputLayers_[0]->getOutputGrad();
inputGrad->maxPoolBackward(*inputV,
imgSizeY_,
imgSize_,
*outGrad,
*outV,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
1,
1,
confPaddingY_,
confPadding_);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "PoolLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief Basic parent layer of different kinds of pooling
*/
class MaxPoolWithMaskLayer : public PoolLayer {
protected:
Argument mask_;
public:
explicit MaxPoolWithMaskLayer(const LayerConfig& config)
: PoolLayer(config) {}
size_t getSize();
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
};
} // namespace paddle
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PoolLayer.h" #include "PoolLayer.h"
#include "MaxPoolWithMaskLayer.h"
#include "PoolProjectionLayer.h" #include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -44,7 +45,6 @@ bool PoolLayer::init(const LayerMap& layerMap, ...@@ -44,7 +45,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride();
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
return true; return true;
} }
...@@ -57,6 +57,8 @@ Layer* PoolLayer::create(const LayerConfig& config) { ...@@ -57,6 +57,8 @@ Layer* PoolLayer::create(const LayerConfig& config) {
} else if (CudnnPoolLayer::typeCheck(pool)) { } else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config); return new CudnnPoolLayer(config);
#endif #endif
} else if (pool == "max-pool-with-mask") {
return new MaxPoolWithMaskLayer(config);
} else { } else {
LOG(FATAL) << "Unknown pool type: " << pool; LOG(FATAL) << "Unknown pool type: " << pool;
return nullptr; return nullptr;
......
# gserver pacakge unittests # gserver pacakge unittests
add_simple_unittest(test_LinearChainCRF) add_simple_unittest(test_LinearChainCRF)
add_simple_unittest(test_MultinomialSampler)
add_simple_unittest(test_RecurrentLayer) add_simple_unittest(test_RecurrentLayer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_MultinomialSampler)
endif()
function(gserver_test TARGET) function(gserver_test TARGET)
add_unittest_without_exec(${TARGET} add_unittest_without_exec(${TARGET}
${TARGET}.cpp ${TARGET}.cpp
...@@ -24,6 +27,7 @@ gserver_test(test_ConvUnify) ...@@ -24,6 +27,7 @@ gserver_test(test_ConvUnify)
gserver_test(test_BatchNorm) gserver_test(test_BatchNorm)
gserver_test(test_KmaxSeqScore) gserver_test(test_KmaxSeqScore)
gserver_test(test_Expand) gserver_test(test_Expand)
gserver_test(test_MaxPoolingWithMaskOutput)
########## test_Mkldnn layers and activations ########## ########## test_Mkldnn layers and activations ##########
if(WITH_MKLDNN) if(WITH_MKLDNN)
...@@ -48,7 +52,7 @@ if(WITH_PYTHON) ...@@ -48,7 +52,7 @@ if(WITH_PYTHON)
endif() endif()
############### test_WarpCTCLayer ####################### ############### test_WarpCTCLayer #######################
if(NOT WITH_DOUBLE) if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
add_unittest_without_exec(test_WarpCTCLayer add_unittest_without_exec(test_WarpCTCLayer
test_WarpCTCLayer.cpp) test_WarpCTCLayer.cpp)
......
...@@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { ...@@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1); config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true); config.layerConfig.set_shared_biases(true);
int dilation = 1; int dilation = 2;
if (type == "cudnn_conv") { if (type == "cudnn_conv") {
#if CUDNN_VERSION >= 6000 #if CUDNN_VERSION >= 6000
dilation = 2; dilation = 2;
...@@ -1234,6 +1234,7 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { ...@@ -1234,6 +1234,7 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, PoolLayer) { TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
...@@ -1242,6 +1243,7 @@ TEST(Layer, PoolLayer) { ...@@ -1242,6 +1243,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ true);
#endif #endif
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/math/MathUtils.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle;
void setPoolConfig(TestConfig* config,
PoolConfig* pool,
const string& poolType) {
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(1);
int kw = 3, kh = 3;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType);
pool->set_channels(1);
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_start(0);
pool->set_padding(pw);
pool->set_padding_y(ph);
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
pool->set_output_x(ow);
pool->set_output_y(oh);
}
void doOneMaxPoolingWithMaskOutputTest(MatrixPtr& inputMat,
const string& poolType,
bool use_gpu,
MatrixPtr& maskMat) {
TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 25, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
pool->set_img_size(5);
pool->set_img_size_y(5);
setPoolConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
config.layerConfig.set_name("MaxPoolWithMask");
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(config,
&dataLayers,
&datas,
&layerMap,
"MaxPoolWithMask",
1,
false,
use_gpu);
dataLayers[0]->getOutputValue()->copyFrom(*inputMat);
FLAGS_use_gpu = use_gpu;
std::vector<ParameterPtr> parameters;
LayerPtr maxPoolingWithMaskOutputLayer;
initTestLayer(config, &layerMap, &parameters, &maxPoolingWithMaskOutputLayer);
maxPoolingWithMaskOutputLayer->forward(PASS_GC);
checkMatrixEqual(maxPoolingWithMaskOutputLayer->getOutput("mask").value,
maskMat);
}
TEST(Layer, maxPoolingWithMaskOutputLayerFwd) {
bool useGpu = false;
MatrixPtr inputMat;
MatrixPtr maskMat;
real inputData[] = {0.1, 0.1, 0.5, 0.5, 1.1, 0.2, 0.2, 0.6, 0.1,
0.1, 0.3, 0.3, 0.7, 0.1, 0.1, 0.4, 0.4, 0.8,
0.8, 0.1, 1.0, 2.0, 3.0, 0.0, 9.0};
real maskData[] = {12, 4, 22, 24};
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->setData(inputData);
maskMat->setData(maskData);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#ifdef PADDLE_WITH_CUDA
useGpu = true;
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->copyFrom(inputData, 25);
maskMat->copyFrom(maskData, 4);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#endif
}
...@@ -1902,5 +1902,52 @@ void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b, ...@@ -1902,5 +1902,52 @@ void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b,
} }
template class BaseMatrixT<real>; template class BaseMatrixT<real>;
#ifndef PADDLE_MOBILE_INFERENCE
template class BaseMatrixT<int>; template class BaseMatrixT<int>;
#else
template <>
void BaseMatrixT<int>::zero() {
applyUnary(unary::Zero<int>());
}
template <>
void BaseMatrixT<int>::assign(int p) {
applyUnary(unary::Assign<int>(p));
}
template <>
void BaseMatrixT<int>::isEqualTo(BaseMatrixT& b, int value) {
applyBinary(binary::IsEqual<int>(value), b);
}
template <>
void BaseMatrixT<int>::neg() {
applyUnary(unary::Neg<int>());
}
template <>
void BaseMatrixT<int>::abs2() {
applyUnary(unary::Abs<int>());
}
template <>
void BaseMatrixT<int>::add(int p) {
applyUnary(unary::Add<int>(p));
}
template <>
void BaseMatrixT<int>::add(int p1, int p2) {
applyUnary(unary::Add2<int>(p1, p2));
}
template <>
void BaseMatrixT<int>::applyL1(int learningRate, int decayRate) {
applyUnary(unary::ApplyL1<int>(learningRate * decayRate));
}
#endif
} // namespace paddle } // namespace paddle
...@@ -25,6 +25,19 @@ else() ...@@ -25,6 +25,19 @@ else()
message(STATUS "Compile with MKLDNNMatrix") message(STATUS "Compile with MKLDNNMatrix")
endif() endif()
if(MOBILE_INFERENCE)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/SIMDFunctions.cpp)
# Remove sparse
list(REMOVE_ITEM MATH_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.h)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.cpp)
endif()
set(MATH_SOURCES set(MATH_SOURCES
"${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu"
"${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu"
......
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef> #include <cstddef>
#include "Matrix.h" #include "Matrix.h"
...@@ -309,3 +312,57 @@ private: ...@@ -309,3 +312,57 @@ private:
using Matrix::subMatrix; using Matrix::subMatrix;
}; };
} // namespace paddle } // namespace paddle
#else
#include "Matrix.h"
namespace paddle {
class CpuSparseMatrix : public Matrix {
public:
CpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
CpuSparseMatrix(real* data,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, false) {}
real* getValue() const { return nullptr; }
size_t getColStartIdx(size_t i) const { return 0; }
size_t getRowStartIdx(size_t i) const { return 0; }
size_t getColNum(size_t i) const { return 0; }
int* getRowCols(size_t i) const { return nullptr; }
CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width) {
return nullptr;
}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
...@@ -451,6 +451,7 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) { ...@@ -451,6 +451,7 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) {
} }
void GpuMatrix::collectBias(Matrix& a, real scale) { void GpuMatrix::collectBias(Matrix& a, real scale) {
#ifdef PADDLE_WITH_CUDA
CHECK_EQ(getHeight(), (size_t)1); CHECK_EQ(getHeight(), (size_t)1);
CHECK_EQ(width_, a.getWidth()); CHECK_EQ(width_, a.getWidth());
GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a); GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a);
...@@ -461,6 +462,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) { ...@@ -461,6 +462,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get(); hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale); hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale);
} }
#endif
} }
void GpuMatrix::collectSharedBias(Matrix& a, real scale) { void GpuMatrix::collectSharedBias(Matrix& a, real scale) {
...@@ -552,6 +554,7 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, ...@@ -552,6 +554,7 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
const GpuMatrix& b, const GpuMatrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous()); CHECK(isContiguous());
CHECK(b.isContiguous()); CHECK(b.isContiguous());
CHECK(b.useGpu_ == true) << "Matrix type are not equal"; CHECK(b.useGpu_ == true) << "Matrix type are not equal";
...@@ -578,12 +581,14 @@ void GpuMatrix::mul(const GpuSparseMatrix& a, ...@@ -578,12 +581,14 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
b.height_, b.height_,
scaleAB, scaleAB,
scaleT); scaleT);
#endif
} }
void GpuMatrix::mul(const GpuMatrix& a, void GpuMatrix::mul(const GpuMatrix& a,
const GpuSparseMatrix& b, const GpuSparseMatrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous()); CHECK(isContiguous());
CHECK(a.isContiguous()); CHECK(a.isContiguous());
CHECK(a.useGpu_ == true) << "Matrix type are not equal"; CHECK(a.useGpu_ == true) << "Matrix type are not equal";
...@@ -622,6 +627,7 @@ void GpuMatrix::mul(const GpuMatrix& a, ...@@ -622,6 +627,7 @@ void GpuMatrix::mul(const GpuMatrix& a,
scaleAB, scaleAB,
scaleT); scaleT);
} }
#endif
} }
/* this = a*b */ /* this = a*b */
...@@ -1028,15 +1034,23 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1028,15 +1034,23 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW,
MatrixPtr maskMatP) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* maskData = NULL;
size_t frameNum = inputMat.getHeight(); size_t frameNum = inputMat.getHeight();
CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth()); CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight()); CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels); CHECK(width_ == outputH * outputW * channels);
if (maskMatP != NULL) {
CHECK(maskMatP->useGpu_ == true) << "Matrix type are not equal";
CHECK(outputH * outputW * channels == maskMatP->getWidth());
maskData = maskMatP->getData();
}
hl_maxpool_forward(frameNum, hl_maxpool_forward(frameNum,
inputData, inputData,
channels, channels,
...@@ -1051,7 +1065,8 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1051,7 +1065,8 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
paddingH, paddingH,
paddingW, paddingW,
data_, data_,
getStride()); getStride(),
maskData);
} }
void GpuMatrix::maxPoolBackward(Matrix& inputMat, void GpuMatrix::maxPoolBackward(Matrix& inputMat,
...@@ -1548,6 +1563,7 @@ void GpuMatrix::bilinearBackward(const Matrix& out, ...@@ -1548,6 +1563,7 @@ void GpuMatrix::bilinearBackward(const Matrix& out,
} }
void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output); GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label); auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
...@@ -1563,9 +1579,11 @@ void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { ...@@ -1563,9 +1579,11 @@ void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get(); hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy( hl_matrix_multi_binary_cross_entropy(
output_d, entropy_d, mat_d, height_, outputPtr->width_); output_d, entropy_d, mat_d, height_, outputPtr->width_);
#endif
} }
void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output); GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label); auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
...@@ -1581,6 +1599,7 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { ...@@ -1581,6 +1599,7 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get(); hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy_bp( hl_matrix_multi_binary_cross_entropy_bp(
output_d, grad_d, mat_d, height_, width_); output_d, grad_d, mat_d, height_, width_);
#endif
} }
void GpuMatrix::vol2Col(real* dataSrc, void GpuMatrix::vol2Col(real* dataSrc,
...@@ -1973,9 +1992,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1973,9 +1992,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW,
MatrixPtr maskMatP) {
real* inputData = inputMat.getData(); real* inputData = inputMat.getData();
real* outData = data_; real* outData = data_;
real* maskData = NULL;
size_t num = inputMat.getHeight(); size_t num = inputMat.getHeight();
size_t inLength = imgSizeH * imgSizeW; size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW; size_t outLength = outputH * outputW;
...@@ -1984,6 +2005,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -1984,6 +2005,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
CHECK_EQ(channels * outLength, this->getWidth()); CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride(); size_t outStride = getStride();
if (maskMatP != NULL) {
maskData = maskMatP->getData();
CHECK_EQ(channels * outLength, maskMatP->getWidth());
}
/* initialize the data_ */ /* initialize the data_ */
for (size_t i = 0; i < height_; i++) { for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) { for (size_t j = 0; j < width_; j++) {
...@@ -2005,10 +2031,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -2005,10 +2031,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
int wstart = pw * strideW - paddingW; int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW); int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) { if (maskData == NULL) {
for (int w = wstart; w < wend; ++w) { for (int h = hstart; h < hend; ++h) {
outData[ph * outputW + pw] = std::max( for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw], inputData[h * imgSizeW + w]); outData[ph * outputW + pw] = std::max(
outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
}
}
} else {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (outData[ph * outputW + pw] < inputData[h * imgSizeW + w]) {
outData[ph * outputW + pw] = inputData[h * imgSizeW + w];
maskData[ph * outputW + pw] = h * imgSizeW + w;
}
}
} }
} }
} }
...@@ -2016,6 +2053,8 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, ...@@ -2016,6 +2053,8 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
// compute offset // compute offset
inputData += inLength; inputData += inLength;
outData += outLength; outData += outLength;
if (maskData != NULL) maskData += outLength;
} }
} }
} }
...@@ -3226,6 +3265,7 @@ template void CpuMatrix::mul<CpuMatrix, CacheRowCpuMatrix>(CpuSparseMatrix* a, ...@@ -3226,6 +3265,7 @@ template void CpuMatrix::mul<CpuMatrix, CacheRowCpuMatrix>(CpuSparseMatrix* a,
real scaleAB, real scaleAB,
real scaleT); real scaleT);
#ifndef PADDLE_MOBILE_INFERENCE
void SharedCpuMatrix::mul(CpuSparseMatrix* a, void SharedCpuMatrix::mul(CpuSparseMatrix* a,
CpuMatrix* b, CpuMatrix* b,
real scaleAB, real scaleAB,
...@@ -3354,6 +3394,7 @@ void SharedCpuMatrix::initBlock(int blockNum) { ...@@ -3354,6 +3394,7 @@ void SharedCpuMatrix::initBlock(int blockNum) {
} }
} }
#endif
/* Add a (column) vector b to matrix a, column by column */ /* Add a (column) vector b to matrix a, column by column */
void CpuMatrix::addColumnVector(const Matrix& b) { void CpuMatrix::addColumnVector(const Matrix& b) {
BaseMatrix::addColVector(const_cast<Matrix&>(b)); BaseMatrix::addColVector(const_cast<Matrix&>(b));
......
...@@ -861,7 +861,8 @@ public: ...@@ -861,7 +861,8 @@ public:
/** /**
* Pooling forward operation, pick out the largest element * Pooling forward operation, pick out the largest element
* in the sizeX of value * in the sizeX of value, if the maskMatP is not NULL, it will
* also caculate the location indices.
*/ */
virtual void maxPoolForward(Matrix& inputMat, virtual void maxPoolForward(Matrix& inputMat,
size_t imgSizeH, size_t imgSizeH,
...@@ -874,7 +875,8 @@ public: ...@@ -874,7 +875,8 @@ public:
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW) { size_t paddingW,
MatrixPtr maskMatP = NULL) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
...@@ -1426,7 +1428,8 @@ public: ...@@ -1426,7 +1428,8 @@ public:
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW); size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image, void maxPoolBackward(Matrix& image,
size_t imgSizeH, size_t imgSizeH,
...@@ -1697,7 +1700,8 @@ public: ...@@ -1697,7 +1700,8 @@ public:
size_t outputH, size_t outputH,
size_t outputW, size_t outputW,
size_t paddingH, size_t paddingH,
size_t paddingW); size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image, void maxPoolBackward(Matrix& image,
size_t imgSizeH, size_t imgSizeH,
...@@ -2066,6 +2070,7 @@ public: ...@@ -2066,6 +2070,7 @@ public:
class SharedCpuMatrix : public CpuMatrix { class SharedCpuMatrix : public CpuMatrix {
public: public:
#ifndef PADDLE_MOBILE_INFERENCE
/* blockNum is number of partitions of the matrix */ /* blockNum is number of partitions of the matrix */
SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false) SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false)
: CpuMatrix(height, width, trans) { : CpuMatrix(height, width, trans) {
...@@ -2111,6 +2116,7 @@ private: ...@@ -2111,6 +2116,7 @@ private:
ThreadLocal<CpuMatrixPtr> localBuf_; ThreadLocal<CpuMatrixPtr> localBuf_;
ThreadLocal<std::vector<int>> localBufRows_; ThreadLocal<std::vector<int>> localBufRows_;
ThreadLocal<std::vector<int>> blockSeq_; ThreadLocal<std::vector<int>> blockSeq_;
#endif
}; };
typedef struct { unsigned int col; } sparse_non_value_t; typedef struct { unsigned int col; } sparse_non_value_t;
......
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef> #include <cstddef>
#include "CpuSparseMatrix.h" #include "CpuSparseMatrix.h"
#include "Matrix.h" #include "Matrix.h"
...@@ -237,3 +240,47 @@ private: ...@@ -237,3 +240,47 @@ private:
}; };
} // namespace paddle } // namespace paddle
#else
#include "CpuSparseMatrix.h"
namespace paddle {
class GpuSparseMatrix : public Matrix {
public:
GpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format_ = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
GpuSparseMatrix(real* value,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, true) {}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once #pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <string.h> #include <string.h>
#include <algorithm> #include <algorithm>
...@@ -313,3 +315,27 @@ private: ...@@ -313,3 +315,27 @@ private:
}; };
} // namespace paddle } // namespace paddle
#else
namespace paddle {
class SparseRowCpuMatrix : public CpuMatrix {
public:
void reserveStore() {}
void clearIndices() {}
};
class SparsePrefetchRowCpuMatrix : public SparseRowCpuMatrix {
public:
void setupIndices() {}
void addRows(MatrixPtr input) {}
void addRows(IVectorPtr ids) {}
};
class SparseAutoGrowRowCpuMatrix : public SparseRowCpuMatrix {};
class CacheRowCpuMatrix : public SparseAutoGrowRowCpuMatrix {};
class SparseRowIdsCpuMatrix : public CpuMatrix {};
} // namespace paddle
#endif
...@@ -3,8 +3,10 @@ ...@@ -3,8 +3,10 @@
add_simple_unittest(test_ExecViaCpu) add_simple_unittest(test_ExecViaCpu)
add_simple_unittest(test_SIMDFunctions) add_simple_unittest(test_SIMDFunctions)
add_simple_unittest(test_TrainingAlgorithm) add_simple_unittest(test_TrainingAlgorithm)
add_simple_unittest(test_SparseMatrix)
add_simple_unittest(test_RowBuffer) add_simple_unittest(test_RowBuffer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_SparseMatrix)
endif()
# TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference. # TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference.
add_unittest(test_matrixCompare add_unittest(test_matrixCompare
......
...@@ -30,6 +30,10 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -30,6 +30,10 @@ class AccuracyOp : public framework::OperatorWithKernel {
"Input (Label) of accuracy op should not be null."); "Input (Label) of accuracy op should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Accuracy"), PADDLE_ENFORCE(ctx->HasOutput("Accuracy"),
"Output (Accuracy) of AccuracyOp should not be null."); "Output (Accuracy) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Correct"),
"Output (Correct) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Total"),
"Output (Total) of AccuracyOp should not be null.");
auto inference_dim = ctx->GetInputDim("Out"); auto inference_dim = ctx->GetInputDim("Out");
auto label_dim = ctx->GetInputDim("Label"); auto label_dim = ctx->GetInputDim("Label");
...@@ -43,6 +47,8 @@ class AccuracyOp : public framework::OperatorWithKernel { ...@@ -43,6 +47,8 @@ class AccuracyOp : public framework::OperatorWithKernel {
" the same as label."); " the same as label.");
ctx->SetOutputDim("Accuracy", {1}); ctx->SetOutputDim("Accuracy", {1});
ctx->SetOutputDim("Correct", {1});
ctx->SetOutputDim("Total", {1});
ctx->ShareLoD("Out", /*->*/ "Accuracy"); ctx->ShareLoD("Out", /*->*/ "Accuracy");
} }
...@@ -66,6 +72,8 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -66,6 +72,8 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Label", "Label of the training data"); AddInput("Label", "Label of the training data");
// TODO(typhoonzero): AddInput("Weight", ... // TODO(typhoonzero): AddInput("Weight", ...
AddOutput("Accuracy", "The accuracy of current batch"); AddOutput("Accuracy", "The accuracy of current batch");
AddOutput("Correct", "The correct samples count of current batch");
AddOutput("Total", "The samples count of current batch");
AddComment(R"DOC( AddComment(R"DOC(
Accuracy Operator. Accuracy Operator.
......
...@@ -24,7 +24,8 @@ using platform::PADDLE_CUDA_NUM_THREADS; ...@@ -24,7 +24,8 @@ using platform::PADDLE_CUDA_NUM_THREADS;
template <int BlockSize> template <int BlockSize>
__global__ void AccuracyCudaKernel(const int N, const int D, __global__ void AccuracyCudaKernel(const int N, const int D,
const int64_t* Xdata, const int64_t* Xdata,
const int64_t* labeldata, float* accuracy) { const int64_t* labeldata, int* correct_data,
float* accuracy) {
int count = 0; int count = 0;
__shared__ int total[BlockSize]; __shared__ int total[BlockSize];
...@@ -43,6 +44,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D, ...@@ -43,6 +44,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D,
// reduce the count with init value 0, and output accuracy. // reduce the count with init value 0, and output accuracy.
int result = thrust::reduce(thrust::device, total, total + BlockSize, 0); int result = thrust::reduce(thrust::device, total, total + BlockSize, 0);
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
*correct_data = result;
*accuracy = static_cast<float>(result) / static_cast<float>(N); *accuracy = static_cast<float>(result) / static_cast<float>(N);
} }
} }
...@@ -56,31 +58,48 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> { ...@@ -56,31 +58,48 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
auto* inference = ctx.Input<Tensor>("Out"); auto* inference = ctx.Input<Tensor>("Out");
auto* indices = ctx.Input<Tensor>("Indices"); auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label"); auto* label = ctx.Input<Tensor>("Label");
auto* accuracy = ctx.Output<Tensor>("Accuracy"); auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
// FIXME(typhoonzero): only support indices currently // FIXME(typhoonzero): only support indices currently
// if add support for output values, how to detect the data type? // if add support for output values, how to detect the data type?
const int64_t* indices_data = indices->data<int64_t>(); const int64_t* indices_data = indices->data<int64_t>();
const int64_t* label_data = label->data<int64_t>(); const int64_t* label_data = label->data<int64_t>();
int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace()); float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());
size_t num_samples = inference->dims()[0]; int num_samples = static_cast<int>(inference->dims()[0]);
size_t infer_width = inference->dims()[1]; size_t infer_width = inference->dims()[1];
PADDLE_ENFORCE(cudaMemset(accuracy_data, 0, sizeof(float))); PADDLE_ENFORCE(cudaMemset(accuracy_data, 0, sizeof(float)));
// cudaMemset((void**)&correct_data, 0, sizeof(float));
if (num_samples == 0) { if (num_samples == 0) {
return; return;
} }
cudaMemcpy(total_data, &num_samples, sizeof(int), cudaMemcpyHostToDevice);
AccuracyCudaKernel<PADDLE_CUDA_NUM_THREADS><<< AccuracyCudaKernel<PADDLE_CUDA_NUM_THREADS><<<
1, PADDLE_CUDA_NUM_THREADS, 0, ctx.cuda_device_context().stream()>>>( 1, PADDLE_CUDA_NUM_THREADS, 0, ctx.cuda_device_context().stream()>>>(
num_samples, infer_width, indices_data, label_data, accuracy_data); num_samples, infer_width, indices_data, label_data, correct_data,
accuracy_data);
int d_num_samples, d_num_correct;
float d_accuracy;
cudaMemcpy(&d_num_correct, correct_data, sizeof(int),
cudaMemcpyDeviceToHost);
cudaMemcpy(&d_num_samples, total_data, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(&d_accuracy, accuracy_data, sizeof(float),
cudaMemcpyDeviceToHost);
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
// FIXME(typhoonzero): types of T is for infernece data. // FIXME(typhoonzero): types of T is for inference data.
// label data is always int // label data is always int64
REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>, REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>,
paddle::operators::AccuracyOpCUDAKernel<double>); paddle::operators::AccuracyOpCUDAKernel<double>);
...@@ -29,7 +29,11 @@ class AccuracyKernel : public framework::OpKernel<T> { ...@@ -29,7 +29,11 @@ class AccuracyKernel : public framework::OpKernel<T> {
auto* indices = ctx.Input<Tensor>("Indices"); auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label"); auto* label = ctx.Input<Tensor>("Label");
auto* accuracy = ctx.Output<Tensor>("Accuracy"); auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace()); float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());
const int64_t* indices_data = indices->data<int64_t>(); const int64_t* indices_data = indices->data<int64_t>();
...@@ -55,7 +59,8 @@ class AccuracyKernel : public framework::OpKernel<T> { ...@@ -55,7 +59,8 @@ class AccuracyKernel : public framework::OpKernel<T> {
} }
} }
// FIXME(typhoonzero): we don't accumulate the accuracy for now. *correct_data = num_correct;
*total_data = num_samples;
*accuracy_data = *accuracy_data =
static_cast<float>(num_correct) / static_cast<float>(num_samples); static_cast<float>(num_correct) / static_cast<float>(num_samples);
} }
......
...@@ -27,6 +27,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase { ...@@ -27,6 +27,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
void Run(const framework::Scope& scope, void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override { const platform::DeviceContext& dev_ctx) const override {
framework::ExecutionContext ctx(*this, scope, dev_ctx); framework::ExecutionContext ctx(*this, scope, dev_ctx);
const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids"); const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores"); const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
const size_t step_num = ids->size(); const size_t step_num = ids->size();
......
...@@ -94,5 +94,13 @@ class CompareOp : public framework::OperatorWithKernel { ...@@ -94,5 +94,13 @@ class CompareOp : public framework::OperatorWithKernel {
REGISTER_LOGICAL_OP(less_than, "Out = X < Y"); REGISTER_LOGICAL_OP(less_than, "Out = X < Y");
REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor); REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y");
REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_OP(greater_than, "Out = X > Y");
REGISTER_LOGICAL_KERNEL(greater_than, CPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_OP(greater_equal, "Out = X >= Y");
REGISTER_LOGICAL_KERNEL(greater_equal, CPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_OP(equal, "Out = X == Y"); REGISTER_LOGICAL_OP(equal, "Out = X == Y");
REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor); REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor);
...@@ -15,4 +15,9 @@ ...@@ -15,4 +15,9 @@
#include "paddle/operators/compare_op.h" #include "paddle/operators/compare_op.h"
REGISTER_LOGICAL_KERNEL(less_than, GPU, paddle::operators::LessThanFunctor); REGISTER_LOGICAL_KERNEL(less_than, GPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_KERNEL(less_equal, GPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_KERNEL(greater_than, GPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_KERNEL(greater_equal, GPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_KERNEL(equal, GPU, paddle::operators::EqualFunctor); REGISTER_LOGICAL_KERNEL(equal, GPU, paddle::operators::EqualFunctor);
...@@ -27,6 +27,24 @@ struct LessThanFunctor { ...@@ -27,6 +27,24 @@ struct LessThanFunctor {
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a < b; } HOSTDEVICE bool operator()(const T& a, const T& b) const { return a < b; }
}; };
template <typename T>
struct LessEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; }
};
template <typename T>
struct GreaterThanFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; }
};
template <typename T>
struct GreaterEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; }
};
template <typename T> template <typename T>
struct EqualFunctor { struct EqualFunctor {
using ELEM_TYPE = T; using ELEM_TYPE = T;
......
...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_add, ops::ElementwiseOp, ops::ElementwiseAddOpMaker, ...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_add, ops::ElementwiseOp, ops::ElementwiseAddOpMaker,
elementwise_add_grad, ops::ElementwiseOpGrad); elementwise_add_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_add, elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseAddKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_add_grad, elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -35,7 +35,13 @@ REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker, ...@@ -35,7 +35,13 @@ REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker,
elementwise_div_grad, ops::ElementwiseOpGrad); elementwise_div_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_div, elementwise_div,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseDivKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_div_grad, elementwise_div_grad,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -37,8 +37,12 @@ REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker, ...@@ -37,8 +37,12 @@ REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_mul, elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, float>, ops::ElementwiseMulKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, double>); ops::ElementwiseMulKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_mul_grad, elementwise_mul_grad,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, float>, ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, double>); ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker, ...@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker,
elementwise_sub_grad, ops::ElementwiseOpGrad); elementwise_sub_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_sub, elementwise_sub,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseSubKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
elementwise_sub_grad, elementwise_sub_grad,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, float>); ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T> ...@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> { class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -87,11 +87,12 @@ template <typename PoolProcess, class T> ...@@ -87,11 +87,12 @@ template <typename PoolProcess, class T>
class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> { class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) { PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -154,10 +155,11 @@ template <class T> ...@@ -154,10 +155,11 @@ template <class T>
class MaxPool2dGradFunctor<platform::CPUPlace, T> { class MaxPool2dGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> { ...@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -240,17 +242,17 @@ template <typename PoolProcess, class T> ...@@ -240,17 +242,17 @@ template <typename PoolProcess, class T>
class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> { class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_depth * output_height * output_width; const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -315,11 +317,12 @@ template <typename PoolProcess, class T> ...@@ -315,11 +317,12 @@ template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> { class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) { PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> { ...@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -398,10 +401,11 @@ template <class T> ...@@ -398,10 +401,11 @@ template <class T>
class MaxPool3dGradFunctor<platform::CPUPlace, T> { class MaxPool3dGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> { ...@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -498,15 +502,15 @@ template <typename T> ...@@ -498,15 +502,15 @@ template <typename T>
class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -563,13 +567,13 @@ template <typename T> ...@@ -563,13 +567,13 @@ template <typename T>
class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> { class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_height = input_grad.dims()[2]; const int batch_size = input_grad->dims()[0];
const int input_width = input_grad.dims()[3]; const int input_height = input_grad->dims()[2];
const int input_width = input_grad->dims()[3];
const int output_channels = output_grad.dims()[1]; const int output_channels = output_grad.dims()[1];
const int output_height = output_grad.dims()[2]; const int output_height = output_grad.dims()[2];
const int output_width = output_grad.dims()[3]; const int output_width = output_grad.dims()[3];
...@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) { for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -612,17 +616,17 @@ template <typename T> ...@@ -612,17 +616,17 @@ template <typename T>
class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> { ...@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_depth * output_height * output_width; const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -691,14 +695,14 @@ template <typename T> ...@@ -691,14 +695,14 @@ template <typename T>
class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> { class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_depth = input_grad.dims()[2]; const int batch_size = input_grad->dims()[0];
const int input_height = input_grad.dims()[3]; const int input_depth = input_grad->dims()[2];
const int input_width = input_grad.dims()[4]; const int input_height = input_grad->dims()[3];
const int input_width = input_grad->dims()[4];
const int output_channels = output_grad.dims()[1]; const int output_channels = output_grad.dims()[1];
const int output_depth = output_grad.dims()[2]; const int output_depth = output_grad.dims()[2];
const int output_height = output_grad.dims()[3]; const int output_height = output_grad.dims()[3];
...@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> { ...@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) { for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
......
...@@ -21,13 +21,13 @@ namespace math { ...@@ -21,13 +21,13 @@ namespace math {
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool2D(const int nthreads, const T* input_data, __global__ void KernelPool2D(const int nthreads, const T* input_data,
T* output_data, const int channels, const int channels, const int input_height,
const int input_height, const int input_width, const int input_width, const int output_height,
const int output_height, const int output_width, const int output_width, const int ksize_height,
const int ksize_height, const int ksize_width, const int ksize_width, const int stride_height,
const int stride_height, const int stride_width, const int stride_width, const int padding_height,
const int padding_height, const int padding_width, const int padding_width, PoolProcess pool_process,
PoolProcess pool_process) { T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool2DGrad( __global__ void KernelPool2DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_height,
const int input_height, const int input_width, const int output_height, const int input_width, const int output_height, const int output_width,
const int output_width, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_height,
const int stride_height, const int stride_width, const int padding_height, const int stride_width, const int padding_height, const int padding_width,
const int padding_width, PoolProcess pool_process) { PoolProcess pool_process, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width; int offsetW = index % input_width + padding_width;
...@@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad( ...@@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad(
template <typename T> template <typename T>
__global__ void KernelMaxPool2DGrad( __global__ void KernelMaxPool2DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_height,
const int input_height, const int input_width, const int output_height, const int input_width, const int output_height, const int output_width,
const int output_width, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_height,
const int stride_height, const int stride_width, const int padding_height, const int stride_width, const int padding_height, const int padding_width,
const int padding_width) { T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -158,16 +158,16 @@ template <typename PoolProcess, typename T> ...@@ -158,16 +158,16 @@ template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> { class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -176,7 +176,7 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -176,7 +176,7 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
const int padding_width = paddings[1]; const int padding_width = paddings[1];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -187,11 +187,10 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -187,11 +187,10 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
PoolProcess, PoolProcess,
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, output_data, input_channels, .stream()>>>(
input_height, input_width, output_height, nthreads, input_data, input_channels, input_height, input_width,
output_width, ksize_height, ksize_width, output_height, output_width, ksize_height, ksize_width, stride_height,
stride_height, stride_width, padding_height, stride_width, padding_height, padding_width, pool_process, output_data);
padding_width, pool_process);
} }
}; };
...@@ -204,11 +203,11 @@ template <typename PoolProcess, typename T> ...@@ -204,11 +203,11 @@ template <typename PoolProcess, typename T>
class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> { class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
...@@ -225,7 +224,7 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -225,7 +224,7 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width; int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -237,10 +236,10 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -237,10 +236,10 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_height, input_width, output_height, output_width, input_height, input_width, output_height, output_width, ksize_height,
ksize_height, ksize_width, stride_height, stride_width, padding_height, ksize_width, stride_height, stride_width, padding_height, padding_width,
padding_width, pool_process); pool_process, input_grad_data);
} }
}; };
...@@ -253,10 +252,11 @@ template <typename T> ...@@ -253,10 +252,11 @@ template <typename T>
class MaxPool2dGradFunctor<platform::GPUPlace, T> { class MaxPool2dGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
...@@ -274,7 +274,7 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> { ...@@ -274,7 +274,7 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -285,10 +285,10 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> { ...@@ -285,10 +285,10 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_height, input_width, output_height, output_width, input_height, input_width, output_height, output_width, ksize_height,
ksize_height, ksize_width, stride_height, stride_width, padding_height, ksize_width, stride_height, stride_width, padding_height, padding_width,
padding_width); input_grad_data);
} }
}; };
...@@ -313,14 +313,16 @@ template class Pool2dGradFunctor< ...@@ -313,14 +313,16 @@ template class Pool2dGradFunctor<
platform::GPUPlace, paddle::operators::math::AvgPoolGrad<double>, double>; platform::GPUPlace, paddle::operators::math::AvgPoolGrad<double>, double>;
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool3D( __global__ void KernelPool3D(const int nthreads, const T* input_data,
const int nthreads, const T* input_data, T* output_data, const int channels, const int channels, const int input_depth,
const int input_depth, const int input_height, const int input_width, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width, const int output_depth, const int output_height,
const int ksize_depth, const int ksize_height, const int ksize_width, const int output_width, const int ksize_depth,
const int stride_depth, const int stride_height, const int stride_width, const int ksize_height, const int ksize_width,
const int padding_depth, const int padding_height, const int padding_width, const int stride_depth, const int stride_height,
PoolProcess pool_process) { const int stride_width, const int padding_depth,
const int padding_height, const int padding_width,
PoolProcess pool_process, T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -358,13 +360,13 @@ __global__ void KernelPool3D( ...@@ -358,13 +360,13 @@ __global__ void KernelPool3D(
template <typename PoolProcess, typename T> template <typename PoolProcess, typename T>
__global__ void KernelPool3DGrad( __global__ void KernelPool3DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_depth,
const int input_depth, const int input_height, const int input_width, const int input_height, const int input_width, const int output_depth,
const int output_depth, const int output_height, const int output_width, const int output_height, const int output_width, const int ksize_depth,
const int ksize_depth, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_depth, const int stride_height, const int stride_width, const int stride_height, const int stride_width, const int padding_depth,
const int padding_depth, const int padding_height, const int padding_width, const int padding_height, const int padding_width, PoolProcess pool_process,
PoolProcess pool_process) { T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width; int offsetW = index % input_width + padding_width;
...@@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad( ...@@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad(
template <typename T> template <typename T>
__global__ void KernelMaxPool3DGrad( __global__ void KernelMaxPool3DGrad(
const int nthreads, const T* input_data, const T* output_data, const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels, const T* output_grad, const int channels, const int input_depth,
const int input_depth, const int input_height, const int input_width, const int input_height, const int input_width, const int output_depth,
const int output_depth, const int output_height, const int output_width, const int output_height, const int output_width, const int ksize_depth,
const int ksize_depth, const int ksize_height, const int ksize_width, const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_depth, const int stride_height, const int stride_width, const int stride_height, const int stride_width, const int padding_depth,
const int padding_depth, const int padding_height, const int padding_height, const int padding_width, T* input_grad) {
const int padding_width) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -480,18 +481,18 @@ template <typename PoolProcess, class T> ...@@ -480,18 +481,18 @@ template <typename PoolProcess, class T>
class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> { class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -503,7 +504,7 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -503,7 +504,7 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
const int padding_width = paddings[2]; const int padding_width = paddings[2];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height * int nthreads = batch_size * output_channels * output_depth * output_height *
output_width; output_width;
...@@ -516,11 +517,11 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -516,11 +517,11 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, input_channels, input_depth, nthreads, input_data, input_channels, input_depth, input_height,
input_height, input_width, output_depth, output_height, output_width, input_width, output_depth, output_height, output_width, ksize_depth,
ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, ksize_height, ksize_width, stride_depth, stride_height, stride_width,
stride_width, padding_depth, padding_height, padding_width, padding_depth, padding_height, padding_width, pool_process,
pool_process); output_data);
} }
}; };
...@@ -533,11 +534,11 @@ template <typename PoolProcess, class T> ...@@ -533,11 +534,11 @@ template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> { class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process) { PoolProcess pool_process, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
...@@ -560,7 +561,7 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -560,7 +561,7 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = int nthreads =
batch_size * input_channels * input_depth * input_height * input_width; batch_size * input_channels * input_depth * input_height * input_width;
...@@ -573,11 +574,11 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> { ...@@ -573,11 +574,11 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_depth, input_height, input_width, output_depth, input_depth, input_height, input_width, output_depth, output_height,
output_height, output_width, ksize_depth, ksize_height, ksize_width, output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_depth, stride_height, stride_width, padding_depth, stride_height, stride_width, padding_depth, padding_height,
padding_height, padding_width, pool_process); padding_width, pool_process, input_grad_data);
} }
}; };
...@@ -590,10 +591,11 @@ template <class T> ...@@ -590,10 +591,11 @@ template <class T>
class MaxPool3dGradFunctor<platform::GPUPlace, T> { class MaxPool3dGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
...@@ -616,7 +618,7 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> { ...@@ -616,7 +618,7 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height * int nthreads = batch_size * output_channels * output_depth * output_height *
output_width; output_width;
...@@ -628,11 +630,11 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> { ...@@ -628,11 +630,11 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data, nthreads, input_data, output_data, output_grad_data, input_channels,
input_channels, input_depth, input_height, input_width, output_depth, input_depth, input_height, input_width, output_depth, output_height,
output_height, output_width, ksize_depth, ksize_height, ksize_width, output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_depth, stride_height, stride_width, padding_depth, stride_height, stride_width, padding_depth, padding_height,
padding_height, padding_width); padding_width, input_grad_data);
} }
}; };
...@@ -658,11 +660,11 @@ template class Pool3dGradFunctor< ...@@ -658,11 +660,11 @@ template class Pool3dGradFunctor<
template <typename T> template <typename T>
__global__ void KernelMaxPool2dWithIdx( __global__ void KernelMaxPool2dWithIdx(
const int nthreads, const T* input_data, T* output_data, T* mask_data, const int nthreads, const T* input_data, const int channels,
const int channels, const int input_height, const int input_width, const int input_height, const int input_width, const int output_height,
const int output_height, const int output_width, const int ksize_height, const int output_width, const int ksize_height, const int ksize_width,
const int ksize_width, const int stride_height, const int stride_width, const int stride_height, const int stride_width, const int padding_height,
const int padding_height, const int padding_width) { const int padding_width, T* output_data, T* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx( ...@@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx(
template <typename T> template <typename T>
__global__ void KernelMaxPool2DWithIdxGrad( __global__ void KernelMaxPool2DWithIdxGrad(
const int nthreads, T* input_grad, const T* output_grad, const T* mask_data, const int nthreads, const T* output_grad, const T* mask_data,
const int channels, const int input_height, const int input_width, const int channels, const int input_height, const int input_width,
const int output_height, const int output_width, const int ksize_height, const int output_height, const int output_width, const int ksize_height,
const int ksize_width, const int stride_height, const int stride_width, const int ksize_width, const int stride_height, const int stride_width,
const int padding_height, const int padding_width) { const int padding_height, const int padding_width, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
...@@ -748,16 +750,16 @@ template <typename T> ...@@ -748,16 +750,16 @@ template <typename T>
class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_height = output.dims()[2]; const int output_height = output->dims()[2];
const int output_width = output.dims()[3]; const int output_width = output->dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
const int ksize_width = ksize[1]; const int ksize_width = ksize[1];
const int stride_height = strides[0]; const int stride_height = strides[0];
...@@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_width = paddings[1]; const int padding_width = paddings[1];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
KernelMaxPool2dWithIdx< KernelMaxPool2dWithIdx<
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, output_data, mask_data, .stream()>>>(
input_channels, input_height, input_width, nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, output_height, output_width, ksize_height, ksize_width, stride_height,
ksize_width, stride_height, stride_width, stride_width, padding_height, padding_width, output_data, mask_data);
padding_height, padding_width);
} }
}; };
...@@ -794,14 +795,14 @@ template <typename T> ...@@ -794,14 +795,14 @@ template <typename T>
class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_channels = input_grad.dims()[1]; const int batch_size = input_grad->dims()[0];
const int input_height = input_grad.dims()[2]; const int input_channels = input_grad->dims()[1];
const int input_width = input_grad.dims()[3]; const int input_height = input_grad->dims()[2];
const int input_width = input_grad->dims()[3];
const int output_height = output_grad.dims()[2]; const int output_height = output_grad.dims()[2];
const int output_width = output_grad.dims()[3]; const int output_width = output_grad.dims()[3];
const int ksize_height = ksize[0]; const int ksize_height = ksize[0];
...@@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width; int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024; int blocks = (nthreads + 1024 - 1) / 1024;
...@@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
KernelMaxPool2DWithIdxGrad< KernelMaxPool2DWithIdxGrad<
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_grad_data, output_grad_data, .stream()>>>(nthreads, output_grad_data, mask_data,
mask_data, input_channels, input_height, input_channels, input_height, input_width,
input_width, output_height, output_width, output_height, output_width, ksize_height,
ksize_height, ksize_width, stride_height, ksize_width, stride_height, stride_width,
stride_width, padding_height, padding_width); padding_height, padding_width, input_grad_data);
} }
}; };
...@@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>; ...@@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>;
template <typename T> template <typename T>
__global__ void KernelMaxPool3DWithIdx( __global__ void KernelMaxPool3DWithIdx(
const int nthreads, const T* input_data, T* output_data, T* mask_data, const int nthreads, const T* input_data, const int channels,
const int channels, const int input_depth, const int input_height, const int input_depth, const int input_height, const int input_width,
const int input_width, const int output_depth, const int output_height, const int output_depth, const int output_height, const int output_width,
const int output_width, const int ksize_depth, const int ksize_height, const int ksize_depth, const int ksize_height, const int ksize_width,
const int ksize_width, const int stride_depth, const int stride_height, const int stride_depth, const int stride_height, const int stride_width,
const int stride_width, const int padding_depth, const int padding_height, const int padding_depth, const int padding_height, const int padding_width,
const int padding_width) { T* output_data, T* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx( ...@@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx(
template <typename T> template <typename T>
__global__ void KernelMaxPool3DWithIdxGrad( __global__ void KernelMaxPool3DWithIdxGrad(
const int nthreads, T* input_grad, const T* output_grad, const T* mask, const int nthreads, const T* output_grad, const T* mask, const int channels,
const int channels, const int input_depth, const int input_height, const int input_depth, const int input_height, const int input_width,
const int input_width, const int output_depth, const int output_height, const int output_depth, const int output_height, const int output_width,
const int output_width, const int ksize_depth, const int ksize_height, const int ksize_depth, const int ksize_height, const int ksize_width,
const int ksize_width, const int stride_depth, const int stride_height, const int stride_depth, const int stride_height, const int stride_width,
const int stride_width, const int padding_depth, const int padding_height, const int padding_depth, const int padding_height, const int padding_width,
const int padding_width) { T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
...@@ -952,18 +953,18 @@ template <typename T> ...@@ -952,18 +953,18 @@ template <typename T>
class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
const int input_width = input.dims()[4]; const int input_width = input.dims()[4];
const int output_channels = output.dims()[1]; const int output_channels = output->dims()[1];
const int output_depth = output.dims()[2]; const int output_depth = output->dims()[2];
const int output_height = output.dims()[3]; const int output_height = output->dims()[3];
const int output_width = output.dims()[4]; const int output_width = output->dims()[4];
const int ksize_depth = ksize[0]; const int ksize_depth = ksize[0];
const int ksize_height = ksize[1]; const int ksize_height = ksize[1];
const int ksize_width = ksize[2]; const int ksize_width = ksize[2];
...@@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_width = paddings[2]; const int padding_width = paddings[2];
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace()); T* mask_data = mask->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height * int nthreads = batch_size * output_channels * output_depth * output_height *
output_width; output_width;
...@@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> { ...@@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_data, output_data, mask_data, input_channels, nthreads, input_data, input_channels, input_depth, input_height,
input_depth, input_height, input_width, output_depth, output_height, input_width, output_depth, output_height, output_width, ksize_depth,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth, ksize_height, ksize_width, stride_depth, stride_height, stride_width,
stride_height, stride_width, padding_depth, padding_height, padding_depth, padding_height, padding_width, output_data, mask_data);
padding_width);
} }
}; };
...@@ -1005,15 +1005,15 @@ template <typename T> ...@@ -1005,15 +1005,15 @@ template <typename T>
class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) { std::vector<int>& strides, std::vector<int>& paddings,
const int batch_size = input_grad.dims()[0]; framework::Tensor* input_grad) {
const int input_channels = input_grad.dims()[1]; const int batch_size = input_grad->dims()[0];
const int input_depth = input_grad.dims()[2]; const int input_channels = input_grad->dims()[1];
const int input_height = input_grad.dims()[3]; const int input_depth = input_grad->dims()[2];
const int input_width = input_grad.dims()[4]; const int input_height = input_grad->dims()[3];
const int input_width = input_grad->dims()[4];
const int output_depth = output_grad.dims()[2]; const int output_depth = output_grad.dims()[2];
const int output_height = output_grad.dims()[3]; const int output_height = output_grad.dims()[3];
const int output_width = output_grad.dims()[4]; const int output_width = output_grad.dims()[4];
...@@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
const T* mask_data = mask.data<T>(); const T* mask_data = mask.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = int nthreads =
batch_size * input_channels * input_depth * input_height * input_width; batch_size * input_channels * input_depth * input_height * input_width;
...@@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> { ...@@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0, T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context) reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>( .stream()>>>(
nthreads, input_grad_data, output_grad_data, mask_data, input_channels, nthreads, output_grad_data, mask_data, input_channels, input_depth,
input_depth, input_height, input_width, output_depth, output_height, input_height, input_width, output_depth, output_height, output_width,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height,
stride_height, stride_width, padding_depth, padding_height, stride_width, padding_depth, padding_height, padding_width,
padding_width); input_grad_data);
} }
}; };
......
...@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T> ...@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T>
class Pool2dFunctor { class Pool2dFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* output);
}; };
template <typename Place, typename PoolProcess, typename T> template <typename Place, typename PoolProcess, typename T>
class Pool2dGradFunctor { class Pool2dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* input_grad);
}; };
template <typename Place, class T> template <typename Place, class T>
class MaxPool2dGradFunctor { class MaxPool2dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
template <typename Place, typename PoolProcess, typename T> template <typename Place, typename PoolProcess, typename T>
class Pool3dFunctor { class Pool3dFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& paddings, PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* output);
}; };
template <typename Place, typename PoolProcess, typename T> template <typename Place, typename PoolProcess, typename T>
class Pool3dGradFunctor { class Pool3dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings, std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute); PoolProcess pool_compute, framework::Tensor* input_grad);
}; };
template <typename Place, class T> template <typename Place, class T>
class MaxPool3dGradFunctor { class MaxPool3dGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize, const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
/* /*
...@@ -155,38 +157,38 @@ template <typename Place, typename T> ...@@ -155,38 +157,38 @@ template <typename Place, typename T>
class MaxPool2dWithIndexFunctor { class MaxPool2dWithIndexFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename Place, typename T> template <typename Place, typename T>
class MaxPool2dWithIndexGradFunctor { class MaxPool2dWithIndexGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
template <typename Place, typename T> template <typename Place, typename T>
class MaxPool3dWithIndexFunctor { class MaxPool3dWithIndexFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output, const framework::Tensor& input, std::vector<int>& ksize,
framework::Tensor& mask, std::vector<int>& ksize, std::vector<int>& strides, std::vector<int>& paddings,
std::vector<int>& strides, std::vector<int>& paddings); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename Place, typename T> template <typename Place, typename T>
class MaxPool3dWithIndexGradFunctor { class MaxPool3dWithIndexGradFunctor {
public: public:
void operator()(const platform::DeviceContext& context, void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize, const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings); std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
}; };
} // namespace math } // namespace math
......
...@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T> Place, paddle::operators::math::MaxPool<T>, T>
pool2d_forward; pool2d_forward;
paddle::operators::math::MaxPool<T> pool_process; paddle::operators::math::MaxPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides, pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool2dFunctor< paddle::operators::math::Pool2dFunctor<
Place, paddle::operators::math::AvgPool<T>, T> Place, paddle::operators::math::AvgPool<T>, T>
pool2d_forward; pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process; paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides, pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} }
} break; } break;
case 3: { case 3: {
...@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T> Place, paddle::operators::math::MaxPool<T>, T>
pool3d_forward; pool3d_forward;
paddle::operators::math::MaxPool<T> pool_process; paddle::operators::math::MaxPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides, pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool3dFunctor< paddle::operators::math::Pool3dFunctor<
Place, paddle::operators::math::AvgPool<T>, T> Place, paddle::operators::math::AvgPool<T>, T>
pool3d_forward; pool3d_forward;
paddle::operators::math::AvgPool<T> pool_process; paddle::operators::math::AvgPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides, pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process); paddings, pool_process, out);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
...@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> { ...@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> {
if (pooling_type == "max") { if (pooling_type == "max") {
paddle::operators::math::MaxPool2dGradFunctor<Place, T> paddle::operators::math::MaxPool2dGradFunctor<Place, T>
pool2d_backward; pool2d_backward;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings); ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool2dGradFunctor< paddle::operators::math::Pool2dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T> Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool2d_backward; pool2d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process; paddle::operators::math::AvgPoolGrad<T> pool_process;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings, pool_process); ksize, strides, paddings, pool_process, in_x_grad);
} }
} break; } break;
case 3: { case 3: {
if (pooling_type == "max") { if (pooling_type == "max") {
paddle::operators::math::MaxPool3dGradFunctor<Place, T> paddle::operators::math::MaxPool3dGradFunctor<Place, T>
pool3d_backward; pool3d_backward;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings); ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool3dGradFunctor< paddle::operators::math::Pool3dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T> Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool3d_backward; pool3d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process; paddle::operators::math::AvgPoolGrad<T> pool_process;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out, pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
*out_grad, ksize, strides, paddings, pool_process); ksize, strides, paddings, pool_process, in_x_grad);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......
...@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> { ...@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
case 2: { case 2: {
paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T> paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T>
pool2d_forward; pool2d_forward;
pool2d_forward(context.device_context(), *in_x, *out, *mask, ksize, pool2d_forward(context.device_context(), *in_x, ksize, strides,
strides, paddings); paddings, out, mask);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T> paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T>
pool3d_forward; pool3d_forward;
pool3d_forward(context.device_context(), *in_x, *out, *mask, ksize, pool3d_forward(context.device_context(), *in_x, ksize, strides,
strides, paddings); paddings, out, mask);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
} }
...@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> { ...@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
case 2: { case 2: {
paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T> paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T>
pool2d_backward; pool2d_backward;
pool2d_backward(context.device_context(), *in_x_grad, *out_grad, pool2d_backward(context.device_context(), *out_grad, *mask, ksize,
*mask, ksize, strides, paddings); strides, paddings, in_x_grad);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T> paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T>
pool3d_backward; pool3d_backward;
pool3d_backward(context.device_context(), *in_x_grad, *out_grad, pool3d_backward(context.device_context(), *out_grad, *mask, ksize,
*mask, ksize, strides, paddings); strides, paddings, in_x_grad);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include "glog/logging.h"
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
...@@ -26,6 +27,10 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor, ...@@ -26,6 +27,10 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>; using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
struct SumFunctor { struct SumFunctor {
template <typename Place, typename X, typename Y, typename Dim> template <typename Place, typename X, typename Y, typename Dim>
void operator()(const Place& place, X& x, Y& y, const Dim& dim) { void operator()(const Place& place, X& x, Y& y, const Dim& dim) {
...@@ -133,10 +138,17 @@ class ReduceKernel : public framework::OpKernel<T> { ...@@ -133,10 +138,17 @@ class ReduceKernel : public framework::OpKernel<T> {
dims_vector.erase(dims_vector.begin() + dim); dims_vector.erase(dims_vector.begin() + dim);
dims = framework::make_ddim(dims_vector); dims = framework::make_ddim(dims_vector);
} }
auto out = EigenTensor < T, D == 1 ? 1 : (D - 1) > ::From(*output, dims);
auto& place = context.GetEigenDevice<Place>(); auto& place = context.GetEigenDevice<Place>();
Functor functor; Functor functor;
functor(place, x, out, reduce_dim);
if (D == 1) {
auto out = EigenScalar<T>::From(*output);
functor(place, x, out, reduce_dim);
} else {
auto out = EigenTensor<T, (D - 1)>::From(*output, dims);
functor(place, x, out, reduce_dim);
}
} }
}; };
...@@ -186,13 +198,13 @@ class ReduceGradKernel : public framework::OpKernel<T> { ...@@ -186,13 +198,13 @@ class ReduceGradKernel : public framework::OpKernel<T> {
auto x_reduce = EigenTensor<T, D>::From(*input1, dims); auto x_reduce = EigenTensor<T, D>::From(*input1, dims);
auto x_reduce_grad = EigenTensor<T, D>::From(*input2, dims); auto x_reduce_grad = EigenTensor<T, D>::From(*input2, dims);
Eigen::array<int, D> braodcast_dim; Eigen::array<int, D> broadcast_dim;
for (size_t i = 0; i < D; ++i) braodcast_dim[i] = 1; for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1;
braodcast_dim[dim] = input0->dims()[dim]; broadcast_dim[dim] = input0->dims()[dim];
auto& place = context.GetEigenDevice<Place>(); auto& place = context.GetEigenDevice<Place>();
Functor functor; Functor functor;
functor(place, x, x_reduce, x_grad, x_reduce_grad, braodcast_dim, functor(place, x, x_reduce, x_grad, x_reduce_grad, broadcast_dim,
braodcast_dim[dim]); broadcast_dim[dim]);
} }
}; };
......
...@@ -200,7 +200,10 @@ void Parameter::setMat(ParameterType pType, int matType) { ...@@ -200,7 +200,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
false, false,
useGpu_); useGpu_);
} }
} else if (matType == MAT_NORMAL_SHARED) { }
#ifndef PADDLE_MOBILE_INFERENCE
// NOLINTNEXTLINE
else if (matType == MAT_NORMAL_SHARED) {
CHECK_EQ(height * width, bufs_[pType]->getSize()); CHECK_EQ(height * width, bufs_[pType]->getSize());
size_t blockNum = 0; size_t blockNum = 0;
CHECK(isGradShared(&blockNum)); CHECK(isGradShared(&blockNum));
...@@ -259,7 +262,10 @@ void Parameter::setMat(ParameterType pType, int matType) { ...@@ -259,7 +262,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
} else if (matType == MAT_SPARSE_ROW_AUTO_GROW) { } else if (matType == MAT_SPARSE_ROW_AUTO_GROW) {
CHECK(isGradSparseUpdate()); CHECK(isGradSparseUpdate());
mats_[pType] = std::make_shared<SparseAutoGrowRowCpuMatrix>(height, width); mats_[pType] = std::make_shared<SparseAutoGrowRowCpuMatrix>(height, width);
} else { }
#endif
// NOLINTNEXTLINE
else {
LOG(FATAL) << "Unsupported mat type" << matType; LOG(FATAL) << "Unsupported mat type" << matType;
} }
} }
......
...@@ -33,6 +33,7 @@ MatrixPtr makeRandomSparseMatrix(size_t height, ...@@ -33,6 +33,7 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
bool withValue, bool withValue,
bool useGpu, bool useGpu,
bool equalNnzPerSample) { bool equalNnzPerSample) {
#ifndef PADDLE_MOBILE_INFERENCE
std::vector<int64_t> ids(height); std::vector<int64_t> ids(height);
std::vector<int64_t> indices(height + 1); std::vector<int64_t> indices(height + 1);
indices[0] = 0; indices[0] = 0;
...@@ -84,6 +85,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height, ...@@ -84,6 +85,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
} }
return mat; return mat;
} }
#endif
return nullptr;
} }
void generateSequenceStartPositions(size_t batchSize, void generateSequenceStartPositions(size_t batchSize,
......
...@@ -37,10 +37,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -37,10 +37,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so
DEPENDS paddle_pybind) DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so) add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
...@@ -66,7 +66,7 @@ if (WITH_TESTING) ...@@ -66,7 +66,7 @@ if (WITH_TESTING)
add_subdirectory(paddle/v2/tests) add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests) add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests) add_subdirectory(paddle/v2/plot/tests)
add_subdirectory(paddle/v2/framework/tests) add_subdirectory(paddle/v2/fluid/tests)
endif() endif()
endif() endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
......
...@@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None): ...@@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None):
#caffe_mode: compute the output size using floor instead of ceil, #caffe_mode: compute the output size using floor instead of ceil,
# which is consistent of caffe and CuDNN's convention. # which is consistent of caffe and CuDNN's convention.
def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): def cnn_output_size(img_size,
output = (2 * padding + img_size - filter_size) / float(stride) filter_size,
padding,
stride,
caffe_mode,
dilation=1):
filter_s = (filter_size - 1) * dilation + 1
output = (2 * padding + img_size - filter_s) / float(stride)
if caffe_mode: if caffe_mode:
return 1 + int(math.floor(output)) return 1 + int(math.floor(output))
else: else:
...@@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): ...@@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
#calcualte image_size based on output_size for de-convolution (ConvTransLayer). #calcualte image_size based on output_size for de-convolution (ConvTransLayer).
#It is the reverse function of cnn_output_size #It is the reverse function of cnn_output_size
def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): def cnn_image_size(output_size,
img_size = (output_size - 1) * stride + filter_size - 2 * padding filter_size,
padding,
stride,
caffe_mode,
dilation=1):
filter_s = (filter_size - 1) * dilation + 1
img_size = (output_size - 1) * stride + filter_s - 2 * padding
if not caffe_mode: if not caffe_mode:
img_size = img_size + 1 img_size = img_size + 1
return img_size return img_size
...@@ -1253,9 +1265,9 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf): ...@@ -1253,9 +1265,9 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): def parse_pool(pool, input_layer_name, pool_conf, ceil_mode):
pool_conf.pool_type = pool.pool_type pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in [ config_assert(pool.pool_type in [
'max-projection', 'avg-projection', 'cudnn-max-pool', 'cudnn-avg-pool' 'max-projection', 'avg-projection', 'max-pool-with-mask', 'cudnn-max-pool', 'cudnn-avg-pool'
], "pool-type %s is not in " ], "pool-type %s is not in " \
"['max-projection', 'avg-projection', " "['max-projection', 'avg-projection', 'max-pool-with-mask'," \
"'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type) "'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type)
pool_conf.channels = pool.channels pool_conf.channels = pool.channels
...@@ -1376,6 +1388,12 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): ...@@ -1376,6 +1388,12 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.stride_y = conv.stride_y conv_conf.stride_y = conv.stride_y
conv_conf.groups = conv.groups conv_conf.groups = conv.groups
conv_conf.caffe_mode = conv.caffe_mode conv_conf.caffe_mode = conv.caffe_mode
if not conv.dilation:
conv.dilation = 1
conv.dilation_y = 1
else:
conv_conf.dilation = conv.dilation
conv_conf.dilation_y = conv.dilation_y
if not trans: if not trans:
conv_conf.filter_channels = conv.channels / conv.groups conv_conf.filter_channels = conv.channels / conv.groups
...@@ -1383,20 +1401,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False): ...@@ -1383,20 +1401,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
get_img_size(input_layer_name, conv.channels) get_img_size(input_layer_name, conv.channels)
conv_conf.output_x = cnn_output_size( conv_conf.output_x = cnn_output_size(
conv_conf.img_size, conv_conf.filter_size, conv_conf.padding, conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode) conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
conv_conf.output_y = cnn_output_size( conv_conf.output_y = cnn_output_size(
conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y, conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode) conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
else: else:
conv_conf.filter_channels = num_filters / conv.groups conv_conf.filter_channels = num_filters / conv.groups
conv_conf.output_x, conv_conf.output_y = \ conv_conf.output_x, conv_conf.output_y = \
get_img_size(input_layer_name, conv.channels) get_img_size(input_layer_name, conv.channels)
conv_conf.img_size = cnn_image_size( conv_conf.img_size = cnn_image_size(
conv_conf.output_x, conv_conf.filter_size, conv_conf.padding, conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode) conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
conv_conf.img_size_y = cnn_image_size( conv_conf.img_size_y = cnn_image_size(
conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y, conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode) conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
#caffe_mode: compute the output size using floor instead of ceil, #caffe_mode: compute the output size using floor instead of ceil,
......
...@@ -20,7 +20,7 @@ from paddle.trainer.config_parser import * ...@@ -20,7 +20,7 @@ from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
from .evaluators import * from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType, \ from .poolings import MaxPooling, AvgPooling, MaxWithMaskPooling, BasePoolingType, \
CudnnAvgPooling, CudnnMaxPooling CudnnAvgPooling, CudnnMaxPooling
from .attrs import * from .attrs import *
from .default_decorators import * from .default_decorators import *
...@@ -888,7 +888,7 @@ def mixed_layer(size=0, ...@@ -888,7 +888,7 @@ def mixed_layer(size=0,
:type size: int :type size: int
:param input: The input of this layer. It is an optional parameter. If set, :param input: The input of this layer. It is an optional parameter. If set,
then this function will just return layer's name. then this function will just return layer's name.
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
...@@ -1030,7 +1030,7 @@ def fc_layer(input, ...@@ -1030,7 +1030,7 @@ def fc_layer(input,
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param size: The layer dimension. :param size: The layer dimension.
:type size: int :type size: int
:param act: Activation Type. TanhActivation is the default. :param act: Activation Type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute|list. :param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -1527,7 +1527,7 @@ def lstmemory(input, ...@@ -1527,7 +1527,7 @@ def lstmemory(input,
:type input: LayerOutput :type input: LayerOutput
:param reverse: is sequence process reversed or not. :param reverse: is sequence process reversed or not.
:type reverse: bool :type reverse: bool
:param act: Activation type. TanhActivation is the default. :math:`h_t` :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activation type, SigmoidActivation by default. :param gate_act: gate activation type, SigmoidActivation by default.
:type gate_act: BaseActivation :type gate_act: BaseActivation
...@@ -1920,7 +1920,7 @@ def repeat_layer(input, ...@@ -1920,7 +1920,7 @@ def repeat_layer(input,
False for treating input as column vector and repeating False for treating input as column vector and repeating
in the row direction. in the row direction.
:type as_row_vector: bool :type as_row_vector: bool
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
...@@ -1974,7 +1974,7 @@ def seq_reshape_layer(input, ...@@ -1974,7 +1974,7 @@ def seq_reshape_layer(input,
:type reshape_size: int :type reshape_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
...@@ -2487,7 +2487,7 @@ def img_conv_layer(input, ...@@ -2487,7 +2487,7 @@ def img_conv_layer(input,
shape will be (filter_size, filter_size_y). shape will be (filter_size, filter_size_y).
:type filter_size_y: int | None :type filter_size_y: int | None
:param num_filters: Each filter group's number of filter :param num_filters: Each filter group's number of filter
:param act: Activation type. ReluActivation is the default. :param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param groups: Group size of filters. :param groups: Group size of filters.
:type groups: int :type groups: int
...@@ -2571,7 +2571,9 @@ def img_conv_layer(input, ...@@ -2571,7 +2571,9 @@ def img_conv_layer(input,
if layer_type: if layer_type:
if dilation > 1 or dilation_y > 1: if dilation > 1 or dilation_y > 1:
assert layer_type in ["cudnn_conv", "cudnn_convt"] assert layer_type in [
"cudnn_conv", "cudnn_convt", "exconv", "exconvt"
]
if trans: if trans:
assert layer_type in ["exconvt", "cudnn_convt"] assert layer_type in ["exconvt", "cudnn_convt"]
else: else:
...@@ -2699,9 +2701,9 @@ def img_pool_layer(input, ...@@ -2699,9 +2701,9 @@ def img_pool_layer(input,
elif isinstance(pool_type, AvgPooling): elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg' pool_type.name = 'avg'
assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, assert type(pool_type) in [AvgPooling, MaxPooling, MaxWithMaskPooling, CudnnAvgPooling,
CudnnMaxPooling], \ CudnnMaxPooling], \
"only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported" "only (Cudnn)AvgPooling, (Cudnn)MaxPooling, MaxWithMaskPooling are supported"
type_name = pool_type.name + '-projection' \ type_name = pool_type.name + '-projection' \
if ( if (
...@@ -3253,7 +3255,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): ...@@ -3253,7 +3255,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
:param input: Input layers. It could be a LayerOutput or list/tuple of :param input: Input layers. It could be a LayerOutput or list/tuple of
LayerOutput. LayerOutput.
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
...@@ -3311,7 +3313,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -3311,7 +3313,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
:type name: basestring :type name: basestring
:param input: input layers or projections :param input: input layers or projections
:type input: list | tuple | collections.Sequence :type input: list | tuple | collections.Sequence
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3406,7 +3408,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -3406,7 +3408,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
:type a: LayerOutput :type a: LayerOutput
:param b: input sequence layer :param b: input sequence layer
:type b: LayerOutput :type b: LayerOutput
:param act: Activation type. IdentityActivation is the default. :param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3572,31 +3574,32 @@ def lstm_step_layer(input, ...@@ -3572,31 +3574,32 @@ def lstm_step_layer(input,
... ...
This layer has two outputs. Default output is :math:`h_t`. The other This layer has two outputs. The default output is :math:`h_t`. The other
output is :math:`o_t`, whose name is 'state' and can use output is :math:`o_t`, whose name is 'state' and users can use
:code:`get_output_layer` to extract this output. :code:`get_output_layer` to extract this output.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param size: Layer's size. NOTE: lstm layer's size, should be equal to :param size: The dimension of this layer's output, which must be
:code:`input.size/4`, and should be equal to equal to the dimension of the state.
:code:`state.size`.
:type size: int :type size: int
:param input: input layer. :math:`Wx_t + Wh_{t-1}` :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param state: State Layer. :math:`c_{t-1}` :param state: The state of the LSTM unit.
:type state: LayerOutput :type state: LayerOutput
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: Gate Activation Type. SigmoidActivation is the default. :param gate_act: Activation type of the gate. SigmoidActivation is the
default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: State Activation Type. TanhActivation is the default. :param state_act: Activation type of the state. TanhActivation is the
default activation.
:type state_act: BaseActivation :type state_act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero. parameter is set to True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -3641,22 +3644,31 @@ def gru_step_layer(input, ...@@ -3641,22 +3644,31 @@ def gru_step_layer(input,
layer_attr=None): layer_attr=None):
""" """
:param input: :param input: The input of this layer, whose dimension can be divided by 3.
:type input: LayerOutput :type input: LayerOutput
:param output_mem: :param output_mem: A memory which memorizes the output of this layer at previous
:param size: time step.
:param act: :type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :type name: basestring
:param gate_act: Activation type of this layer's two gates. SigmoidActivation is
the default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The parameter attribute for bias. If this parameter is set to
whose type is not ParameterAttribute, no bias is defined. If the False or an object whose type is not ParameterAttribute, no bias
parameter is set to True, the bias is initialized to zero. is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: the parameter_attribute for transforming the output_mem :param param_attr: The parameter attribute. See ParameterAttribute for details.
from previous step. :type param_attr: ParameterAttribute
:param layer_attr: :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3701,24 +3713,34 @@ def gru_step_naive_layer(input, ...@@ -3701,24 +3713,34 @@ def gru_step_naive_layer(input,
param_attr=None, param_attr=None,
layer_attr=None): layer_attr=None):
""" """
GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING GRU Step Layer, which is realized using PaddlePaddle API. It supports ERROR_CLIPPING
and DROPOUT. and DROPOUT.
:param input: :param input: The input of this layer, whose dimensionality can be divided by 3.
:param output_mem: :param output_mem: A memory which memorizes the output of this layer at previous
:param size: time step.
:type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param act: :type name: basestring
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid. :param gate_act: Activation type of this layer's two gates. SigmoidActivation
is the default activation.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The parameter attribute for bias. If this parameter is set to
whose type is not ParameterAttribute, no bias is defined. If the False or an object whose type is not ParameterAttribute, no bias
parameter is set to True, the bias is initialized to zero. is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: :param param_attr: The parameter attribute. See ParameterAttribute for details.
:param layer_attr: :type param_attr: ParameterAttribute
:return: :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if input.size % 3 != 0: if input.size % 3 != 0:
...@@ -3780,12 +3802,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None): ...@@ -3780,12 +3802,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: get output layer's input. And this layer should contains :param input: The input layer. And this layer should contain
multiple outputs. multiple outputs.
:type input: LayerOutput :type input: LayerOutput
:param arg_name: Output name from input. :param arg_name: The name of the output to be extracted from the input layer.
:type arg_name: basestring :type arg_name: basestring
:param layer_attr: Layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3842,17 +3865,20 @@ def recurrent_layer(input, ...@@ -3842,17 +3865,20 @@ def recurrent_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The parameter attribute for bias. If this parameter is set to
whose type is not ParameterAttribute, no bias is defined. If the False or an object whose type is not ParameterAttribute,
parameter is set to True, the bias is initialized to zero. no bias is defined. If the parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: parameter attribute. :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -3877,7 +3903,7 @@ def recurrent_layer(input, ...@@ -3877,7 +3903,7 @@ def recurrent_layer(input,
class StaticInput(object): class StaticInput(object):
""" """
StaticInput is only used in recurrent_group which defines a read-only memory StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence. and can be a sequence or non-sequence.
:param size: DEPRECATED :param size: DEPRECATED
:param is_seq: DEPRECATED :param is_seq: DEPRECATED
""" """
...@@ -3910,8 +3936,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3910,8 +3936,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
Recurrent layer group is an extremely flexible recurrent unit in Recurrent layer group is an extremely flexible recurrent unit in
PaddlePaddle. As long as the user defines the calculation done within a PaddlePaddle. As long as the user defines the calculation done within a
time step, PaddlePaddle will iterate such a recurrent calculation over time step, PaddlePaddle will iterate such a recurrent calculation over
sequence input. This is extremely usefull for attention based model, or sequence input. This is useful for attention-based models, or Neural
Neural Turning Machine like models. Turning Machine like models.
The basic usage (time steps) is: The basic usage (time steps) is:
...@@ -3933,18 +3959,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3933,18 +3959,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
demo/seqToseq/seqToseq_net.py demo/seqToseq/seqToseq_net.py
- sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf - sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf
:param step: recurrent one time step function.The input of this function is :param step: A step function which takes the input of recurrent_group as its own
input of the group. The return of this function will be input and returns values as recurrent_group's output every time step.
recurrent group's return value.
The recurrent group scatter a sequence into time steps. And The recurrent group scatters a sequence into time steps. And
for each time step, will invoke step function, and return for each time step, it will invoke step function, and return
a time step result. Then gather each time step of output into a time step result. Then gather outputs of each time step into
layer group's output. layer group's output.
:type step: callable :type step: callable
:param name: recurrent_group's name. :param name: The recurrent_group's name. It is optional.
:type name: basestring :type name: basestring
:param input: Input links array. :param input: Input links array.
...@@ -3952,11 +3977,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3952,11 +3977,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
LayerOutput will be scattered into time steps. LayerOutput will be scattered into time steps.
SubsequenceInput will be scattered into sequence steps. SubsequenceInput will be scattered into sequence steps.
StaticInput will be imported to each time step, and doesn't change StaticInput will be imported to each time step, and doesn't change
through time. It's a mechanism to access layer outside step function. over time. It's a mechanism to access layer outside step function.
:type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple :type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple
:param reverse: If reverse is set true, the recurrent unit will process the :param reverse: If reverse is set to True, the recurrent unit will process the
input sequence in a reverse order. input sequence in a reverse order.
:type reverse: bool :type reverse: bool
...@@ -4091,7 +4116,8 @@ def maxid_layer(input, name=None, layer_attr=None): ...@@ -4091,7 +4116,8 @@ def maxid_layer(input, name=None, layer_attr=None):
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4124,11 +4150,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None): ...@@ -4124,11 +4150,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input1: The first input layer name. :param input1: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param input2: The second input layer name. :param input2: The second input layer.
:type input2: LayerOutput :type input2: LayerOutput
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4167,9 +4194,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): ...@@ -4167,9 +4194,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param eos_id: end id of sequence :param eos_id: End id of sequence
:type eos_id: int :type eos_id: int
:param layer_attr: extra layer attributes. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4230,8 +4258,9 @@ def beam_search(step, ...@@ -4230,8 +4258,9 @@ def beam_search(step,
- machine translation : demo/seqToseq/translation/gen.conf \ - machine translation : demo/seqToseq/translation/gen.conf \
demo/seqToseq/seqToseq_net.py demo/seqToseq/seqToseq_net.py
:param name: Name of the recurrent unit that generates sequences. :param name: The name of the recurrent unit that is responsible for
:type name: base string generating sequences. It is optional.
:type name: basestring
:param step: A callable function that defines the calculation in a time :param step: A callable function that defines the calculation in a time
step, and it is applied to sequences with arbitrary length by step, and it is applied to sequences with arbitrary length by
sharing a same set of weights. sharing a same set of weights.
...@@ -4356,16 +4385,18 @@ def square_error_cost(input, ...@@ -4356,16 +4385,18 @@ def square_error_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Network prediction. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: Data label. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4398,17 +4429,20 @@ def classification_cost(input, ...@@ -4398,17 +4429,20 @@ def classification_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: input layer name. network output. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: label layer name. data_layer often. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param evaluator: Evaluator method. :param evaluator: Evaluator method. classification_error_evaluator is the default.
:param layer_attr: layer's extra attribute. :type evaluator: Evaluator method
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4461,7 +4495,7 @@ def conv_operator(img, ...@@ -4461,7 +4495,7 @@ def conv_operator(img,
Different from img_conv_layer, conv_op is an Operator, which can be used Different from img_conv_layer, conv_op is an Operator, which can be used
in mixed_layer. And conv_op takes two inputs to perform convolution. in mixed_layer. And conv_op takes two inputs to perform convolution.
The first input is the image and the second is filter kernel. It only The first input is the image and the second is filter kernel. It only
support GPU mode. supports GPU mode.
The example usage is: The example usage is:
...@@ -4473,27 +4507,31 @@ def conv_operator(img, ...@@ -4473,27 +4507,31 @@ def conv_operator(img,
num_filters=64, num_filters=64,
num_channels=64) num_channels=64)
:param img: input image :param img: The input image.
:type img: LayerOutput :type img: LayerOutput
:param filter: input filter :param filter: The input filter.
:type filter: LayerOutput :type filter: LayerOutput
:param filter_size: The x dimension of a filter kernel. :param filter_size: The dimension of the filter kernel on the x axis.
:type filter_size: int :type filter_size: int
:param filter_size_y: The y dimension of a filter kernel. Since :param filter_size_y: The dimension of the filter kernel on the y axis.
PaddlePaddle now supports rectangular filters, If the parameter is not set or set to None, it will
the filter's shape can be (filter_size, filter_size_y). set to 'filter_size' automatically.
:type filter_size_y: int :type filter_size_y: int
:param num_filters: channel of output data. :param num_filters: The number of the output channels.
:type num_filters: int :type num_filters: int
:param num_channels: channel of input data. :param num_channels: The number of the input channels. If the parameter is not set
or set to None, it will be automatically set to the channel
number of the 'img'.
:type num_channels: int :type num_channels: int
:param stride: The x dimension of the stride. :param stride: The stride on the x axis.
:type stride: int :type stride: int
:param stride_y: The y dimension of the stride. :param stride_y: The stride on the y axis. If the parameter is not set or
set to None, it will be set to 'stride' automatically.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of padding. :param padding: The padding size on the x axis.
:type padding: int :type padding: int
:param padding_y: The y dimension of padding. :param padding_y: The padding size on the y axis. If the parameter is not set
or set to None, it will be set to 'padding' automatically.
:type padding_y: int :type padding_y: int
:return: A ConvOperator Object. :return: A ConvOperator Object.
:rtype: ConvOperator :rtype: ConvOperator
...@@ -4544,9 +4582,9 @@ def conv_projection(input, ...@@ -4544,9 +4582,9 @@ def conv_projection(input,
param_attr=None, param_attr=None,
trans=False): trans=False):
""" """
Different from img_conv_layer and conv_op, conv_projection is an Projection, Different from img_conv_layer and conv_op, conv_projection is a Projection,
which can be used in mixed_layer and conat_layer. It use cudnn to implement which can be used in mixed_layer and concat_layer. It uses cudnn to implement
conv and only support GPU mode. convolution and only supports GPU mode.
The example usage is: The example usage is:
...@@ -4559,32 +4597,45 @@ def conv_projection(input, ...@@ -4559,32 +4597,45 @@ def conv_projection(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. :param filter_size: The dimensions of the filter kernel. If the parameter is
:type filter_size: int set to one integer, the two dimensions on x and y axises
:param filter_size_y: The y dimension of a filter kernel. Since will be same when filter_size_y is not set. If it is set
PaddlePaddle now supports rectangular filters, to a list, the first element indicates the dimension on
the filter's shape can be (filter_size, filter_size_y). the x axis, and the second is used to specify the dimension
on the y axis when filter_size is not provided.
:type filter_size: int | tuple | list
:param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
is not set, it will be set automatically according to filter_size.
:type filter_size_y: int :type filter_size_y: int
:param num_filters: channel of output data. :param num_filters: The number of filters.
:type num_filters: int :type num_filters: int
:param num_channels: channel of input data. :param num_channels: The number of the input channels.
:type num_channels: int :type num_channels: int
:param stride: The x dimension of the stride. :param stride: The strides. If the parameter is set to one integer, the strides
:type stride: int on x and y axises will be same when stride_y is not set. If it is
:param stride_y: The y dimension of the stride. set to a list, the first element indicates the stride on the x axis,
and the second is used to specify the stride on the y axis when
stride_y is not provided.
:type stride: int | tuple | list
:param stride_y: The stride on the y axis.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of padding. :param padding: The padding sizes. If the parameter is set to one integer, the padding
:type padding: int sizes on x and y axises will be same when padding_y is not set. If it
:param padding_y: The y dimension of padding. is set to a list, the first element indicates the padding size on the
x axis, and the second is used to specify the padding size on the y axis
when padding_y is not provided.
:type padding: int | tuple | list
:param padding_y: The padding size on the y axis.
:type padding_y: int :type padding_y: int
:param groups: The group number. :param groups: The group number.
:type groups: int :type groups: int
:param param_attr: Convolution param attribute. None means default attribute :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param trans: whether it is convTrans or conv :param trans: Whether it is ConvTransProjection or ConvProjection
:type trans: bool :type trans: bool
:return: A DotMulProjection Object. :return: A Projection Object.
:rtype: DotMulProjection :rtype: ConvTransProjection | ConvProjection
""" """
if num_channels is None: if num_channels is None:
assert input.num_filters is not None assert input.num_filters is not None
...@@ -4649,13 +4700,13 @@ def pad_layer(input, ...@@ -4649,13 +4700,13 @@ def pad_layer(input,
layer_attr=None): layer_attr=None):
""" """
This operation pads zeros to the input data according to pad_c,pad_h This operation pads zeros to the input data according to pad_c,pad_h
and pad_w. pad_c, pad_h, pad_w specifies the which dimension and size and pad_w. pad_c, pad_h, pad_w specify the size in the corresponding
of padding. And the input data shape is NCHW. dimension. And the input data shape is NCHW.
For example, pad_c=[2,3] means padding 2 zeros before the For example, pad_c=[2,3] means padding 2 zeros before the input data
input data and 3 zeros after the input data in channel dimension. and 3 zeros after the input data in the channel dimension. pad_h means
pad_h means padding zeros in height dimension. pad_w means padding zeros padding zeros in the height dimension. pad_w means padding zeros in the
in width dimension. width dimension.
For example, For example,
...@@ -4692,13 +4743,14 @@ def pad_layer(input, ...@@ -4692,13 +4743,14 @@ def pad_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pad_c: padding size in channel dimension. :param pad_c: The padding size in the channel dimension.
:type pad_c: list | None :type pad_c: list | None
:param pad_h: padding size in height dimension. :param pad_h: The padding size in the height dimension.
:type pad_h: list | None :type pad_h: list | None
:param pad_w: padding size in width dimension. :param pad_w: The padding size in the width dimension.
:type pad_w: list | None :type pad_w: list | None
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -4747,7 +4799,7 @@ def pad_layer(input, ...@@ -4747,7 +4799,7 @@ def pad_layer(input,
@layer_support() @layer_support()
def conv_shift_layer(a, b, name=None, layer_attr=None): def conv_shift_layer(a, b, name=None, layer_attr=None):
""" """
This layer performs cyclic convolution for two input. For example: This layer performs cyclic convolution on two inputs. For example:
- a[in]: contains M elements. - a[in]: contains M elements.
- b[in]: contains N elements (N should be odd). - b[in]: contains N elements (N should be odd).
- c[out]: contains M elements. - c[out]: contains M elements.
...@@ -4756,7 +4808,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -4756,7 +4808,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j} c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j}
In this formular: In this formula:
- a's index is computed modulo M. When it is negative, then get item from - a's index is computed modulo M. When it is negative, then get item from
the right side (which is the end of array) to the left. the right side (which is the end of array) to the left.
- b's index is computed modulo N. When it is negative, then get item from - b's index is computed modulo N. When it is negative, then get item from
...@@ -4770,11 +4822,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): ...@@ -4770,11 +4822,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: The first input of this layer.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b. :param b: The second input of this layer.
:type b: LayerOutput :type b: LayerOutput
:param layer_attr: layer's extra attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4805,8 +4858,8 @@ def tensor_layer(a, ...@@ -4805,8 +4858,8 @@ def tensor_layer(a,
bias_attr=None, bias_attr=None,
layer_attr=None): layer_attr=None):
""" """
This layer performs tensor operation for two input. This layer performs tensor operation on two inputs.
For example, each sample: For example:
.. math:: .. math::
y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1 y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1
...@@ -4826,21 +4879,24 @@ def tensor_layer(a, ...@@ -4826,21 +4879,24 @@ def tensor_layer(a,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param a: Input layer a. :param a: The first input of this layer.
:type a: LayerOutput :type a: LayerOutput
:param b: input layer b. :param b: The second input of this layer.
:type b: LayerOutput :type b: LayerOutput
:param size: the layer dimension. :param size: The dimension of this layer.
:type size: int. :type size: int
:param act: Activation type. LinearActivation is the default. :param act: Activation type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The parameter attribute for bias. If this parameter is set to
whose type is not ParameterAttribute, no bias is defined. If the False or an object whose type is not ParameterAttribute,
parameter is set to True, the bias is initialized to zero. no bias is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4876,7 +4932,7 @@ def selective_fc_layer(input, ...@@ -4876,7 +4932,7 @@ def selective_fc_layer(input,
layer_attr=None): layer_attr=None):
""" """
Selectived fully connected layer. Different from fc_layer, the output Selectived fully connected layer. Different from fc_layer, the output
of this layer maybe sparse. It requires an additional input to indicate of this layer can be sparse. It requires an additional input to indicate
several selected columns for output. If the selected columns is not several selected columns for output. If the selected columns is not
specified, selective_fc_layer acts exactly like fc_layer. specified, selective_fc_layer acts exactly like fc_layer.
...@@ -4890,21 +4946,34 @@ def selective_fc_layer(input, ...@@ -4890,21 +4946,34 @@ def selective_fc_layer(input,
:type name: basestring :type name: basestring
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput | list | tuple :type input: LayerOutput | list | tuple
:param select: The select layer. The output of select layer should be a :param select: The layer to select columns to output. It should be a sparse
sparse binary matrix, and treat as the mask of selective fc. binary matrix, and is treated as the mask of selective fc. If
If is None, acts exactly like fc_layer. it is not set or set to None, selective_fc_layer acts exactly
like fc_layer.
:type select: LayerOutput :type select: LayerOutput
:param size: The layer dimension. :param size: The dimension of this layer, which should be equal to that of
the layer 'select'.
:type size: int :type size: int
:param act: Activation type. TanhActivation is the default. :param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param pass_generation: The flag which indicates whether it is during generation.
:type pass_generation: bool
:param has_selected_colums: The flag which indicates whether the parameter 'select'
has been set. True is the default.
:type has_selected_colums: bool
:param mul_ratio: A ratio helps to judge how sparse the output is and determine
the computation method for speed consideration.
:type mul_ratio: float
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The parameter attribute for bias. If this parameter is set to
whose type is not ParameterAttribute, no bias is defined. If the False or an object whose type is not ParameterAttribute,
parameter is set to True, the bias is initialized to zero. no bias is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4955,7 +5024,7 @@ def selective_fc_layer(input, ...@@ -4955,7 +5024,7 @@ def selective_fc_layer(input,
@layer_support() @layer_support()
def sampling_id_layer(input, name=None, layer_attr=None): def sampling_id_layer(input, name=None, layer_attr=None):
""" """
A layer for sampling id from multinomial distribution from the input layer. A layer for sampling id from a multinomial distribution from the input layer.
Sampling one id for one sample. Sampling one id for one sample.
The simple usage is: The simple usage is:
...@@ -4968,8 +5037,9 @@ def sampling_id_layer(input, name=None, layer_attr=None): ...@@ -4968,8 +5037,9 @@ def sampling_id_layer(input, name=None, layer_attr=None):
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -4990,8 +5060,7 @@ def slope_intercept_layer(input, ...@@ -4990,8 +5060,7 @@ def slope_intercept_layer(input,
intercept=0.0, intercept=0.0,
layer_attr=None): layer_attr=None):
""" """
This layer for applying a slope and an intercept to the input This layer for applying a slope and an intercept to the input.
element-wise. There is no activation and weight.
.. math:: .. math::
y = slope * x + intercept y = slope * x + intercept
...@@ -5006,12 +5075,13 @@ def slope_intercept_layer(input, ...@@ -5006,12 +5075,13 @@ def slope_intercept_layer(input,
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param slope: the scale factor. :param slope: The scale factor.
:type slope: float. :type slope: float
:param intercept: the offset. :param intercept: The offset.
:type intercept: float. :type intercept: float
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5066,12 +5136,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None): ...@@ -5066,12 +5136,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None):
:type weights: LayerOutput :type weights: LayerOutput
:param vectors: The vector layer. :param vectors: The vector layer.
:type vectors: LayerOutput :type vectors: LayerOutput
:param size: the dimension of this layer. :param size: The dimension of this layer.
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5118,11 +5189,11 @@ def block_expand_layer(input, ...@@ -5118,11 +5189,11 @@ def block_expand_layer(input,
outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x
The expand method is the same with ExpandConvLayer, but saved the transposed The expanding method is the same with ExpandConvLayer, but saved the transposed
value. After expanding, output.sequenceStartPositions will store timeline. value. After expanding, output.sequenceStartPositions will store timeline.
The number of time steps are outputH * outputW and the dimension of each The number of time steps is outputH * outputW and the dimension of each
time step is block_y * block_x * num_channels. This layer can be used after time step is block_y * block_x * num_channels. This layer can be used after
convolution neural network, and before recurrent neural network. convolutional neural network, and before recurrent neural network.
The simple usage is: The simple usage is:
...@@ -5137,8 +5208,10 @@ def block_expand_layer(input, ...@@ -5137,8 +5208,10 @@ def block_expand_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. :param num_channels: The number of input channels. If the parameter is not set or
:type num_channels: int | None set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int
:param block_x: The width of sub block. :param block_x: The width of sub block.
:type block_x: int :type block_x: int
:param block_y: The width of sub block. :param block_y: The width of sub block.
...@@ -5152,9 +5225,10 @@ def block_expand_layer(input, ...@@ -5152,9 +5225,10 @@ def block_expand_layer(input,
:param padding_y: The padding size in vertical direction. :param padding_y: The padding size in vertical direction.
:type padding_y: int :type padding_y: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring. :type name: basestring.
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5184,12 +5258,19 @@ def block_expand_layer(input, ...@@ -5184,12 +5258,19 @@ def block_expand_layer(input,
@layer_support() @layer_support()
def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
""" """
A layer to do max out on conv layer output. A layer to do max out on convolutional layer output.
- Input: output of a conv layer. - Input: the output of a convolutional layer.
- Output: feature map size same as input. Channel is (input channel) / groups. - Output: feature map size same as the input's, and its channel number is
(input channel) / groups.
So groups should be larger than 1, and the num of channels should be able So groups should be larger than 1, and the num of channels should be able
to devided by groups. to be devided by groups.
Reference:
Maxout Networks
http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
https://arxiv.org/pdf/1312.6082v4.pdf
.. math:: .. math::
y_{si+j} = \max_k x_{gsi + sk + j} y_{si+j} = \max_k x_{gsi + sk + j}
...@@ -5199,12 +5280,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5199,12 +5280,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
0 \le j < s 0 \le j < s
0 \le k < groups 0 \le k < groups
Please refer to Paper:
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
The simple usage is: The simple usage is:
.. code-block:: python .. code-block:: python
...@@ -5215,14 +5290,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5215,14 +5290,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set :param num_channels: The number of input channels. If the parameter is not set or
automatically from previous output. set to None, its actual value will be automatically set to
:type num_channels: int | None the channels number of the input.
:type num_channels: int
:param groups: The group number of input layer. :param groups: The group number of input layer.
:type groups: int :type groups: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring. :type name: basestring
:param layer_attr: Extra Layer attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5254,20 +5331,20 @@ def ctc_layer(input, ...@@ -5254,20 +5331,20 @@ def ctc_layer(input,
layer_attr=None): layer_attr=None):
""" """
Connectionist Temporal Classification (CTC) is designed for temporal Connectionist Temporal Classification (CTC) is designed for temporal
classication task. That is, for sequence labeling problems where the classication task. e.g. sequence labeling problems where the
alignment between the inputs and the target labels is unknown. alignment between the inputs and the target labels is unknown.
More details can be found by referring to `Connectionist Temporal Reference:
Classification: Labelling Unsegmented Sequence Data with Recurrent Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ with Recurrent Neural Networks
icml2006_GravesFGS06.pdf>`_ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note: Note:
Considering the 'blank' label needed by CTC, you need to use Considering the 'blank' label needed by CTC, you need to use (num_classes + 1)
(num_classes + 1) as the input size. num_classes is the category number. as the size of the input, where num_classes is the category number.
And the 'blank' is the last category index. So the size of 'input' layer, such as And the 'blank' is the last category index. So the size of 'input' layer (e.g.
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer fc_layer with softmax activation) should be (num_classes + 1). The size of
should also be num_classes + 1. ctc_layer should also be (num_classes + 1).
The example usage is: The example usage is:
...@@ -5280,16 +5357,17 @@ def ctc_layer(input, ...@@ -5280,16 +5357,17 @@ def ctc_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring | None :type name: basestring
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5330,20 +5408,19 @@ def warp_ctc_layer(input, ...@@ -5330,20 +5408,19 @@ def warp_ctc_layer(input,
building process, PaddlePaddle will clone the source codes, build and building process, PaddlePaddle will clone the source codes, build and
install it to :code:`third_party/install/warpctc` directory. install it to :code:`third_party/install/warpctc` directory.
More details of CTC can be found by referring to `Connectionist Temporal Reference:
Classification: Labelling Unsegmented Sequence Data with Recurrent Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/ with Recurrent Neural Networks
icml2006_GravesFGS06.pdf>`_. http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note: Note:
- Let num_classes represent the category number. Considering the 'blank' - Let num_classes represents the category number. Considering the 'blank'
label needed by CTC, you need to use (num_classes + 1) as the input size. label needed by CTC, you need to use (num_classes + 1) as the size of
Thus, the size of both warp_ctc layer and 'input' layer should be set to warp_ctc layer.
num_classes + 1.
- You can set 'blank' to any value ranged in [0, num_classes], which - You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels. should be consistent with those used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library, - As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer. 'linear' activation is expected to be used instead in the 'input' layer.
The example usage is: The example usage is:
...@@ -5357,18 +5434,19 @@ def warp_ctc_layer(input, ...@@ -5357,18 +5434,19 @@ def warp_ctc_layer(input,
:param input: The input of this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring | None :type name: basestring
:param blank: the 'blank' label used in ctc :param blank: The 'blank' label used in ctc.
:type blank: int :type blank: int
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5414,23 +5492,26 @@ def crf_layer(input, ...@@ -5414,23 +5492,26 @@ def crf_layer(input,
label=label, label=label,
size=label_dim) size=label_dim)
:param input: The first input layer is the feature. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The second input layer is label. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param size: The category number. :param size: The category number.
:type size: int :type size: int
:param weight: The third layer is "weight" of each sample, which is an :param weight: The weight layer defines a weight for each sample in the
optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param param_attr: Parameter attribute. None means default attribute :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5476,9 +5557,9 @@ def crf_decoding_layer(input, ...@@ -5476,9 +5557,9 @@ def crf_decoding_layer(input,
""" """
A layer for calculating the decoding sequence of sequential conditional A layer for calculating the decoding sequence of sequential conditional
random field model. The decoding sequence is stored in output.ids. random field model. The decoding sequence is stored in output.ids.
If a second input is provided, it is treated as the ground-truth label, and If the input 'label' is provided, it is treated as the ground-truth label, and
this layer will also calculate error. output.value[i] is 1 for incorrect this layer will also calculate error. output.value[i] is 1 for an incorrect
decoding or 0 for correct decoding. decoding and 0 for the correct.
The example usage is: The example usage is:
...@@ -5489,16 +5570,18 @@ def crf_decoding_layer(input, ...@@ -5489,16 +5570,18 @@ def crf_decoding_layer(input,
:param input: The first input layer. :param input: The first input layer.
:type input: LayerOutput :type input: LayerOutput
:param size: size of this layer. :param size: The dimension of this layer.
:type size: int :type size: int
:param label: None or ground-truth label. :param label: The input label.
:type label: LayerOutput or None :type label: LayerOutput | None
:param param_attr: Parameter attribute. None means default attribute :param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
:type layer_attr: ExtraLayerAttribute | None details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5545,8 +5628,7 @@ def nce_layer(input, ...@@ -5545,8 +5628,7 @@ def nce_layer(input,
bias_attr=None, bias_attr=None,
layer_attr=None): layer_attr=None):
""" """
Noise-contrastive estimation. This layer implements the method in the Noise-contrastive estimation.
following paper:
Reference: Reference:
A fast and simple algorithm for training neural probabilistic language A fast and simple algorithm for training neural probabilistic language
...@@ -5562,37 +5644,40 @@ def nce_layer(input, ...@@ -5562,37 +5644,40 @@ def nce_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layers. It should be a LayerOutput or a list/tuple :param input: The first input of this layer.
of LayerOutput.
:type input: LayerOutput | list | tuple | collections.Sequence :type input: LayerOutput | list | tuple | collections.Sequence
:param label: The ground truth. :param label: The input label.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight layer defines a weight for each sample in the :param weight: The weight layer defines a weight for each sample in the
mini-batch. The default value is None. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param num_classes: The class number. :param num_classes: The number of classes.
:type num_classes: int :type num_classes: int
:param param_attr: The parameter attributes. :param act: Activation type. SigmoidActivation is the default activation.
:type param_attr: ParameterAttribute|list :type act: BaseActivation
:param num_neg_samples: The number of sampled negative labels. The default :param param_attr: The parameter attribute. See ParameterAttribute for
value is 10. details.
:type param_attr: ParameterAttribute
:param num_neg_samples: The number of sampled negative labels. 10 is the
default value.
:type num_neg_samples: int :type num_neg_samples: int
:param neg_distribution: The discrete noisy distribution over the output :param neg_distribution: The discrete noisy distribution over the output
space from which num_neg_samples negative labels space from which num_neg_samples negative labels
are sampled. If this parameter is not set, a are sampled. If this parameter is not set, a
uniform distribution will be used. A user defined uniform distribution will be used. A user-defined
distribution is a list whose length must be equal distribution is a list whose length must be equal
to the num_classes. Each member of the list defines to the num_classes. Each member of the list defines
the probability of a class given input x. the probability of a class given input x.
:type neg_distribution: list | tuple | collections.Sequence | None :type neg_distribution: list | tuple | collections.Sequence | None
:param bias_attr: The attribute for bias. If this parameter is set False or :param bias_attr: The parameter attribute for bias. If this parameter is set to
any object whose type is not ParameterAttribute, no bias False or an object whose type is not ParameterAttribute,
is added. If this parameter is set True, the bias is no bias is defined. If this parameter is set to True,
initialized to zero. the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: The LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if isinstance(input, LayerOutput): if isinstance(input, LayerOutput):
...@@ -5659,11 +5744,11 @@ def rank_cost(left, ...@@ -5659,11 +5744,11 @@ def rank_cost(left,
coeff=1.0, coeff=1.0,
layer_attr=None): layer_attr=None):
""" """
A cost Layer for learning to rank using gradient descent. Details can refer A cost Layer for learning to rank using gradient descent.
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/
ICML_ranking.pdf>`_. Reference:
This layer contains at least three inputs. The weight is an optional Learning to Rank using Gradient Descent
argument, which affects the cost. http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf
.. math:: .. math::
...@@ -5694,14 +5779,16 @@ def rank_cost(left, ...@@ -5694,14 +5779,16 @@ def rank_cost(left,
:type right: LayerOutput :type right: LayerOutput
:param label: Label is 1 or 0, means positive order and reverse order. :param label: Label is 1 or 0, means positive order and reverse order.
:type label: LayerOutput :type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost. :param weight: The weight layer defines a weight for each sample in the
It is an optional argument. mini-batch. It is optional.
:type weight: LayerOutput :type weight: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5746,25 +5833,25 @@ def lambda_cost(input, ...@@ -5746,25 +5833,25 @@ def lambda_cost(input,
NDCG_num=8, NDCG_num=8,
max_sort_size=-1) max_sort_size=-1)
:param input: Samples of the same query should be loaded as sequence. :param input: The first input of this layer, which is often a document
samples list of the same query and whose type must be sequence.
:type input: LayerOutput :type input: LayerOutput
:param score: The 2nd input. Score of each sample. :param score: The scores of the samples.
:type input: LayerOutput :type input: LayerOutput
:param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain), :param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain),
e.g., 5 for NDCG@5. It must be less than or equal to the e.g., 5 for NDCG@5. It must be less than or equal to the
minimum size of lists. minimum size of the list.
:type NDCG_num: int :type NDCG_num: int
:param max_sort_size: The size of partial sorting in calculating gradient. :param max_sort_size: The size of partial sorting in calculating gradient. If
If max_sort_size = -1, then for each list, the max_sort_size is equal to -1 or greater than the number
algorithm will sort the entire list to get gradient. of the samples in the list, then the algorithm will sort
In other cases, max_sort_size must be greater than or the entire list to compute the gradient. In other cases,
equal to NDCG_num. And if max_sort_size is greater max_sort_size must be greater than or equal to NDCG_num.
than the size of a list, the algorithm will sort the
entire list of get gradient.
:type max_sort_size: int :type max_sort_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None | basestring :type name: basestring
:param layer_attr: Extra Layer Attribute. :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -5809,11 +5896,10 @@ def cross_entropy(input, ...@@ -5809,11 +5896,10 @@ def cross_entropy(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param weight: The cost of each sample is multiplied with each weight. :param weight: The weight layer defines a weight for each sample in the
The weight should be a layer with size=1. Note that gradient mini-batch. It is optional.
will not be calculated for weight.
:type weight: LayerOutout :type weight: LayerOutout
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -5858,7 +5944,7 @@ def cross_entropy_with_selfnorm(input, ...@@ -5858,7 +5944,7 @@ def cross_entropy_with_selfnorm(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param softmax_selfnorm_alpha: The scale factor affects the cost. :param softmax_selfnorm_alpha: The scale factor affects the cost.
:type softmax_selfnorm_alpha: float :type softmax_selfnorm_alpha: float
...@@ -5948,7 +6034,7 @@ def huber_regression_cost(input, ...@@ -5948,7 +6034,7 @@ def huber_regression_cost(input,
:param delta: The difference between the observed and predicted values. :param delta: The difference between the observed and predicted values.
:type delta: float :type delta: float
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -5998,7 +6084,7 @@ def huber_classification_cost(input, ...@@ -5998,7 +6084,7 @@ def huber_classification_cost(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6043,7 +6129,7 @@ def multi_binary_label_cross_entropy(input, ...@@ -6043,7 +6129,7 @@ def multi_binary_label_cross_entropy(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6214,7 +6300,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -6214,7 +6300,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param coeff: The weight of the gradient in the back propagation. :param coeff: The weight of the gradient in the back propagation.
1.0 is the default. 1.0 is the default value.
:type coeff: float :type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details. details.
...@@ -6366,7 +6452,7 @@ def row_conv_layer(input, ...@@ -6366,7 +6452,7 @@ def row_conv_layer(input,
:param context_len: The context length equals the lookahead step number :param context_len: The context length equals the lookahead step number
plus one. plus one.
:type context_len: int :type context_len: int
:param act: Activation Type. LinearActivation is the default. :param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The parameter attribute. See ParameterAttribute for :param param_attr: The parameter attribute. See ParameterAttribute for
details. details.
...@@ -6488,7 +6574,8 @@ def gated_unit_layer(input, ...@@ -6488,7 +6574,8 @@ def gated_unit_layer(input,
:type input: LayerOutput :type input: LayerOutput
:param size: The dimension of this layer's output. :param size: The dimension of this layer's output.
:type size: int :type size: int
:param act: Activation type of the projection. LinearActivation is the default. :param act: Activation type of the projection. LinearActivation is the default
activation.
:type act: BaseActivation :type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -6498,9 +6585,9 @@ def gated_unit_layer(input, ...@@ -6498,9 +6585,9 @@ def gated_unit_layer(input,
:param gate_param_attr: The parameter attribute of the gate. See ParameterAttribute :param gate_param_attr: The parameter attribute of the gate. See ParameterAttribute
for details. for details.
:type gate_param_attr: ParameterAttribute :type gate_param_attr: ParameterAttribute
:param gate_bias_attr: The bias attribute of the gate. If the parameter is set to False or :param gate_bias_attr: The bias attribute of the gate. If this parameter is set to False or
an object whose type is not ParameterAttribute, no bias is defined. an object whose type is not ParameterAttribute, no bias is defined.
If the parameter is set to True, the bias is initialized to zero. If this parameter is set to True, the bias is initialized to zero.
:type gate_bias_attr: ParameterAttribute | bool | None | Any :type gate_bias_attr: ParameterAttribute | bool | None | Any
:param inproj_attr: Extra layer attributes of the projection. See ExtraLayerAttribute for :param inproj_attr: Extra layer attributes of the projection. See ExtraLayerAttribute for
details. details.
...@@ -6508,9 +6595,9 @@ def gated_unit_layer(input, ...@@ -6508,9 +6595,9 @@ def gated_unit_layer(input,
:param inproj_param_attr: The parameter attribute of the projection. See ParameterAttribute :param inproj_param_attr: The parameter attribute of the projection. See ParameterAttribute
for details. for details.
:type inproj_param_attr: ParameterAttribute :type inproj_param_attr: ParameterAttribute
:param inproj_bias_attr: The bias attribute of the projection. If the parameter is set to False :param inproj_bias_attr: The bias attribute of the projection. If this parameter is set to False
or an object whose type is not ParameterAttribute, no bias is defined. or an object whose type is not ParameterAttribute, no bias is defined.
If the parameter is set to True, the bias is initialized to zero. If this parameter is set to True, the bias is initialized to zero.
:type inproj_bias_attr: ParameterAttribute | bool | None | Any :type inproj_bias_attr: ParameterAttribute | bool | None | Any
:param layer_attr: Extra layer attribute of the product. See ExtraLayerAttribute for :param layer_attr: Extra layer attribute of the product. See ExtraLayerAttribute for
details. details.
...@@ -6869,7 +6956,7 @@ def img_conv3d_layer(input, ...@@ -6869,7 +6956,7 @@ def img_conv3d_layer(input,
:type filter_size: int | tuple | list :type filter_size: int | tuple | list
:param num_filters: The number of filters in each group. :param num_filters: The number of filters in each group.
:type num_filters: int :type num_filters: int
:param act: Activation type. ReluActivation is the default. :param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation :type act: BaseActivation
:param groups: The number of the filter groups. :param groups: The number of the filter groups.
:type groups: int :type groups: int
...@@ -6884,8 +6971,8 @@ def img_conv3d_layer(input, ...@@ -6884,8 +6971,8 @@ def img_conv3d_layer(input,
parameter is set to True, the bias is initialized to zero. parameter is set to True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any :type bias_attr: ParameterAttribute | None | bool | Any
:param num_channels: The number of input channels. If the parameter is not set or :param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to set to None, its actual value will be automatically set to
the channels number of the input . the channels number of the input.
:type num_channels: int :type num_channels: int
:param param_attr: The parameter attribute of the convolution. See ParameterAttribute for :param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details. details.
...@@ -7061,7 +7148,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None): ...@@ -7061,7 +7148,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None):
:type offsets: LayerOutput :type offsets: LayerOutput
:param sizes: The sizes of the sub-sequences, which should be sequence type. :param sizes: The sizes of the sub-sequences, which should be sequence type.
:type sizes: LayerOutput :type sizes: LayerOutput
:param act: Activation type, LinearActivation is the default. :param act: Activation type, LinearActivation is the default activation.
:type act: BaseActivation. :type act: BaseActivation.
:param bias_attr: The bias attribute. If the parameter is set to False or an object :param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the whose type is not ParameterAttribute, no bias is defined. If the
......
...@@ -681,34 +681,42 @@ def lstmemory_unit(input, ...@@ -681,34 +681,42 @@ def lstmemory_unit(input,
state_act=TanhActivation()) state_act=TanhActivation())
:param input: input layer. :param input: Input layer.
:type input: LayerOutput :type input: LayerOutput
:param out_memory: output of previous time step :param out_memory: The output of previous time step.
:type out_memory: LayerOutput | None :type out_memory: LayerOutput | None
:param name: lstmemory unit name. :param name: The lstmemory unit name.
:type name: basestring :type name: basestring
:param size: lstmemory unit size. :param size: The lstmemory unit size.
:type size: int :type size: int
:param param_attr: parameter attribute, None means default attribute. :param param_attr: The parameter attribute for the weights in
input to hidden projection.
None means default attribute.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param act: last activiation type of lstm. :param act: The last activiation type of lstm.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activiation type of lstm. :param gate_act: The gate activiation type of lstm.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: state activiation type of lstm. :param state_act: The state activiation type of lstm.
:type state_act: BaseActivation :type state_act: BaseActivation
:param input_proj_bias_attr: bias attribute for input to hidden projection. :param input_proj_bias_attr: The parameter attribute for the bias in
False means no bias, None means default bias. input to hidden projection.
:type input_proj_bias_attr: ParameterAttribute|False|None False or None means no bias.
:param input_proj_layer_attr: extra layer attribute for input to hidden If this parameter is set to True,
projection of the LSTM unit, such as dropout, error clipping. the bias is initialized to zero.
:type input_proj_bias_attr: ParameterAttribute|bool|None
:param input_proj_layer_attr: The extra layer attribute for
input to hidden projection of the LSTM unit,
such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute :type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_bias_attr: bias parameter attribute of lstm layer. :param lstm_bias_attr: The parameter attribute for the bias in lstm layer.
False means no bias, None means default bias. False or None means no bias.
:type lstm_bias_attr: ParameterAttribute|False|None If this parameter is set to True,
:param lstm_layer_attr: extra attribute of lstm layer. the bias is initialized to zero.
:type lstm_bias_attr: ParameterAttribute|True|None
:param lstm_layer_attr: The extra attribute of lstm layer.
:type lstm_layer_attr: ExtraLayerAttribute :type lstm_layer_attr: ExtraLayerAttribute
:return: lstmemory unit name. :return: The lstmemory unit name.
:rtype: LayerOutput :rtype: LayerOutput
""" """
if size is None: if size is None:
...@@ -786,34 +794,42 @@ def lstmemory_group(input, ...@@ -786,34 +794,42 @@ def lstmemory_group(input,
gate_act=SigmoidActivation(), gate_act=SigmoidActivation(),
state_act=TanhActivation()) state_act=TanhActivation())
:param input: input layer. :param input: Input layer.
:type input: LayerOutput :type input: LayerOutput
:param size: lstmemory group size. :param size: The lstmemory group size.
:type size: int :type size: int
:param name: name of lstmemory group. :param name: The name of lstmemory group.
:type name: basestring :type name: basestring
:param out_memory: output of previous time step. :param out_memory: The output of previous time step.
:type out_memory: LayerOutput | None :type out_memory: LayerOutput | None
:param reverse: process the input in a reverse order or not. :param reverse: Process the input in a reverse order or not.
:type reverse: bool :type reverse: bool
:param param_attr: parameter attribute, None means default attribute. :param param_attr: The parameter attribute for the weights in
input to hidden projection.
None means default attribute.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param act: last activiation type of lstm. :param act: The last activiation type of lstm.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activiation type of lstm. :param gate_act: The gate activiation type of lstm.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: state activiation type of lstm. :param state_act: The state activiation type of lstm.
:type state_act: BaseActivation :type state_act: BaseActivation
:param lstm_bias_attr: bias parameter attribute of lstm layer. :param input_proj_bias_attr: The parameter attribute for the bias in
False means no bias, None means default bias. input to hidden projection.
:type lstm_bias_attr: ParameterAttribute|False|None False or None means no bias.
:param input_proj_bias_attr: bias attribute for input to hidden projection. If this parameter is set to True,
False means no bias, None means default bias. the bias is initialized to zero.
:type input_proj_bias_attr: ParameterAttribute|False|None :type input_proj_bias_attr: ParameterAttribute|bool|None
:param input_proj_layer_attr: extra layer attribute for input to hidden :param input_proj_layer_attr: The extra layer attribute for
projection of the LSTM unit, such as dropout, error clipping. input to hidden projection of the LSTM unit,
such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute :type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute. :param lstm_bias_attr: The parameter attribute for the bias in lstm layer.
False or None means no bias.
If this parameter is set to True,
the bias is initialized to zero.
:type lstm_bias_attr: ParameterAttribute|True|None
:param lstm_layer_attr: The extra attribute of lstm layer.
:type lstm_layer_attr: ExtraLayerAttribute :type lstm_layer_attr: ExtraLayerAttribute
:return: the lstmemory group. :return: the lstmemory group.
:rtype: LayerOutput :rtype: LayerOutput
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
""" """
__all__ = [ __all__ = [
"BasePoolingType", "MaxPooling", "AvgPooling", "CudnnMaxPooling", "BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling",
"CudnnAvgPooling", "SumPooling", "SquareRootNPooling" "CudnnMaxPooling", "CudnnAvgPooling", "SumPooling", "SquareRootNPooling"
] ]
...@@ -55,6 +55,19 @@ class MaxPooling(BasePoolingType): ...@@ -55,6 +55,19 @@ class MaxPooling(BasePoolingType):
self.output_max_index = output_max_index self.output_max_index = output_max_index
class MaxWithMaskPooling(BasePoolingType):
"""
MaxWithMask pooling.
Not only return the very large values for each dimension in sequence or time steps,
but also the location indices of found maxinum values.
"""
def __init__(self):
BasePoolingType.__init__(self, "max-pool-with-mask")
class CudnnMaxPooling(BasePoolingType): class CudnnMaxPooling(BasePoolingType):
""" """
Cudnn max pooling only support GPU. Return the maxinum value in the Cudnn max pooling only support GPU. Return the maxinum value in the
......
...@@ -28,6 +28,8 @@ layers { ...@@ -28,6 +28,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 227 output_y: 227
img_size_y: 256 img_size_y: 256
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -28,6 +28,8 @@ layers { ...@@ -28,6 +28,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 227 output_y: 227
img_size_y: 256 img_size_y: 256
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -28,6 +28,8 @@ layers { ...@@ -28,6 +28,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 48 output_y: 48
img_size_y: 48 img_size_y: 48
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -30,6 +30,8 @@ layers { ...@@ -30,6 +30,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 48 output_y: 48
img_size_y: 48 img_size_y: 48
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
...@@ -105,6 +107,8 @@ layers { ...@@ -105,6 +107,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 24 output_y: 24
img_size_y: 24 img_size_y: 24
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_1__.wbias" bias_parameter_name: "___conv_1__.wbias"
......
...@@ -30,6 +30,8 @@ layers { ...@@ -30,6 +30,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 48 output_y: 48
img_size_y: 48 img_size_y: 48
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -36,6 +36,8 @@ layers { ...@@ -36,6 +36,8 @@ layers {
stride_y: 1 stride_y: 1
output_y: 14 output_y: 14
img_size_y: 14 img_size_y: 14
dilation: 1
dilation_y: 1
} }
} }
bias_parameter_name: "___conv_0__.wbias" bias_parameter_name: "___conv_0__.wbias"
......
...@@ -37,6 +37,8 @@ import model ...@@ -37,6 +37,8 @@ import model
import paddle.trainer.config_parser as cp import paddle.trainer.config_parser as cp
__all__ = [ __all__ = [
'default_startup_program',
'default_main_program',
'optimizer', 'optimizer',
'layer', 'layer',
'activation', 'activation',
......
from paddle.v2.framework import framework as framework from paddle.v2.fluid import framework as framework
__all__ = ['append_backward_ops'] __all__ = ['append_backward_ops']
......
...@@ -13,7 +13,7 @@ A `scoped_function` will take a `function` as input. That function will be ...@@ -13,7 +13,7 @@ A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope. invoked in a new local scope.
""" """
import paddle.v2.framework.core import paddle.v2.fluid.core
import threading import threading
__tl_scope__ = threading.local() __tl_scope__ = threading.local()
...@@ -27,13 +27,13 @@ __all__ = [ ...@@ -27,13 +27,13 @@ __all__ = [
def get_cur_scope(): def get_cur_scope():
""" """
Get current scope. Get current scope.
:rtype: paddle.v2.framework.core.Scope :rtype: paddle.v2.fluid.core.Scope
""" """
cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None) cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None)
if cur_scope_stack is None: if cur_scope_stack is None:
__tl_scope__.cur_scope = list() __tl_scope__.cur_scope = list()
if len(__tl_scope__.cur_scope) == 0: if len(__tl_scope__.cur_scope) == 0:
__tl_scope__.cur_scope.append(paddle.v2.framework.core.Scope()) __tl_scope__.cur_scope.append(paddle.v2.fluid.core.Scope())
return __tl_scope__.cur_scope[-1] return __tl_scope__.cur_scope[-1]
......
import numpy as np
from paddle.v2.fluid.framework import Program, g_main_program, unique_name, Variable
import paddle.v2.fluid.core as core
def _clone_var_in_block_(block, var):
assert isinstance(var, Variable)
return block.create_var(
name=var.name,
shape=var.shape,
dtype=var.data_type,
type=var.type,
lod_level=var.lod_level,
persistable=True)
class Evaluator(object):
"""
Evalutor Base class.
create metric states
add mini-batch evaluator caculate operator
add increment operator to accumulate the metric states
"""
def __init__(self, name, **kwargs):
"""
init the global states
"""
self._states = {}
if kwargs.has_key("main_program"):
self._main_program = kwargs.get("main_program")
else:
self._main_program = g_main_program
def _update_ops(self, *args, **kwargs):
"""
append update ops to the global states
"""
raise NotImplementedError()
def reset(self, executor, reset_program=None):
"""
Clear metric states at the begin of each pass/user specified batch
"""
if reset_program == None:
reset_program = Program()
else:
reset_program = program
block = reset_program.global_block()
for k, var in self._states.iteritems():
g_var = _clone_var_in_block_(block, var)
zeros = block.create_var(dtype="float32", persistable=True)
block.append_op(
type="fill_constant",
outputs={"Out": [zeros]},
attrs={
"shape": g_var.shape,
"value": .0,
"data_type": 5,
})
block.append_op(
type="scale", inputs={"X": zeros}, outputs={"Out": g_var})
executor.run(reset_program, fetch_list=self._states.values())
def eval(self, executor, eval_program=None):
"""
Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
"""
raise NotImplementedError()
class Accuracy(Evaluator):
"""
Accuracy need two state variable Total, Correct
"""
def __init__(self, *args, **kwargs):
super(Accuracy, self).__init__("accuracy", **kwargs)
block = self._main_program.global_block()
g_total = block.create_var(
name=unique_name("Total"),
persistable=True,
dtype="int64",
shape=[1])
g_correct = block.create_var(
name=unique_name("Correct"),
persistable=True,
dtype="int64",
shape=[1])
self._states["Total"] = g_total
self._states["Correct"] = g_correct
def _update_ops(self, input, label, k=1, **kwargs):
block = self._main_program.global_block()
topk_out = block.create_var(dtype=input.data_type)
topk_indices = block.create_var(dtype="int64")
block.append_op(
type="top_k",
inputs={"X": [input]},
outputs={"Out": [topk_out],
"Indices": [topk_indices]},
attrs={"k": k})
acc_out = block.create_var(dtype=kwargs.get("out_dtype", "float32"))
correct = block.create_var(dtype="int64", persistable=True)
total = block.create_var(dtype="int64", persistable=True)
block.append_op(
type="accuracy",
inputs={
"Out": [topk_out],
"Indices": [topk_indices],
"Label": [label]
},
outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
block.append_op(
type="cast",
inputs={"X": [self._states["Total"]]},
outputs={"Out": [self._states["Total"]]},
attrs={
"in_data_type": 5, # float32
"out_data_type": 2, #int32
})
block.append_op(
type="cast",
inputs={"X": [self._states["Correct"]]},
outputs={"Out": [self._states["Correct"]]},
attrs={
"in_data_type": 5,
"out_data_type": 2,
})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Total"]],
"Y": [total]},
outputs={"Out": [self._states["Total"]]})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Correct"]],
"Y": [correct]},
outputs={"Out": [self._states["Correct"]]})
return acc_out
def eval(self, executor, eval_program=None):
if eval_program != None:
eval_program = eval_program
else:
eval_program = Program()
block = eval_program.global_block()
eval_out = block.create_var(dtype=self._states["Total"].data_type)
e_total = _clone_var_in_block_(block, self._states["Total"])
e_correct = _clone_var_in_block_(block, self._states["Correct"])
block.append_op(
type="cast",
inputs={"X": [e_total]},
outputs={"Out": [e_total]},
attrs={
"in_data_type": 2, #int32
"out_data_type": 5, #float32
})
block.append_op(
type="cast",
inputs={"X": [e_correct]},
outputs={"Out": [e_correct]},
attrs={
"in_data_type": 2,
"out_data_type": 5,
})
block.append_op(
type="elementwise_div",
inputs={"X": e_correct,
"Y": e_total},
outputs={"Out": eval_out})
out = executor.run(eval_program, fetch_list=[eval_out])
return np.array(out[0])
def accuracy(*args, **kwargs):
cls = Accuracy(*args, **kwargs)
out = cls._update_ops(*args, **kwargs)
return cls, out
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.framework import Block, Program, g_main_program from paddle.v2.fluid.framework import Block, Program, g_main_program
g_scope = core.Scope() g_scope = core.Scope()
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
import collections import collections
import numpy as np import numpy as np
import copy import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator'] __all__ = ['Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'default_main_program']
def unique_name(prefix): def unique_name(prefix):
...@@ -562,3 +562,9 @@ class Parameter(Variable): ...@@ -562,3 +562,9 @@ class Parameter(Variable):
# program is a global instance. # program is a global instance.
g_main_program = Program() g_main_program = Program()
g_startup_program = Program() g_startup_program = Program()
def default_startup_program():
return g_startup_program
def default_main_program():
return g_main_program
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import numpy as np import numpy as np
__all__ = [ __all__ = [
......
import os import os
import cPickle as pickle import cPickle as pickle
from paddle.v2.framework.framework import Program, Parameter, g_main_program, \ from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \
Variable Variable
__all__ = [ __all__ = [
......
import copy import copy
import itertools import itertools
from paddle.v2.framework.framework import Variable, g_main_program, \ from paddle.v2.fluid.framework import Variable, g_main_program, \
g_startup_program, unique_name, Program g_startup_program, unique_name, Program
from paddle.v2.framework.initializer import ConstantInitializer, \ from paddle.v2.fluid.initializer import ConstantInitializer, \
UniformInitializer, XavierInitializer UniformInitializer, XavierInitializer
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \ from paddle.v2.fluid.framework import OpProtoHolder, Variable, Program, \
Operator Operator
from paddle.v2.framework.initializer import ConstantInitializer, \ from paddle.v2.fluid.initializer import ConstantInitializer, \
NormalInitializer NormalInitializer
from paddle.v2.framework.layer_helper import LayerHelper, unique_name from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
import re import re
import cStringIO import cStringIO
...@@ -574,7 +574,9 @@ def accuracy(input, label, k=1, **kwargs): ...@@ -574,7 +574,9 @@ def accuracy(input, label, k=1, **kwargs):
"Indices": [topk_indices]}, "Indices": [topk_indices]},
attrs={"k": k}) attrs={"k": k})
acc_out_dtype = kwargs.get("out_dtype", "float32") acc_out_dtype = kwargs.get("out_dtype", "float32")
acc_out = helper.create_tmp_variable(dtype=acc_out_dtype) acc_out = helper.create_tmp_variable(dtype="float32")
correct = helper.create_tmp_variable(dtype="int64")
total = helper.create_tmp_variable(dtype="int64")
helper.append_op( helper.append_op(
type="accuracy", type="accuracy",
inputs={ inputs={
...@@ -582,7 +584,11 @@ def accuracy(input, label, k=1, **kwargs): ...@@ -582,7 +584,11 @@ def accuracy(input, label, k=1, **kwargs):
"Indices": [topk_indices], "Indices": [topk_indices],
"Label": [label] "Label": [label]
}, },
outputs={"Accuracy": [acc_out]}) outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
return acc_out return acc_out
...@@ -839,6 +845,23 @@ def batch_norm(input, ...@@ -839,6 +845,23 @@ def batch_norm(input,
return helper.append_activation(batch_norm_out) return helper.append_activation(batch_norm_out)
def beam_search_decode(ids, scores, main_program=None, startup_program=None):
helper = LayerHelper('beam_search_decode', **locals())
sentence_ids = helper.create_tmp_variable(dtype=ids.data_type)
sentence_scores = helper.create_tmp_variable(dtype=ids.data_type)
helper.append_op(
type="beam_search_decode",
inputs={"Ids": ids,
"Scores": scores},
outputs={
"SentenceIds": sentence_ids,
"SentenceScores": sentence_scores
})
return sentence_ids, sentence_scores
class BlockGuard(object): class BlockGuard(object):
""" """
BlockGuard class. BlockGuard class.
......
...@@ -3,8 +3,8 @@ import json ...@@ -3,8 +3,8 @@ import json
import logging import logging
from collections import defaultdict from collections import defaultdict
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
__all__ = ["simple_img_conv_pool", "sequence_conv_pool"] __all__ = ["simple_img_conv_pool", "sequence_conv_pool"]
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
def get_all_op_protos(): def get_all_op_protos():
......
from collections import defaultdict from collections import defaultdict
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import unique_name, Program from paddle.v2.fluid.framework import unique_name, Program
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.initializer import ConstantInitializer from paddle.v2.fluid.initializer import ConstantInitializer
from paddle.v2.framework.regularizer import append_regularization_ops from paddle.v2.fluid.regularizer import append_regularization_ops
from paddle.v2.framework.layer_helper import LayerHelper from paddle.v2.fluid.layer_helper import LayerHelper
__all__ = [ __all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
......
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
__all__ = [ __all__ = [
'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer' 'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer'
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.io import save_persistables, load_persistables
from paddle.v2.framework.io import save_persistables, load_persistables from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
x = layers.data( x = layers.data(
name='x', name='x',
shape=[13], shape=[13],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
y_predict = layers.fc(input=x, y_predict = layers.fc(input=x,
size=1, size=1,
act=None, act=None)
main_program=main_program,
startup_program=startup_program)
y = layers.data( y = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
cost = layers.square_error_cost( cost = layers.square_error_cost(
input=y_predict, input=y_predict,
label=y, label=y)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20 BATCH_SIZE = 20
...@@ -52,12 +40,12 @@ train_reader = paddle.batch( ...@@ -52,12 +40,12 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
save_persistables(exe, "./fit_a_line.model/", main_program=main_program) save_persistables(exe, "./fit_a_line.model/")
load_persistables(exe, "./fit_a_line.model/", main_program=main_program) load_persistables(exe, "./fit_a_line.model/")
for data in train_reader(): for data in train_reader():
x_data = np.array(map(lambda x: x[0], data)).astype("float32") x_data = np.array(map(lambda x: x[0], data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("float32")
...@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM): ...@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
# print tensor_y.get_dims() # print tensor_y.get_dims()
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost])
......
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import g_startup_program, g_main_program import paddle.v2.fluid.framework as framework
from paddle.v2.framework.initializer import XavierInitializer from paddle.v2.fluid.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, def conv_bn_layer(input,
ch_out, ch_out,
filter_size, filter_size,
stride, stride,
padding, padding,
act='relu', act='relu'):
main_program=None,
startup_program=None):
tmp = layers.conv2d( tmp = layers.conv2d(
input=input, input=input,
filter_size=filter_size, filter_size=filter_size,
...@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
stride=stride, stride=stride,
padding=padding, padding=padding,
act=None, act=None,
bias_attr=False, bias_attr=False)
main_program=main_program,
startup_program=startup_program)
return layers.batch_norm( return layers.batch_norm(
input=tmp, input=tmp,
act=act, act=act)
main_program=main_program,
startup_program=startup_program)
def shortcut(input, ch_in, ch_out, stride, program, init_program): def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out: if ch_in != ch_out:
...@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def basicblock(input, def basicblock(input,
ch_in, ch_in,
ch_out, ch_out,
stride, stride):
main_program=main_program,
startup_program=startup_program):
tmp = conv_bn_layer( tmp = conv_bn_layer(
input, input,
ch_out, ch_out,
3, 3,
stride, stride,
1, 1)
main_program=main_program,
startup_program=startup_program)
tmp = conv_bn_layer( tmp = conv_bn_layer(
tmp, tmp,
ch_out, ch_out,
3, 3,
1, 1,
1, 1,
act=None, act=None)
main_program=main_program, short = shortcut(input, ch_in, ch_out, stride)
startup_program=startup_program)
short = shortcut(input, ch_in, ch_out, stride, main_program,
startup_program)
return layers.elementwise_add( return layers.elementwise_add(
x=tmp, x=tmp,
y=short, y=short,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
def layer_warp(block_func, input, ch_in, ch_out, count, stride, program, def layer_warp(block_func, input, ch_in, ch_out, count, stride):
startup_program): tmp = block_func(input, ch_in, ch_out, stride)
tmp = block_func(input, ch_in, ch_out, stride, program, startup_program)
for i in range(1, count): for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program) tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp return tmp
assert (depth - 2) % 6 == 0 assert (depth - 2) % 6 == 0
...@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): ...@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
ch_out=16, ch_out=16,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1, padding=1)
main_program=main_program,
startup_program=startup_program)
res1 = layer_warp( res1 = layer_warp(
basicblock, basicblock,
conv1, conv1,
16, 16,
16, 16,
n, n,
1, 1)
main_program=main_program,
startup_program=startup_program)
res2 = layer_warp( res2 = layer_warp(
basicblock, basicblock,
res1, res1,
16, 16,
32, 32,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
res3 = layer_warp( res3 = layer_warp(
basicblock, basicblock,
res2, res2,
32, 32,
64, 64,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
pool = layers.pool2d( pool = layers.pool2d(
input=res3, input=res3,
pool_size=8, pool_size=8,
pool_type='avg', pool_type='avg',
pool_stride=1, pool_stride=1)
main_program=main_program,
startup_program=startup_program)
return pool return pool
def vgg16_bn_drop(input, main_program=None, startup_program=None): def vgg16_bn_drop(input):
def conv_block(input, def conv_block(input,
num_filter, num_filter,
groups, groups,
dropouts, dropouts):
main_program=None,
startup_program=None):
return nets.img_conv_group( return nets.img_conv_group(
input=input, input=input,
pool_size=2, pool_size=2,
...@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None): ...@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None):
conv_act='relu', conv_act='relu',
conv_with_batchnorm=True, conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type='max', pool_type='max')
main_program=main_program,
startup_program=startup_program)
conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program) conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program) conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program, conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
startup_program) conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program, conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
drop = layers.dropout( drop = layers.dropout(
x=conv5, x=conv5,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc1 = layers.fc(input=drop, fc1 = layers.fc(input=drop,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
reshape1 = layers.reshape( reshape1 = layers.reshape(
x=fc1, x=fc1,
shape=list(fc1.shape + (1, 1)), shape=list(fc1.shape + (1, 1)))
main_program=main_program,
startup_program=startup_program)
bn = layers.batch_norm( bn = layers.batch_norm(
input=reshape1, input=reshape1,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
drop2 = layers.dropout( drop2 = layers.dropout(
x=bn, x=bn,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc2 = layers.fc(input=drop2, fc2 = layers.fc(input=drop2,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
return fc2 return fc2
...@@ -225,7 +180,7 @@ train_reader = paddle.batch( ...@@ -225,7 +180,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
batch_id = 0 batch_id = 0
...@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM): ...@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.evaluator as evaluator
from paddle.v2.framework.framework import Program import paddle.v2.fluid.framework as framework
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[1, 28, 28], shape=[1, 28, 28],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
label = layers.data( label = layers.data(
name='label', name='label',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
conv_pool_1 = nets.simple_img_conv_pool( conv_pool_1 = nets.simple_img_conv_pool(
input=images, input=images,
filter_size=5, filter_size=5,
num_filters=20, num_filters=20,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
conv_pool_2 = nets.simple_img_conv_pool( conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1, input=conv_pool_1,
filter_size=5, filter_size=5,
num_filters=50, num_filters=50,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
predict = layers.fc(input=conv_pool_2, predict = layers.fc(input=conv_pool_2,
size=10, size=10,
act="softmax", act="softmax")
main_program=main_program, cost = layers.cross_entropy(input=predict, label=label)
startup_program=startup_program) avg_cost = layers.mean(x=cost)
cost = layers.cross_entropy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=cost, main_program=main_program)
accuracy = layers.accuracy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0,
# momentum=0.9)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(
input=predict,
label=label)
BATCH_SIZE = 50 BATCH_SIZE = 50
PASS_NUM = 3 PASS_NUM = 3
...@@ -75,10 +54,11 @@ train_reader = paddle.batch( ...@@ -75,10 +54,11 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
count = 0 count = 0
accuracy.reset(exe)
for data in train_reader(): for data in train_reader():
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]), img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
data)).astype("float32") data)).astype("float32")
...@@ -90,14 +70,20 @@ for pass_id in range(PASS_NUM): ...@@ -90,14 +70,20 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, acc_out])
loss = np.array(outs[0]) loss = np.array(outs[0])
acc = np.array(outs[1]) acc = np.array(outs[1])
pass_acc = accuracy.eval(exe)
print "pass id : ", pass_id, pass_acc
# print loss, acc
if loss < 10.0 and acc > 0.9: if loss < 10.0 and acc > 0.9:
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. # if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good.
exit(0) exit(0)
pass_acc = accuracy.eval(exe)
print "pass id : ", pass_id, pass_acc
exit(1) exit(1)
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.regularizer import L2DecayRegularizer
from paddle.v2.framework.regularizer import L2DecayRegularizer from paddle.v2.fluid.initializer import UniformInitializer
from paddle.v2.framework.initializer import UniformInitializer
import numpy as np import numpy as np
BATCH_SIZE = 128 BATCH_SIZE = 128
startup_program = Program()
main_program = Program()
image = layers.data( image = layers.data(
name='x', name='x',
shape=[784], shape=[784],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
param_attr = { param_attr = {
'name': None, 'name': None,
...@@ -30,45 +25,30 @@ param_attr = { ...@@ -30,45 +25,30 @@ param_attr = {
hidden1 = layers.fc(input=image, hidden1 = layers.fc(input=image,
size=128, size=128,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
hidden2 = layers.fc(input=hidden1, hidden2 = layers.fc(input=hidden1,
size=64, size=64,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
predict = layers.fc(input=hidden2, predict = layers.fc(input=hidden2,
size=10, size=10,
act='softmax', act='softmax',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
label = layers.data( label = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(input=predict, label=label)
input=predict, avg_cost = layers.mean(x=cost)
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
accuracy = layers.accuracy( accuracy = layers.accuracy(
input=predict, input=predict,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -78,7 +58,7 @@ train_reader = paddle.batch( ...@@ -78,7 +58,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
...@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM): ...@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
startup_program = Program() IS_SPARSE = True
main_program = Program() USE_GPU = False
is_sparse = True
use_gpu = False
BATCH_SIZE = 256 BATCH_SIZE = 256
...@@ -25,99 +22,71 @@ def get_usr_combined_features(): ...@@ -25,99 +22,71 @@ def get_usr_combined_features():
uid = layers.data( uid = layers.data(
name='user_id', name='user_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
usr_emb = layers.embedding( usr_emb = layers.embedding(
input=uid, input=uid,
data_type='float32', data_type='float32',
size=[USR_DICT_SIZE, 32], size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'}, param_attr={'name': 'user_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_fc = layers.fc(input=usr_emb, usr_fc = layers.fc(input=usr_emb,
size=32, size=32)
main_program=main_program,
startup_program=startup_program)
USR_GENDER_DICT_SIZE = 2 USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data( usr_gender_id = layers.data(
name='gender_id', name='gender_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
usr_gender_emb = layers.embedding( usr_gender_emb = layers.embedding(
input=usr_gender_id, input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16], size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'}, param_attr={'name': 'gender_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_gender_fc = layers.fc(input=usr_gender_emb, usr_gender_fc = layers.fc(input=usr_gender_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data( usr_age_id = layers.data(
name='age_id', name='age_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64")
main_program=main_program,
startup_program=startup_program)
usr_age_emb = layers.embedding( usr_age_emb = layers.embedding(
input=usr_age_id, input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16], size=[USR_AGE_DICT_SIZE, 16],
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'}, param_attr={'name': 'age_table'})
main_program=main_program,
startup_program=startup_program)
usr_age_fc = layers.fc(input=usr_age_emb, usr_age_fc = layers.fc(input=usr_age_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data( usr_job_id = layers.data(
name='job_id', name='job_id',
shape=[1], shape=[1],
data_type="int64", data_type="int64")
main_program=main_program,
startup_program=startup_program)
usr_job_emb = layers.embedding( usr_job_emb = layers.embedding(
input=usr_job_id, input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16], size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'}, param_attr={'name': 'job_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
usr_job_fc = layers.fc(input=usr_job_emb, usr_job_fc = layers.fc(input=usr_job_emb,
size=16, size=16)
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
usr_combined_features = layers.fc(input=concat_embed, usr_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh")
main_program=main_program,
startup_program=startup_program)
return usr_combined_features return usr_combined_features
...@@ -129,83 +98,61 @@ def get_mov_combined_features(): ...@@ -129,83 +98,61 @@ def get_mov_combined_features():
mov_id = layers.data( mov_id = layers.data(
name='movie_id', name='movie_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_emb = layers.embedding( mov_emb = layers.embedding(
input=mov_id, input=mov_id,
data_type='float32', data_type='float32',
size=[MOV_DICT_SIZE, 32], size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'}, param_attr={'name': 'movie_table'},
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_fc = layers.fc(input=mov_emb, mov_fc = layers.fc(input=mov_emb,
size=32, size=32)
main_program=main_program,
startup_program=startup_program)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data( category_id = layers.data(
name='category_id', name='category_id',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_categories_emb = layers.embedding( mov_categories_emb = layers.embedding(
input=category_id, input=category_id,
size=[CATEGORY_DICT_SIZE, 32], size=[CATEGORY_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_categories_hidden = layers.sequence_pool( mov_categories_hidden = layers.sequence_pool(
input=mov_categories_emb, input=mov_categories_emb,
pool_type="sum", pool_type="sum")
main_program=main_program,
startup_program=startup_program)
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data( mov_title_id = layers.data(
name='movie_title', name='movie_title',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
mov_title_emb = layers.embedding( mov_title_emb = layers.embedding(
input=mov_title_id, input=mov_title_id,
size=[MOV_TITLE_DICT_SIZE, 32], size=[MOV_TITLE_DICT_SIZE, 32],
is_sparse=is_sparse, is_sparse=IS_SPARSE)
main_program=main_program,
startup_program=startup_program)
mov_title_conv = nets.sequence_conv_pool( mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb, input=mov_title_emb,
num_filters=32, num_filters=32,
filter_size=3, filter_size=3,
act="tanh", act="tanh",
pool_type="sum", pool_type="sum")
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[mov_fc, mov_categories_hidden, mov_title_conv], input=[mov_fc, mov_categories_hidden, mov_title_conv],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
# FIXME(dzh) : need tanh operator # FIXME(dzh) : need tanh operator
mov_combined_features = layers.fc(input=concat_embed, mov_combined_features = layers.fc(input=concat_embed,
size=200, size=200,
act="tanh", act="tanh")
main_program=main_program,
startup_program=startup_program)
return mov_combined_features return mov_combined_features
...@@ -217,27 +164,18 @@ def model(): ...@@ -217,27 +164,18 @@ def model():
# need cos sim # need cos sim
inference = layers.cos_sim( inference = layers.cos_sim(
X=usr_combined_features, X=usr_combined_features,
Y=mov_combined_features, Y=mov_combined_features)
main_program=main_program,
startup_program=startup_program)
label = layers.data( label = layers.data(
name='score', name='score',
shape=[1], shape=[1],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
square_cost = layers.square_error_cost( square_cost = layers.square_error_cost(
input=inference, input=inference,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean( avg_cost = layers.mean(x=square_cost)
x=square_cost,
main_program=main_program,
startup_program=startup_program)
return avg_cost return avg_cost
...@@ -245,16 +183,15 @@ def model(): ...@@ -245,16 +183,15 @@ def model():
def main(): def main():
cost = model() cost = model()
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(cost, startup_program=startup_program) opts = sgd_optimizer.minimize(cost)
block = main_program.block(0)
if use_gpu: if USE_GPU:
place = core.GPUPlace(0) place = core.GPUPlace(0)
else: else:
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -303,7 +240,7 @@ def main(): ...@@ -303,7 +240,7 @@ def main():
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed=func_feed(feeding, data), feed=func_feed(feeding, data),
fetch_list=[cost]) fetch_list=[cost])
out = np.array(outs[0]) out = np.array(outs[0])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program, g_main_program, g_startup_program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
...@@ -70,7 +69,7 @@ def main(): ...@@ -70,7 +69,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
...@@ -82,7 +81,7 @@ def main(): ...@@ -82,7 +81,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program, g_main_program, g_startup_program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
...@@ -81,7 +80,7 @@ def main(): ...@@ -81,7 +80,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
...@@ -93,7 +92,7 @@ def main(): ...@@ -93,7 +92,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import g_main_program, g_startup_program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
...@@ -88,10 +87,10 @@ def main(): ...@@ -88,10 +87,10 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
tensor_words, tensor_label = prepare_feed_data(data, place) tensor_words, tensor_label = prepare_feed_data(data, place)
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
while True: while True:
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.framework.framework import Program from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.executor import Executor
import numpy as np import numpy as np
startup_program = Program() PASS_NUM = 100
main_program = Program() EMBED_SIZE = 32
HIDDEN_SIZE = 256
embed_size = 32
hidden_size = 256
N = 5 N = 5
batch_size = 32 BATCH_SIZE = 32
is_sparse = True IS_SPARSE = True
word_dict = paddle.dataset.imikolov.build_dict() word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict) dict_size = len(word_dict)
...@@ -23,97 +20,67 @@ dict_size = len(word_dict) ...@@ -23,97 +20,67 @@ dict_size = len(word_dict)
first_word = layers.data( first_word = layers.data(
name='firstw', name='firstw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
second_word = layers.data( second_word = layers.data(
name='secondw', name='secondw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
third_word = layers.data( third_word = layers.data(
name='thirdw', name='thirdw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
forth_word = layers.data( forth_word = layers.data(
name='forthw', name='forthw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
next_word = layers.data( next_word = layers.data(
name='nextw', name='nextw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
embed_first = layers.embedding( embed_first = layers.embedding(
input=first_word, input=first_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_second = layers.embedding( embed_second = layers.embedding(
input=second_word, input=second_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_third = layers.embedding( embed_third = layers.embedding(
input=third_word, input=third_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_forth = layers.embedding( embed_forth = layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], input=[embed_first, embed_second, embed_third, embed_forth],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
hidden1 = layers.fc(input=concat_embed, hidden1 = layers.fc(input=concat_embed,
size=hidden_size, size=HIDDEN_SIZE,
act='sigmoid', act='sigmoid')
main_program=main_program,
startup_program=startup_program)
predict_word = layers.fc(input=hidden1, predict_word = layers.fc(input=hidden1,
size=dict_size, size=dict_size,
act='softmax', act='softmax')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict_word, input=predict_word,
label=next_word, label=next_word)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size) paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
...@@ -122,8 +89,8 @@ exe = Executor(place) ...@@ -122,8 +89,8 @@ exe = Executor(place)
# below exit line. # below exit line.
exit(0) exit(0)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)] input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)]
...@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM): ...@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM):
next_tensor = core.LoDTensor() next_tensor = core.LoDTensor()
next_tensor.set(next_data, place) next_tensor.set(next_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={ feed={
'firstw': first_tensor, 'firstw': first_tensor,
'secondw': second_tensor, 'secondw': second_tensor,
......
...@@ -2,12 +2,12 @@ import unittest ...@@ -2,12 +2,12 @@ import unittest
import numpy as np import numpy as np
import random import random
import itertools import itertools
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import collections import collections
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import Program, OpProtoHolder from paddle.v2.fluid.framework import Program, OpProtoHolder
def randomize_probability(batch_size, class_num, dtype='float32'): def randomize_probability(batch_size, class_num, dtype='float32'):
......
...@@ -18,7 +18,9 @@ class TestAccuracyOp(OpTest): ...@@ -18,7 +18,9 @@ class TestAccuracyOp(OpTest):
num_correct += 1 num_correct += 1
break break
self.outputs = { self.outputs = {
'Accuracy': np.array([num_correct / float(n)]).astype("float32") 'Accuracy': np.array([num_correct / float(n)]).astype("float32"),
'Correct': np.array([num_correct]).astype("int32"),
'Total': np.array([n]).astype("int32")
} }
def test_check_output(self): def test_check_output(self):
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import numpy import numpy
......
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
def grad_var_name(var_name): def grad_var_name(var_name):
......
import op_test import op_test
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestCastOp(op_test.OpTest): class TestCastOp(op_test.OpTest):
......
...@@ -23,6 +23,9 @@ def create_test_class(op_type, typename, callback): ...@@ -23,6 +23,9 @@ def create_test_class(op_type, typename, callback):
for _type_name in {'float32', 'float64', 'int32', 'int64'}: for _type_name in {'float32', 'float64', 'int32', 'int64'}:
create_test_class('less_than', _type_name, lambda _a, _b: _a < _b) create_test_class('less_than', _type_name, lambda _a, _b: _a < _b)
create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b)
create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b)
create_test_class('greater_equal', _type_name, lambda _a, _b: _a >= _b)
create_test_class('equal', _type_name, lambda _a, _b: _a == _b) create_test_class('equal', _type_name, lambda _a, _b: _a == _b)
if __name__ == '__main__': if __name__ == '__main__':
......
import logging import logging
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
from paddle.v2.framework.op import Operator, CondOp from paddle.v2.fluid.op import Operator, CondOp
class PySimpleCond(object): class PySimpleCond(object):
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.framework import g_startup_program, g_main_program from paddle.v2.fluid.framework import g_startup_program, g_main_program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
class TestDocString(unittest.TestCase): class TestDocString(unittest.TestCase):
......
from paddle.v2.framework.default_scope_funcs import * from paddle.v2.fluid.default_scope_funcs import *
import unittest import unittest
......
import logging import logging
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
from paddle.v2.framework.op import Operator, DynamicRecurrentOp from paddle.v2.fluid.op import Operator, DynamicRecurrentOp
import numpy as np import numpy as np
# for siplicity, just one level LoD # for siplicity, just one level LoD
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
......
import unittest import unittest
from paddle.v2.framework.layers import mul, data from paddle.v2.fluid.layers import mul, data
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import numpy import numpy
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
import unittest import unittest
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
class TestDebugStringFramework(unittest.TestCase): class TestDebugStringFramework(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
def conv_block(input, def conv_block(input,
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestInferShape(unittest.TestCase): class TestInferShape(unittest.TestCase):
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.io import save_inference_model, load_inference_model from paddle.v2.fluid.io import save_inference_model, load_inference_model
import paddle.v2.framework.executor as executor import paddle.v2.fluid.executor as executor
import unittest import unittest
import numpy as np import numpy as np
......
import numpy as np import numpy as np
import unittest import unittest
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.framework.initializer as initializer import paddle.v2.fluid.initializer as initializer
DELTA = 0.00001 DELTA = 0.00001
......
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.framework.nets as nets import paddle.v2.fluid.nets as nets
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
from paddle.v2.framework.layers import lod_rank_table, data from paddle.v2.fluid.layers import lod_rank_table, data
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
......
import unittest, os import unittest, os
import numpy as np import numpy as np
import paddle.v2 as paddle import paddle.v2 as paddle
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from op_test import OpTest, create_op, set_input from op_test import OpTest, create_op, set_input
if not core.is_compile_gpu(): if not core.is_compile_gpu():
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestOpSupportGPU(unittest.TestCase): class TestOpSupportGPU(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.op as op import paddle.v2.fluid.op as op
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
class TestGetAllProtos(unittest.TestCase): class TestGetAllProtos(unittest.TestCase):
......
import unittest import unittest
from paddle.v2.framework.framework import Variable, Program, g_main_program from paddle.v2.fluid.framework import Variable, Program, g_main_program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestOperator(unittest.TestCase): class TestOperator(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestOptimizer(unittest.TestCase): class TestOptimizer(unittest.TestCase):
......
import unittest import unittest
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestParameter(unittest.TestCase): class TestParameter(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
class TestProgram(unittest.TestCase): class TestProgram(unittest.TestCase):
......
import paddle.v2.framework.proto.framework_pb2 as framework_pb2 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
import unittest import unittest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class TestOpDesc(unittest.TestCase): class TestOpDesc(unittest.TestCase):
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
class PyRNNBase(object): class PyRNNBase(object):
......
import unittest import unittest
import paddle.v2.framework.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.framework.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.framework.regularizer as regularizer import paddle.v2.fluid.regularizer as regularizer
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestL2DecayRegularizer(unittest.TestCase): class TestL2DecayRegularizer(unittest.TestCase):
......
import unittest import unittest
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
def create_tensor(np_data, place): def create_tensor(np_data, place):
......
import paddle.v2.framework.core import paddle.v2.fluid.core
import unittest import unittest
class TestScope(unittest.TestCase): class TestScope(unittest.TestCase):
def test_create_destroy(self): def test_create_destroy(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
self.assertIsNotNone(scope) self.assertIsNotNone(scope)
scope_with_parent = scope.new_scope() scope_with_parent = scope.new_scope()
self.assertIsNotNone(scope_with_parent) self.assertIsNotNone(scope_with_parent)
def test_none_variable(self): def test_none_variable(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
self.assertIsNone(scope.find_var("test")) self.assertIsNone(scope.find_var("test"))
def test_create_var_get_var(self): def test_create_var_get_var(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var_a = scope.var("var_a") var_a = scope.var("var_a")
self.assertIsNotNone(var_a) self.assertIsNotNone(var_a)
...@@ -25,7 +25,7 @@ class TestScope(unittest.TestCase): ...@@ -25,7 +25,7 @@ class TestScope(unittest.TestCase):
self.assertIsNotNone(scope2.find_var('var_a')) self.assertIsNotNone(scope2.find_var('var_a'))
def test_var_get_int(self): def test_var_get_int(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var = scope.var("test_int") var = scope.var("test_int")
var.set_int(10) var.set_int(10)
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
import unittest import unittest
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
from op_test import OpTest from op_test import OpTest
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.framework.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
import numpy import numpy
......
import unittest import unittest
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy as np import numpy as np
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.framework.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
......
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy import numpy
......
import logging import logging
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
......
import unittest import unittest
from paddle.v2.framework.op import Operator from paddle.v2.fluid.op import Operator
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
import unittest import unittest
from paddle.v2.framework.framework import Variable, g_main_program, Program from paddle.v2.fluid.framework import Variable, g_main_program, Program
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy as np import numpy as np
......
import unittest import unittest
import paddle.v2.framework.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.framework.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.framework.core as core import paddle.v2.fluid.core as core
import numpy import numpy
......
import paddle.v2.framework.op as op
import numpy as np
import paddle.v2.framework.core as core
def avg_accumulate(accumulated_var, per_eval, num_batches, place):
t = np.array(accumulated_var.get_tensor())
t[0] += per_eval[0]
accumulated_var.get_tensor().set([t[0] / float(num_batches)], place)
class Evaluator(object):
def __init__(self,
scope,
operator='accuracy',
input='Inference',
label='Label',
output='Output',
place=core.CPUPlace()):
"""
create an evaluator for evaluating the inference.
NOTE: default run on CPUPlace(), running on GPUPlace doesn't improve performance much.
:param scope: the scope instance contains the input.
:type scope: paddle.v2.framework.core.scope
:param operator: operator name for caculating the evaluation for each mini-batch.
:type operator: string
:param input: output variable name of forward network.
:type input: string
:param label: variable name of label
:type label: string
"""
self.scope = scope
self.place = place
self.output_name = output
self.num_batches = 0
# create variable to store accumulated evaluator output
eval_name = ''.join([operator, "@Eval"])
if scope.find_var(eval_name):
raise Exception("evaluator already exist in scope: %s" % eval_name)
self.accumulated_var = scope.var(eval_name)
t = self.accumulated_var.get_tensor()
t.set_dims((1, ))
t.set([0.0], place)
# self.accumulated_var = block.create_var(block, name=eval_name, shape=(1,))
# self.accumulated_var.get_tensor().set([0.0])
# create operator of evaluation
var_map = dict() # var name -> variable
var_map[input] = [input]
var_map[label] = [label]
var_map[output] = [output]
self.op = op.Operator(operator, **var_map)
def evaluate(self, ctx, accumulator=avg_accumulate):
self.op.run(self.scope, ctx)
per_eval = np.array(self.scope.find_var(self.output_name).get_tensor())
self.num_batches += 1
accumulator(self.accumulated_var, per_eval, self.num_batches,
self.place)
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \
Operator
import unittest
import numpy as np
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
class TestBeamSearchDecodeOp(unittest.TestCase):
def setUp(self):
self.scope = core.Scope()
self.cpu_place = core.CPUPlace()
def append_lod_tensor(self, tensor_array, lod, data):
lod_tensor = core.LoDTensor()
lod_tensor.set_lod(lod)
lod_tensor.set(data, self.cpu_place)
tensor_array.append(lod_tensor)
def test_get_set(self):
ids = self.scope.var("ids").get_lod_tensor_array()
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
np.array(
[1, 2, 3, 4, 5, 6], dtype="int64"))
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
np.array(
[0, 1, 2, 3, 4, 5], dtype="int64"))
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
np.array(
[0, 1, 2, 3, 4], dtype="int64"))
scores = self.scope.var("scores").get_lod_tensor_array()
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
np.array(
[1, 2, 3, 4, 5, 6], dtype="float32"))
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
np.array(
[0, 1, 2, 3, 4, 5], dtype="float32"))
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
np.array(
[0, 1, 2, 3, 4], dtype="float32"))
sentence_ids = self.scope.var("sentence_ids").get_tensor()
sentence_scores = self.scope.var("sentence_scores").get_tensor()
beam_search_decode_op = Operator(
"beam_search_decode",
# inputs
Ids="ids",
Scores="scores",
# outputs
SentenceIds="sentence_ids",
SentenceScores="sentence_scores")
ctx = core.DeviceContext.create(self.cpu_place)
beam_search_decode_op.run(self.scope, ctx)
expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]]
self.assertEqual(sentence_ids.lod(), expected_lod)
self.assertEqual(sentence_scores.lod(), expected_lod)
expected_data = np.array(
[2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64")
self.assertTrue(np.array_equal(np.array(sentence_ids), expected_data))
self.assertTrue(
np.array_equal(np.array(sentence_scores), expected_data))
if __name__ == '__main__':
unittest.main()
from paddle.v2.framework.evaluator import Evaluator
from paddle.v2.framework.op import Operator
import paddle.v2.framework.core as core
import unittest
import op_test
import numpy as np
class TestEvaluator(unittest.TestCase):
def setup(self, scope, inputs, outputs):
def __create_var__(var_name, arr):
np_arr = np.array(arr)
scope.var(var_name)
# tensor = var.get_tensor()
# tensor.set_dims(np_arr.shape)
for var_name, arr in inputs.iteritems():
__create_var__(var_name, arr)
for var_name, arr in outputs.iteritems():
__create_var__(var_name, arr)
def test_evaluator(self):
inputs = {
'Inference': np.array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 1]]).T,
'Label': np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
}
outputs = {'Accuracy': np.array([0.9])}
out_name = 'Accuracy'
places = [core.CPUPlace()]
if core.is_compile_gpu():
places.append(core.GPUPlace(0))
for place in places:
scope = core.Scope()
self.setup(scope, inputs, outputs)
evaluator = Evaluator(
scope,
operator='accuracy',
input='Inference',
label='Label',
output=out_name,
place=place)
op_test.set_input(scope, evaluator.op, inputs, place)
ctx = core.DeviceContext.create(place)
for i in range(10): # simulate 10 mini-batches
evaluator.evaluate(ctx)
actual = np.array(scope.find_var(out_name).get_tensor())
print actual
self.assertTrue(
np.allclose(
actual, outputs[out_name], atol=1e-5),
"output name: " + out_name + " has diff.")
if __name__ == '__main__':
exit(0)
unittest.main()
...@@ -13,8 +13,8 @@ packages=['paddle', ...@@ -13,8 +13,8 @@ packages=['paddle',
'paddle.v2.reader', 'paddle.v2.reader',
'paddle.v2.master', 'paddle.v2.master',
'paddle.v2.plot', 'paddle.v2.plot',
'paddle.v2.framework', 'paddle.v2.fluid',
'paddle.v2.framework.proto', 'paddle.v2.fluid.proto',
'py_paddle'] 'py_paddle']
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
...@@ -44,14 +44,14 @@ setup(name='paddlepaddle', ...@@ -44,14 +44,14 @@ setup(name='paddlepaddle',
ext_modules=[Extension('_foo', ['stub.cc'])], ext_modules=[Extension('_foo', ['stub.cc'])],
package_data={ package_data={
'paddle.v2.master': ['libpaddle_master.so'], 'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so'], 'paddle.v2.fluid': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so'] 'py_paddle':['*.py','_swig_paddle.so']
}, },
package_dir={ package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}', '': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.framework.proto will be generated while compiling. # The paddle.v2.fluid.proto will be generated while compiling.
# So that package points to other directory. # So that package points to other directory.
'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework', 'paddle.v2.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
}, },
scripts=paddle_bins, scripts=paddle_bins,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册