提交 b7cb9561 编写于 作者: L Luo Tao

Merge branch 'develop' into ProtoDataProvider

......@@ -21,7 +21,7 @@ third_party/
cmake-build-*
# generated while compiling
python/paddle/v2/framework/core.so
python/paddle/v2/fluid/core.so
paddle/pybind/pybind.h
CMakeFiles
cmake_install.cmake
......
## Evaluator Design
### The Problem
During training or serving, we provide the evaluation function to measure the model performance, e.g., accuracy, precision. In the operator based framework design, the data go through the network pipeline batch by batch. As a result, inside the operator, we only can calculate one minibatch metrics. We need to provide a mechanism to calculate the metrics for each N pass/batch the user wanted.
### Evaluator Design
Currently, every operation is expressed in the graph. we divide the evaluator process into three steps.
1. Initialize the metric state and add it into the block.
2. Calculate the statistic of the metric state in every mini-batch. The single operator is only responsible for calculating necessary statistics for one mini-batch. For example, accuracy operator only calculate a minibatch data if run once.
3. Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. When it comes to distributed training/Multi-GPU training, aggregate the value from different devices.
### Implementation
This design is shown in python API.
Each metric operator need to caculate the metric statistic and return the batch aware states, Python side responsible for accumulate the states for each pass.
```python
class Evaluator(object):
"""
Evaluator Base class.
"""
def __init__(self, name, **kwargs):
"""
Different evaluator may has different metric states. E.g, Accuracy need two variables, total and right sample counts.
Auc need four variables, `true_positives`,
`true_negatives`, `false_positives` and `false_negatives`. So every evaluator should create its needed variables and append to main_program
The initialization of Evaluator should be responsible for:
create metric states and append to the main_program
"""
pass
def _update_ops(self, input, label, **kwargs)
"""
Add mini-batch evaluator caculate operators to the main_program.
Add increment operator to accumulate the metric states.
"""
def reset(self, executor, reset_program=None):
"""
Reset metric states at the begin of each pass/user specified batch number.
Execute the reset_program to reset the states.
"""
def eval(self, executor, eval_program=None):
"""
Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
Execute the eval_program and return the result.
"""
return eval_result
```
......@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat,
paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) {
#ifndef PADDLE_MOBILE_INFERENCE
auto ptr = new paddle::capi::CMatrix();
ptr->mat = paddle::Matrix::createSparseMatrix(
height,
......@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse(
false,
useGpu);
return ptr;
#else
return nullptr;
#endif
}
paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
......@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
uint64_t colSize,
float* valueArray,
uint64_t valueSize) {
#ifndef PADDLE_MOBILE_INFERENCE
if (mat == nullptr) return kPD_NULLPTR;
auto ptr = cast(mat);
if (rowArray == nullptr || colArray == nullptr ||
......@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
} else {
return kPD_NOT_SUPPORTED;
}
#else
return kPD_NOT_SUPPORTED;
#endif
}
......@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height,
* @param isBinary is binary (either 1 or 0 in matrix) or not.
* @param useGpu is using GPU or not.
* @return paddle_matrix.
* @note Mobile inference does not support this interface.
*/
PD_API paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu);
......@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat,
* NULL if the matrix is binary.
* @param [in] valueSize length of value array. Zero if the matrix is binary.
* @return paddle_error
* @note Mobile inference does not support this interface.
*/
PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
int* rowArray,
......
......@@ -27,7 +27,9 @@ if(WITH_GPU)
set_source_files_properties(${CUDA_CXX_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__")
else()
if (NOT MOBILE_INFERENCE)
set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
endif()
endif()
set(CUDA_CU_SOURCES
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "hl_base.h"
/**
* @brief Maximum pool forward.
* @brief Maximum pool forward with Mask output.
*
* @param[in] frameCnt batch size of input image.
* @param[in] inputData input data.
......@@ -35,7 +35,7 @@ limitations under the License. */
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples.
*
* @param[out] maskData the location indices of select max data.
*/
extern void hl_maxpool_forward(const int frameCnt,
const real* inputData,
......@@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride);
const int tgtStride,
real* maskData = NULL);
/**
* @brief Maximum pool backward.
......
......@@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride) {}
const int tgtStride,
real* MaskData) {}
inline void hl_maxpool_backward(const int frameCnt,
const real* inputData,
......
......@@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads,
const int offsetH,
const int offsetW,
real* tgtData,
const int tgtStride) {
const int tgtStride,
real* maskData) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
......@@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads,
hstart = max(hstart, 0);
wstart = max(wstart, 0);
real maxval = -FLT_MAX;
int max_index = -1;
inputData += (frameNum * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (maxval < inputData[h * width + w])
maxval = inputData[h * width + w];
if (maxval < inputData[h * width + w]) {
max_index = h * width + w;
maxval = inputData[max_index];
}
}
}
int tgtIndex =
index % (pooledW * pooledH * channels) + frameNum * tgtStride;
tgtData[tgtIndex] = maxval;
if (maskData != NULL) {
maskData[tgtIndex] = max_index;
}
}
}
......@@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride) {
const int tgtStride,
real* maskData) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
dim3 threads(1024, 1);
......@@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt,
paddingH,
paddingW,
tgtData,
tgtStride);
tgtStride,
maskData);
CHECK_SYNC("hl_maxpool_forward failed");
}
......
......@@ -61,6 +61,7 @@ public:
// function arguments
strides_ = config.get<std::vector<size_t>>("strides");
paddings_ = config.get<std::vector<size_t>>("paddings");
dilations_ = config.get<std::vector<size_t>>("dilations");
groups_ = config.get<size_t>("groups");
// number of inputs and outputs
......@@ -118,6 +119,7 @@ protected:
std::vector<size_t> strides_;
std::vector<size_t> paddings_;
std::vector<size_t> dilations_;
/// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the
......@@ -133,6 +135,10 @@ protected:
inline int paddingW() const { return paddings_[1]; }
inline int dilationH() const { return dilations_[0]; }
inline int dilationW() const { return dilations_[1]; }
// A temporary memory in convolution calculation.
MemoryHandlePtr memory_;
......
......@@ -79,45 +79,59 @@ void Convolution(const std::string& conv1,
if (outputChannels < inputChannels) continue;
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
for (size_t dilation : {1, 3}) {
if (padding >= filterSize) break;
size_t filterS = (filterSize - 1) * dilation + 1;
// NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;
if (inputSize + 2 * padding < filterS) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" ||
conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
dilation > 1)
break;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
// NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
size_t outputSize =
(inputSize - filterS + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;
function(test, input, filter, output);
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
function(test, input, filter, output);
}
}
}
}
......@@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1,
for (size_t outputChannels : {7}) {
size_t stride = 1;
size_t padding = 0;
size_t dilation = 1;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
......@@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
......@@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1,
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("dilations", dilations)
.set("algo", (std::string) "auto"));
TensorShape input{
......@@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {1, 1};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
......@@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1,
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("dilations", dilations)
.set("algo", (std::string) "auto"));
TensorShape input{
......
......@@ -100,7 +100,9 @@ public:
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
} else {
colData = inputData + g * inputOffset;
}
......@@ -223,7 +225,9 @@ public:
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
}
}
inputGrad += inputChannels * inputHeight * inputWidth;
......@@ -310,7 +314,9 @@ public:
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
} else {
colData = inputData + g * inputOffset;
}
......
......@@ -78,7 +78,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth);
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
};
template <ColFormat Format, DeviceType Device, class T>
......@@ -91,7 +93,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth);
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
};
} // namespace paddle
......@@ -31,7 +31,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -47,8 +49,8 @@ public:
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) < 0 ||
(imRowIdx - paddingHeight) >= inputHeight ||
(imColIdx - paddingWidth) < 0 ||
......@@ -81,7 +83,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -97,8 +101,8 @@ public:
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) >= 0 &&
(imRowIdx - paddingHeight) < inputHeight &&
(imColIdx - paddingWidth) >= 0 &&
......@@ -134,7 +138,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -147,9 +153,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset =
outputH * strideHeight + filterH - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth;
int imRowOffset = outputH * strideHeight +
filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels +
channel) *
......@@ -189,7 +196,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -202,9 +211,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset =
outputH * strideHeight + filterH - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth;
int imRowOffset = outputH * strideHeight +
filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels +
channel) *
......
......@@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im,
int strideW,
int paddingH,
int paddingW,
int dilationH,
int dilationW,
int height_col,
int width_col,
T* data_col) {
......@@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im,
data_col += (channel_out * height_col + h_out) * width_col + w_out;
for (int i = 0; i < blockH; ++i) {
for (int j = 0; j < blockW; ++j) {
int rIdx = int(h_in + i);
int cIdx = int(w_in + j);
int rIdx = int(h_in + i * dilationH);
int cIdx = int(w_in + j * dilationW);
if ((rIdx - (int)paddingH) >= (int)height ||
(rIdx - (int)paddingH) < 0 ||
(cIdx - (int)paddingW) >= (int)width ||
......@@ -77,7 +79,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -102,6 +106,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth,
colData);
......@@ -121,6 +127,8 @@ __global__ void col2im(size_t n,
size_t strideW,
size_t paddingH,
size_t paddingW,
size_t dilationH,
size_t dilationW,
size_t height_col,
size_t width_col,
T* data_im) {
......@@ -131,23 +139,34 @@ __global__ void col2im(size_t n,
int w = int(index % width);
int h = int((index / width) % height);
int c = int(index / (width * height));
int filterH = (blockH - 1) * dilationH + 1;
int filterW = (blockW - 1) * dilationW + 1;
if ((w - (int)paddingW) >= 0 &&
(w - (int)paddingW) < (width - 2 * paddingW) &&
(h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) {
// compute the start and end of the output
int w_col_start =
(w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1;
(w < (int)filterW) ? 0 : (w - int(filterW)) / (int)strideW + 1;
int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col));
int h_col_start =
(h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1;
(h < (int)filterH) ? 0 : (h - (int)filterH) / (int)strideH + 1;
int h_col_end = min(int(h / strideH + 1), int(height_col));
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
// the col location: [c * width * height + h_out, w_out]
int c_col = int(c * blockH * blockW) +
(h - h_col * (int)strideH) * (int)blockW +
(w - w_col * (int)strideW);
val += data_col[(c_col * height_col + h_col) * width_col + w_col];
int h_k = (h - h_col * strideH);
int w_k = (w - w_col * strideW);
if (h_k % dilationH == 0 && w_k % dilationW == 0) {
h_k /= dilationH;
w_k /= dilationW;
int c_col =
(((c * blockH + h_k) * blockW + w_k) * height_col + h_col) *
width_col +
w_col;
val += data_col[c_col];
}
}
}
h -= paddingH;
......@@ -173,7 +192,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -205,6 +226,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth,
imData);
......@@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData,
int strideWidth,
int paddingHeight,
int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight,
int outputWidth) {
int swId = blockIdx.x;
......@@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData,
channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth;
int heightOffset = idy + shId * strideHeight - paddingHeight;
int widthOffset =
idx * dilationHeight + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationWidth + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth;
......@@ -273,7 +300,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -312,6 +341,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth);
CHECK_SYNC("Im2ColFunctor GPU failed");
......@@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData,
int strideWidth,
int paddingHeight,
int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight,
int outputWidth) {
int swId = blockIdx.x;
......@@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData,
channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth;
int heightOffset = idy + shId * strideHeight - paddingHeight;
int widthOffset =
idx * dilationWidth + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationHeight + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth;
......@@ -372,7 +407,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -411,6 +448,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth);
CHECK_SYNC("Col2ImFunctor GPU failed");
......
......@@ -29,82 +29,98 @@ void TestIm2ColFunctor() {
for (size_t filterWidth : {3, 7}) {
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (inputHeight <= filterHeight || inputWidth <= filterWidth)
break;
if (padding >= filterHeight || padding >= filterWidth) break;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) / stride;
TensorShape imShape =
TensorShape({channels, inputHeight, inputWidth});
TensorShape colShape1 = TensorShape({channels,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
TensorShape colShape2 = TensorShape({outputHeight,
outputWidth,
channels,
filterHeight,
filterWidth});
size_t height = channels * filterHeight * filterWidth;
size_t width = outputHeight * outputWidth;
VectorPtr input1 = Vector::create(imShape.getElements(), false);
VectorPtr input2 = Vector::create(imShape.getElements(), false);
MatrixPtr output1 = Matrix::create(height, width, false, false);
MatrixPtr output2 = Matrix::create(width, height, false, false);
input1->uniform(0.001, 1);
input2->copyFrom(*input1);
Im2ColFunctor<kCFO, Device, T> im2Col1;
Im2ColFunctor<kOCF, Device, T> im2Col2;
im2Col1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding);
im2Col2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding);
// The transposition of the result of ColFormat == kCFO
// is equal to the result of ColFormat == kOCF.
MatrixPtr test;
output2->transpose(test, true);
autotest::TensorCheckErr(*output1, *test);
Col2ImFunctor<kCFO, Device, T> col2Im1;
Col2ImFunctor<kOCF, Device, T> col2Im2;
col2Im1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding);
col2Im2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding);
autotest::TensorCheckErr(*input1, *input2);
for (size_t dilation : {1, 3}) {
size_t filterSizeH = (filterHeight - 1) * dilation + 1;
size_t filterSizeW = (filterWidth - 1) * dilation + 1;
if (inputHeight + 2 * padding < filterSizeH ||
inputWidth + 2 * padding < filterSizeW)
break;
if (padding >= filterSizeH || padding >= filterSizeW) break;
size_t outputHeight =
(inputHeight - filterSizeH + 2 * padding) / stride + 1;
size_t outputWidth =
(inputWidth - filterSizeW + 2 * padding) / stride + 1;
TensorShape imShape =
TensorShape({channels, inputHeight, inputWidth});
TensorShape colShape1 = TensorShape({channels,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
TensorShape colShape2 = TensorShape({outputHeight,
outputWidth,
channels,
filterHeight,
filterWidth});
size_t height = channels * filterHeight * filterWidth;
size_t width = outputHeight * outputWidth;
VectorPtr input1 =
Vector::create(imShape.getElements(), false);
VectorPtr input2 =
Vector::create(imShape.getElements(), false);
MatrixPtr output1 =
Matrix::create(height, width, false, false);
MatrixPtr output2 =
Matrix::create(width, height, false, false);
input1->uniform(0.001, 1);
input2->copyFrom(*input1);
Im2ColFunctor<kCFO, Device, T> im2Col1;
Im2ColFunctor<kOCF, Device, T> im2Col2;
im2Col1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding,
dilation,
dilation);
im2Col2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding,
dilation,
dilation);
// The transposition of the result of ColFormat == kCFO
// is equal to the result of ColFormat == kOCF.
MatrixPtr test;
output2->transpose(test, true);
autotest::TensorCheckErr(*output1, *test);
Col2ImFunctor<kCFO, Device, T> col2Im1;
Col2ImFunctor<kOCF, Device, T> col2Im2;
col2Im1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding,
dilation,
dilation);
col2Im2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding,
dilation,
dilation);
autotest::TensorCheckErr(*input1, *input2);
}
}
}
}
......
......@@ -84,9 +84,49 @@ if(MOBILE_INFERENCE)
gradientmachines/GradientMachineMode.cpp
gradientmachines/MultiGradientMachine.cpp)
# Remove useless layers
# Remove layers that used in training
list(REMOVE_ITEM GSERVER_SOURCES
layers/RecurrentLayerGroup.cpp)
layers/RecurrentLayerGroup.cpp
layers/CostLayer.cpp
layers/MultiBoxLossLayer.cpp
layers/WarpCTCLayer.cpp
layers/CTCLayer.cpp
layers/LinearChainCTC.cpp
layers/PrintLayer.cpp)
list(REMOVE_ITEM GSERVER_SOURCES
layers/OuterProdLayer.cpp
layers/SumToOneNormLayer.cpp
layers/ConvShiftLayer.cpp
layers/InterpolationLayer.cpp
layers/AgentLayer.cpp
layers/DotMulOperator.cpp
layers/GruStepLayer.cpp
layers/LstmStepLayer.cpp
layers/ConvexCombinationLayer.cpp
layers/Conv3DLayer.cpp
layers/DeConv3DLayer.cpp
layers/CropLayer.cpp
layers/CrossEntropyOverBeam.cpp
layers/DataNormLayer.cpp
layers/FeatureMapExpandLayer.cpp
layers/HierarchicalSigmoidLayer.cpp
layers/MultinomialSampler.cpp
layers/NCELayer.cpp
layers/KmaxSeqScoreLayer.cpp
layers/MDLstmLayer.cpp
layers/MultiplexLayer.cpp
layers/PadLayer.cpp
layers/Pool3DLayer.cpp
layers/ResizeLayer.cpp
layers/RotateLayer.cpp
layers/RowConvLayer.cpp
layers/RowL2NormLayer.cpp
layers/SamplingIdLayer.cpp
layers/ScaleShiftLayer.cpp
layers/SelectiveFullyConnectedLayer.cpp
layers/SpatialPyramidPoolLayer.cpp
layers/BilinearInterpLayer.cpp
layers/ClipLayer.cpp)
endif()
if(WITH_GPU)
......
......@@ -16,7 +16,6 @@ limitations under the License. */
#include "NeuralNetwork.h"
#include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/CustomStackTrace.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
......@@ -28,6 +27,7 @@ limitations under the License. */
#ifndef PADDLE_MOBILE_INFERENCE
#include "MultiNetwork.h"
#include "RecurrentGradientMachine.h"
#include "paddle/gserver/layers/AgentLayer.h"
#endif
namespace paddle {
......@@ -192,9 +192,11 @@ void NeuralNetwork::init(const ModelConfig& config,
void NeuralNetwork::connect(LayerPtr agentLayer,
LayerPtr realLayer,
int height) {
#ifndef PADDLE_MOBILE_INFERENCE
AgentLayer* agent = dynamic_cast<AgentLayer*>(agentLayer.get());
CHECK_NOTNULL(agent);
agent->setRealLayer(realLayer, height);
#endif
}
void NeuralNetwork::connect(std::string agentLayerName,
......
......@@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
std::vector<size_t> dilations = {(size_t)dilationY_[i],
(size_t)dilation_[i]};
bool useDilation = ((size_t)dilationY_[i] > 1 || (size_t)dilation_[i] > 1);
// Convolution Layer uses the GemmConv function by default.
convType = "GemmConv";
......@@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
if ((filterSize_[i] == filterSizeY_[i]) &&
(filterSize_[i] == 3 || filterSize_[i] == 4) &&
(stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) {
(stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2) &&
!useDilation) {
convType = "NeonDepthwiseConv";
}
#endif
}
if (FLAGS_use_nnpack && !isDeconv_) {
if (FLAGS_use_nnpack && !isDeconv_ && !useDilation) {
createFunction(forward_,
"NNPACKConv",
FuncConfig()
......@@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i]));
createFunction(backward_,
......@@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i]));
createFunction(backward_,
......@@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i]));
}
}
......
......@@ -98,6 +98,7 @@ ClassRegistrar<Layer, LayerConfig> Layer::registrar_;
LayerPtr Layer::create(const LayerConfig& config) {
std::string type = config.type();
#ifndef PADDLE_MOBILE_INFERENCE
// NOTE: As following types have illegal character '-',
// they can not use REGISTER_LAYER to registrar.
// Besides, to fit with old training models,
......@@ -106,7 +107,6 @@ LayerPtr Layer::create(const LayerConfig& config) {
return LayerPtr(new MultiClassCrossEntropy(config));
else if (type == "rank-cost")
return LayerPtr(new RankingCost(config));
#ifndef PADDLE_MOBILE_INFERENCE
else if (type == "auc-validation")
return LayerPtr(new AucValidation(config));
else if (type == "pnpair-validation")
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MaxPoolWithMaskLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
bool MaxPoolWithMaskLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
PoolLayer::init(layerMap, parameterMap);
setOutput("mask", &mask_);
return true;
}
size_t MaxPoolWithMaskLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
outputY_ = outputSize(imgSizeY_,
sizeY_,
confPaddingY_,
strideY_,
/* caffeMode */ false);
outputX_ = outputSize(imgSize_,
sizeX_,
confPadding_,
stride_,
/* caffeMode */ false);
layerSize = outputX_ * outputY_ * channels_;
getOutput().setFrameHeight(outputY_);
getOutput().setFrameWidth(outputX_);
return layerSize;
}
void MaxPoolWithMaskLayer::forward(PassType passType) {
size_t size = getSize();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
int batchSize = inputV->getHeight();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
CHECK_EQ(size, outV->getWidth());
resetSpecifyOutput(mask_,
batchSize,
size,
/* isValueClean */ false,
/* isGradClean */ true);
MatrixPtr maskV = mask_.value;
outV->maxPoolForward(*inputV,
imgSizeY_,
imgSize_,
channels_,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
confPaddingY_,
confPadding_,
maskV);
}
void MaxPoolWithMaskLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
MatrixPtr outGrad = getOutputGrad();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
MatrixPtr outV = getOutputValue();
MatrixPtr inputGrad = inputLayers_[0]->getOutputGrad();
inputGrad->maxPoolBackward(*inputV,
imgSizeY_,
imgSize_,
*outGrad,
*outV,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
1,
1,
confPaddingY_,
confPadding_);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "PoolLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief Basic parent layer of different kinds of pooling
*/
class MaxPoolWithMaskLayer : public PoolLayer {
protected:
Argument mask_;
public:
explicit MaxPoolWithMaskLayer(const LayerConfig& config)
: PoolLayer(config) {}
size_t getSize();
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
};
} // namespace paddle
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "PoolLayer.h"
#include "MaxPoolWithMaskLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_CUDA
......@@ -44,7 +45,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride();
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
return true;
}
......@@ -57,6 +57,8 @@ Layer* PoolLayer::create(const LayerConfig& config) {
} else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config);
#endif
} else if (pool == "max-pool-with-mask") {
return new MaxPoolWithMaskLayer(config);
} else {
LOG(FATAL) << "Unknown pool type: " << pool;
return nullptr;
......
# gserver pacakge unittests
add_simple_unittest(test_LinearChainCRF)
add_simple_unittest(test_MultinomialSampler)
add_simple_unittest(test_RecurrentLayer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_MultinomialSampler)
endif()
function(gserver_test TARGET)
add_unittest_without_exec(${TARGET}
${TARGET}.cpp
......@@ -24,6 +27,7 @@ gserver_test(test_ConvUnify)
gserver_test(test_BatchNorm)
gserver_test(test_KmaxSeqScore)
gserver_test(test_Expand)
gserver_test(test_MaxPoolingWithMaskOutput)
########## test_Mkldnn layers and activations ##########
if(WITH_MKLDNN)
......@@ -48,7 +52,7 @@ if(WITH_PYTHON)
endif()
############### test_WarpCTCLayer #######################
if(NOT WITH_DOUBLE)
if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
add_unittest_without_exec(test_WarpCTCLayer
test_WarpCTCLayer.cpp)
......
......@@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
int dilation = 1;
int dilation = 2;
if (type == "cudnn_conv") {
#if CUDNN_VERSION >= 6000
dilation = 2;
......@@ -1234,6 +1234,7 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_CUDA
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
......@@ -1242,6 +1243,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ true);
#endif
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/math/MathUtils.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle;
void setPoolConfig(TestConfig* config,
PoolConfig* pool,
const string& poolType) {
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(1);
int kw = 3, kh = 3;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType);
pool->set_channels(1);
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_start(0);
pool->set_padding(pw);
pool->set_padding_y(ph);
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
pool->set_output_x(ow);
pool->set_output_y(oh);
}
void doOneMaxPoolingWithMaskOutputTest(MatrixPtr& inputMat,
const string& poolType,
bool use_gpu,
MatrixPtr& maskMat) {
TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 25, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
pool->set_img_size(5);
pool->set_img_size_y(5);
setPoolConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
config.layerConfig.set_name("MaxPoolWithMask");
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(config,
&dataLayers,
&datas,
&layerMap,
"MaxPoolWithMask",
1,
false,
use_gpu);
dataLayers[0]->getOutputValue()->copyFrom(*inputMat);
FLAGS_use_gpu = use_gpu;
std::vector<ParameterPtr> parameters;
LayerPtr maxPoolingWithMaskOutputLayer;
initTestLayer(config, &layerMap, &parameters, &maxPoolingWithMaskOutputLayer);
maxPoolingWithMaskOutputLayer->forward(PASS_GC);
checkMatrixEqual(maxPoolingWithMaskOutputLayer->getOutput("mask").value,
maskMat);
}
TEST(Layer, maxPoolingWithMaskOutputLayerFwd) {
bool useGpu = false;
MatrixPtr inputMat;
MatrixPtr maskMat;
real inputData[] = {0.1, 0.1, 0.5, 0.5, 1.1, 0.2, 0.2, 0.6, 0.1,
0.1, 0.3, 0.3, 0.7, 0.1, 0.1, 0.4, 0.4, 0.8,
0.8, 0.1, 1.0, 2.0, 3.0, 0.0, 9.0};
real maskData[] = {12, 4, 22, 24};
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->setData(inputData);
maskMat->setData(maskData);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#ifdef PADDLE_WITH_CUDA
useGpu = true;
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->copyFrom(inputData, 25);
maskMat->copyFrom(maskData, 4);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#endif
}
......@@ -1902,5 +1902,52 @@ void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b,
}
template class BaseMatrixT<real>;
#ifndef PADDLE_MOBILE_INFERENCE
template class BaseMatrixT<int>;
#else
template <>
void BaseMatrixT<int>::zero() {
applyUnary(unary::Zero<int>());
}
template <>
void BaseMatrixT<int>::assign(int p) {
applyUnary(unary::Assign<int>(p));
}
template <>
void BaseMatrixT<int>::isEqualTo(BaseMatrixT& b, int value) {
applyBinary(binary::IsEqual<int>(value), b);
}
template <>
void BaseMatrixT<int>::neg() {
applyUnary(unary::Neg<int>());
}
template <>
void BaseMatrixT<int>::abs2() {
applyUnary(unary::Abs<int>());
}
template <>
void BaseMatrixT<int>::add(int p) {
applyUnary(unary::Add<int>(p));
}
template <>
void BaseMatrixT<int>::add(int p1, int p2) {
applyUnary(unary::Add2<int>(p1, p2));
}
template <>
void BaseMatrixT<int>::applyL1(int learningRate, int decayRate) {
applyUnary(unary::ApplyL1<int>(learningRate * decayRate));
}
#endif
} // namespace paddle
......@@ -25,6 +25,19 @@ else()
message(STATUS "Compile with MKLDNNMatrix")
endif()
if(MOBILE_INFERENCE)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/SIMDFunctions.cpp)
# Remove sparse
list(REMOVE_ITEM MATH_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.h)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.cpp)
endif()
set(MATH_SOURCES
"${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu"
"${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu"
......
......@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef>
#include "Matrix.h"
......@@ -309,3 +312,57 @@ private:
using Matrix::subMatrix;
};
} // namespace paddle
#else
#include "Matrix.h"
namespace paddle {
class CpuSparseMatrix : public Matrix {
public:
CpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
CpuSparseMatrix(real* data,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, false) {}
real* getValue() const { return nullptr; }
size_t getColStartIdx(size_t i) const { return 0; }
size_t getRowStartIdx(size_t i) const { return 0; }
size_t getColNum(size_t i) const { return 0; }
int* getRowCols(size_t i) const { return nullptr; }
CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width) {
return nullptr;
}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
......@@ -451,6 +451,7 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) {
}
void GpuMatrix::collectBias(Matrix& a, real scale) {
#ifdef PADDLE_WITH_CUDA
CHECK_EQ(getHeight(), (size_t)1);
CHECK_EQ(width_, a.getWidth());
GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a);
......@@ -461,6 +462,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale);
}
#endif
}
void GpuMatrix::collectSharedBias(Matrix& a, real scale) {
......@@ -552,6 +554,7 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
const GpuMatrix& b,
real scaleAB,
real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous());
CHECK(b.isContiguous());
CHECK(b.useGpu_ == true) << "Matrix type are not equal";
......@@ -578,12 +581,14 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
b.height_,
scaleAB,
scaleT);
#endif
}
void GpuMatrix::mul(const GpuMatrix& a,
const GpuSparseMatrix& b,
real scaleAB,
real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous());
CHECK(a.isContiguous());
CHECK(a.useGpu_ == true) << "Matrix type are not equal";
......@@ -622,6 +627,7 @@ void GpuMatrix::mul(const GpuMatrix& a,
scaleAB,
scaleT);
}
#endif
}
/* this = a*b */
......@@ -1028,15 +1034,23 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW) {
size_t paddingW,
MatrixPtr maskMatP) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData();
real* maskData = NULL;
size_t frameNum = inputMat.getHeight();
CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels);
if (maskMatP != NULL) {
CHECK(maskMatP->useGpu_ == true) << "Matrix type are not equal";
CHECK(outputH * outputW * channels == maskMatP->getWidth());
maskData = maskMatP->getData();
}
hl_maxpool_forward(frameNum,
inputData,
channels,
......@@ -1051,7 +1065,8 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
paddingH,
paddingW,
data_,
getStride());
getStride(),
maskData);
}
void GpuMatrix::maxPoolBackward(Matrix& inputMat,
......@@ -1548,6 +1563,7 @@ void GpuMatrix::bilinearBackward(const Matrix& out,
}
void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
......@@ -1563,9 +1579,11 @@ void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy(
output_d, entropy_d, mat_d, height_, outputPtr->width_);
#endif
}
void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
......@@ -1581,6 +1599,7 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy_bp(
output_d, grad_d, mat_d, height_, width_);
#endif
}
void GpuMatrix::vol2Col(real* dataSrc,
......@@ -1973,9 +1992,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW) {
size_t paddingW,
MatrixPtr maskMatP) {
real* inputData = inputMat.getData();
real* outData = data_;
real* maskData = NULL;
size_t num = inputMat.getHeight();
size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW;
......@@ -1984,6 +2005,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride();
if (maskMatP != NULL) {
maskData = maskMatP->getData();
CHECK_EQ(channels * outLength, maskMatP->getWidth());
}
/* initialize the data_ */
for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) {
......@@ -2005,10 +2031,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max(
outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
if (maskData == NULL) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max(
outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
}
}
} else {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (outData[ph * outputW + pw] < inputData[h * imgSizeW + w]) {
outData[ph * outputW + pw] = inputData[h * imgSizeW + w];
maskData[ph * outputW + pw] = h * imgSizeW + w;
}
}
}
}
}
......@@ -2016,6 +2053,8 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
// compute offset
inputData += inLength;
outData += outLength;
if (maskData != NULL) maskData += outLength;
}
}
}
......@@ -3226,6 +3265,7 @@ template void CpuMatrix::mul<CpuMatrix, CacheRowCpuMatrix>(CpuSparseMatrix* a,
real scaleAB,
real scaleT);
#ifndef PADDLE_MOBILE_INFERENCE
void SharedCpuMatrix::mul(CpuSparseMatrix* a,
CpuMatrix* b,
real scaleAB,
......@@ -3354,6 +3394,7 @@ void SharedCpuMatrix::initBlock(int blockNum) {
}
}
#endif
/* Add a (column) vector b to matrix a, column by column */
void CpuMatrix::addColumnVector(const Matrix& b) {
BaseMatrix::addColVector(const_cast<Matrix&>(b));
......
......@@ -861,7 +861,8 @@ public:
/**
* Pooling forward operation, pick out the largest element
* in the sizeX of value
* in the sizeX of value, if the maskMatP is not NULL, it will
* also caculate the location indices.
*/
virtual void maxPoolForward(Matrix& inputMat,
size_t imgSizeH,
......@@ -874,7 +875,8 @@ public:
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW) {
size_t paddingW,
MatrixPtr maskMatP = NULL) {
LOG(FATAL) << "Not implemeted";
}
......@@ -1426,7 +1428,8 @@ public:
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW);
size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image,
size_t imgSizeH,
......@@ -1697,7 +1700,8 @@ public:
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW);
size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image,
size_t imgSizeH,
......@@ -2066,6 +2070,7 @@ public:
class SharedCpuMatrix : public CpuMatrix {
public:
#ifndef PADDLE_MOBILE_INFERENCE
/* blockNum is number of partitions of the matrix */
SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false)
: CpuMatrix(height, width, trans) {
......@@ -2111,6 +2116,7 @@ private:
ThreadLocal<CpuMatrixPtr> localBuf_;
ThreadLocal<std::vector<int>> localBufRows_;
ThreadLocal<std::vector<int>> blockSeq_;
#endif
};
typedef struct { unsigned int col; } sparse_non_value_t;
......
......@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef>
#include "CpuSparseMatrix.h"
#include "Matrix.h"
......@@ -237,3 +240,47 @@ private:
};
} // namespace paddle
#else
#include "CpuSparseMatrix.h"
namespace paddle {
class GpuSparseMatrix : public Matrix {
public:
GpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format_ = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
GpuSparseMatrix(real* value,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, true) {}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
......@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <gflags/gflags.h>
#include <string.h>
#include <algorithm>
......@@ -313,3 +315,27 @@ private:
};
} // namespace paddle
#else
namespace paddle {
class SparseRowCpuMatrix : public CpuMatrix {
public:
void reserveStore() {}
void clearIndices() {}
};
class SparsePrefetchRowCpuMatrix : public SparseRowCpuMatrix {
public:
void setupIndices() {}
void addRows(MatrixPtr input) {}
void addRows(IVectorPtr ids) {}
};
class SparseAutoGrowRowCpuMatrix : public SparseRowCpuMatrix {};
class CacheRowCpuMatrix : public SparseAutoGrowRowCpuMatrix {};
class SparseRowIdsCpuMatrix : public CpuMatrix {};
} // namespace paddle
#endif
......@@ -3,8 +3,10 @@
add_simple_unittest(test_ExecViaCpu)
add_simple_unittest(test_SIMDFunctions)
add_simple_unittest(test_TrainingAlgorithm)
add_simple_unittest(test_SparseMatrix)
add_simple_unittest(test_RowBuffer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_SparseMatrix)
endif()
# TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference.
add_unittest(test_matrixCompare
......
......@@ -30,6 +30,10 @@ class AccuracyOp : public framework::OperatorWithKernel {
"Input (Label) of accuracy op should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Accuracy"),
"Output (Accuracy) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Correct"),
"Output (Correct) of AccuracyOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Total"),
"Output (Total) of AccuracyOp should not be null.");
auto inference_dim = ctx->GetInputDim("Out");
auto label_dim = ctx->GetInputDim("Label");
......@@ -43,6 +47,8 @@ class AccuracyOp : public framework::OperatorWithKernel {
" the same as label.");
ctx->SetOutputDim("Accuracy", {1});
ctx->SetOutputDim("Correct", {1});
ctx->SetOutputDim("Total", {1});
ctx->ShareLoD("Out", /*->*/ "Accuracy");
}
......@@ -66,6 +72,8 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("Label", "Label of the training data");
// TODO(typhoonzero): AddInput("Weight", ...
AddOutput("Accuracy", "The accuracy of current batch");
AddOutput("Correct", "The correct samples count of current batch");
AddOutput("Total", "The samples count of current batch");
AddComment(R"DOC(
Accuracy Operator.
......
......@@ -24,7 +24,8 @@ using platform::PADDLE_CUDA_NUM_THREADS;
template <int BlockSize>
__global__ void AccuracyCudaKernel(const int N, const int D,
const int64_t* Xdata,
const int64_t* labeldata, float* accuracy) {
const int64_t* labeldata, int* correct_data,
float* accuracy) {
int count = 0;
__shared__ int total[BlockSize];
......@@ -43,6 +44,7 @@ __global__ void AccuracyCudaKernel(const int N, const int D,
// reduce the count with init value 0, and output accuracy.
int result = thrust::reduce(thrust::device, total, total + BlockSize, 0);
if (threadIdx.x == 0) {
*correct_data = result;
*accuracy = static_cast<float>(result) / static_cast<float>(N);
}
}
......@@ -56,31 +58,48 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
auto* inference = ctx.Input<Tensor>("Out");
auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label");
auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
// FIXME(typhoonzero): only support indices currently
// if add support for output values, how to detect the data type?
const int64_t* indices_data = indices->data<int64_t>();
const int64_t* label_data = label->data<int64_t>();
int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());
size_t num_samples = inference->dims()[0];
int num_samples = static_cast<int>(inference->dims()[0]);
size_t infer_width = inference->dims()[1];
PADDLE_ENFORCE(cudaMemset(accuracy_data, 0, sizeof(float)));
// cudaMemset((void**)&correct_data, 0, sizeof(float));
if (num_samples == 0) {
return;
}
cudaMemcpy(total_data, &num_samples, sizeof(int), cudaMemcpyHostToDevice);
AccuracyCudaKernel<PADDLE_CUDA_NUM_THREADS><<<
1, PADDLE_CUDA_NUM_THREADS, 0, ctx.cuda_device_context().stream()>>>(
num_samples, infer_width, indices_data, label_data, accuracy_data);
num_samples, infer_width, indices_data, label_data, correct_data,
accuracy_data);
int d_num_samples, d_num_correct;
float d_accuracy;
cudaMemcpy(&d_num_correct, correct_data, sizeof(int),
cudaMemcpyDeviceToHost);
cudaMemcpy(&d_num_samples, total_data, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(&d_accuracy, accuracy_data, sizeof(float),
cudaMemcpyDeviceToHost);
}
};
} // namespace operators
} // namespace paddle
// FIXME(typhoonzero): types of T is for infernece data.
// label data is always int
// FIXME(typhoonzero): types of T is for inference data.
// label data is always int64
REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>,
paddle::operators::AccuracyOpCUDAKernel<double>);
......@@ -29,7 +29,11 @@ class AccuracyKernel : public framework::OpKernel<T> {
auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label");
auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());
const int64_t* indices_data = indices->data<int64_t>();
......@@ -55,7 +59,8 @@ class AccuracyKernel : public framework::OpKernel<T> {
}
}
// FIXME(typhoonzero): we don't accumulate the accuracy for now.
*correct_data = num_correct;
*total_data = num_samples;
*accuracy_data =
static_cast<float>(num_correct) / static_cast<float>(num_samples);
}
......
......@@ -27,6 +27,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
framework::ExecutionContext ctx(*this, scope, dev_ctx);
const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
const size_t step_num = ids->size();
......
......@@ -94,5 +94,13 @@ class CompareOp : public framework::OperatorWithKernel {
REGISTER_LOGICAL_OP(less_than, "Out = X < Y");
REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y");
REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_OP(greater_than, "Out = X > Y");
REGISTER_LOGICAL_KERNEL(greater_than, CPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_OP(greater_equal, "Out = X >= Y");
REGISTER_LOGICAL_KERNEL(greater_equal, CPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_OP(equal, "Out = X == Y");
REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor);
......@@ -15,4 +15,9 @@
#include "paddle/operators/compare_op.h"
REGISTER_LOGICAL_KERNEL(less_than, GPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_KERNEL(less_equal, GPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_KERNEL(greater_than, GPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_KERNEL(greater_equal, GPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_KERNEL(equal, GPU, paddle::operators::EqualFunctor);
......@@ -27,6 +27,24 @@ struct LessThanFunctor {
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a < b; }
};
template <typename T>
struct LessEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; }
};
template <typename T>
struct GreaterThanFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; }
};
template <typename T>
struct GreaterEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; }
};
template <typename T>
struct EqualFunctor {
using ELEM_TYPE = T;
......
......@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_add, ops::ElementwiseOp, ops::ElementwiseAddOpMaker,
elementwise_add_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_add,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseAddKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL(
elementwise_add_grad,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseAddGradKernel<paddle::platform::CPUPlace, int64_t>);
......@@ -35,7 +35,13 @@ REGISTER_OP(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker,
elementwise_div_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_div,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseDivKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL(
elementwise_div_grad,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseDivGradKernel<paddle::platform::CPUPlace, int64_t>);
......@@ -37,8 +37,12 @@ REGISTER_OP(elementwise_mul, ops::ElementwiseOp, ops::ElementwiseMulOpMaker,
REGISTER_OP_CPU_KERNEL(
elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, double>);
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseMulKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL(
elementwise_mul_grad,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, double>);
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseMulGradKernel<paddle::platform::CPUPlace, int64_t>);
......@@ -34,7 +34,13 @@ REGISTER_OP(elementwise_sub, ops::ElementwiseOp, ops::ElementwiseSubOpMaker,
elementwise_sub_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_sub,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseSubKernel<paddle::platform::CPUPlace, int64_t>);
REGISTER_OP_CPU_KERNEL(
elementwise_sub_grad,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, float>);
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, float>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, double>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, int>,
ops::ElementwiseSubGradKernel<paddle::platform::CPUPlace, int64_t>);
......@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_process) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const int output_channels = output->dims()[1];
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
......@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -87,11 +87,12 @@ template <typename PoolProcess, class T>
class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) {
PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
......@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -154,10 +155,11 @@ template <class T>
class MaxPool2dGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
......@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -240,17 +242,17 @@ template <typename PoolProcess, class T>
class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_process) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
......@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -315,11 +317,12 @@ template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) {
PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
......@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -398,10 +401,11 @@ template <class T>
class MaxPool3dGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
......@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -498,15 +502,15 @@ template <typename T>
class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const int output_channels = output->dims()[1];
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
......@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -563,13 +567,13 @@ template <typename T>
class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const int batch_size = input_grad.dims()[0];
const int input_height = input_grad.dims()[2];
const int input_width = input_grad.dims()[3];
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0];
const int input_height = input_grad->dims()[2];
const int input_width = input_grad->dims()[3];
const int output_channels = output_grad.dims()[1];
const int output_height = output_grad.dims()[2];
const int output_width = output_grad.dims()[3];
......@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) {
......@@ -612,17 +616,17 @@ template <typename T>
class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
......@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -691,14 +695,14 @@ template <typename T>
class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const int batch_size = input_grad.dims()[0];
const int input_depth = input_grad.dims()[2];
const int input_height = input_grad.dims()[3];
const int input_width = input_grad.dims()[4];
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0];
const int input_depth = input_grad->dims()[2];
const int input_height = input_grad->dims()[3];
const int input_width = input_grad->dims()[4];
const int output_channels = output_grad.dims()[1];
const int output_depth = output_grad.dims()[2];
const int output_height = output_grad.dims()[3];
......@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) {
......
......@@ -21,13 +21,13 @@ namespace math {
template <typename PoolProcess, typename T>
__global__ void KernelPool2D(const int nthreads, const T* input_data,
T* output_data, const int channels,
const int input_height, const int input_width,
const int output_height, const int output_width,
const int ksize_height, const int ksize_width,
const int stride_height, const int stride_width,
const int padding_height, const int padding_width,
PoolProcess pool_process) {
const int channels, const int input_height,
const int input_width, const int output_height,
const int output_width, const int ksize_height,
const int ksize_width, const int stride_height,
const int stride_width, const int padding_height,
const int padding_width, PoolProcess pool_process,
T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
......@@ -59,11 +59,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
template <typename PoolProcess, typename T>
__global__ void KernelPool2DGrad(
const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels,
const int input_height, const int input_width, const int output_height,
const int output_width, const int ksize_height, const int ksize_width,
const int stride_height, const int stride_width, const int padding_height,
const int padding_width, PoolProcess pool_process) {
const T* output_grad, const int channels, const int input_height,
const int input_width, const int output_height, const int output_width,
const int ksize_height, const int ksize_width, const int stride_height,
const int stride_width, const int padding_height, const int padding_width,
PoolProcess pool_process, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width;
......@@ -107,11 +107,11 @@ __global__ void KernelPool2DGrad(
template <typename T>
__global__ void KernelMaxPool2DGrad(
const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels,
const int input_height, const int input_width, const int output_height,
const int output_width, const int ksize_height, const int ksize_width,
const int stride_height, const int stride_width, const int padding_height,
const int padding_width) {
const T* output_grad, const int channels, const int input_height,
const int input_width, const int output_height, const int output_width,
const int ksize_height, const int ksize_width, const int stride_height,
const int stride_width, const int padding_height, const int padding_width,
T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
......@@ -158,16 +158,16 @@ template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_process) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const int output_channels = output->dims()[1];
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
......@@ -176,7 +176,7 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024;
......@@ -187,11 +187,10 @@ class Pool2dFunctor<platform::GPUPlace, PoolProcess, T> {
PoolProcess,
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, output_data, input_channels,
input_height, input_width, output_height,
output_width, ksize_height, ksize_width,
stride_height, stride_width, padding_height,
padding_width, pool_process);
.stream()>>>(
nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, ksize_width, stride_height,
stride_width, padding_height, padding_width, pool_process, output_data);
}
};
......@@ -204,11 +203,11 @@ template <typename PoolProcess, typename T>
class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process) {
PoolProcess pool_process, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_height = input.dims()[2];
......@@ -225,7 +224,7 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024;
......@@ -237,10 +236,10 @@ class Pool2dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data,
input_channels, input_height, input_width, output_height, output_width,
ksize_height, ksize_width, stride_height, stride_width, padding_height,
padding_width, pool_process);
nthreads, input_data, output_data, output_grad_data, input_channels,
input_height, input_width, output_height, output_width, ksize_height,
ksize_width, stride_height, stride_width, padding_height, padding_width,
pool_process, input_grad_data);
}
};
......@@ -253,10 +252,11 @@ template <typename T>
class MaxPool2dGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_height = input.dims()[2];
......@@ -274,7 +274,7 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024;
......@@ -285,10 +285,10 @@ class MaxPool2dGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data,
input_channels, input_height, input_width, output_height, output_width,
ksize_height, ksize_width, stride_height, stride_width, padding_height,
padding_width);
nthreads, input_data, output_data, output_grad_data, input_channels,
input_height, input_width, output_height, output_width, ksize_height,
ksize_width, stride_height, stride_width, padding_height, padding_width,
input_grad_data);
}
};
......@@ -313,14 +313,16 @@ template class Pool2dGradFunctor<
platform::GPUPlace, paddle::operators::math::AvgPoolGrad<double>, double>;
template <typename PoolProcess, typename T>
__global__ void KernelPool3D(
const int nthreads, const T* input_data, T* output_data, const int channels,
const int input_depth, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width,
const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width,
PoolProcess pool_process) {
__global__ void KernelPool3D(const int nthreads, const T* input_data,
const int channels, const int input_depth,
const int input_height, const int input_width,
const int output_depth, const int output_height,
const int output_width, const int ksize_depth,
const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height,
const int stride_width, const int padding_depth,
const int padding_height, const int padding_width,
PoolProcess pool_process, T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
......@@ -358,13 +360,13 @@ __global__ void KernelPool3D(
template <typename PoolProcess, typename T>
__global__ void KernelPool3DGrad(
const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels,
const int input_depth, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width,
const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width,
PoolProcess pool_process) {
const T* output_grad, const int channels, const int input_depth,
const int input_height, const int input_width, const int output_depth,
const int output_height, const int output_width, const int ksize_depth,
const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_height, const int stride_width, const int padding_depth,
const int padding_height, const int padding_width, PoolProcess pool_process,
T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width;
......@@ -422,13 +424,12 @@ __global__ void KernelPool3DGrad(
template <typename T>
__global__ void KernelMaxPool3DGrad(
const int nthreads, const T* input_data, const T* output_data,
const T* output_grad, T* input_grad, const int channels,
const int input_depth, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width,
const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height,
const int padding_width) {
const T* output_grad, const int channels, const int input_depth,
const int input_height, const int input_width, const int output_depth,
const int output_height, const int output_width, const int ksize_depth,
const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_height, const int stride_width, const int padding_depth,
const int padding_height, const int padding_width, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
......@@ -480,18 +481,18 @@ template <typename PoolProcess, class T>
class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_process) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
......@@ -503,7 +504,7 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
const int padding_width = paddings[2];
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height *
output_width;
......@@ -516,11 +517,11 @@ class Pool3dFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, input_channels, input_depth,
input_height, input_width, output_depth, output_height, output_width,
ksize_depth, ksize_height, ksize_width, stride_depth, stride_height,
stride_width, padding_depth, padding_height, padding_width,
pool_process);
nthreads, input_data, input_channels, input_depth, input_height,
input_width, output_depth, output_height, output_width, ksize_depth,
ksize_height, ksize_width, stride_depth, stride_height, stride_width,
padding_depth, padding_height, padding_width, pool_process,
output_data);
}
};
......@@ -533,11 +534,11 @@ template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process) {
PoolProcess pool_process, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2];
......@@ -560,7 +561,7 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads =
batch_size * input_channels * input_depth * input_height * input_width;
......@@ -573,11 +574,11 @@ class Pool3dGradFunctor<platform::GPUPlace, PoolProcess, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data,
input_channels, input_depth, input_height, input_width, output_depth,
output_height, output_width, ksize_depth, ksize_height, ksize_width,
stride_depth, stride_height, stride_width, padding_depth,
padding_height, padding_width, pool_process);
nthreads, input_data, output_data, output_grad_data, input_channels,
input_depth, input_height, input_width, output_depth, output_height,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_height, stride_width, padding_depth, padding_height,
padding_width, pool_process, input_grad_data);
}
};
......@@ -590,10 +591,11 @@ template <class T>
class MaxPool3dGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2];
......@@ -616,7 +618,7 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height *
output_width;
......@@ -628,11 +630,11 @@ class MaxPool3dGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_grad_data,
input_channels, input_depth, input_height, input_width, output_depth,
output_height, output_width, ksize_depth, ksize_height, ksize_width,
stride_depth, stride_height, stride_width, padding_depth,
padding_height, padding_width);
nthreads, input_data, output_data, output_grad_data, input_channels,
input_depth, input_height, input_width, output_depth, output_height,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_height, stride_width, padding_depth, padding_height,
padding_width, input_grad_data);
}
};
......@@ -658,11 +660,11 @@ template class Pool3dGradFunctor<
template <typename T>
__global__ void KernelMaxPool2dWithIdx(
const int nthreads, const T* input_data, T* output_data, T* mask_data,
const int channels, const int input_height, const int input_width,
const int output_height, const int output_width, const int ksize_height,
const int ksize_width, const int stride_height, const int stride_width,
const int padding_height, const int padding_width) {
const int nthreads, const T* input_data, const int channels,
const int input_height, const int input_width, const int output_height,
const int output_width, const int ksize_height, const int ksize_width,
const int stride_height, const int stride_width, const int padding_height,
const int padding_width, T* output_data, T* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
......@@ -697,11 +699,11 @@ __global__ void KernelMaxPool2dWithIdx(
template <typename T>
__global__ void KernelMaxPool2DWithIdxGrad(
const int nthreads, T* input_grad, const T* output_grad, const T* mask_data,
const int nthreads, const T* output_grad, const T* mask_data,
const int channels, const int input_height, const int input_width,
const int output_height, const int output_width, const int ksize_height,
const int ksize_width, const int stride_height, const int stride_width,
const int padding_height, const int padding_width) {
const int padding_height, const int padding_width, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int w_offset = index % input_width;
......@@ -748,16 +750,16 @@ template <typename T>
class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const int output_channels = output->dims()[1];
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
......@@ -766,8 +768,8 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024;
......@@ -777,11 +779,10 @@ class MaxPool2dWithIndexFunctor<platform::GPUPlace, T> {
KernelMaxPool2dWithIdx<
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_data, output_data, mask_data,
input_channels, input_height, input_width,
output_height, output_width, ksize_height,
ksize_width, stride_height, stride_width,
padding_height, padding_width);
.stream()>>>(
nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, ksize_width, stride_height,
stride_width, padding_height, padding_width, output_data, mask_data);
}
};
......@@ -794,14 +795,14 @@ template <typename T>
class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const int batch_size = input_grad.dims()[0];
const int input_channels = input_grad.dims()[1];
const int input_height = input_grad.dims()[2];
const int input_width = input_grad.dims()[3];
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0];
const int input_channels = input_grad->dims()[1];
const int input_height = input_grad->dims()[2];
const int input_width = input_grad->dims()[3];
const int output_height = output_grad.dims()[2];
const int output_width = output_grad.dims()[3];
const int ksize_height = ksize[0];
......@@ -813,7 +814,7 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024;
......@@ -823,11 +824,11 @@ class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, T> {
KernelMaxPool2DWithIdxGrad<
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(nthreads, input_grad_data, output_grad_data,
mask_data, input_channels, input_height,
input_width, output_height, output_width,
ksize_height, ksize_width, stride_height,
stride_width, padding_height, padding_width);
.stream()>>>(nthreads, output_grad_data, mask_data,
input_channels, input_height, input_width,
output_height, output_width, ksize_height,
ksize_width, stride_height, stride_width,
padding_height, padding_width, input_grad_data);
}
};
......@@ -838,13 +839,13 @@ template class MaxPool2dWithIndexGradFunctor<platform::GPUPlace, double>;
template <typename T>
__global__ void KernelMaxPool3DWithIdx(
const int nthreads, const T* input_data, T* output_data, T* mask_data,
const int channels, const int input_depth, const int input_height,
const int input_width, const int output_depth, const int output_height,
const int output_width, const int ksize_depth, const int ksize_height,
const int ksize_width, const int stride_depth, const int stride_height,
const int stride_width, const int padding_depth, const int padding_height,
const int padding_width) {
const int nthreads, const T* input_data, const int channels,
const int input_depth, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width,
const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width,
T* output_data, T* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
......@@ -886,13 +887,13 @@ __global__ void KernelMaxPool3DWithIdx(
template <typename T>
__global__ void KernelMaxPool3DWithIdxGrad(
const int nthreads, T* input_grad, const T* output_grad, const T* mask,
const int channels, const int input_depth, const int input_height,
const int input_width, const int output_depth, const int output_height,
const int output_width, const int ksize_depth, const int ksize_height,
const int ksize_width, const int stride_depth, const int stride_height,
const int stride_width, const int padding_depth, const int padding_height,
const int padding_width) {
const int nthreads, const T* output_grad, const T* mask, const int channels,
const int input_depth, const int input_height, const int input_width,
const int output_depth, const int output_height, const int output_width,
const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width,
T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int w_offset = index % input_width;
......@@ -952,18 +953,18 @@ template <typename T>
class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
......@@ -975,8 +976,8 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
const int padding_width = paddings[2];
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height *
output_width;
......@@ -988,11 +989,10 @@ class MaxPool3dWithIndexFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_data, output_data, mask_data, input_channels,
input_depth, input_height, input_width, output_depth, output_height,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_height, stride_width, padding_depth, padding_height,
padding_width);
nthreads, input_data, input_channels, input_depth, input_height,
input_width, output_depth, output_height, output_width, ksize_depth,
ksize_height, ksize_width, stride_depth, stride_height, stride_width,
padding_depth, padding_height, padding_width, output_data, mask_data);
}
};
......@@ -1005,15 +1005,15 @@ template <typename T>
class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const int batch_size = input_grad.dims()[0];
const int input_channels = input_grad.dims()[1];
const int input_depth = input_grad.dims()[2];
const int input_height = input_grad.dims()[3];
const int input_width = input_grad.dims()[4];
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0];
const int input_channels = input_grad->dims()[1];
const int input_depth = input_grad->dims()[2];
const int input_height = input_grad->dims()[3];
const int input_width = input_grad->dims()[4];
const int output_depth = output_grad.dims()[2];
const int output_height = output_grad.dims()[3];
const int output_width = output_grad.dims()[4];
......@@ -1029,7 +1029,7 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
const T* output_grad_data = output_grad.data<T>();
const T* mask_data = mask.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads =
batch_size * input_channels * input_depth * input_height * input_width;
......@@ -1041,11 +1041,11 @@ class MaxPool3dWithIndexGradFunctor<platform::GPUPlace, T> {
T><<<grid, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
nthreads, input_grad_data, output_grad_data, mask_data, input_channels,
input_depth, input_height, input_width, output_depth, output_height,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_height, stride_width, padding_depth, padding_height,
padding_width);
nthreads, output_grad_data, mask_data, input_channels, input_depth,
input_height, input_width, output_depth, output_height, output_width,
ksize_depth, ksize_height, ksize_width, stride_depth, stride_height,
stride_width, padding_depth, padding_height, padding_width,
input_grad_data);
}
};
......
......@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T>
class Pool2dFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_compute);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute, framework::Tensor* output);
};
template <typename Place, typename PoolProcess, typename T>
class Pool2dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute);
PoolProcess pool_compute, framework::Tensor* input_grad);
};
template <typename Place, class T>
class MaxPool2dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
template <typename Place, typename PoolProcess, typename T>
class Pool3dFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_compute);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute, framework::Tensor* output);
};
template <typename Place, typename PoolProcess, typename T>
class Pool3dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute);
PoolProcess pool_compute, framework::Tensor* input_grad);
};
template <typename Place, class T>
class MaxPool3dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
/*
......@@ -155,38 +157,38 @@ template <typename Place, typename T>
class MaxPool2dWithIndexFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask);
};
template <typename Place, typename T>
class MaxPool2dWithIndexGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
template <typename Place, typename T>
class MaxPool3dWithIndexFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask);
};
template <typename Place, typename T>
class MaxPool3dWithIndexGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
} // namespace math
......
......@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T>
pool2d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool2dFunctor<
Place, paddle::operators::math::AvgPool<T>, T>
pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
}
} break;
case 3: {
......@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T>
pool3d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dFunctor<
Place, paddle::operators::math::AvgPool<T>, T>
pool3d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
}
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> {
if (pooling_type == "max") {
paddle::operators::math::MaxPool2dGradFunctor<Place, T>
pool2d_backward;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings);
pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool2dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool2d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings, pool_process);
pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, pool_process, in_x_grad);
}
} break;
case 3: {
if (pooling_type == "max") {
paddle::operators::math::MaxPool3dGradFunctor<Place, T>
pool3d_backward;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings);
pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool3d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings, pool_process);
pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, pool_process, in_x_grad);
}
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......
......@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
case 2: {
paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T>
pool2d_forward;
pool2d_forward(context.device_context(), *in_x, *out, *mask, ksize,
strides, paddings);
pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, out, mask);
} break;
case 3: {
paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T>
pool3d_forward;
pool3d_forward(context.device_context(), *in_x, *out, *mask, ksize,
strides, paddings);
pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, out, mask);
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
}
......@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
case 2: {
paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T>
pool2d_backward;
pool2d_backward(context.device_context(), *in_x_grad, *out_grad,
*mask, ksize, strides, paddings);
pool2d_backward(context.device_context(), *out_grad, *mask, ksize,
strides, paddings, in_x_grad);
} break;
case 3: {
paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T>
pool3d_backward;
pool3d_backward(context.device_context(), *in_x_grad, *out_grad,
*mask, ksize, strides, paddings);
pool3d_backward(context.device_context(), *out_grad, *mask, ksize,
strides, paddings, in_x_grad);
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
}
......
......@@ -14,6 +14,7 @@
#pragma once
#include "glog/logging.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
......@@ -26,6 +27,10 @@ template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
struct SumFunctor {
template <typename Place, typename X, typename Y, typename Dim>
void operator()(const Place& place, X& x, Y& y, const Dim& dim) {
......@@ -133,10 +138,17 @@ class ReduceKernel : public framework::OpKernel<T> {
dims_vector.erase(dims_vector.begin() + dim);
dims = framework::make_ddim(dims_vector);
}
auto out = EigenTensor < T, D == 1 ? 1 : (D - 1) > ::From(*output, dims);
auto& place = context.GetEigenDevice<Place>();
Functor functor;
functor(place, x, out, reduce_dim);
if (D == 1) {
auto out = EigenScalar<T>::From(*output);
functor(place, x, out, reduce_dim);
} else {
auto out = EigenTensor<T, (D - 1)>::From(*output, dims);
functor(place, x, out, reduce_dim);
}
}
};
......@@ -186,13 +198,13 @@ class ReduceGradKernel : public framework::OpKernel<T> {
auto x_reduce = EigenTensor<T, D>::From(*input1, dims);
auto x_reduce_grad = EigenTensor<T, D>::From(*input2, dims);
Eigen::array<int, D> braodcast_dim;
for (size_t i = 0; i < D; ++i) braodcast_dim[i] = 1;
braodcast_dim[dim] = input0->dims()[dim];
Eigen::array<int, D> broadcast_dim;
for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1;
broadcast_dim[dim] = input0->dims()[dim];
auto& place = context.GetEigenDevice<Place>();
Functor functor;
functor(place, x, x_reduce, x_grad, x_reduce_grad, braodcast_dim,
braodcast_dim[dim]);
functor(place, x, x_reduce, x_grad, x_reduce_grad, broadcast_dim,
broadcast_dim[dim]);
}
};
......
......@@ -200,7 +200,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
false,
useGpu_);
}
} else if (matType == MAT_NORMAL_SHARED) {
}
#ifndef PADDLE_MOBILE_INFERENCE
// NOLINTNEXTLINE
else if (matType == MAT_NORMAL_SHARED) {
CHECK_EQ(height * width, bufs_[pType]->getSize());
size_t blockNum = 0;
CHECK(isGradShared(&blockNum));
......@@ -259,7 +262,10 @@ void Parameter::setMat(ParameterType pType, int matType) {
} else if (matType == MAT_SPARSE_ROW_AUTO_GROW) {
CHECK(isGradSparseUpdate());
mats_[pType] = std::make_shared<SparseAutoGrowRowCpuMatrix>(height, width);
} else {
}
#endif
// NOLINTNEXTLINE
else {
LOG(FATAL) << "Unsupported mat type" << matType;
}
}
......
......@@ -33,6 +33,7 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
bool withValue,
bool useGpu,
bool equalNnzPerSample) {
#ifndef PADDLE_MOBILE_INFERENCE
std::vector<int64_t> ids(height);
std::vector<int64_t> indices(height + 1);
indices[0] = 0;
......@@ -84,6 +85,8 @@ MatrixPtr makeRandomSparseMatrix(size_t height,
}
return mat;
}
#endif
return nullptr;
}
void generateSequenceStartPositions(size_t batchSize,
......
......@@ -37,10 +37,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so
add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so
DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/core.so)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
......@@ -66,7 +66,7 @@ if (WITH_TESTING)
add_subdirectory(paddle/v2/tests)
add_subdirectory(paddle/v2/reader/tests)
add_subdirectory(paddle/v2/plot/tests)
add_subdirectory(paddle/v2/framework/tests)
add_subdirectory(paddle/v2/fluid/tests)
endif()
endif()
install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
......
......@@ -1200,8 +1200,14 @@ def TestData(data_config, async_load_data=None):
#caffe_mode: compute the output size using floor instead of ceil,
# which is consistent of caffe and CuDNN's convention.
def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
output = (2 * padding + img_size - filter_size) / float(stride)
def cnn_output_size(img_size,
filter_size,
padding,
stride,
caffe_mode,
dilation=1):
filter_s = (filter_size - 1) * dilation + 1
output = (2 * padding + img_size - filter_s) / float(stride)
if caffe_mode:
return 1 + int(math.floor(output))
else:
......@@ -1210,8 +1216,14 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode):
#calcualte image_size based on output_size for de-convolution (ConvTransLayer).
#It is the reverse function of cnn_output_size
def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode):
img_size = (output_size - 1) * stride + filter_size - 2 * padding
def cnn_image_size(output_size,
filter_size,
padding,
stride,
caffe_mode,
dilation=1):
filter_s = (filter_size - 1) * dilation + 1
img_size = (output_size - 1) * stride + filter_s - 2 * padding
if not caffe_mode:
img_size = img_size + 1
return img_size
......@@ -1253,9 +1265,9 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
def parse_pool(pool, input_layer_name, pool_conf, ceil_mode):
pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in [
'max-projection', 'avg-projection', 'cudnn-max-pool', 'cudnn-avg-pool'
], "pool-type %s is not in "
"['max-projection', 'avg-projection', "
'max-projection', 'avg-projection', 'max-pool-with-mask', 'cudnn-max-pool', 'cudnn-avg-pool'
], "pool-type %s is not in " \
"['max-projection', 'avg-projection', 'max-pool-with-mask'," \
"'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type)
pool_conf.channels = pool.channels
......@@ -1376,6 +1388,12 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
conv_conf.stride_y = conv.stride_y
conv_conf.groups = conv.groups
conv_conf.caffe_mode = conv.caffe_mode
if not conv.dilation:
conv.dilation = 1
conv.dilation_y = 1
else:
conv_conf.dilation = conv.dilation
conv_conf.dilation_y = conv.dilation_y
if not trans:
conv_conf.filter_channels = conv.channels / conv.groups
......@@ -1383,20 +1401,20 @@ def parse_conv(conv, input_layer_name, conv_conf, num_filters, trans=False):
get_img_size(input_layer_name, conv.channels)
conv_conf.output_x = cnn_output_size(
conv_conf.img_size, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode)
conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
conv_conf.output_y = cnn_output_size(
conv_conf.img_size_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode)
conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
else:
conv_conf.filter_channels = num_filters / conv.groups
conv_conf.output_x, conv_conf.output_y = \
get_img_size(input_layer_name, conv.channels)
conv_conf.img_size = cnn_image_size(
conv_conf.output_x, conv_conf.filter_size, conv_conf.padding,
conv_conf.stride, conv_conf.caffe_mode)
conv_conf.stride, conv_conf.caffe_mode, conv.dilation)
conv_conf.img_size_y = cnn_image_size(
conv_conf.output_y, conv_conf.filter_size_y, conv_conf.padding_y,
conv_conf.stride_y, conv_conf.caffe_mode)
conv_conf.stride_y, conv_conf.caffe_mode, conv.dilation_y)
#caffe_mode: compute the output size using floor instead of ceil,
......
......@@ -20,7 +20,7 @@ from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType, \
from .poolings import MaxPooling, AvgPooling, MaxWithMaskPooling, BasePoolingType, \
CudnnAvgPooling, CudnnMaxPooling
from .attrs import *
from .default_decorators import *
......@@ -888,7 +888,7 @@ def mixed_layer(size=0,
:type size: int
:param input: The input of this layer. It is an optional parameter. If set,
then this function will just return layer's name.
:param act: Activation Type. LinearActivation is the default.
:param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
......@@ -1030,7 +1030,7 @@ def fc_layer(input,
:type input: LayerOutput | list | tuple
:param size: The layer dimension.
:type size: int
:param act: Activation Type. TanhActivation is the default.
:param act: Activation Type. TanhActivation is the default activation.
:type act: BaseActivation
:param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute
......@@ -1527,7 +1527,7 @@ def lstmemory(input,
:type input: LayerOutput
:param reverse: is sequence process reversed or not.
:type reverse: bool
:param act: Activation type. TanhActivation is the default. :math:`h_t`
:param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation
:param gate_act: gate activation type, SigmoidActivation by default.
:type gate_act: BaseActivation
......@@ -1920,7 +1920,7 @@ def repeat_layer(input,
False for treating input as column vector and repeating
in the row direction.
:type as_row_vector: bool
:param act: Activation type. IdentityActivation is the default.
:param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation
:type name: basestring
:param layer_attr: extra layer attributes.
......@@ -1974,7 +1974,7 @@ def seq_reshape_layer(input,
:type reshape_size: int
:param name: The name of this layer. It is optional.
:type name: basestring
:param act: Activation type. IdentityActivation is the default.
:param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
......@@ -2487,7 +2487,7 @@ def img_conv_layer(input,
shape will be (filter_size, filter_size_y).
:type filter_size_y: int | None
:param num_filters: Each filter group's number of filter
:param act: Activation type. ReluActivation is the default.
:param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation
:param groups: Group size of filters.
:type groups: int
......@@ -2571,7 +2571,9 @@ def img_conv_layer(input,
if layer_type:
if dilation > 1 or dilation_y > 1:
assert layer_type in ["cudnn_conv", "cudnn_convt"]
assert layer_type in [
"cudnn_conv", "cudnn_convt", "exconv", "exconvt"
]
if trans:
assert layer_type in ["exconvt", "cudnn_convt"]
else:
......@@ -2699,9 +2701,9 @@ def img_pool_layer(input,
elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg'
assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling,
assert type(pool_type) in [AvgPooling, MaxPooling, MaxWithMaskPooling, CudnnAvgPooling,
CudnnMaxPooling], \
"only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported"
"only (Cudnn)AvgPooling, (Cudnn)MaxPooling, MaxWithMaskPooling are supported"
type_name = pool_type.name + '-projection' \
if (
......@@ -3253,7 +3255,7 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
:param input: Input layers. It could be a LayerOutput or list/tuple of
LayerOutput.
:type input: LayerOutput | list | tuple
:param act: Activation Type. LinearActivation is the default.
:param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
......@@ -3311,7 +3313,7 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
:type name: basestring
:param input: input layers or projections
:type input: list | tuple | collections.Sequence
:param act: Activation type. IdentityActivation is the default.
:param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
......@@ -3406,7 +3408,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
:type a: LayerOutput
:param b: input sequence layer
:type b: LayerOutput
:param act: Activation type. IdentityActivation is the default.
:param act: Activation type. IdentityActivation is the default activation.
:type act: BaseActivation
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
......@@ -3572,31 +3574,32 @@ def lstm_step_layer(input,
...
This layer has two outputs. Default output is :math:`h_t`. The other
output is :math:`o_t`, whose name is 'state' and can use
This layer has two outputs. The default output is :math:`h_t`. The other
output is :math:`o_t`, whose name is 'state' and users can use
:code:`get_output_layer` to extract this output.
:param name: The name of this layer. It is optional.
:type name: basestring
:param size: Layer's size. NOTE: lstm layer's size, should be equal to
:code:`input.size/4`, and should be equal to
:code:`state.size`.
:param size: The dimension of this layer's output, which must be
equal to the dimension of the state.
:type size: int
:param input: input layer. :math:`Wx_t + Wh_{t-1}`
:param input: The input of this layer.
:type input: LayerOutput
:param state: State Layer. :math:`c_{t-1}`
:param state: The state of the LSTM unit.
:type state: LayerOutput
:param act: Activation type. TanhActivation is the default.
:param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation
:param gate_act: Gate Activation Type. SigmoidActivation is the default.
:param gate_act: Activation type of the gate. SigmoidActivation is the
default activation.
:type gate_act: BaseActivation
:param state_act: State Activation Type. TanhActivation is the default.
:param state_act: Activation type of the state. TanhActivation is the
default activation.
:type state_act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: layer's extra attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -3641,22 +3644,31 @@ def gru_step_layer(input,
layer_attr=None):
"""
:param input:
:param input: The input of this layer, whose dimension can be divided by 3.
:type input: LayerOutput
:param output_mem:
:param size:
:param act:
:param output_mem: A memory which memorizes the output of this layer at previous
time step.
:type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation
:param name: The name of this layer. It is optional.
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid.
:type name: basestring
:param gate_act: Activation type of this layer's two gates. SigmoidActivation is
the default activation.
:type gate_act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero.
:param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, no bias
is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: the parameter_attribute for transforming the output_mem
from previous step.
:param layer_attr:
:param param_attr: The parameter attribute. See ParameterAttribute for details.
:type param_attr: ParameterAttribute
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -3701,24 +3713,34 @@ def gru_step_naive_layer(input,
param_attr=None,
layer_attr=None):
"""
GRU Step Layer, but using MixedLayer to generate. It support ERROR_CLIPPING
GRU Step Layer, which is realized using PaddlePaddle API. It supports ERROR_CLIPPING
and DROPOUT.
:param input:
:param output_mem:
:param size:
:param input: The input of this layer, whose dimensionality can be divided by 3.
:param output_mem: A memory which memorizes the output of this layer at previous
time step.
:type output_mem: LayerOutput
:param size: The dimension of this layer's output. If it is not set or set to None,
it will be set to one-third of the dimension of the input automatically.
:type size: int
:param name: The name of this layer. It is optional.
:param act:
:type name: basestring
:param act: Activation type of this layer's output. TanhActivation
is the default activation.
:type act: BaseActivation
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid.
:param gate_act: Activation type of this layer's two gates. SigmoidActivation
is the default activation.
:type gate_act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero.
:param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute, no bias
is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr:
:param layer_attr:
:return:
:param param_attr: The parameter attribute. See ParameterAttribute for details.
:type param_attr: ParameterAttribute
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
if input.size % 3 != 0:
......@@ -3780,12 +3802,13 @@ def get_output_layer(input, arg_name, name=None, layer_attr=None):
:param name: The name of this layer. It is optional.
:type name: basestring
:param input: get output layer's input. And this layer should contains
:param input: The input layer. And this layer should contain
multiple outputs.
:type input: LayerOutput
:param arg_name: Output name from input.
:param arg_name: The name of the output to be extracted from the input layer.
:type arg_name: basestring
:param layer_attr: Layer's extra attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -3842,17 +3865,20 @@ def recurrent_layer(input,
:param input: The input of this layer.
:type input: LayerOutput
:param act: Activation type. TanhActivation is the default.
:param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero.
:param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute,
no bias is defined. If the parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: parameter attribute.
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional.
:type name: basestring
:param layer_attr: Layer Attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -3877,7 +3903,7 @@ def recurrent_layer(input,
class StaticInput(object):
"""
StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence.
and can be a sequence or non-sequence.
:param size: DEPRECATED
:param is_seq: DEPRECATED
"""
......@@ -3910,8 +3936,8 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
Recurrent layer group is an extremely flexible recurrent unit in
PaddlePaddle. As long as the user defines the calculation done within a
time step, PaddlePaddle will iterate such a recurrent calculation over
sequence input. This is extremely usefull for attention based model, or
Neural Turning Machine like models.
sequence input. This is useful for attention-based models, or Neural
Turning Machine like models.
The basic usage (time steps) is:
......@@ -3933,18 +3959,17 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
demo/seqToseq/seqToseq_net.py
- sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf
:param step: recurrent one time step function.The input of this function is
input of the group. The return of this function will be
recurrent group's return value.
:param step: A step function which takes the input of recurrent_group as its own
input and returns values as recurrent_group's output every time step.
The recurrent group scatter a sequence into time steps. And
for each time step, will invoke step function, and return
a time step result. Then gather each time step of output into
The recurrent group scatters a sequence into time steps. And
for each time step, it will invoke step function, and return
a time step result. Then gather outputs of each time step into
layer group's output.
:type step: callable
:param name: recurrent_group's name.
:param name: The recurrent_group's name. It is optional.
:type name: basestring
:param input: Input links array.
......@@ -3952,11 +3977,11 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
LayerOutput will be scattered into time steps.
SubsequenceInput will be scattered into sequence steps.
StaticInput will be imported to each time step, and doesn't change
through time. It's a mechanism to access layer outside step function.
over time. It's a mechanism to access layer outside step function.
:type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple
:param reverse: If reverse is set true, the recurrent unit will process the
:param reverse: If reverse is set to True, the recurrent unit will process the
input sequence in a reverse order.
:type reverse: bool
......@@ -4091,7 +4116,8 @@ def maxid_layer(input, name=None, layer_attr=None):
:type input: LayerOutput
:param name: The name of this layer. It is optional.
:type name: basestring
:param layer_attr: extra layer attributes.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4124,11 +4150,12 @@ def out_prod_layer(input1, input2, name=None, layer_attr=None):
:param name: The name of this layer. It is optional.
:type name: basestring
:param input1: The first input layer name.
:param input1: The first input layer.
:type input: LayerOutput
:param input2: The second input layer name.
:param input2: The second input layer.
:type input2: LayerOutput
:param layer_attr: extra layer attributes.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4167,9 +4194,10 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:type name: basestring
:param input: The input of this layer.
:type input: LayerOutput
:param eos_id: end id of sequence
:param eos_id: End id of sequence
:type eos_id: int
:param layer_attr: extra layer attributes.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4230,8 +4258,9 @@ def beam_search(step,
- machine translation : demo/seqToseq/translation/gen.conf \
demo/seqToseq/seqToseq_net.py
:param name: Name of the recurrent unit that generates sequences.
:type name: base string
:param name: The name of the recurrent unit that is responsible for
generating sequences. It is optional.
:type name: basestring
:param step: A callable function that defines the calculation in a time
step, and it is applied to sequences with arbitrary length by
sharing a same set of weights.
......@@ -4356,16 +4385,18 @@ def square_error_cost(input,
:param name: The name of this layer. It is optional.
:type name: basestring
:param input: Network prediction.
:param input: The first input layer.
:type input: LayerOutput
:param label: Data label.
:param label: The input label.
:type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost.
It is an optional argument.
:param weight: The weight layer defines a weight for each sample in the
mini-batch. It is optional.
:type weight: LayerOutput
:param coeff: The coefficient affects the gradient in the backward.
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float
:param layer_attr: layer's extra attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4398,17 +4429,20 @@ def classification_cost(input,
:param name: The name of this layer. It is optional.
:type name: basestring
:param input: input layer name. network output.
:param input: The first input layer.
:type input: LayerOutput
:param label: label layer name. data_layer often.
:param label: The input label.
:type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost.
It is an optional argument.
:param weight: The weight layer defines a weight for each sample in the
mini-batch. It is optional.
:type weight: LayerOutput
:param evaluator: Evaluator method.
:param layer_attr: layer's extra attribute.
:param evaluator: Evaluator method. classification_error_evaluator is the default.
:type evaluator: Evaluator method
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:param coeff: The coefficient affects the gradient in the backward.
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4461,7 +4495,7 @@ def conv_operator(img,
Different from img_conv_layer, conv_op is an Operator, which can be used
in mixed_layer. And conv_op takes two inputs to perform convolution.
The first input is the image and the second is filter kernel. It only
support GPU mode.
supports GPU mode.
The example usage is:
......@@ -4473,27 +4507,31 @@ def conv_operator(img,
num_filters=64,
num_channels=64)
:param img: input image
:param img: The input image.
:type img: LayerOutput
:param filter: input filter
:param filter: The input filter.
:type filter: LayerOutput
:param filter_size: The x dimension of a filter kernel.
:param filter_size: The dimension of the filter kernel on the x axis.
:type filter_size: int
:param filter_size_y: The y dimension of a filter kernel. Since
PaddlePaddle now supports rectangular filters,
the filter's shape can be (filter_size, filter_size_y).
:param filter_size_y: The dimension of the filter kernel on the y axis.
If the parameter is not set or set to None, it will
set to 'filter_size' automatically.
:type filter_size_y: int
:param num_filters: channel of output data.
:param num_filters: The number of the output channels.
:type num_filters: int
:param num_channels: channel of input data.
:param num_channels: The number of the input channels. If the parameter is not set
or set to None, it will be automatically set to the channel
number of the 'img'.
:type num_channels: int
:param stride: The x dimension of the stride.
:param stride: The stride on the x axis.
:type stride: int
:param stride_y: The y dimension of the stride.
:param stride_y: The stride on the y axis. If the parameter is not set or
set to None, it will be set to 'stride' automatically.
:type stride_y: int
:param padding: The x dimension of padding.
:param padding: The padding size on the x axis.
:type padding: int
:param padding_y: The y dimension of padding.
:param padding_y: The padding size on the y axis. If the parameter is not set
or set to None, it will be set to 'padding' automatically.
:type padding_y: int
:return: A ConvOperator Object.
:rtype: ConvOperator
......@@ -4544,9 +4582,9 @@ def conv_projection(input,
param_attr=None,
trans=False):
"""
Different from img_conv_layer and conv_op, conv_projection is an Projection,
which can be used in mixed_layer and conat_layer. It use cudnn to implement
conv and only support GPU mode.
Different from img_conv_layer and conv_op, conv_projection is a Projection,
which can be used in mixed_layer and concat_layer. It uses cudnn to implement
convolution and only supports GPU mode.
The example usage is:
......@@ -4559,32 +4597,45 @@ def conv_projection(input,
:param input: The input of this layer.
:type input: LayerOutput
:param filter_size: The x dimension of a filter kernel.
:type filter_size: int
:param filter_size_y: The y dimension of a filter kernel. Since
PaddlePaddle now supports rectangular filters,
the filter's shape can be (filter_size, filter_size_y).
:param filter_size: The dimensions of the filter kernel. If the parameter is
set to one integer, the two dimensions on x and y axises
will be same when filter_size_y is not set. If it is set
to a list, the first element indicates the dimension on
the x axis, and the second is used to specify the dimension
on the y axis when filter_size is not provided.
:type filter_size: int | tuple | list
:param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter
is not set, it will be set automatically according to filter_size.
:type filter_size_y: int
:param num_filters: channel of output data.
:param num_filters: The number of filters.
:type num_filters: int
:param num_channels: channel of input data.
:param num_channels: The number of the input channels.
:type num_channels: int
:param stride: The x dimension of the stride.
:type stride: int
:param stride_y: The y dimension of the stride.
:param stride: The strides. If the parameter is set to one integer, the strides
on x and y axises will be same when stride_y is not set. If it is
set to a list, the first element indicates the stride on the x axis,
and the second is used to specify the stride on the y axis when
stride_y is not provided.
:type stride: int | tuple | list
:param stride_y: The stride on the y axis.
:type stride_y: int
:param padding: The x dimension of padding.
:type padding: int
:param padding_y: The y dimension of padding.
:param padding: The padding sizes. If the parameter is set to one integer, the padding
sizes on x and y axises will be same when padding_y is not set. If it
is set to a list, the first element indicates the padding size on the
x axis, and the second is used to specify the padding size on the y axis
when padding_y is not provided.
:type padding: int | tuple | list
:param padding_y: The padding size on the y axis.
:type padding_y: int
:param groups: The group number.
:type groups: int
:param param_attr: Convolution param attribute. None means default attribute
:param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute
:param trans: whether it is convTrans or conv
:param trans: Whether it is ConvTransProjection or ConvProjection
:type trans: bool
:return: A DotMulProjection Object.
:rtype: DotMulProjection
:return: A Projection Object.
:rtype: ConvTransProjection | ConvProjection
"""
if num_channels is None:
assert input.num_filters is not None
......@@ -4649,13 +4700,13 @@ def pad_layer(input,
layer_attr=None):
"""
This operation pads zeros to the input data according to pad_c,pad_h
and pad_w. pad_c, pad_h, pad_w specifies the which dimension and size
of padding. And the input data shape is NCHW.
and pad_w. pad_c, pad_h, pad_w specify the size in the corresponding
dimension. And the input data shape is NCHW.
For example, pad_c=[2,3] means padding 2 zeros before the
input data and 3 zeros after the input data in channel dimension.
pad_h means padding zeros in height dimension. pad_w means padding zeros
in width dimension.
For example, pad_c=[2,3] means padding 2 zeros before the input data
and 3 zeros after the input data in the channel dimension. pad_h means
padding zeros in the height dimension. pad_w means padding zeros in the
width dimension.
For example,
......@@ -4692,13 +4743,14 @@ def pad_layer(input,
:param input: The input of this layer.
:type input: LayerOutput
:param pad_c: padding size in channel dimension.
:param pad_c: The padding size in the channel dimension.
:type pad_c: list | None
:param pad_h: padding size in height dimension.
:param pad_h: The padding size in the height dimension.
:type pad_h: list | None
:param pad_w: padding size in width dimension.
:param pad_w: The padding size in the width dimension.
:type pad_w: list | None
:param layer_attr: Extra Layer Attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:param name: The name of this layer. It is optional.
:type name: basestring
......@@ -4747,7 +4799,7 @@ def pad_layer(input,
@layer_support()
def conv_shift_layer(a, b, name=None, layer_attr=None):
"""
This layer performs cyclic convolution for two input. For example:
This layer performs cyclic convolution on two inputs. For example:
- a[in]: contains M elements.
- b[in]: contains N elements (N should be odd).
- c[out]: contains M elements.
......@@ -4756,7 +4808,7 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
c[i] = \sum_{j=-(N-1)/2}^{(N-1)/2}a_{i+j} * b_{j}
In this formular:
In this formula:
- a's index is computed modulo M. When it is negative, then get item from
the right side (which is the end of array) to the left.
- b's index is computed modulo N. When it is negative, then get item from
......@@ -4770,11 +4822,12 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
:param name: The name of this layer. It is optional.
:type name: basestring
:param a: Input layer a.
:param a: The first input of this layer.
:type a: LayerOutput
:param b: input layer b.
:param b: The second input of this layer.
:type b: LayerOutput
:param layer_attr: layer's extra attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4805,8 +4858,8 @@ def tensor_layer(a,
bias_attr=None,
layer_attr=None):
"""
This layer performs tensor operation for two input.
For example, each sample:
This layer performs tensor operation on two inputs.
For example:
.. math::
y_{i} = a * W_{i} * {b^\mathrm{T}}, i=0,1,...,K-1
......@@ -4826,21 +4879,24 @@ def tensor_layer(a,
:param name: The name of this layer. It is optional.
:type name: basestring
:param a: Input layer a.
:param a: The first input of this layer.
:type a: LayerOutput
:param b: input layer b.
:param b: The second input of this layer.
:type b: LayerOutput
:param size: the layer dimension.
:type size: int.
:param act: Activation type. LinearActivation is the default.
:param size: The dimension of this layer.
:type size: int
:param act: Activation type. LinearActivation is the default activation.
:type act: BaseActivation
:param param_attr: The Parameter Attribute.
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero.
:param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute,
no bias is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4876,7 +4932,7 @@ def selective_fc_layer(input,
layer_attr=None):
"""
Selectived fully connected layer. Different from fc_layer, the output
of this layer maybe sparse. It requires an additional input to indicate
of this layer can be sparse. It requires an additional input to indicate
several selected columns for output. If the selected columns is not
specified, selective_fc_layer acts exactly like fc_layer.
......@@ -4890,21 +4946,34 @@ def selective_fc_layer(input,
:type name: basestring
:param input: The input of this layer.
:type input: LayerOutput | list | tuple
:param select: The select layer. The output of select layer should be a
sparse binary matrix, and treat as the mask of selective fc.
If is None, acts exactly like fc_layer.
:param select: The layer to select columns to output. It should be a sparse
binary matrix, and is treated as the mask of selective fc. If
it is not set or set to None, selective_fc_layer acts exactly
like fc_layer.
:type select: LayerOutput
:param size: The layer dimension.
:param size: The dimension of this layer, which should be equal to that of
the layer 'select'.
:type size: int
:param act: Activation type. TanhActivation is the default.
:param act: Activation type. TanhActivation is the default activation.
:type act: BaseActivation
:param param_attr: The Parameter Attribute.
:param pass_generation: The flag which indicates whether it is during generation.
:type pass_generation: bool
:param has_selected_colums: The flag which indicates whether the parameter 'select'
has been set. True is the default.
:type has_selected_colums: bool
:param mul_ratio: A ratio helps to judge how sparse the output is and determine
the computation method for speed consideration.
:type mul_ratio: float
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
parameter is set to True, the bias is initialized to zero.
:param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute,
no bias is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -4955,7 +5024,7 @@ def selective_fc_layer(input,
@layer_support()
def sampling_id_layer(input, name=None, layer_attr=None):
"""
A layer for sampling id from multinomial distribution from the input layer.
A layer for sampling id from a multinomial distribution from the input layer.
Sampling one id for one sample.
The simple usage is:
......@@ -4968,8 +5037,9 @@ def sampling_id_layer(input, name=None, layer_attr=None):
:type input: LayerOutput
:param name: The name of this layer. It is optional.
:type name: basestring
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -4990,8 +5060,7 @@ def slope_intercept_layer(input,
intercept=0.0,
layer_attr=None):
"""
This layer for applying a slope and an intercept to the input
element-wise. There is no activation and weight.
This layer for applying a slope and an intercept to the input.
.. math::
y = slope * x + intercept
......@@ -5006,12 +5075,13 @@ def slope_intercept_layer(input,
:type input: LayerOutput
:param name: The name of this layer. It is optional.
:type name: basestring
:param slope: the scale factor.
:type slope: float.
:param intercept: the offset.
:type intercept: float.
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:param slope: The scale factor.
:type slope: float
:param intercept: The offset.
:type intercept: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -5066,12 +5136,13 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None):
:type weights: LayerOutput
:param vectors: The vector layer.
:type vectors: LayerOutput
:param size: the dimension of this layer.
:param size: The dimension of this layer.
:type size: int
:param name: The name of this layer. It is optional.
:type name: basestring
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -5118,11 +5189,11 @@ def block_expand_layer(input,
outputW = 1 + (2 * padding_x + imgSizeW - block_x + stride_x - 1) / stride_x
The expand method is the same with ExpandConvLayer, but saved the transposed
The expanding method is the same with ExpandConvLayer, but saved the transposed
value. After expanding, output.sequenceStartPositions will store timeline.
The number of time steps are outputH * outputW and the dimension of each
The number of time steps is outputH * outputW and the dimension of each
time step is block_y * block_x * num_channels. This layer can be used after
convolution neural network, and before recurrent neural network.
convolutional neural network, and before recurrent neural network.
The simple usage is:
......@@ -5137,8 +5208,10 @@ def block_expand_layer(input,
:param input: The input of this layer.
:type input: LayerOutput
:param num_channels: The channel number of input layer.
:type num_channels: int | None
:param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int
:param block_x: The width of sub block.
:type block_x: int
:param block_y: The width of sub block.
......@@ -5152,9 +5225,10 @@ def block_expand_layer(input,
:param padding_y: The padding size in vertical direction.
:type padding_y: int
:param name: The name of this layer. It is optional.
:type name: None | basestring.
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:type name: basestring.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -5184,12 +5258,19 @@ def block_expand_layer(input,
@layer_support()
def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
"""
A layer to do max out on conv layer output.
- Input: output of a conv layer.
- Output: feature map size same as input. Channel is (input channel) / groups.
A layer to do max out on convolutional layer output.
- Input: the output of a convolutional layer.
- Output: feature map size same as the input's, and its channel number is
(input channel) / groups.
So groups should be larger than 1, and the num of channels should be able
to devided by groups.
to be devided by groups.
Reference:
Maxout Networks
http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
Multi-digit Number Recognition from Street View Imagery using Deep Convolutional Neural Networks
https://arxiv.org/pdf/1312.6082v4.pdf
.. math::
y_{si+j} = \max_k x_{gsi + sk + j}
......@@ -5199,12 +5280,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
0 \le j < s
0 \le k < groups
Please refer to Paper:
- Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf
- Multi-digit Number Recognition from Street View \
Imagery using Deep Convolutional Neural Networks: \
https://arxiv.org/pdf/1312.6082v4.pdf
The simple usage is:
.. code-block:: python
......@@ -5215,14 +5290,16 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
:param input: The input of this layer.
:type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set
automatically from previous output.
:type num_channels: int | None
:param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int
:param groups: The group number of input layer.
:type groups: int
:param name: The name of this layer. It is optional.
:type name: None | basestring.
:param layer_attr: Extra Layer attribute.
:type name: basestring
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -5254,20 +5331,20 @@ def ctc_layer(input,
layer_attr=None):
"""
Connectionist Temporal Classification (CTC) is designed for temporal
classication task. That is, for sequence labeling problems where the
classication task. e.g. sequence labeling problems where the
alignment between the inputs and the target labels is unknown.
More details can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/
icml2006_GravesFGS06.pdf>`_
Reference:
Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
with Recurrent Neural Networks
http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note:
Considering the 'blank' label needed by CTC, you need to use
(num_classes + 1) as the input size. num_classes is the category number.
And the 'blank' is the last category index. So the size of 'input' layer, such as
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
should also be num_classes + 1.
Considering the 'blank' label needed by CTC, you need to use (num_classes + 1)
as the size of the input, where num_classes is the category number.
And the 'blank' is the last category index. So the size of 'input' layer (e.g.
fc_layer with softmax activation) should be (num_classes + 1). The size of
ctc_layer should also be (num_classes + 1).
The example usage is:
......@@ -5280,16 +5357,17 @@ def ctc_layer(input,
:param input: The input of this layer.
:type input: LayerOutput
:param label: The data layer of label with variable length.
:param label: The input label.
:type label: LayerOutput
:param size: category numbers + 1.
:param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int
:param name: The name of this layer. It is optional.
:type name: basestring | None
:param norm_by_times: Whether to normalization by times. False by default.
:type name: basestring
:param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -5330,20 +5408,19 @@ def warp_ctc_layer(input,
building process, PaddlePaddle will clone the source codes, build and
install it to :code:`third_party/install/warpctc` directory.
More details of CTC can be found by referring to `Connectionist Temporal
Classification: Labelling Unsegmented Sequence Data with Recurrent
Neural Networks <http://machinelearning.wustl.edu/mlpapers/paper_files/
icml2006_GravesFGS06.pdf>`_.
Reference:
Connectionist Temporal Classification: Labelling Unsegmented Sequence Data
with Recurrent Neural Networks
http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf
Note:
- Let num_classes represent the category number. Considering the 'blank'
label needed by CTC, you need to use (num_classes + 1) as the input size.
Thus, the size of both warp_ctc layer and 'input' layer should be set to
num_classes + 1.
- Let num_classes represents the category number. Considering the 'blank'
label needed by CTC, you need to use (num_classes + 1) as the size of
warp_ctc layer.
- You can set 'blank' to any value ranged in [0, num_classes], which
should be consistent as that used in your labels.
should be consistent with those used in your labels.
- As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer.
'linear' activation is expected to be used instead in the 'input' layer.
The example usage is:
......@@ -5357,18 +5434,19 @@ def warp_ctc_layer(input,
:param input: The input of this layer.
:type input: LayerOutput
:param label: The data layer of label with variable length.
:param label: The input label.
:type label: LayerOutput
:param size: category numbers + 1.
:param size: The dimension of this layer, which must be equal to (category number + 1).
:type size: int
:param name: The name of this layer. It is optional.
:type name: basestring | None
:param blank: the 'blank' label used in ctc
:type name: basestring
:param blank: The 'blank' label used in ctc.
:type blank: int
:param norm_by_times: Whether to normalization by times. False by default.
:param norm_by_times: Whether to do normalization by times. False is the default.
:type norm_by_times: bool
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -5414,23 +5492,26 @@ def crf_layer(input,
label=label,
size=label_dim)
:param input: The first input layer is the feature.
:param input: The first input layer.
:type input: LayerOutput
:param label: The second input layer is label.
:param label: The input label.
:type label: LayerOutput
:param size: The category number.
:type size: int
:param weight: The third layer is "weight" of each sample, which is an
optional argument.
:param weight: The weight layer defines a weight for each sample in the
mini-batch. It is optional.
:type weight: LayerOutput
:param param_attr: Parameter attribute. None means default attribute
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional.
:type name: None | basestring
:param coeff: The coefficient affects the gradient in the backward.
:type name: basestring
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -5476,9 +5557,9 @@ def crf_decoding_layer(input,
"""
A layer for calculating the decoding sequence of sequential conditional
random field model. The decoding sequence is stored in output.ids.
If a second input is provided, it is treated as the ground-truth label, and
this layer will also calculate error. output.value[i] is 1 for incorrect
decoding or 0 for correct decoding.
If the input 'label' is provided, it is treated as the ground-truth label, and
this layer will also calculate error. output.value[i] is 1 for an incorrect
decoding and 0 for the correct.
The example usage is:
......@@ -5489,16 +5570,18 @@ def crf_decoding_layer(input,
:param input: The first input layer.
:type input: LayerOutput
:param size: size of this layer.
:param size: The dimension of this layer.
:type size: int
:param label: None or ground-truth label.
:type label: LayerOutput or None
:param param_attr: Parameter attribute. None means default attribute
:param label: The input label.
:type label: LayerOutput | None
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional.
:type name: None | basestring
:param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute | None
:type name: basestring
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -5545,8 +5628,7 @@ def nce_layer(input,
bias_attr=None,
layer_attr=None):
"""
Noise-contrastive estimation. This layer implements the method in the
following paper:
Noise-contrastive estimation.
Reference:
A fast and simple algorithm for training neural probabilistic language
......@@ -5562,37 +5644,40 @@ def nce_layer(input,
:param name: The name of this layer. It is optional.
:type name: basestring
:param input: The input layers. It should be a LayerOutput or a list/tuple
of LayerOutput.
:param input: The first input of this layer.
:type input: LayerOutput | list | tuple | collections.Sequence
:param label: The ground truth.
:param label: The input label.
:type label: LayerOutput
:param weight: The weight layer defines a weight for each sample in the
mini-batch. The default value is None.
mini-batch. It is optional.
:type weight: LayerOutput
:param num_classes: The class number.
:param num_classes: The number of classes.
:type num_classes: int
:param param_attr: The parameter attributes.
:type param_attr: ParameterAttribute|list
:param num_neg_samples: The number of sampled negative labels. The default
value is 10.
:param act: Activation type. SigmoidActivation is the default activation.
:type act: BaseActivation
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
:type param_attr: ParameterAttribute
:param num_neg_samples: The number of sampled negative labels. 10 is the
default value.
:type num_neg_samples: int
:param neg_distribution: The discrete noisy distribution over the output
space from which num_neg_samples negative labels
are sampled. If this parameter is not set, a
uniform distribution will be used. A user defined
uniform distribution will be used. A user-defined
distribution is a list whose length must be equal
to the num_classes. Each member of the list defines
the probability of a class given input x.
:type neg_distribution: list | tuple | collections.Sequence | None
:param bias_attr: The attribute for bias. If this parameter is set False or
any object whose type is not ParameterAttribute, no bias
is added. If this parameter is set True, the bias is
initialized to zero.
:param bias_attr: The parameter attribute for bias. If this parameter is set to
False or an object whose type is not ParameterAttribute,
no bias is defined. If this parameter is set to True,
the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer Attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: The LayerOutput object.
:return: LayerOutput object.
:rtype: LayerOutput
"""
if isinstance(input, LayerOutput):
......@@ -5659,11 +5744,11 @@ def rank_cost(left,
coeff=1.0,
layer_attr=None):
"""
A cost Layer for learning to rank using gradient descent. Details can refer
to `papers <http://research.microsoft.com/en-us/um/people/cburges/papers/
ICML_ranking.pdf>`_.
This layer contains at least three inputs. The weight is an optional
argument, which affects the cost.
A cost Layer for learning to rank using gradient descent.
Reference:
Learning to Rank using Gradient Descent
http://research.microsoft.com/en-us/um/people/cburges/papers/ICML_ranking.pdf
.. math::
......@@ -5694,14 +5779,16 @@ def rank_cost(left,
:type right: LayerOutput
:param label: Label is 1 or 0, means positive order and reverse order.
:type label: LayerOutput
:param weight: The weight affects the cost, namely the scale of cost.
It is an optional argument.
:param weight: The weight layer defines a weight for each sample in the
mini-batch. It is optional.
:type weight: LayerOutput
:param name: The name of this layer. It is optional.
:type name: None | basestring
:param coeff: The coefficient affects the gradient in the backward.
:type name: basestring
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default value.
:type coeff: float
:param layer_attr: Extra Layer Attribute.
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -5746,25 +5833,25 @@ def lambda_cost(input,
NDCG_num=8,
max_sort_size=-1)
:param input: Samples of the same query should be loaded as sequence.
:param input: The first input of this layer, which is often a document
samples list of the same query and whose type must be sequence.
:type input: LayerOutput
:param score: The 2nd input. Score of each sample.
:param score: The scores of the samples.
:type input: LayerOutput
:param NDCG_num: The size of NDCG (Normalized Discounted Cumulative Gain),
e.g., 5 for NDCG@5. It must be less than or equal to the
minimum size of lists.
minimum size of the list.
:type NDCG_num: int
:param max_sort_size: The size of partial sorting in calculating gradient.
If max_sort_size = -1, then for each list, the
algorithm will sort the entire list to get gradient.
In other cases, max_sort_size must be greater than or
equal to NDCG_num. And if max_sort_size is greater
than the size of a list, the algorithm will sort the
entire list of get gradient.
:param max_sort_size: The size of partial sorting in calculating gradient. If
max_sort_size is equal to -1 or greater than the number
of the samples in the list, then the algorithm will sort
the entire list to compute the gradient. In other cases,
max_sort_size must be greater than or equal to NDCG_num.
:type max_sort_size: int
:param name: The name of this layer. It is optional.
:type name: None | basestring
:param layer_attr: Extra Layer Attribute.
:type name: basestring
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
......@@ -5809,11 +5896,10 @@ def cross_entropy(input,
:param name: The name of this layer. It is optional.
:type name: basestring
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default.
1.0 is the default value.
:type coeff: float
:param weight: The cost of each sample is multiplied with each weight.
The weight should be a layer with size=1. Note that gradient
will not be calculated for weight.
:param weight: The weight layer defines a weight for each sample in the
mini-batch. It is optional.
:type weight: LayerOutout
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
......@@ -5858,7 +5944,7 @@ def cross_entropy_with_selfnorm(input,
:param name: The name of this layer. It is optional.
:type name: basestring
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default.
1.0 is the default value.
:type coeff: float
:param softmax_selfnorm_alpha: The scale factor affects the cost.
:type softmax_selfnorm_alpha: float
......@@ -5948,7 +6034,7 @@ def huber_regression_cost(input,
:param delta: The difference between the observed and predicted values.
:type delta: float
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default.
1.0 is the default value.
:type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
......@@ -5998,7 +6084,7 @@ def huber_classification_cost(input,
:param name: The name of this layer. It is optional.
:type name: basestring
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default.
1.0 is the default value.
:type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
......@@ -6043,7 +6129,7 @@ def multi_binary_label_cross_entropy(input,
:param name: The name of this layer. It is optional.
:type name: basestring
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default.
1.0 is the default value.
:type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
......@@ -6214,7 +6300,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
:param name: The name of this layer. It is optional.
:type name: basestring
:param coeff: The weight of the gradient in the back propagation.
1.0 is the default.
1.0 is the default value.
:type coeff: float
:param layer_attr: The extra layer attribute. See ExtraLayerAttribute for
details.
......@@ -6366,7 +6452,7 @@ def row_conv_layer(input,
:param context_len: The context length equals the lookahead step number
plus one.
:type context_len: int
:param act: Activation Type. LinearActivation is the default.
:param act: Activation Type. LinearActivation is the default activation.
:type act: BaseActivation
:param param_attr: The parameter attribute. See ParameterAttribute for
details.
......@@ -6488,7 +6574,8 @@ def gated_unit_layer(input,
:type input: LayerOutput
:param size: The dimension of this layer's output.
:type size: int
:param act: Activation type of the projection. LinearActivation is the default.
:param act: Activation type of the projection. LinearActivation is the default
activation.
:type act: BaseActivation
:param name: The name of this layer. It is optional.
:type name: basestring
......@@ -6498,9 +6585,9 @@ def gated_unit_layer(input,
:param gate_param_attr: The parameter attribute of the gate. See ParameterAttribute
for details.
:type gate_param_attr: ParameterAttribute
:param gate_bias_attr: The bias attribute of the gate. If the parameter is set to False or
:param gate_bias_attr: The bias attribute of the gate. If this parameter is set to False or
an object whose type is not ParameterAttribute, no bias is defined.
If the parameter is set to True, the bias is initialized to zero.
If this parameter is set to True, the bias is initialized to zero.
:type gate_bias_attr: ParameterAttribute | bool | None | Any
:param inproj_attr: Extra layer attributes of the projection. See ExtraLayerAttribute for
details.
......@@ -6508,9 +6595,9 @@ def gated_unit_layer(input,
:param inproj_param_attr: The parameter attribute of the projection. See ParameterAttribute
for details.
:type inproj_param_attr: ParameterAttribute
:param inproj_bias_attr: The bias attribute of the projection. If the parameter is set to False
:param inproj_bias_attr: The bias attribute of the projection. If this parameter is set to False
or an object whose type is not ParameterAttribute, no bias is defined.
If the parameter is set to True, the bias is initialized to zero.
If this parameter is set to True, the bias is initialized to zero.
:type inproj_bias_attr: ParameterAttribute | bool | None | Any
:param layer_attr: Extra layer attribute of the product. See ExtraLayerAttribute for
details.
......@@ -6869,7 +6956,7 @@ def img_conv3d_layer(input,
:type filter_size: int | tuple | list
:param num_filters: The number of filters in each group.
:type num_filters: int
:param act: Activation type. ReluActivation is the default.
:param act: Activation type. ReluActivation is the default activation.
:type act: BaseActivation
:param groups: The number of the filter groups.
:type groups: int
......@@ -6884,8 +6971,8 @@ def img_conv3d_layer(input,
parameter is set to True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute | None | bool | Any
:param num_channels: The number of input channels. If the parameter is not set or
set to None, its actual value will be automatically set to
the channels number of the input .
set to None, its actual value will be automatically set to
the channels number of the input.
:type num_channels: int
:param param_attr: The parameter attribute of the convolution. See ParameterAttribute for
details.
......@@ -7061,7 +7148,7 @@ def sub_seq_layer(input, offsets, sizes, act=None, bias_attr=None, name=None):
:type offsets: LayerOutput
:param sizes: The sizes of the sub-sequences, which should be sequence type.
:type sizes: LayerOutput
:param act: Activation type, LinearActivation is the default.
:param act: Activation type, LinearActivation is the default activation.
:type act: BaseActivation.
:param bias_attr: The bias attribute. If the parameter is set to False or an object
whose type is not ParameterAttribute, no bias is defined. If the
......
......@@ -681,34 +681,42 @@ def lstmemory_unit(input,
state_act=TanhActivation())
:param input: input layer.
:param input: Input layer.
:type input: LayerOutput
:param out_memory: output of previous time step
:param out_memory: The output of previous time step.
:type out_memory: LayerOutput | None
:param name: lstmemory unit name.
:param name: The lstmemory unit name.
:type name: basestring
:param size: lstmemory unit size.
:param size: The lstmemory unit size.
:type size: int
:param param_attr: parameter attribute, None means default attribute.
:param param_attr: The parameter attribute for the weights in
input to hidden projection.
None means default attribute.
:type param_attr: ParameterAttribute
:param act: last activiation type of lstm.
:param act: The last activiation type of lstm.
:type act: BaseActivation
:param gate_act: gate activiation type of lstm.
:param gate_act: The gate activiation type of lstm.
:type gate_act: BaseActivation
:param state_act: state activiation type of lstm.
:param state_act: The state activiation type of lstm.
:type state_act: BaseActivation
:param input_proj_bias_attr: bias attribute for input to hidden projection.
False means no bias, None means default bias.
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:param input_proj_bias_attr: The parameter attribute for the bias in
input to hidden projection.
False or None means no bias.
If this parameter is set to True,
the bias is initialized to zero.
:type input_proj_bias_attr: ParameterAttribute|bool|None
:param input_proj_layer_attr: The extra layer attribute for
input to hidden projection of the LSTM unit,
such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False|None
:param lstm_layer_attr: extra attribute of lstm layer.
:param lstm_bias_attr: The parameter attribute for the bias in lstm layer.
False or None means no bias.
If this parameter is set to True,
the bias is initialized to zero.
:type lstm_bias_attr: ParameterAttribute|True|None
:param lstm_layer_attr: The extra attribute of lstm layer.
:type lstm_layer_attr: ExtraLayerAttribute
:return: lstmemory unit name.
:return: The lstmemory unit name.
:rtype: LayerOutput
"""
if size is None:
......@@ -786,34 +794,42 @@ def lstmemory_group(input,
gate_act=SigmoidActivation(),
state_act=TanhActivation())
:param input: input layer.
:param input: Input layer.
:type input: LayerOutput
:param size: lstmemory group size.
:param size: The lstmemory group size.
:type size: int
:param name: name of lstmemory group.
:param name: The name of lstmemory group.
:type name: basestring
:param out_memory: output of previous time step.
:param out_memory: The output of previous time step.
:type out_memory: LayerOutput | None
:param reverse: process the input in a reverse order or not.
:param reverse: Process the input in a reverse order or not.
:type reverse: bool
:param param_attr: parameter attribute, None means default attribute.
:param param_attr: The parameter attribute for the weights in
input to hidden projection.
None means default attribute.
:type param_attr: ParameterAttribute
:param act: last activiation type of lstm.
:param act: The last activiation type of lstm.
:type act: BaseActivation
:param gate_act: gate activiation type of lstm.
:param gate_act: The gate activiation type of lstm.
:type gate_act: BaseActivation
:param state_act: state activiation type of lstm.
:param state_act: The state activiation type of lstm.
:type state_act: BaseActivation
:param lstm_bias_attr: bias parameter attribute of lstm layer.
False means no bias, None means default bias.
:type lstm_bias_attr: ParameterAttribute|False|None
:param input_proj_bias_attr: bias attribute for input to hidden projection.
False means no bias, None means default bias.
:type input_proj_bias_attr: ParameterAttribute|False|None
:param input_proj_layer_attr: extra layer attribute for input to hidden
projection of the LSTM unit, such as dropout, error clipping.
:param input_proj_bias_attr: The parameter attribute for the bias in
input to hidden projection.
False or None means no bias.
If this parameter is set to True,
the bias is initialized to zero.
:type input_proj_bias_attr: ParameterAttribute|bool|None
:param input_proj_layer_attr: The extra layer attribute for
input to hidden projection of the LSTM unit,
such as dropout, error clipping.
:type input_proj_layer_attr: ExtraLayerAttribute
:param lstm_layer_attr: lstm layer's extra attribute.
:param lstm_bias_attr: The parameter attribute for the bias in lstm layer.
False or None means no bias.
If this parameter is set to True,
the bias is initialized to zero.
:type lstm_bias_attr: ParameterAttribute|True|None
:param lstm_layer_attr: The extra attribute of lstm layer.
:type lstm_layer_attr: ExtraLayerAttribute
:return: the lstmemory group.
:rtype: LayerOutput
......
......@@ -15,8 +15,8 @@
"""
__all__ = [
"BasePoolingType", "MaxPooling", "AvgPooling", "CudnnMaxPooling",
"CudnnAvgPooling", "SumPooling", "SquareRootNPooling"
"BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling",
"CudnnMaxPooling", "CudnnAvgPooling", "SumPooling", "SquareRootNPooling"
]
......@@ -55,6 +55,19 @@ class MaxPooling(BasePoolingType):
self.output_max_index = output_max_index
class MaxWithMaskPooling(BasePoolingType):
"""
MaxWithMask pooling.
Not only return the very large values for each dimension in sequence or time steps,
but also the location indices of found maxinum values.
"""
def __init__(self):
BasePoolingType.__init__(self, "max-pool-with-mask")
class CudnnMaxPooling(BasePoolingType):
"""
Cudnn max pooling only support GPU. Return the maxinum value in the
......
......@@ -28,6 +28,8 @@ layers {
stride_y: 1
output_y: 227
img_size_y: 256
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
......
......@@ -28,6 +28,8 @@ layers {
stride_y: 1
output_y: 227
img_size_y: 256
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
......
......@@ -28,6 +28,8 @@ layers {
stride_y: 1
output_y: 48
img_size_y: 48
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
......
......@@ -30,6 +30,8 @@ layers {
stride_y: 1
output_y: 48
img_size_y: 48
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
......@@ -105,6 +107,8 @@ layers {
stride_y: 1
output_y: 24
img_size_y: 24
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_1__.wbias"
......
......@@ -30,6 +30,8 @@ layers {
stride_y: 1
output_y: 48
img_size_y: 48
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
......
......@@ -36,6 +36,8 @@ layers {
stride_y: 1
output_y: 14
img_size_y: 14
dilation: 1
dilation_y: 1
}
}
bias_parameter_name: "___conv_0__.wbias"
......
......@@ -37,6 +37,8 @@ import model
import paddle.trainer.config_parser as cp
__all__ = [
'default_startup_program',
'default_main_program',
'optimizer',
'layer',
'activation',
......
from paddle.v2.framework import framework as framework
from paddle.v2.fluid import framework as framework
__all__ = ['append_backward_ops']
......
......@@ -13,7 +13,7 @@ A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope.
"""
import paddle.v2.framework.core
import paddle.v2.fluid.core
import threading
__tl_scope__ = threading.local()
......@@ -27,13 +27,13 @@ __all__ = [
def get_cur_scope():
"""
Get current scope.
:rtype: paddle.v2.framework.core.Scope
:rtype: paddle.v2.fluid.core.Scope
"""
cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None)
if cur_scope_stack is None:
__tl_scope__.cur_scope = list()
if len(__tl_scope__.cur_scope) == 0:
__tl_scope__.cur_scope.append(paddle.v2.framework.core.Scope())
__tl_scope__.cur_scope.append(paddle.v2.fluid.core.Scope())
return __tl_scope__.cur_scope[-1]
......
import numpy as np
from paddle.v2.fluid.framework import Program, g_main_program, unique_name, Variable
import paddle.v2.fluid.core as core
def _clone_var_in_block_(block, var):
assert isinstance(var, Variable)
return block.create_var(
name=var.name,
shape=var.shape,
dtype=var.data_type,
type=var.type,
lod_level=var.lod_level,
persistable=True)
class Evaluator(object):
"""
Evalutor Base class.
create metric states
add mini-batch evaluator caculate operator
add increment operator to accumulate the metric states
"""
def __init__(self, name, **kwargs):
"""
init the global states
"""
self._states = {}
if kwargs.has_key("main_program"):
self._main_program = kwargs.get("main_program")
else:
self._main_program = g_main_program
def _update_ops(self, *args, **kwargs):
"""
append update ops to the global states
"""
raise NotImplementedError()
def reset(self, executor, reset_program=None):
"""
Clear metric states at the begin of each pass/user specified batch
"""
if reset_program == None:
reset_program = Program()
else:
reset_program = program
block = reset_program.global_block()
for k, var in self._states.iteritems():
g_var = _clone_var_in_block_(block, var)
zeros = block.create_var(dtype="float32", persistable=True)
block.append_op(
type="fill_constant",
outputs={"Out": [zeros]},
attrs={
"shape": g_var.shape,
"value": .0,
"data_type": 5,
})
block.append_op(
type="scale", inputs={"X": zeros}, outputs={"Out": g_var})
executor.run(reset_program, fetch_list=self._states.values())
def eval(self, executor, eval_program=None):
"""
Merge the mini-batch statistics to form the evaluation result for multiple mini-batches.
"""
raise NotImplementedError()
class Accuracy(Evaluator):
"""
Accuracy need two state variable Total, Correct
"""
def __init__(self, *args, **kwargs):
super(Accuracy, self).__init__("accuracy", **kwargs)
block = self._main_program.global_block()
g_total = block.create_var(
name=unique_name("Total"),
persistable=True,
dtype="int64",
shape=[1])
g_correct = block.create_var(
name=unique_name("Correct"),
persistable=True,
dtype="int64",
shape=[1])
self._states["Total"] = g_total
self._states["Correct"] = g_correct
def _update_ops(self, input, label, k=1, **kwargs):
block = self._main_program.global_block()
topk_out = block.create_var(dtype=input.data_type)
topk_indices = block.create_var(dtype="int64")
block.append_op(
type="top_k",
inputs={"X": [input]},
outputs={"Out": [topk_out],
"Indices": [topk_indices]},
attrs={"k": k})
acc_out = block.create_var(dtype=kwargs.get("out_dtype", "float32"))
correct = block.create_var(dtype="int64", persistable=True)
total = block.create_var(dtype="int64", persistable=True)
block.append_op(
type="accuracy",
inputs={
"Out": [topk_out],
"Indices": [topk_indices],
"Label": [label]
},
outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
block.append_op(
type="cast",
inputs={"X": [self._states["Total"]]},
outputs={"Out": [self._states["Total"]]},
attrs={
"in_data_type": 5, # float32
"out_data_type": 2, #int32
})
block.append_op(
type="cast",
inputs={"X": [self._states["Correct"]]},
outputs={"Out": [self._states["Correct"]]},
attrs={
"in_data_type": 5,
"out_data_type": 2,
})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Total"]],
"Y": [total]},
outputs={"Out": [self._states["Total"]]})
block.append_op(
type="elementwise_add",
inputs={"X": [self._states["Correct"]],
"Y": [correct]},
outputs={"Out": [self._states["Correct"]]})
return acc_out
def eval(self, executor, eval_program=None):
if eval_program != None:
eval_program = eval_program
else:
eval_program = Program()
block = eval_program.global_block()
eval_out = block.create_var(dtype=self._states["Total"].data_type)
e_total = _clone_var_in_block_(block, self._states["Total"])
e_correct = _clone_var_in_block_(block, self._states["Correct"])
block.append_op(
type="cast",
inputs={"X": [e_total]},
outputs={"Out": [e_total]},
attrs={
"in_data_type": 2, #int32
"out_data_type": 5, #float32
})
block.append_op(
type="cast",
inputs={"X": [e_correct]},
outputs={"Out": [e_correct]},
attrs={
"in_data_type": 2,
"out_data_type": 5,
})
block.append_op(
type="elementwise_div",
inputs={"X": e_correct,
"Y": e_total},
outputs={"Out": eval_out})
out = executor.run(eval_program, fetch_list=[eval_out])
return np.array(out[0])
def accuracy(*args, **kwargs):
cls = Accuracy(*args, **kwargs)
out = cls._update_ops(*args, **kwargs)
return cls, out
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import Block, Program, g_main_program
import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import Block, Program, g_main_program
g_scope = core.Scope()
......
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
import paddle.v2.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
import collections
import numpy as np
import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator']
__all__ = ['Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'default_main_program']
def unique_name(prefix):
......@@ -562,3 +562,9 @@ class Parameter(Variable):
# program is a global instance.
g_main_program = Program()
g_startup_program = Program()
def default_startup_program():
return g_startup_program
def default_main_program():
return g_main_program
import paddle.v2.framework.framework as framework
import paddle.v2.fluid.framework as framework
import numpy as np
__all__ = [
......
import os
import cPickle as pickle
from paddle.v2.framework.framework import Program, Parameter, g_main_program, \
from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \
Variable
__all__ = [
......
import copy
import itertools
from paddle.v2.framework.framework import Variable, g_main_program, \
from paddle.v2.fluid.framework import Variable, g_main_program, \
g_startup_program, unique_name, Program
from paddle.v2.framework.initializer import ConstantInitializer, \
from paddle.v2.fluid.initializer import ConstantInitializer, \
UniformInitializer, XavierInitializer
......
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \
import paddle.v2.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
from paddle.v2.fluid.framework import OpProtoHolder, Variable, Program, \
Operator
from paddle.v2.framework.initializer import ConstantInitializer, \
from paddle.v2.fluid.initializer import ConstantInitializer, \
NormalInitializer
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
import re
import cStringIO
......@@ -574,7 +574,9 @@ def accuracy(input, label, k=1, **kwargs):
"Indices": [topk_indices]},
attrs={"k": k})
acc_out_dtype = kwargs.get("out_dtype", "float32")
acc_out = helper.create_tmp_variable(dtype=acc_out_dtype)
acc_out = helper.create_tmp_variable(dtype="float32")
correct = helper.create_tmp_variable(dtype="int64")
total = helper.create_tmp_variable(dtype="int64")
helper.append_op(
type="accuracy",
inputs={
......@@ -582,7 +584,11 @@ def accuracy(input, label, k=1, **kwargs):
"Indices": [topk_indices],
"Label": [label]
},
outputs={"Accuracy": [acc_out]})
outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
return acc_out
......@@ -839,6 +845,23 @@ def batch_norm(input,
return helper.append_activation(batch_norm_out)
def beam_search_decode(ids, scores, main_program=None, startup_program=None):
helper = LayerHelper('beam_search_decode', **locals())
sentence_ids = helper.create_tmp_variable(dtype=ids.data_type)
sentence_scores = helper.create_tmp_variable(dtype=ids.data_type)
helper.append_op(
type="beam_search_decode",
inputs={"Ids": ids,
"Scores": scores},
outputs={
"SentenceIds": sentence_ids,
"SentenceScores": sentence_scores
})
return sentence_ids, sentence_scores
class BlockGuard(object):
"""
BlockGuard class.
......
......@@ -3,8 +3,8 @@ import json
import logging
from collections import defaultdict
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
import paddle.v2.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
......
import paddle.v2.framework.layers as layers
import paddle.v2.fluid.layers as layers
__all__ = ["simple_img_conv_pool", "sequence_conv_pool"]
......
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
import paddle.v2.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
def get_all_op_protos():
......
from collections import defaultdict
import paddle.v2.framework.framework as framework
from paddle.v2.framework.framework import unique_name, Program
from paddle.v2.framework.backward import append_backward_ops
from paddle.v2.framework.initializer import ConstantInitializer
from paddle.v2.framework.regularizer import append_regularization_ops
from paddle.v2.framework.layer_helper import LayerHelper
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import unique_name, Program
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.initializer import ConstantInitializer
from paddle.v2.fluid.regularizer import append_regularization_ops
from paddle.v2.fluid.layer_helper import LayerHelper
__all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
......
import paddle.v2.framework.framework as framework
import paddle.v2.fluid.framework as framework
__all__ = [
'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer'
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program
from paddle.v2.framework.io import save_persistables, load_persistables
from paddle.v2.framework.executor import Executor
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.io import save_persistables, load_persistables
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
x = layers.data(
name='x',
shape=[13],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
y_predict = layers.fc(input=x,
size=1,
act=None,
main_program=main_program,
startup_program=startup_program)
act=None)
y = layers.data(
name='y',
shape=[1],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
cost = layers.square_error_cost(
input=y_predict,
label=y,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
label=y)
avg_cost = layers.mean(x=cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program)
opts = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20
......@@ -52,12 +40,12 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM):
save_persistables(exe, "./fit_a_line.model/", main_program=main_program)
load_persistables(exe, "./fit_a_line.model/", main_program=main_program)
save_persistables(exe, "./fit_a_line.model/")
load_persistables(exe, "./fit_a_line.model/")
for data in train_reader():
x_data = np.array(map(lambda x: x[0], data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("float32")
......@@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor()
tensor_y.set(y_data, place)
# print tensor_y.get_dims()
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x,
'y': tensor_y},
fetch_list=[avg_cost])
......
import numpy as np
import paddle.v2 as paddle
import paddle.v2.framework.core as core
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import g_startup_program, g_main_program
from paddle.v2.framework.initializer import XavierInitializer
import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def resnet_cifar10(input, depth=32):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
main_program=None,
startup_program=None):
act='relu'):
tmp = layers.conv2d(
input=input,
filter_size=filter_size,
......@@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
stride=stride,
padding=padding,
act=None,
bias_attr=False,
main_program=main_program,
startup_program=startup_program)
bias_attr=False)
return layers.batch_norm(
input=tmp,
act=act,
main_program=main_program,
startup_program=startup_program)
act=act)
def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out:
......@@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def basicblock(input,
ch_in,
ch_out,
stride,
main_program=main_program,
startup_program=startup_program):
stride):
tmp = conv_bn_layer(
input,
ch_out,
3,
stride,
1,
main_program=main_program,
startup_program=startup_program)
1)
tmp = conv_bn_layer(
tmp,
ch_out,
3,
1,
1,
act=None,
main_program=main_program,
startup_program=startup_program)
short = shortcut(input, ch_in, ch_out, stride, main_program,
startup_program)
act=None)
short = shortcut(input, ch_in, ch_out, stride)
return layers.elementwise_add(
x=tmp,
y=short,
act='relu',
main_program=main_program,
startup_program=startup_program)
act='relu')
def layer_warp(block_func, input, ch_in, ch_out, count, stride, program,
startup_program):
tmp = block_func(input, ch_in, ch_out, stride, program, startup_program)
def layer_warp(block_func, input, ch_in, ch_out, count, stride):
tmp = block_func(input, ch_in, ch_out, stride)
for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program)
tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp
assert (depth - 2) % 6 == 0
......@@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
ch_out=16,
filter_size=3,
stride=1,
padding=1,
main_program=main_program,
startup_program=startup_program)
padding=1)
res1 = layer_warp(
basicblock,
conv1,
16,
16,
n,
1,
main_program=main_program,
startup_program=startup_program)
1)
res2 = layer_warp(
basicblock,
res1,
16,
32,
n,
2,
main_program=main_program,
startup_program=startup_program)
2)
res3 = layer_warp(
basicblock,
res2,
32,
64,
n,
2,
main_program=main_program,
startup_program=startup_program)
2)
pool = layers.pool2d(
input=res3,
pool_size=8,
pool_type='avg',
pool_stride=1,
main_program=main_program,
startup_program=startup_program)
pool_stride=1)
return pool
def vgg16_bn_drop(input, main_program=None, startup_program=None):
def vgg16_bn_drop(input):
def conv_block(input,
num_filter,
groups,
dropouts,
main_program=None,
startup_program=None):
dropouts):
return nets.img_conv_group(
input=input,
pool_size=2,
......@@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None):
conv_act='relu',
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type='max',
main_program=main_program,
startup_program=startup_program)
pool_type='max')
conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program)
conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program)
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program,
startup_program)
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = layers.dropout(
x=conv5,
dropout_prob=0.5,
main_program=main_program,
startup_program=startup_program)
dropout_prob=0.5)
fc1 = layers.fc(input=drop,
size=512,
act=None,
param_attr={"initializer": XavierInitializer()},
main_program=main_program,
startup_program=startup_program)
param_attr={"initializer": XavierInitializer()})
reshape1 = layers.reshape(
x=fc1,
shape=list(fc1.shape + (1, 1)),
main_program=main_program,
startup_program=startup_program)
shape=list(fc1.shape + (1, 1)))
bn = layers.batch_norm(
input=reshape1,
act='relu',
main_program=main_program,
startup_program=startup_program)
act='relu')
drop2 = layers.dropout(
x=bn,
dropout_prob=0.5,
main_program=main_program,
startup_program=startup_program)
dropout_prob=0.5)
fc2 = layers.fc(input=drop2,
size=512,
act=None,
param_attr={"initializer": XavierInitializer()},
main_program=main_program,
startup_program=startup_program)
param_attr={"initializer": XavierInitializer()})
return fc2
......@@ -225,7 +180,7 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(g_startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM):
batch_id = 0
......@@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place)
tensor_y.set(y_data, place)
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img,
"label": tensor_y},
fetch_list=[avg_cost, accuracy])
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.evaluator as evaluator
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
images = layers.data(
name='pixel',
shape=[1, 28, 28],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
label = layers.data(
name='label',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
conv_pool_1 = nets.simple_img_conv_pool(
input=images,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu",
main_program=main_program,
startup_program=startup_program)
act="relu")
conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu",
main_program=main_program,
startup_program=startup_program)
act="relu")
predict = layers.fc(input=conv_pool_2,
size=10,
act="softmax",
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=cost, main_program=main_program)
accuracy = layers.accuracy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
# optimizer = optimizer.MomentumOptimizer(learning_rate=0.1 / 128.0,
# momentum=0.9)
act="softmax")
cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, startup_program)
opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy(
input=predict,
label=label)
BATCH_SIZE = 50
PASS_NUM = 3
......@@ -75,10 +54,11 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM):
count = 0
accuracy.reset(exe)
for data in train_reader():
img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
data)).astype("float32")
......@@ -90,14 +70,20 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place)
tensor_y.set(y_data, place)
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img,
"label": tensor_y},
fetch_list=[avg_cost, accuracy])
fetch_list=[avg_cost, acc_out])
loss = np.array(outs[0])
acc = np.array(outs[1])
pass_acc = accuracy.eval(exe)
print "pass id : ", pass_id, pass_acc
# print loss, acc
if loss < 10.0 and acc > 0.9:
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good.
exit(0)
pass_acc = accuracy.eval(exe)
print "pass id : ", pass_id, pass_acc
exit(1)
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.regularizer import L2DecayRegularizer
from paddle.v2.framework.initializer import UniformInitializer
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.regularizer import L2DecayRegularizer
from paddle.v2.fluid.initializer import UniformInitializer
import numpy as np
BATCH_SIZE = 128
startup_program = Program()
main_program = Program()
image = layers.data(
name='x',
shape=[784],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
param_attr = {
'name': None,
......@@ -30,45 +25,30 @@ param_attr = {
hidden1 = layers.fc(input=image,
size=128,
act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr)
hidden2 = layers.fc(input=hidden1,
size=64,
act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr)
predict = layers.fc(input=hidden2,
size=10,
act='softmax',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr)
label = layers.data(
name='y',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
cost = layers.cross_entropy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost)
accuracy = layers.accuracy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
label=label)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, startup_program)
opts = optimizer.minimize(avg_cost)
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -78,7 +58,7 @@ train_reader = paddle.batch(
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM):
......@@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor()
tensor_y.set(y_data, place)
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x,
'y': tensor_y},
fetch_list=[avg_cost, accuracy])
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
is_sparse = True
use_gpu = False
IS_SPARSE = True
USE_GPU = False
BATCH_SIZE = 256
......@@ -25,99 +22,71 @@ def get_usr_combined_features():
uid = layers.data(
name='user_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
usr_emb = layers.embedding(
input=uid,
data_type='float32',
size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
usr_fc = layers.fc(input=usr_emb,
size=32,
main_program=main_program,
startup_program=startup_program)
size=32)
USR_GENDER_DICT_SIZE = 2
usr_gender_id = layers.data(
name='gender_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
usr_gender_emb = layers.embedding(
input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
usr_gender_fc = layers.fc(input=usr_gender_emb,
size=16,
main_program=main_program,
startup_program=startup_program)
size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
usr_age_id = layers.data(
name='age_id',
shape=[1],
data_type="int64",
main_program=main_program,
startup_program=startup_program)
data_type="int64")
usr_age_emb = layers.embedding(
input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16],
is_sparse=is_sparse,
param_attr={'name': 'age_table'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'})
usr_age_fc = layers.fc(input=usr_age_emb,
size=16,
main_program=main_program,
startup_program=startup_program)
size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
usr_job_id = layers.data(
name='job_id',
shape=[1],
data_type="int64",
main_program=main_program,
startup_program=startup_program)
data_type="int64")
usr_job_emb = layers.embedding(
input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
usr_job_fc = layers.fc(input=usr_job_emb,
size=16,
main_program=main_program,
startup_program=startup_program)
size=16)
concat_embed = layers.concat(
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
axis=1,
main_program=main_program,
startup_program=startup_program)
axis=1)
usr_combined_features = layers.fc(input=concat_embed,
size=200,
act="tanh",
main_program=main_program,
startup_program=startup_program)
act="tanh")
return usr_combined_features
......@@ -129,83 +98,61 @@ def get_mov_combined_features():
mov_id = layers.data(
name='movie_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
mov_emb = layers.embedding(
input=mov_id,
data_type='float32',
size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'},
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
mov_fc = layers.fc(input=mov_emb,
size=32,
main_program=main_program,
startup_program=startup_program)
size=32)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
category_id = layers.data(
name='category_id',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
mov_categories_emb = layers.embedding(
input=category_id,
size=[CATEGORY_DICT_SIZE, 32],
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool(
input=mov_categories_emb,
pool_type="sum",
main_program=main_program,
startup_program=startup_program)
pool_type="sum")
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
mov_title_id = layers.data(
name='movie_title',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
mov_title_emb = layers.embedding(
input=mov_title_id,
size=[MOV_TITLE_DICT_SIZE, 32],
is_sparse=is_sparse,
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb,
num_filters=32,
filter_size=3,
act="tanh",
pool_type="sum",
main_program=main_program,
startup_program=startup_program)
pool_type="sum")
concat_embed = layers.concat(
input=[mov_fc, mov_categories_hidden, mov_title_conv],
axis=1,
main_program=main_program,
startup_program=startup_program)
axis=1)
# FIXME(dzh) : need tanh operator
mov_combined_features = layers.fc(input=concat_embed,
size=200,
act="tanh",
main_program=main_program,
startup_program=startup_program)
act="tanh")
return mov_combined_features
......@@ -217,27 +164,18 @@ def model():
# need cos sim
inference = layers.cos_sim(
X=usr_combined_features,
Y=mov_combined_features,
main_program=main_program,
startup_program=startup_program)
Y=mov_combined_features)
label = layers.data(
name='score',
shape=[1],
data_type='float32',
main_program=main_program,
startup_program=startup_program)
data_type='float32')
square_cost = layers.square_error_cost(
input=inference,
label=label,
main_program=main_program,
startup_program=startup_program)
label=label)
avg_cost = layers.mean(
x=square_cost,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=square_cost)
return avg_cost
......@@ -245,16 +183,15 @@ def model():
def main():
cost = model()
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(cost, startup_program=startup_program)
block = main_program.block(0)
opts = sgd_optimizer.minimize(cost)
if use_gpu:
if USE_GPU:
place = core.GPUPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[])
exe.run(framework.default_startup_program())
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -303,7 +240,7 @@ def main():
PASS_NUM = 100
for pass_id in range(PASS_NUM):
for data in train_reader():
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed=func_feed(feeding, data),
fetch_list=[cost])
out = np.array(outs[0])
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_main_program, g_startup_program
from paddle.v2.framework.executor import Executor
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
......@@ -70,7 +69,7 @@ def main():
place = core.CPUPlace()
exe = Executor(place)
exe.run(g_startup_program)
exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM):
for data in train_data():
......@@ -82,7 +81,7 @@ def main():
tensor_label = core.LoDTensor()
tensor_label.set(label, place)
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[cost, acc])
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_main_program, g_startup_program
from paddle.v2.framework.executor import Executor
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
......@@ -81,7 +80,7 @@ def main():
place = core.CPUPlace()
exe = Executor(place)
exe.run(g_startup_program)
exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM):
for data in train_data():
......@@ -93,7 +92,7 @@ def main():
tensor_label = core.LoDTensor()
tensor_label.set(label, place)
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[cost, acc])
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import g_main_program, g_startup_program
from paddle.v2.framework.executor import Executor
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
......@@ -88,10 +87,10 @@ def main():
place = core.CPUPlace()
tensor_words, tensor_label = prepare_feed_data(data, place)
exe = Executor(place)
exe.run(g_startup_program)
exe.run(framework.default_startup_program())
while True:
outs = exe.run(g_main_program,
outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[cost, acc])
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.executor import Executor
import numpy as np
startup_program = Program()
main_program = Program()
embed_size = 32
hidden_size = 256
PASS_NUM = 100
EMBED_SIZE = 32
HIDDEN_SIZE = 256
N = 5
batch_size = 32
is_sparse = True
BATCH_SIZE = 32
IS_SPARSE = True
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
......@@ -23,97 +20,67 @@ dict_size = len(word_dict)
first_word = layers.data(
name='firstw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
second_word = layers.data(
name='secondw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
third_word = layers.data(
name='thirdw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
forth_word = layers.data(
name='forthw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
next_word = layers.data(
name='nextw',
shape=[1],
data_type='int64',
main_program=main_program,
startup_program=startup_program)
data_type='int64')
embed_first = layers.embedding(
input=first_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
embed_second = layers.embedding(
input=second_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
embed_third = layers.embedding(
input=third_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
embed_forth = layers.embedding(
input=forth_word,
size=[dict_size, embed_size],
size=[dict_size, EMBED_SIZE],
data_type='float32',
is_sparse=is_sparse,
param_attr={'name': 'shared_w'},
main_program=main_program,
startup_program=startup_program)
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth],
axis=1,
main_program=main_program,
startup_program=startup_program)
axis=1)
hidden1 = layers.fc(input=concat_embed,
size=hidden_size,
act='sigmoid',
main_program=main_program,
startup_program=startup_program)
size=HIDDEN_SIZE,
act='sigmoid')
predict_word = layers.fc(input=hidden1,
size=dict_size,
act='softmax',
main_program=main_program,
startup_program=startup_program)
act='softmax')
cost = layers.cross_entropy(
input=predict_word,
label=next_word,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
label=next_word)
avg_cost = layers.mean(x=cost)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program)
opts = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size)
paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = core.CPUPlace()
exe = Executor(place)
......@@ -122,8 +89,8 @@ exe = Executor(place)
# below exit line.
exit(0)
exe.run(startup_program, feed={}, fetch_list=[])
PASS_NUM = 100
exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM):
for data in train_reader():
input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)]
......@@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM):
next_tensor = core.LoDTensor()
next_tensor.set(next_data, place)
outs = exe.run(main_program,
outs = exe.run(framework.default_main_program(),
feed={
'firstw': first_tensor,
'secondw': second_tensor,
......
......@@ -2,12 +2,12 @@ import unittest
import numpy as np
import random
import itertools
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import collections
from paddle.v2.framework.backward import append_backward_ops
from paddle.v2.framework.op import Operator
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import Program, OpProtoHolder
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.op import Operator
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import Program, OpProtoHolder
def randomize_probability(batch_size, class_num, dtype='float32'):
......
......@@ -18,7 +18,9 @@ class TestAccuracyOp(OpTest):
num_correct += 1
break
self.outputs = {
'Accuracy': np.array([num_correct / float(n)]).astype("float32")
'Accuracy': np.array([num_correct / float(n)]).astype("float32"),
'Correct': np.array([num_correct]).astype("int32"),
'Total': np.array([n]).astype("int32")
}
def test_check_output(self):
......
import unittest
import paddle.v2.framework.core as core
import paddle.v2.framework.layers as layers
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.backward import append_backward_ops
from paddle.v2.framework.framework import g_main_program
import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.framework import g_main_program
import numpy
......
import unittest
import numpy as np
from op_test import OpTest
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
import paddle.v2.fluid.core as core
from paddle.v2.fluid.op import Operator
def grad_var_name(var_name):
......
import op_test
import unittest
import numpy as np
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
class TestCastOp(op_test.OpTest):
......
......@@ -23,6 +23,9 @@ def create_test_class(op_type, typename, callback):
for _type_name in {'float32', 'float64', 'int32', 'int64'}:
create_test_class('less_than', _type_name, lambda _a, _b: _a < _b)
create_test_class('less_equal', _type_name, lambda _a, _b: _a <= _b)
create_test_class('greater_than', _type_name, lambda _a, _b: _a > _b)
create_test_class('greater_equal', _type_name, lambda _a, _b: _a >= _b)
create_test_class('equal', _type_name, lambda _a, _b: _a == _b)
if __name__ == '__main__':
......
import logging
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import unittest
import numpy as np
from paddle.v2.framework.op import Operator, CondOp
from paddle.v2.fluid.op import Operator, CondOp
class PySimpleCond(object):
......
import unittest
import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import g_startup_program, g_main_program
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.backward import append_backward_ops
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import g_startup_program, g_main_program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
import numpy
......
import unittest
import paddle.v2.framework.layers as layers
import paddle.v2.fluid.layers as layers
class TestDocString(unittest.TestCase):
......
from paddle.v2.framework.default_scope_funcs import *
from paddle.v2.fluid.default_scope_funcs import *
import unittest
......
import logging
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import unittest
from paddle.v2.framework.op import Operator, DynamicRecurrentOp
from paddle.v2.fluid.op import Operator, DynamicRecurrentOp
import numpy as np
# for siplicity, just one level LoD
......
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import unittest
......
import unittest
from paddle.v2.framework.layers import mul, data
import paddle.v2.framework.core as core
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import g_main_program
from paddle.v2.fluid.layers import mul, data
import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import g_main_program
import numpy
......
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import unittest
import numpy as np
......
import unittest
from paddle.v2.framework.framework import Program
from paddle.v2.fluid.framework import Program
class TestDebugStringFramework(unittest.TestCase):
......
import unittest
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
import paddle.v2.fluid.core as core
from paddle.v2.fluid.op import Operator
import numpy
......
import unittest
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
from paddle.v2.framework.framework import Program
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program
def conv_block(input,
......
import unittest
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
class TestInferShape(unittest.TestCase):
......
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.framework.framework import Program
from paddle.v2.framework.io import save_inference_model, load_inference_model
import paddle.v2.framework.executor as executor
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.io import save_inference_model, load_inference_model
import paddle.v2.fluid.executor as executor
import unittest
import numpy as np
......
import numpy as np
import unittest
import paddle.v2.framework.framework as framework
import paddle.v2.framework.initializer as initializer
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.initializer as initializer
DELTA = 0.00001
......
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
from paddle.v2.framework.framework import Program
import paddle.v2.framework.core as core
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program
import paddle.v2.fluid.core as core
import unittest
......
import unittest
import paddle.v2.framework.layers as layers
from paddle.v2.framework.executor import Executor
import paddle.v2.framework.core as core
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.core as core
import numpy
......
from paddle.v2.framework.layers import lod_rank_table, data
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.framework import g_main_program
import paddle.v2.framework.core as core
from paddle.v2.fluid.layers import lod_rank_table, data
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import g_main_program
import paddle.v2.fluid.core as core
import numpy
import unittest
......
import unittest
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import numpy
......
import unittest
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import numpy
import paddle.v2.framework.layers as layers
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.backward import append_backward_ops
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
class TestCPULoDTensorArrayOps(unittest.TestCase):
......
import unittest, os
import numpy as np
import paddle.v2 as paddle
from paddle.v2.framework.op import Operator
import paddle.v2.framework.core as core
from paddle.v2.fluid.op import Operator
import paddle.v2.fluid.core as core
from op_test import OpTest, create_op, set_input
if not core.is_compile_gpu():
......
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
import paddle.v2.fluid.core as core
from paddle.v2.fluid.op import Operator
import unittest
......
import unittest
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
class TestOpSupportGPU(unittest.TestCase):
......
import unittest
import paddle.v2.framework.op as op
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
import paddle.v2.fluid.op as op
import paddle.v2.fluid.core as core
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
class TestGetAllProtos(unittest.TestCase):
......
import unittest
from paddle.v2.framework.framework import Variable, Program, g_main_program
import paddle.v2.framework.core as core
from paddle.v2.fluid.framework import Variable, Program, g_main_program
import paddle.v2.fluid.core as core
class TestOperator(unittest.TestCase):
......
import unittest
import paddle.v2.framework.framework as framework
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.backward import append_backward_ops
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.backward import append_backward_ops
class TestOptimizer(unittest.TestCase):
......
import unittest
from paddle.v2.framework.framework import g_main_program
import paddle.v2.framework.core as core
from paddle.v2.fluid.framework import g_main_program
import paddle.v2.fluid.core as core
class TestParameter(unittest.TestCase):
......
import unittest
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import Program
from paddle.v2.framework.framework import g_main_program
import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.framework import g_main_program
class TestProgram(unittest.TestCase):
......
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
import unittest
......
import unittest
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
class TestOpDesc(unittest.TestCase):
......
import unittest
import paddle.v2.framework.layers as layers
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.backward import append_backward_ops
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
import numpy as np
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
class PyRNNBase(object):
......
import unittest
import paddle.v2.framework.framework as framework
import paddle.v2.framework.optimizer as optimizer
import paddle.v2.framework.regularizer as regularizer
from paddle.v2.framework.backward import append_backward_ops
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.regularizer as regularizer
from paddle.v2.fluid.backward import append_backward_ops
class TestL2DecayRegularizer(unittest.TestCase):
......
import unittest
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.backward import append_backward_ops
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
import numpy as np
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
def create_tensor(np_data, place):
......
import paddle.v2.framework.core
import paddle.v2.fluid.core
import unittest
class TestScope(unittest.TestCase):
def test_create_destroy(self):
paddle_c = paddle.v2.framework.core
paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope()
self.assertIsNotNone(scope)
scope_with_parent = scope.new_scope()
self.assertIsNotNone(scope_with_parent)
def test_none_variable(self):
paddle_c = paddle.v2.framework.core
paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope()
self.assertIsNone(scope.find_var("test"))
def test_create_var_get_var(self):
paddle_c = paddle.v2.framework.core
paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope()
var_a = scope.var("var_a")
self.assertIsNotNone(var_a)
......@@ -25,7 +25,7 @@ class TestScope(unittest.TestCase):
self.assertIsNotNone(scope2.find_var('var_a'))
def test_var_get_int(self):
paddle_c = paddle.v2.framework.core
paddle_c = paddle.v2.fluid.core
scope = paddle_c.Scope()
var = scope.var("test_int")
var.set_int(10)
......
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import unittest
import numpy as np
......
import unittest
import numpy as np
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
import paddle.v2.fluid.core as core
from paddle.v2.fluid.op import Operator
from op_test import OpTest
......
import unittest
import paddle.v2.framework.core as core
from paddle.v2.framework.executor import Executor
import paddle.v2.framework.layers as layers
from paddle.v2.framework.backward import append_backward_ops
from paddle.v2.framework.framework import g_main_program
import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.framework import g_main_program
import numpy
......
import unittest
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import numpy as np
import paddle.v2.framework.layers as layers
from paddle.v2.framework.framework import Program
from paddle.v2.framework.executor import Executor
from paddle.v2.framework.backward import append_backward_ops
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
class TestCPULoDTensorArrayOps(unittest.TestCase):
......
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import unittest
import numpy
......
import logging
import paddle.v2.framework.core as core
import paddle.v2.fluid.core as core
import unittest
import numpy as np
......
import unittest
from paddle.v2.framework.op import Operator
import paddle.v2.framework.core as core
from paddle.v2.fluid.op import Operator
import paddle.v2.fluid.core as core
import numpy
......
import unittest
from paddle.v2.framework.framework import Variable, g_main_program, Program
import paddle.v2.framework.core as core
from paddle.v2.fluid.framework import Variable, g_main_program, Program
import paddle.v2.fluid.core as core
import numpy as np
......
import unittest
import paddle.v2.framework.layers as layers
from paddle.v2.framework.executor import Executor
import paddle.v2.framework.core as core
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.core as core
import numpy
......
import paddle.v2.framework.op as op
import numpy as np
import paddle.v2.framework.core as core
def avg_accumulate(accumulated_var, per_eval, num_batches, place):
t = np.array(accumulated_var.get_tensor())
t[0] += per_eval[0]
accumulated_var.get_tensor().set([t[0] / float(num_batches)], place)
class Evaluator(object):
def __init__(self,
scope,
operator='accuracy',
input='Inference',
label='Label',
output='Output',
place=core.CPUPlace()):
"""
create an evaluator for evaluating the inference.
NOTE: default run on CPUPlace(), running on GPUPlace doesn't improve performance much.
:param scope: the scope instance contains the input.
:type scope: paddle.v2.framework.core.scope
:param operator: operator name for caculating the evaluation for each mini-batch.
:type operator: string
:param input: output variable name of forward network.
:type input: string
:param label: variable name of label
:type label: string
"""
self.scope = scope
self.place = place
self.output_name = output
self.num_batches = 0
# create variable to store accumulated evaluator output
eval_name = ''.join([operator, "@Eval"])
if scope.find_var(eval_name):
raise Exception("evaluator already exist in scope: %s" % eval_name)
self.accumulated_var = scope.var(eval_name)
t = self.accumulated_var.get_tensor()
t.set_dims((1, ))
t.set([0.0], place)
# self.accumulated_var = block.create_var(block, name=eval_name, shape=(1,))
# self.accumulated_var.get_tensor().set([0.0])
# create operator of evaluation
var_map = dict() # var name -> variable
var_map[input] = [input]
var_map[label] = [label]
var_map[output] = [output]
self.op = op.Operator(operator, **var_map)
def evaluate(self, ctx, accumulator=avg_accumulate):
self.op.run(self.scope, ctx)
per_eval = np.array(self.scope.find_var(self.output_name).get_tensor())
self.num_batches += 1
accumulator(self.accumulated_var, per_eval, self.num_batches,
self.place)
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \
Operator
import unittest
import numpy as np
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
class TestBeamSearchDecodeOp(unittest.TestCase):
def setUp(self):
self.scope = core.Scope()
self.cpu_place = core.CPUPlace()
def append_lod_tensor(self, tensor_array, lod, data):
lod_tensor = core.LoDTensor()
lod_tensor.set_lod(lod)
lod_tensor.set(data, self.cpu_place)
tensor_array.append(lod_tensor)
def test_get_set(self):
ids = self.scope.var("ids").get_lod_tensor_array()
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
np.array(
[1, 2, 3, 4, 5, 6], dtype="int64"))
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
np.array(
[0, 1, 2, 3, 4, 5], dtype="int64"))
self.append_lod_tensor(
ids, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
np.array(
[0, 1, 2, 3, 4], dtype="int64"))
scores = self.scope.var("scores").get_lod_tensor_array()
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
np.array(
[1, 2, 3, 4, 5, 6], dtype="float32"))
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
np.array(
[0, 1, 2, 3, 4, 5], dtype="float32"))
self.append_lod_tensor(
scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
np.array(
[0, 1, 2, 3, 4], dtype="float32"))
sentence_ids = self.scope.var("sentence_ids").get_tensor()
sentence_scores = self.scope.var("sentence_scores").get_tensor()
beam_search_decode_op = Operator(
"beam_search_decode",
# inputs
Ids="ids",
Scores="scores",
# outputs
SentenceIds="sentence_ids",
SentenceScores="sentence_scores")
ctx = core.DeviceContext.create(self.cpu_place)
beam_search_decode_op.run(self.scope, ctx)
expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]]
self.assertEqual(sentence_ids.lod(), expected_lod)
self.assertEqual(sentence_scores.lod(), expected_lod)
expected_data = np.array(
[2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64")
self.assertTrue(np.array_equal(np.array(sentence_ids), expected_data))
self.assertTrue(
np.array_equal(np.array(sentence_scores), expected_data))
if __name__ == '__main__':
unittest.main()
from paddle.v2.framework.evaluator import Evaluator
from paddle.v2.framework.op import Operator
import paddle.v2.framework.core as core
import unittest
import op_test
import numpy as np
class TestEvaluator(unittest.TestCase):
def setup(self, scope, inputs, outputs):
def __create_var__(var_name, arr):
np_arr = np.array(arr)
scope.var(var_name)
# tensor = var.get_tensor()
# tensor.set_dims(np_arr.shape)
for var_name, arr in inputs.iteritems():
__create_var__(var_name, arr)
for var_name, arr in outputs.iteritems():
__create_var__(var_name, arr)
def test_evaluator(self):
inputs = {
'Inference': np.array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 1]]).T,
'Label': np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
}
outputs = {'Accuracy': np.array([0.9])}
out_name = 'Accuracy'
places = [core.CPUPlace()]
if core.is_compile_gpu():
places.append(core.GPUPlace(0))
for place in places:
scope = core.Scope()
self.setup(scope, inputs, outputs)
evaluator = Evaluator(
scope,
operator='accuracy',
input='Inference',
label='Label',
output=out_name,
place=place)
op_test.set_input(scope, evaluator.op, inputs, place)
ctx = core.DeviceContext.create(place)
for i in range(10): # simulate 10 mini-batches
evaluator.evaluate(ctx)
actual = np.array(scope.find_var(out_name).get_tensor())
print actual
self.assertTrue(
np.allclose(
actual, outputs[out_name], atol=1e-5),
"output name: " + out_name + " has diff.")
if __name__ == '__main__':
exit(0)
unittest.main()
......@@ -13,8 +13,8 @@ packages=['paddle',
'paddle.v2.reader',
'paddle.v2.master',
'paddle.v2.plot',
'paddle.v2.framework',
'paddle.v2.framework.proto',
'paddle.v2.fluid',
'paddle.v2.fluid.proto',
'py_paddle']
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
......@@ -44,14 +44,14 @@ setup(name='paddlepaddle',
ext_modules=[Extension('_foo', ['stub.cc'])],
package_data={
'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so'],
'paddle.v2.fluid': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so']
},
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.framework.proto will be generated while compiling.
# The paddle.v2.fluid.proto will be generated while compiling.
# So that package points to other directory.
'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework',
'paddle.v2.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/framework',
'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle'
},
scripts=paddle_bins,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册