提交 60232d81 编写于 作者: D Dong Zhihong

Merge remote-tracking branch 'origin/develop' into fix/reduce_op

......@@ -21,7 +21,7 @@ third_party/
cmake-build-*
# generated while compiling
python/paddle/v2/framework/core.so
python/paddle/v2/fluid/core.so
paddle/pybind/pybind.h
CMakeFiles
cmake_install.cmake
......
......@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat,
paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) {
#ifndef PADDLE_MOBILE_INFERENCE
auto ptr = new paddle::capi::CMatrix();
ptr->mat = paddle::Matrix::createSparseMatrix(
height,
......@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse(
false,
useGpu);
return ptr;
#else
return nullptr;
#endif
}
paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
......@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
uint64_t colSize,
float* valueArray,
uint64_t valueSize) {
#ifndef PADDLE_MOBILE_INFERENCE
if (mat == nullptr) return kPD_NULLPTR;
auto ptr = cast(mat);
if (rowArray == nullptr || colArray == nullptr ||
......@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
} else {
return kPD_NOT_SUPPORTED;
}
#else
return kPD_NOT_SUPPORTED;
#endif
}
......@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height,
* @param isBinary is binary (either 1 or 0 in matrix) or not.
* @param useGpu is using GPU or not.
* @return paddle_matrix.
* @note Mobile inference does not support this interface.
*/
PD_API paddle_matrix paddle_matrix_create_sparse(
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu);
......@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat,
* NULL if the matrix is binary.
* @param [in] valueSize length of value array. Zero if the matrix is binary.
* @return paddle_error
* @note Mobile inference does not support this interface.
*/
PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
int* rowArray,
......
......@@ -27,7 +27,9 @@ if(WITH_GPU)
set_source_files_properties(${CUDA_CXX_SOURCES}
PROPERTIES COMPILE_FLAGS "-D__NVCC__")
else()
if (NOT MOBILE_INFERENCE)
set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
endif()
endif()
set(CUDA_CU_SOURCES
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "hl_base.h"
/**
* @brief Maximum pool forward.
* @brief Maximum pool forward with Mask output.
*
* @param[in] frameCnt batch size of input image.
* @param[in] inputData input data.
......@@ -35,7 +35,7 @@ limitations under the License. */
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
* @param[in] tgtStride stride between output data samples.
*
* @param[out] maskData the location indices of select max data.
*/
extern void hl_maxpool_forward(const int frameCnt,
const real* inputData,
......@@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride);
const int tgtStride,
real* maskData = NULL);
/**
* @brief Maximum pool backward.
......
......@@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride) {}
const int tgtStride,
real* MaskData) {}
inline void hl_maxpool_backward(const int frameCnt,
const real* inputData,
......
......@@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads,
const int offsetH,
const int offsetW,
real* tgtData,
const int tgtStride) {
const int tgtStride,
real* maskData) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
......@@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads,
hstart = max(hstart, 0);
wstart = max(wstart, 0);
real maxval = -FLT_MAX;
int max_index = -1;
inputData += (frameNum * channels + c) * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (maxval < inputData[h * width + w])
maxval = inputData[h * width + w];
if (maxval < inputData[h * width + w]) {
max_index = h * width + w;
maxval = inputData[max_index];
}
}
}
int tgtIndex =
index % (pooledW * pooledH * channels) + frameNum * tgtStride;
tgtData[tgtIndex] = maxval;
if (maskData != NULL) {
maskData[tgtIndex] = max_index;
}
}
}
......@@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt,
const int paddingH,
const int paddingW,
real* tgtData,
const int tgtStride) {
const int tgtStride,
real* maskData) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
dim3 threads(1024, 1);
......@@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt,
paddingH,
paddingW,
tgtData,
tgtStride);
tgtStride,
maskData);
CHECK_SYNC("hl_maxpool_forward failed");
}
......
......@@ -377,6 +377,12 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
return grad_op_descs;
}
static BlockDescBind* CreateStepBlock(
ProgramDescBind& program_desc,
std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var,
int step_block_idx);
std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx,
std::unordered_set<std::string>* no_grad_vars,
......@@ -392,13 +398,13 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
if ((*it)->Type() == "recurrent") {
int step_block_idx = (*it)->GetBlockAttr("step_block");
auto backward_block_op_descs = MakeBlockBackward(
program_desc, step_block_idx, no_grad_vars, grad_to_var);
BlockDescBind* backward_block = CreateStepBlock(
program_desc, no_grad_vars, grad_to_var, step_block_idx);
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
} else if ((*it)->Type() == "conditional_block") {
BlockDescBind* backward_block =
program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
for (auto& ptr : backward_block_op_descs) {
backward_block->AppendAllocatedOp(std::move(ptr));
}
CreateStepBlock(program_desc, no_grad_vars, grad_to_var,
(*it)->GetBlockAttr("block"));
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
} else {
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var);
......@@ -449,6 +455,21 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
return backward_descs;
}
static BlockDescBind* CreateStepBlock(
ProgramDescBind& program_desc,
std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var,
int step_block_idx) {
auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx,
no_grad_vars, grad_to_var);
BlockDescBind* backward_block =
program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
for (auto& ptr : backward_block_op_descs) {
backward_block->AppendAllocatedOp(move(ptr));
}
return backward_block;
}
ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars) {
......
......@@ -27,10 +27,32 @@ inline VarDesc::VarType ToVarType(std::type_index type) {
return VarDesc_VarType_LOD_RANK_TABLE;
} else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) {
return VarDesc_VarType_LOD_TENSOR_ARRAY;
} else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
return VarDesc_VarType_SELECTED_ROWS;
} else {
PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
}
}
template <typename Visitor>
inline void VisitVarType(const Variable& var, Visitor visitor) {
switch (ToVarType(var.Type())) {
case VarDesc_VarType_LOD_TENSOR:
visitor(var.Get<framework::LoDTensor>());
return;
case VarDesc_VarType_LOD_RANK_TABLE:
visitor(var.Get<LoDRankTable>());
return;
case VarDesc_VarType_LOD_TENSOR_ARRAY:
visitor(var.Get<LoDTensorArray>());
return;
case VarDesc_VarType_SELECTED_ROWS:
visitor(var.Get<SelectedRows>());
return;
default:
PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
}
}
} // namespace framework
} // namespace paddle
......@@ -61,6 +61,7 @@ public:
// function arguments
strides_ = config.get<std::vector<size_t>>("strides");
paddings_ = config.get<std::vector<size_t>>("paddings");
dilations_ = config.get<std::vector<size_t>>("dilations");
groups_ = config.get<size_t>("groups");
// number of inputs and outputs
......@@ -118,6 +119,7 @@ protected:
std::vector<size_t> strides_;
std::vector<size_t> paddings_;
std::vector<size_t> dilations_;
/// Group size, refer to grouped convolution in
/// Alex Krizhevsky's paper: when group=2, the first half of the
......@@ -133,6 +135,10 @@ protected:
inline int paddingW() const { return paddings_[1]; }
inline int dilationH() const { return dilations_[0]; }
inline int dilationW() const { return dilations_[1]; }
// A temporary memory in convolution calculation.
MemoryHandlePtr memory_;
......
......@@ -79,45 +79,59 @@ void Convolution(const std::string& conv1,
if (outputChannels < inputChannels) continue;
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (padding >= filterSize) break;
for (size_t dilation : {1, 3}) {
if (padding >= filterSize) break;
size_t filterS = (filterSize - 1) * dilation + 1;
// NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;
if (inputSize + 2 * padding < filterS) break;
size_t outputSize =
(inputSize - filterSize + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize << " stride=" << stride
<< " padding=" << padding;
if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" ||
conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
dilation > 1)
break;
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
// NNPACK only supports stride = 1 if batchSize > 1
if ((conv1 == "NNPACKConv-CPU" ||
conv2 == "NNPACKConv-CPU") &&
batchSize > 1 && stride > 1)
break;
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
size_t outputSize =
(inputSize - filterS + 2 * padding + stride) / stride;
VLOG(3) << " batchSize=" << batchSize
<< " inputChannels=" << inputChannels
<< " inputHeight=" << inputSize
<< " inputWidth=" << inputSize
<< " outputChannels=" << outputChannels
<< " filterHeight=" << filterSize
<< " filterWidth=" << filterSize
<< " outputHeight=" << outputSize
<< " outputWidth=" << outputSize
<< " stride=" << stride << " padding=" << padding;
function(test, input, filter, output);
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)1)
.set("algo", (std::string) "auto"));
TensorShape input{
batchSize, inputChannels, inputSize, inputSize};
TensorShape filter{
outputChannels, inputChannels, filterSize, filterSize};
TensorShape output{
batchSize, outputChannels, outputSize, outputSize};
function(test, input, filter, output);
}
}
}
}
......@@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1,
for (size_t outputChannels : {7}) {
size_t stride = 1;
size_t padding = 0;
size_t dilation = 1;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
......@@ -162,6 +177,7 @@ void Convolution2(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {dilation, dilation};
Compare2Function<DType1, DType2> test(
conv1,
conv2,
......@@ -169,6 +185,7 @@ void Convolution2(const std::string& conv1,
.set("paddings", paddings)
.set("strides", strides)
.set("groups", (size_t)1)
.set("dilations", dilations)
.set("algo", (std::string) "auto"));
TensorShape input{
......@@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1,
std::vector<size_t> paddings = {padding, padding};
std::vector<size_t> strides = {stride, stride};
std::vector<size_t> dilations = {1, 1};
size_t groups = inputChannels;
Compare2Function<DType1, DType2> test(
conv1,
......@@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1,
.set("paddings", paddings)
.set("strides", strides)
.set("groups", groups)
.set("dilations", dilations)
.set("algo", (std::string) "auto"));
TensorShape input{
......
......@@ -100,7 +100,9 @@ public:
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
} else {
colData = inputData + g * inputOffset;
}
......@@ -223,7 +225,9 @@ public:
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
}
}
inputGrad += inputChannels * inputHeight * inputWidth;
......@@ -310,7 +314,9 @@ public:
strideH(),
strideW(),
paddingH(),
paddingW());
paddingW(),
dilationH(),
dilationW());
} else {
colData = inputData + g * inputOffset;
}
......
......@@ -78,7 +78,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth);
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
};
template <ColFormat Format, DeviceType Device, class T>
......@@ -91,7 +93,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth);
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1);
};
} // namespace paddle
......@@ -31,7 +31,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -47,8 +49,8 @@ public:
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) < 0 ||
(imRowIdx - paddingHeight) >= inputHeight ||
(imColIdx - paddingWidth) < 0 ||
......@@ -81,7 +83,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -97,8 +101,8 @@ public:
int c_im = c / filterWidth / filterHeight;
for (int h = 0; h < outputHeight; ++h) {
for (int w = 0; w < outputWidth; ++w) {
int imRowIdx = h * strideHeight + hOffset;
int imColIdx = w * strideWidth + wOffset;
int imRowIdx = h * strideHeight + hOffset * dilationHeight;
int imColIdx = w * strideWidth + wOffset * dilationWidth;
if ((imRowIdx - paddingHeight) >= 0 &&
(imRowIdx - paddingHeight) < inputHeight &&
(imColIdx - paddingWidth) >= 0 &&
......@@ -134,7 +138,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -147,9 +153,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset =
outputH * strideHeight + filterH - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth;
int imRowOffset = outputH * strideHeight +
filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels +
channel) *
......@@ -189,7 +196,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight = 1,
int dilationWidth = 1) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -202,9 +211,10 @@ public:
for (int channel = 0; channel < inputChannels; ++channel) {
for (int filterH = 0; filterH < filterHeight; ++filterH) {
for (int filterW = 0; filterW < filterWidth; ++filterW) {
int imRowOffset =
outputH * strideHeight + filterH - paddingHeight;
int imColOffset = outputW * strideWidth + filterW - paddingWidth;
int imRowOffset = outputH * strideHeight +
filterH * dilationHeight - paddingHeight;
int imColOffset = outputW * strideWidth +
filterW * dilationWidth - paddingWidth;
int colDataOffset =
(((outputH * outputWidth + outputW) * inputChannels +
channel) *
......
......@@ -28,6 +28,8 @@ __global__ void im2col(const T* data_im,
int strideW,
int paddingH,
int paddingW,
int dilationH,
int dilationW,
int height_col,
int width_col,
T* data_col) {
......@@ -44,8 +46,8 @@ __global__ void im2col(const T* data_im,
data_col += (channel_out * height_col + h_out) * width_col + w_out;
for (int i = 0; i < blockH; ++i) {
for (int j = 0; j < blockW; ++j) {
int rIdx = int(h_in + i);
int cIdx = int(w_in + j);
int rIdx = int(h_in + i * dilationH);
int cIdx = int(w_in + j * dilationW);
if ((rIdx - (int)paddingH) >= (int)height ||
(rIdx - (int)paddingH) < 0 ||
(cIdx - (int)paddingW) >= (int)width ||
......@@ -77,7 +79,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -102,6 +106,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth,
colData);
......@@ -121,6 +127,8 @@ __global__ void col2im(size_t n,
size_t strideW,
size_t paddingH,
size_t paddingW,
size_t dilationH,
size_t dilationW,
size_t height_col,
size_t width_col,
T* data_im) {
......@@ -131,23 +139,34 @@ __global__ void col2im(size_t n,
int w = int(index % width);
int h = int((index / width) % height);
int c = int(index / (width * height));
int filterH = (blockH - 1) * dilationH + 1;
int filterW = (blockW - 1) * dilationW + 1;
if ((w - (int)paddingW) >= 0 &&
(w - (int)paddingW) < (width - 2 * paddingW) &&
(h - (int)paddingH) >= 0 && (h - paddingH) < (height - 2 * paddingH)) {
// compute the start and end of the output
int w_col_start =
(w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1;
(w < (int)filterW) ? 0 : (w - int(filterW)) / (int)strideW + 1;
int w_col_end = min((int)(w / (int)strideW + 1), (int)(width_col));
int h_col_start =
(h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1;
(h < (int)filterH) ? 0 : (h - (int)filterH) / (int)strideH + 1;
int h_col_end = min(int(h / strideH + 1), int(height_col));
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
// the col location: [c * width * height + h_out, w_out]
int c_col = int(c * blockH * blockW) +
(h - h_col * (int)strideH) * (int)blockW +
(w - w_col * (int)strideW);
val += data_col[(c_col * height_col + h_col) * width_col + w_col];
int h_k = (h - h_col * strideH);
int w_k = (w - w_col * strideW);
if (h_k % dilationH == 0 && w_k % dilationW == 0) {
h_k /= dilationH;
w_k /= dilationW;
int c_col =
(((c * blockH + h_k) * blockW + w_k) * height_col + h_col) *
width_col +
w_col;
val += data_col[c_col];
}
}
}
h -= paddingH;
......@@ -173,7 +192,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -205,6 +226,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth,
imData);
......@@ -229,6 +252,8 @@ __global__ void im2colOCF(const T* imData,
int strideWidth,
int paddingHeight,
int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight,
int outputWidth) {
int swId = blockIdx.x;
......@@ -237,8 +262,10 @@ __global__ void im2colOCF(const T* imData,
channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth;
int heightOffset = idy + shId * strideHeight - paddingHeight;
int widthOffset =
idx * dilationHeight + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationWidth + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth;
......@@ -273,7 +300,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -312,6 +341,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth);
CHECK_SYNC("Im2ColFunctor GPU failed");
......@@ -330,6 +361,8 @@ __global__ void col2imOCF(T* imData,
int strideWidth,
int paddingHeight,
int paddingWidth,
int dilationHeight,
int dilationWidth,
int outputHeight,
int outputWidth) {
int swId = blockIdx.x;
......@@ -338,8 +371,10 @@ __global__ void col2imOCF(T* imData,
channelId += blockDim.z) {
for (int idy = threadIdx.y; idy < filterHeight; idy += blockDim.y) {
for (int idx = threadIdx.x; idx < filterWidth; idx += blockDim.x) {
int widthOffset = idx + swId * strideWidth - paddingWidth;
int heightOffset = idy + shId * strideHeight - paddingHeight;
int widthOffset =
idx * dilationWidth + swId * strideWidth - paddingWidth;
int heightOffset =
idy * dilationHeight + shId * strideHeight - paddingHeight;
int imOffset = widthOffset + heightOffset * inputWidth +
channelId * inputHeight * inputWidth;
......@@ -372,7 +407,9 @@ public:
int strideHeight,
int strideWidth,
int paddingHeight,
int paddingWidth) {
int paddingWidth,
int dilationHeight,
int dilationWidth) {
int inputChannels = imShape[0];
int inputHeight = imShape[1];
int inputWidth = imShape[2];
......@@ -411,6 +448,8 @@ public:
strideWidth,
paddingHeight,
paddingWidth,
dilationHeight,
dilationWidth,
outputHeight,
outputWidth);
CHECK_SYNC("Col2ImFunctor GPU failed");
......
......@@ -29,82 +29,98 @@ void TestIm2ColFunctor() {
for (size_t filterWidth : {3, 7}) {
for (size_t stride : {1, 2}) {
for (size_t padding : {0, 1}) {
if (inputHeight <= filterHeight || inputWidth <= filterWidth)
break;
if (padding >= filterHeight || padding >= filterWidth) break;
size_t outputHeight =
(inputHeight - filterHeight + 2 * padding + stride) /
stride;
size_t outputWidth =
(inputWidth - filterWidth + 2 * padding + stride) / stride;
TensorShape imShape =
TensorShape({channels, inputHeight, inputWidth});
TensorShape colShape1 = TensorShape({channels,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
TensorShape colShape2 = TensorShape({outputHeight,
outputWidth,
channels,
filterHeight,
filterWidth});
size_t height = channels * filterHeight * filterWidth;
size_t width = outputHeight * outputWidth;
VectorPtr input1 = Vector::create(imShape.getElements(), false);
VectorPtr input2 = Vector::create(imShape.getElements(), false);
MatrixPtr output1 = Matrix::create(height, width, false, false);
MatrixPtr output2 = Matrix::create(width, height, false, false);
input1->uniform(0.001, 1);
input2->copyFrom(*input1);
Im2ColFunctor<kCFO, Device, T> im2Col1;
Im2ColFunctor<kOCF, Device, T> im2Col2;
im2Col1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding);
im2Col2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding);
// The transposition of the result of ColFormat == kCFO
// is equal to the result of ColFormat == kOCF.
MatrixPtr test;
output2->transpose(test, true);
autotest::TensorCheckErr(*output1, *test);
Col2ImFunctor<kCFO, Device, T> col2Im1;
Col2ImFunctor<kOCF, Device, T> col2Im2;
col2Im1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding);
col2Im2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding);
autotest::TensorCheckErr(*input1, *input2);
for (size_t dilation : {1, 3}) {
size_t filterSizeH = (filterHeight - 1) * dilation + 1;
size_t filterSizeW = (filterWidth - 1) * dilation + 1;
if (inputHeight + 2 * padding < filterSizeH ||
inputWidth + 2 * padding < filterSizeW)
break;
if (padding >= filterSizeH || padding >= filterSizeW) break;
size_t outputHeight =
(inputHeight - filterSizeH + 2 * padding) / stride + 1;
size_t outputWidth =
(inputWidth - filterSizeW + 2 * padding) / stride + 1;
TensorShape imShape =
TensorShape({channels, inputHeight, inputWidth});
TensorShape colShape1 = TensorShape({channels,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
TensorShape colShape2 = TensorShape({outputHeight,
outputWidth,
channels,
filterHeight,
filterWidth});
size_t height = channels * filterHeight * filterWidth;
size_t width = outputHeight * outputWidth;
VectorPtr input1 =
Vector::create(imShape.getElements(), false);
VectorPtr input2 =
Vector::create(imShape.getElements(), false);
MatrixPtr output1 =
Matrix::create(height, width, false, false);
MatrixPtr output2 =
Matrix::create(width, height, false, false);
input1->uniform(0.001, 1);
input2->copyFrom(*input1);
Im2ColFunctor<kCFO, Device, T> im2Col1;
Im2ColFunctor<kOCF, Device, T> im2Col2;
im2Col1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding,
dilation,
dilation);
im2Col2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding,
dilation,
dilation);
// The transposition of the result of ColFormat == kCFO
// is equal to the result of ColFormat == kOCF.
MatrixPtr test;
output2->transpose(test, true);
autotest::TensorCheckErr(*output1, *test);
Col2ImFunctor<kCFO, Device, T> col2Im1;
Col2ImFunctor<kOCF, Device, T> col2Im2;
col2Im1(input1->getData(),
imShape,
output1->getData(),
colShape1,
stride,
stride,
padding,
padding,
dilation,
dilation);
col2Im2(input2->getData(),
imShape,
output2->getData(),
colShape2,
stride,
stride,
padding,
padding,
dilation,
dilation);
autotest::TensorCheckErr(*input1, *input2);
}
}
}
}
......
......@@ -85,9 +85,49 @@ if(MOBILE_INFERENCE)
gradientmachines/GradientMachineMode.cpp
gradientmachines/MultiGradientMachine.cpp)
# Remove useless layers
# Remove layers that used in training
list(REMOVE_ITEM GSERVER_SOURCES
layers/RecurrentLayerGroup.cpp)
layers/RecurrentLayerGroup.cpp
layers/CostLayer.cpp
layers/MultiBoxLossLayer.cpp
layers/WarpCTCLayer.cpp
layers/CTCLayer.cpp
layers/LinearChainCTC.cpp
layers/PrintLayer.cpp)
list(REMOVE_ITEM GSERVER_SOURCES
layers/OuterProdLayer.cpp
layers/SumToOneNormLayer.cpp
layers/ConvShiftLayer.cpp
layers/InterpolationLayer.cpp
layers/AgentLayer.cpp
layers/DotMulOperator.cpp
layers/GruStepLayer.cpp
layers/LstmStepLayer.cpp
layers/ConvexCombinationLayer.cpp
layers/Conv3DLayer.cpp
layers/DeConv3DLayer.cpp
layers/CropLayer.cpp
layers/CrossEntropyOverBeam.cpp
layers/DataNormLayer.cpp
layers/FeatureMapExpandLayer.cpp
layers/HierarchicalSigmoidLayer.cpp
layers/MultinomialSampler.cpp
layers/NCELayer.cpp
layers/KmaxSeqScoreLayer.cpp
layers/MDLstmLayer.cpp
layers/MultiplexLayer.cpp
layers/PadLayer.cpp
layers/Pool3DLayer.cpp
layers/ResizeLayer.cpp
layers/RotateLayer.cpp
layers/RowConvLayer.cpp
layers/RowL2NormLayer.cpp
layers/SamplingIdLayer.cpp
layers/ScaleShiftLayer.cpp
layers/SelectiveFullyConnectedLayer.cpp
layers/SpatialPyramidPoolLayer.cpp
layers/BilinearInterpLayer.cpp
layers/ClipLayer.cpp)
endif()
if(WITH_GPU)
......
......@@ -16,7 +16,6 @@ limitations under the License. */
#include "NeuralNetwork.h"
#include "hl_gpu.h"
#include "paddle/gserver/layers/AgentLayer.h"
#include "paddle/utils/CustomStackTrace.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
......@@ -28,6 +27,7 @@ limitations under the License. */
#ifndef PADDLE_MOBILE_INFERENCE
#include "MultiNetwork.h"
#include "RecurrentGradientMachine.h"
#include "paddle/gserver/layers/AgentLayer.h"
#endif
namespace paddle {
......@@ -192,9 +192,11 @@ void NeuralNetwork::init(const ModelConfig& config,
void NeuralNetwork::connect(LayerPtr agentLayer,
LayerPtr realLayer,
int height) {
#ifndef PADDLE_MOBILE_INFERENCE
AgentLayer* agent = dynamic_cast<AgentLayer*>(agentLayer.get());
CHECK_NOTNULL(agent);
agent->setRealLayer(realLayer, height);
#endif
}
void NeuralNetwork::connect(std::string agentLayerName,
......
......@@ -79,6 +79,10 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
for (int i = 0; i < config_.inputs_size(); i++) {
std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
std::vector<size_t> dilations = {(size_t)dilationY_[i],
(size_t)dilation_[i]};
bool useDilation = ((size_t)dilationY_[i] > 1 || (size_t)dilation_[i] > 1);
// Convolution Layer uses the GemmConv function by default.
convType = "GemmConv";
......@@ -97,13 +101,14 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
if ((filterSize_[i] == filterSizeY_[i]) &&
(filterSize_[i] == 3 || filterSize_[i] == 4) &&
(stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2)) {
(stride_[i] == strideY_[i]) && (stride_[i] == 1 || stride_[i] == 2) &&
!useDilation) {
convType = "NeonDepthwiseConv";
}
#endif
}
if (FLAGS_use_nnpack && !isDeconv_) {
if (FLAGS_use_nnpack && !isDeconv_ && !useDilation) {
createFunction(forward_,
"NNPACKConv",
FuncConfig()
......@@ -117,6 +122,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i]));
createFunction(backward_,
......@@ -124,6 +130,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i]));
createFunction(backward_,
......@@ -131,6 +138,7 @@ bool ExpandConvLayer::init(const LayerMap &layerMap,
FuncConfig()
.set("paddings", paddings)
.set("strides", strides)
.set("dilations", dilations)
.set("groups", (size_t)groups_[i]));
}
}
......
......@@ -98,6 +98,7 @@ ClassRegistrar<Layer, LayerConfig> Layer::registrar_;
LayerPtr Layer::create(const LayerConfig& config) {
std::string type = config.type();
#ifndef PADDLE_MOBILE_INFERENCE
// NOTE: As following types have illegal character '-',
// they can not use REGISTER_LAYER to registrar.
// Besides, to fit with old training models,
......@@ -106,7 +107,6 @@ LayerPtr Layer::create(const LayerConfig& config) {
return LayerPtr(new MultiClassCrossEntropy(config));
else if (type == "rank-cost")
return LayerPtr(new RankingCost(config));
#ifndef PADDLE_MOBILE_INFERENCE
else if (type == "auc-validation")
return LayerPtr(new AucValidation(config));
else if (type == "pnpair-validation")
......
......@@ -54,7 +54,6 @@ void MKLDNNAddtoLayer::reshape(
ow = iw;
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -125,7 +125,6 @@ void MKLDNNBatchNormLayer::reshape(
<< "Input channel can not be changed";
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -102,8 +102,6 @@ void MKLDNNConvLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -92,7 +92,7 @@ public:
void printSizeInfo() override {
MKLDNNLayer::printSizeInfo();
VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
<< ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
<< ", ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
<< ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_;
}
......
......@@ -84,8 +84,6 @@ void MKLDNNFcLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc);
printSizeInfo();
}
void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -71,8 +71,6 @@ void MKLDNNPoolLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MaxPoolWithMaskLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
bool MaxPoolWithMaskLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
PoolLayer::init(layerMap, parameterMap);
setOutput("mask", &mask_);
return true;
}
size_t MaxPoolWithMaskLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
outputY_ = outputSize(imgSizeY_,
sizeY_,
confPaddingY_,
strideY_,
/* caffeMode */ false);
outputX_ = outputSize(imgSize_,
sizeX_,
confPadding_,
stride_,
/* caffeMode */ false);
layerSize = outputX_ * outputY_ * channels_;
getOutput().setFrameHeight(outputY_);
getOutput().setFrameWidth(outputX_);
return layerSize;
}
void MaxPoolWithMaskLayer::forward(PassType passType) {
size_t size = getSize();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
int batchSize = inputV->getHeight();
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
CHECK_EQ(size, outV->getWidth());
resetSpecifyOutput(mask_,
batchSize,
size,
/* isValueClean */ false,
/* isGradClean */ true);
MatrixPtr maskV = mask_.value;
outV->maxPoolForward(*inputV,
imgSizeY_,
imgSize_,
channels_,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
confPaddingY_,
confPadding_,
maskV);
}
void MaxPoolWithMaskLayer::backward(const UpdateCallback& callback) {
(void)callback;
if (NULL == getInputGrad(0)) {
return;
}
MatrixPtr outGrad = getOutputGrad();
MatrixPtr inputV = inputLayers_[0]->getOutputValue();
MatrixPtr outV = getOutputValue();
MatrixPtr inputGrad = inputLayers_[0]->getOutputGrad();
inputGrad->maxPoolBackward(*inputV,
imgSizeY_,
imgSize_,
*outGrad,
*outV,
sizeX_,
sizeY_,
strideY_,
stride_,
outputY_,
outputX_,
1,
1,
confPaddingY_,
confPadding_);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "PoolLayer.h"
#include "paddle/math/Matrix.h"
namespace paddle {
/**
* @brief Basic parent layer of different kinds of pooling
*/
class MaxPoolWithMaskLayer : public PoolLayer {
protected:
Argument mask_;
public:
explicit MaxPoolWithMaskLayer(const LayerConfig& config)
: PoolLayer(config) {}
size_t getSize();
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
};
} // namespace paddle
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "PoolLayer.h"
#include "MaxPoolWithMaskLayer.h"
#include "PoolProjectionLayer.h"
#include "paddle/utils/Logging.h"
#ifdef PADDLE_WITH_CUDA
......@@ -44,7 +45,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride();
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
return true;
}
......@@ -57,6 +57,8 @@ Layer* PoolLayer::create(const LayerConfig& config) {
} else if (CudnnPoolLayer::typeCheck(pool)) {
return new CudnnPoolLayer(config);
#endif
} else if (pool == "max-pool-with-mask") {
return new MaxPoolWithMaskLayer(config);
} else {
LOG(FATAL) << "Unknown pool type: " << pool;
return nullptr;
......
......@@ -98,7 +98,7 @@ void ROIPoolLayer::forward(PassType passType) {
size_t roiStartH = round(bottomROIs[2] * spatialScale_);
size_t roiEndW = round(bottomROIs[3] * spatialScale_);
size_t roiEndH = round(bottomROIs[4] * spatialScale_);
CHECK_GE(roiBatchIdx, 0);
CHECK_GE(roiBatchIdx, 0UL);
CHECK_LT(roiBatchIdx, batchSize);
size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL);
size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL);
......
# gserver pacakge unittests
add_simple_unittest(test_LinearChainCRF)
add_simple_unittest(test_MultinomialSampler)
add_simple_unittest(test_RecurrentLayer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_MultinomialSampler)
endif()
function(gserver_test TARGET)
add_unittest_without_exec(${TARGET}
${TARGET}.cpp
......@@ -24,6 +27,7 @@ gserver_test(test_ConvUnify)
gserver_test(test_BatchNorm)
gserver_test(test_KmaxSeqScore)
gserver_test(test_Expand)
gserver_test(test_MaxPoolingWithMaskOutput)
########## test_Mkldnn layers and activations ##########
if(WITH_MKLDNN)
......@@ -48,7 +52,7 @@ if(WITH_PYTHON)
endif()
############### test_WarpCTCLayer #######################
if(NOT WITH_DOUBLE)
if(NOT WITH_DOUBLE AND NOT MOBILE_INFERENCE)
add_unittest_without_exec(test_WarpCTCLayer
test_WarpCTCLayer.cpp)
......
......@@ -434,7 +434,7 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
config.layerConfig.set_partial_sum(1);
config.layerConfig.set_shared_biases(true);
int dilation = 1;
int dilation = 2;
if (type == "cudnn_conv") {
#if CUDNN_VERSION >= 6000
dilation = 2;
......@@ -1234,6 +1234,7 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
TEST(Layer, PoolLayer) {
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ false);
#ifdef PADDLE_WITH_CUDA
testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true);
......@@ -1242,6 +1243,7 @@ TEST(Layer, PoolLayer) {
testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true);
testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ true);
#endif
}
......
......@@ -297,7 +297,7 @@ static void getAddtoConfig(TestConfig& cfg,
}
void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) {
CHECK_GE(nInputs, 1);
CHECK_GE(nInputs, 1UL);
TestConfig dnnConfig;
getAddtoConfig(dnnConfig, pm, nInputs);
dnnConfig.layerConfig.set_type("mkldnn_addto");
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/math/MathUtils.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle;
void setPoolConfig(TestConfig* config,
PoolConfig* pool,
const string& poolType) {
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(1);
int kw = 3, kh = 3;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType);
pool->set_channels(1);
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_start(0);
pool->set_padding(pw);
pool->set_padding_y(ph);
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false);
int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false);
pool->set_output_x(ow);
pool->set_output_y(oh);
}
void doOneMaxPoolingWithMaskOutputTest(MatrixPtr& inputMat,
const string& poolType,
bool use_gpu,
MatrixPtr& maskMat) {
TestConfig config;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 25, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
pool->set_img_size(5);
pool->set_img_size_y(5);
setPoolConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
config.layerConfig.set_name("MaxPoolWithMask");
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(config,
&dataLayers,
&datas,
&layerMap,
"MaxPoolWithMask",
1,
false,
use_gpu);
dataLayers[0]->getOutputValue()->copyFrom(*inputMat);
FLAGS_use_gpu = use_gpu;
std::vector<ParameterPtr> parameters;
LayerPtr maxPoolingWithMaskOutputLayer;
initTestLayer(config, &layerMap, &parameters, &maxPoolingWithMaskOutputLayer);
maxPoolingWithMaskOutputLayer->forward(PASS_GC);
checkMatrixEqual(maxPoolingWithMaskOutputLayer->getOutput("mask").value,
maskMat);
}
TEST(Layer, maxPoolingWithMaskOutputLayerFwd) {
bool useGpu = false;
MatrixPtr inputMat;
MatrixPtr maskMat;
real inputData[] = {0.1, 0.1, 0.5, 0.5, 1.1, 0.2, 0.2, 0.6, 0.1,
0.1, 0.3, 0.3, 0.7, 0.1, 0.1, 0.4, 0.4, 0.8,
0.8, 0.1, 1.0, 2.0, 3.0, 0.0, 9.0};
real maskData[] = {12, 4, 22, 24};
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->setData(inputData);
maskMat->setData(maskData);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#ifdef PADDLE_WITH_CUDA
useGpu = true;
inputMat = Matrix::create(1, 25, false, useGpu);
maskMat = Matrix::create(1, 4, false, useGpu);
inputMat->copyFrom(inputData, 25);
maskMat->copyFrom(maskData, 4);
doOneMaxPoolingWithMaskOutputTest(
inputMat, "max-pool-with-mask", useGpu, maskMat);
#endif
}
......@@ -1902,5 +1902,52 @@ void BaseMatrixT<real>::sumOfProducts(BaseMatrixT& b,
}
template class BaseMatrixT<real>;
#ifndef PADDLE_MOBILE_INFERENCE
template class BaseMatrixT<int>;
#else
template <>
void BaseMatrixT<int>::zero() {
applyUnary(unary::Zero<int>());
}
template <>
void BaseMatrixT<int>::assign(int p) {
applyUnary(unary::Assign<int>(p));
}
template <>
void BaseMatrixT<int>::isEqualTo(BaseMatrixT& b, int value) {
applyBinary(binary::IsEqual<int>(value), b);
}
template <>
void BaseMatrixT<int>::neg() {
applyUnary(unary::Neg<int>());
}
template <>
void BaseMatrixT<int>::abs2() {
applyUnary(unary::Abs<int>());
}
template <>
void BaseMatrixT<int>::add(int p) {
applyUnary(unary::Add<int>(p));
}
template <>
void BaseMatrixT<int>::add(int p1, int p2) {
applyUnary(unary::Add2<int>(p1, p2));
}
template <>
void BaseMatrixT<int>::applyL1(int learningRate, int decayRate) {
applyUnary(unary::ApplyL1<int>(learningRate * decayRate));
}
#endif
} // namespace paddle
......@@ -25,6 +25,19 @@ else()
message(STATUS "Compile with MKLDNNMatrix")
endif()
if(MOBILE_INFERENCE)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/SIMDFunctions.cpp)
# Remove sparse
list(REMOVE_ITEM MATH_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.h
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.h)
list(REMOVE_ITEM MATH_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/CpuSparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseMatrix.cpp
${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.cpp)
endif()
set(MATH_SOURCES
"${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu"
"${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu"
......
......@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef>
#include "Matrix.h"
......@@ -309,3 +312,57 @@ private:
using Matrix::subMatrix;
};
} // namespace paddle
#else
#include "Matrix.h"
namespace paddle {
class CpuSparseMatrix : public Matrix {
public:
CpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
CpuSparseMatrix(real* data,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, false) {}
real* getValue() const { return nullptr; }
size_t getColStartIdx(size_t i) const { return 0; }
size_t getRowStartIdx(size_t i) const { return 0; }
size_t getColNum(size_t i) const { return 0; }
int* getRowCols(size_t i) const { return nullptr; }
CpuSparseMatrixPtr getTmpSparseMatrix(size_t height, size_t width) {
return nullptr;
}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
......@@ -152,12 +152,7 @@ void MKLDNNMatrix::downSpatial() {
}
memory::desc md = memory::desc(dstDims, getDtype(), dstFmt);
memory::primitive_desc pd = memory::primitive_desc(md, getEngine());
mkldnn_primitive_t result;
mkldnn::error::wrap_c_api(
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(data_);
resetMKLDNNMemory(pd, data_);
}
} // namespace paddle
......@@ -145,6 +145,27 @@ public:
m_.reset();
}
/**
* override the CpuMatrix::resize
*/
void resize(size_t newHeight, size_t newWidth) override {
m_->resize(newHeight, newWidth);
if (data_ == m_->getData() && elementCnt_ == newHeight * newWidth) {
return;
}
CpuMatrix::setData(data_);
height_ = newHeight;
width_ = newWidth;
elementCnt_ = newHeight * newWidth;
stride_ = width_;
auto pd = mkldnn::memory::primitive_desc(
mkldnn::memory::desc({(int)newHeight, (int)newWidth},
getDtype(),
mkldnn::memory::format::nc),
getEngine());
resetMKLDNNMemory(pd, data_);
}
/**
* override Matrix::getData
* check data before return
......@@ -215,6 +236,17 @@ protected:
memory::format srcFmt,
memory::format dstFmt,
memory::dims dm);
/**
* reset this MKLDNN Memory from primitve desc
*/
void resetMKLDNNMemory(memory::primitive_desc pd, real* data) {
mkldnn_primitive_t result;
mkldnn::error::wrap_c_api(
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(data);
}
private:
// save the CpuMatrixPtr in case the buffer released outside
......
......@@ -451,6 +451,7 @@ void GpuMatrix::addSharedBias(Matrix& b, real scale) {
}
void GpuMatrix::collectBias(Matrix& a, real scale) {
#ifdef PADDLE_WITH_CUDA
CHECK_EQ(getHeight(), (size_t)1);
CHECK_EQ(width_, a.getWidth());
GpuSparseMatrix* sMatPtr = dynamic_cast<GpuSparseMatrix*>(&a);
......@@ -461,6 +462,7 @@ void GpuMatrix::collectBias(Matrix& a, real scale) {
hl_sparse_matrix_s A_d = sMatPtr->sMatrix_.get();
hl_sparse_matrix_column_sum(data, A_d, sMatPtr->getHeight(), width_, scale);
}
#endif
}
void GpuMatrix::collectSharedBias(Matrix& a, real scale) {
......@@ -552,6 +554,7 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
const GpuMatrix& b,
real scaleAB,
real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous());
CHECK(b.isContiguous());
CHECK(b.useGpu_ == true) << "Matrix type are not equal";
......@@ -578,12 +581,14 @@ void GpuMatrix::mul(const GpuSparseMatrix& a,
b.height_,
scaleAB,
scaleT);
#endif
}
void GpuMatrix::mul(const GpuMatrix& a,
const GpuSparseMatrix& b,
real scaleAB,
real scaleT) {
#ifdef PADDLE_WITH_CUDA
CHECK(isContiguous());
CHECK(a.isContiguous());
CHECK(a.useGpu_ == true) << "Matrix type are not equal";
......@@ -622,6 +627,7 @@ void GpuMatrix::mul(const GpuMatrix& a,
scaleAB,
scaleT);
}
#endif
}
/* this = a*b */
......@@ -1028,15 +1034,23 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW) {
size_t paddingW,
MatrixPtr maskMatP) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData();
real* maskData = NULL;
size_t frameNum = inputMat.getHeight();
CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth());
CHECK(height_ == inputMat.getHeight());
CHECK(width_ == outputH * outputW * channels);
if (maskMatP != NULL) {
CHECK(maskMatP->useGpu_ == true) << "Matrix type are not equal";
CHECK(outputH * outputW * channels == maskMatP->getWidth());
maskData = maskMatP->getData();
}
hl_maxpool_forward(frameNum,
inputData,
channels,
......@@ -1051,7 +1065,8 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat,
paddingH,
paddingW,
data_,
getStride());
getStride(),
maskData);
}
void GpuMatrix::maxPoolBackward(Matrix& inputMat,
......@@ -1548,6 +1563,7 @@ void GpuMatrix::bilinearBackward(const Matrix& out,
}
void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
......@@ -1563,9 +1579,11 @@ void GpuMatrix::multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy(
output_d, entropy_d, mat_d, height_, outputPtr->width_);
#endif
}
void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
#ifdef PADDLE_WITH_CUDA
GpuMatrix* outputPtr = dynamic_cast<GpuMatrix*>(&output);
auto labelPtr = dynamic_cast<GpuSparseMatrix*>(&label);
......@@ -1581,6 +1599,7 @@ void GpuMatrix::multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) {
hl_sparse_matrix_s mat_d = labelPtr->sMatrix_.get();
hl_matrix_multi_binary_cross_entropy_bp(
output_d, grad_d, mat_d, height_, width_);
#endif
}
void GpuMatrix::vol2Col(real* dataSrc,
......@@ -1973,9 +1992,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW) {
size_t paddingW,
MatrixPtr maskMatP) {
real* inputData = inputMat.getData();
real* outData = data_;
real* maskData = NULL;
size_t num = inputMat.getHeight();
size_t inLength = imgSizeH * imgSizeW;
size_t outLength = outputH * outputW;
......@@ -1984,6 +2005,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
CHECK_EQ(channels * outLength, this->getWidth());
size_t outStride = getStride();
if (maskMatP != NULL) {
maskData = maskMatP->getData();
CHECK_EQ(channels * outLength, maskMatP->getWidth());
}
/* initialize the data_ */
for (size_t i = 0; i < height_; i++) {
for (size_t j = 0; j < width_; j++) {
......@@ -2005,10 +2031,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
int wstart = pw * strideW - paddingW;
int wend = std::min(wstart + sizeX, imgSizeW);
wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max(
outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
if (maskData == NULL) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max(
outData[ph * outputW + pw], inputData[h * imgSizeW + w]);
}
}
} else {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (outData[ph * outputW + pw] < inputData[h * imgSizeW + w]) {
outData[ph * outputW + pw] = inputData[h * imgSizeW + w];
maskData[ph * outputW + pw] = h * imgSizeW + w;
}
}
}
}
}
......@@ -2016,6 +2053,8 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat,
// compute offset
inputData += inLength;
outData += outLength;
if (maskData != NULL) maskData += outLength;
}
}
}
......@@ -3226,6 +3265,7 @@ template void CpuMatrix::mul<CpuMatrix, CacheRowCpuMatrix>(CpuSparseMatrix* a,
real scaleAB,
real scaleT);
#ifndef PADDLE_MOBILE_INFERENCE
void SharedCpuMatrix::mul(CpuSparseMatrix* a,
CpuMatrix* b,
real scaleAB,
......@@ -3354,6 +3394,7 @@ void SharedCpuMatrix::initBlock(int blockNum) {
}
}
#endif
/* Add a (column) vector b to matrix a, column by column */
void CpuMatrix::addColumnVector(const Matrix& b) {
BaseMatrix::addColVector(const_cast<Matrix&>(b));
......
......@@ -861,7 +861,8 @@ public:
/**
* Pooling forward operation, pick out the largest element
* in the sizeX of value
* in the sizeX of value, if the maskMatP is not NULL, it will
* also caculate the location indices.
*/
virtual void maxPoolForward(Matrix& inputMat,
size_t imgSizeH,
......@@ -874,7 +875,8 @@ public:
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW) {
size_t paddingW,
MatrixPtr maskMatP = NULL) {
LOG(FATAL) << "Not implemeted";
}
......@@ -1426,7 +1428,8 @@ public:
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW);
size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image,
size_t imgSizeH,
......@@ -1697,7 +1700,8 @@ public:
size_t outputH,
size_t outputW,
size_t paddingH,
size_t paddingW);
size_t paddingW,
MatrixPtr maskMatP);
void maxPoolBackward(Matrix& image,
size_t imgSizeH,
......@@ -2066,6 +2070,7 @@ public:
class SharedCpuMatrix : public CpuMatrix {
public:
#ifndef PADDLE_MOBILE_INFERENCE
/* blockNum is number of partitions of the matrix */
SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false)
: CpuMatrix(height, width, trans) {
......@@ -2111,6 +2116,7 @@ private:
ThreadLocal<CpuMatrixPtr> localBuf_;
ThreadLocal<std::vector<int>> localBufRows_;
ThreadLocal<std::vector<int>> blockSeq_;
#endif
};
typedef struct { unsigned int col; } sparse_non_value_t;
......
......@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <cstddef>
#include "CpuSparseMatrix.h"
#include "Matrix.h"
......@@ -237,3 +240,47 @@ private:
};
} // namespace paddle
#else
#include "CpuSparseMatrix.h"
namespace paddle {
class GpuSparseMatrix : public Matrix {
public:
GpuSparseMatrix(size_t height,
size_t width,
size_t nnz, /* used to allocate space */
SparseValueType valueType = FLOAT_VALUE,
SparseFormat format_ = SPARSE_CSR,
bool trans = false)
: Matrix(NULL, height, width, trans, false) {}
GpuSparseMatrix(real* value,
int* rows,
int* cols,
size_t height,
size_t width,
size_t nnz,
SparseValueType valueType,
SparseFormat format,
bool trans)
: Matrix(NULL, height, width, trans, true) {}
void resize(size_t newHeight,
size_t newWidth,
size_t newNnz, /* used to allocate space */
SparseValueType valueType,
SparseFormat format) {}
void resize(size_t newHeight, size_t newWidth) {}
MatrixPtr getTranspose() { return nullptr; }
void setRow(size_t row,
size_t colNum,
const unsigned int* cols,
const real* values) {}
};
} // namespace paddle
#endif
......@@ -14,6 +14,8 @@ limitations under the License. */
#pragma once
#ifndef PADDLE_MOBILE_INFERENCE
#include <gflags/gflags.h>
#include <string.h>
#include <algorithm>
......@@ -313,3 +315,27 @@ private:
};
} // namespace paddle
#else
namespace paddle {
class SparseRowCpuMatrix : public CpuMatrix {
public:
void reserveStore() {}
void clearIndices() {}
};
class SparsePrefetchRowCpuMatrix : public SparseRowCpuMatrix {
public:
void setupIndices() {}
void addRows(MatrixPtr input) {}
void addRows(IVectorPtr ids) {}
};
class SparseAutoGrowRowCpuMatrix : public SparseRowCpuMatrix {};
class CacheRowCpuMatrix : public SparseAutoGrowRowCpuMatrix {};
class SparseRowIdsCpuMatrix : public CpuMatrix {};
} // namespace paddle
#endif
......@@ -3,8 +3,10 @@
add_simple_unittest(test_ExecViaCpu)
add_simple_unittest(test_SIMDFunctions)
add_simple_unittest(test_TrainingAlgorithm)
add_simple_unittest(test_SparseMatrix)
add_simple_unittest(test_RowBuffer)
if(NOT MOBILE_INFERENCE)
add_simple_unittest(test_SparseMatrix)
endif()
# TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference.
add_unittest(test_matrixCompare
......
......@@ -214,6 +214,7 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc
rnn/recurrent_op_utils.cc
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/data_type.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/var_type.h"
namespace paddle {
namespace operators {
class AssignFunctor {
public:
AssignFunctor(framework::Variable *out,
const platform::DeviceContext &dev_ctx)
: out_(out), dev_ctx_(dev_ctx) {}
void operator()(const framework::LoDTensor &lod_tensor) const {
auto &out_tensor = *out_->GetMutable<framework::LoDTensor>();
copy_tensor(lod_tensor, &out_tensor);
}
void operator()(const framework::LoDTensorArray &array) const {
auto &out_array = *out_->GetMutable<framework::LoDTensorArray>();
out_array.resize(array.size());
for (size_t i = 0; i < array.size(); ++i) {
copy_tensor(array[i], &out_array[i]);
}
}
void operator()(const framework::SelectedRows &rows) const {
framework::SelectedRows &out_rows =
*out_->GetMutable<framework::SelectedRows>();
out_rows.set_rows(rows.rows());
out_rows.set_height(rows.height());
auto &t = rows.value();
out_rows.mutable_value()->CopyFrom(t, t.place(), dev_ctx_);
}
template <typename T>
void operator()(const T &v) const {
PADDLE_THROW("Not support type for assign op %s", typeid(T).name());
}
private:
void copy_tensor(const framework::LoDTensor &lod_tensor,
framework::LoDTensor *out) const {
auto &out_tensor = *out;
out_tensor.CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_);
out_tensor.set_lod(lod_tensor.lod());
}
framework::Variable *out_;
const platform::DeviceContext &dev_ctx_;
};
class AssignOp : public framework::OperatorBase {
public:
AssignOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto *x = scope.FindVar(Input("X"));
if (x == nullptr) {
return;
}
auto *out = scope.FindVar(Output("Out"));
PADDLE_ENFORCE(
out != nullptr,
"The Output(Out) should not be null if the Input(X) is set.");
framework::VisitVarType(*x, AssignFunctor(out, dev_ctx));
}
};
class AssignOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
AssignOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(LoDTensor, SelectedRows or LoDTensorArray) The input variable "
"could be LoDTensor, SelectedRows or LoDTensorArray.")
.AsDispensable();
AddOutput("Out",
"(LoDTensor, SelectedRows or LoDTensorArray) The type of output "
"is the same as input X.");
AddComment(R"DOC(Assign Operator
Out = X, when type in [LoDTensor/SelectedRows/LoDTensorArray]
raise error if the type is not listed above.
)DOC");
}
};
class AssignInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
if (context->HasInput("X")) {
auto type = context->GetInputsVarType("X")[0];
if (type == framework::VarDesc_VarType_SELECTED_ROWS ||
type == framework::VarDesc_VarType_LOD_TENSOR) {
context->SetOutputDim("Out", context->GetInputDim("X"));
}
}
}
};
class AssignGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *op = new framework::OpDescBind();
op->SetType("assign");
op->SetInput("X", OutputGrad("Out"));
op->SetOutput("Out", InputGrad("X"));
return std::unique_ptr<framework::OpDescBind>(op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(assign, ops::AssignOp, ops::AssignGradMaker,
ops::AssignInferShape, ops::AssignOpProtoMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/beam_search_decode_op.h"
namespace paddle {
namespace operators {
class BeamSearchDecodeOp : public framework::OperatorBase {
public:
BeamSearchDecodeOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
framework::ExecutionContext ctx(*this, scope, dev_ctx);
const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
const size_t step_num = ids->size();
PADDLE_ENFORCE_GT(step_num, 0UL,
"beam search steps should be larger than 0");
const size_t source_num = ids->at(0).lod().at(0).size() - 1;
PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0");
for (size_t i = 0; i < step_num; ++i) {
PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL,
"Level of LodTensor should be 2");
}
// prepare output
LoDTensor* sentenceIds = ctx.Output<LoDTensor>("SentenceIds");
LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");
BeamSearchDecoder<float> beam_search_decoder;
beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds,
sentenceScores);
}
};
class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
BeamSearchDecodeOpProtoMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ids",
"(LodTensorArray)"
"score of the candidate words in each step");
AddInput("Scores",
"(LodTensorArray)"
"score of the candidate words in each step");
AddOutput("SentenceIds",
"(LodTensor)"
"All possible result sentences of word ids");
AddOutput("SentenceScores",
"(LodTensor)"
"All possible result sentences of word scores");
AddComment(R"DOC(
Pack the result of Beam search op into SentenceIds and SentenceScores.
)DOC");
}
};
class BeamSearchDecodeInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext* context) const override {
PADDLE_ENFORCE(context->HasInput("Ids"),
"BeamSearchDecodeOp must has input Ids");
PADDLE_ENFORCE(context->HasInput("Scores"),
"BeamSearchDecodeOp must has input Scores");
PADDLE_ENFORCE(context->HasOutput("SentenceIds"),
"BeamSearchDecodeOp must has output SentenceIds");
PADDLE_ENFORCE(context->HasOutput("SentenceScores"),
"BeamSearchDecodeOp must has output SentenceScores");
}
};
class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override {
for (auto& o : op_desc.Output("SentenceIds")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
for (auto& o : op_desc.Output("SentenceScores")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp,
paddle::operators::BeamSearchDecodeOpProtoMaker,
paddle::operators::BeamSearchDecodeInferShape,
paddle::operators::BeamSearchDecodeInferVarType,
paddle::framework::EmptyGradOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using LoDTensor = framework::LoDTensor;
using LoDTensorArray = framework::LoDTensorArray;
// all the lod have 2 levels.
// The First is source level, the second is sentence level.
// source level describe how many candidate words for this source.
// sentence level describe these candidates belong to which prefix
const size_t kSourceLevel = 0;
const size_t kSentenceLevel = 1;
template <typename T>
struct BeamNode {
BeamNode(int64_t word_id, T score) : word_id_(word_id), score_(score) {}
~BeamNode() {
if (parent_) {
parent_->DropKid(this);
if (parent_->kids_.size() == 0UL) {
delete parent_;
}
}
VLOG(3) << "Delete BeamNode root with word_id:" << this->word_id_;
}
void AppendTo(BeamNode* parent) {
parent_ = parent;
parent->kids_.insert(this);
}
void DropKid(BeamNode* kid) { kids_.erase(kid); }
BeamNode* parent_ = nullptr;
std::unordered_set<BeamNode*> kids_;
int64_t word_id_;
T score_;
};
template <typename T>
using BeamNodeVector = std::vector<std::unique_ptr<BeamNode<T>>>;
template <typename T>
struct Sentence {
std::vector<int64_t> word_ids;
std::vector<T> scores;
};
template <typename T>
using SentenceVector = std::vector<Sentence<T>>;
template <typename T>
struct BeamSearchDecoder {
/**
* make a BeamNode and all it's related prefix BeanNode into a Sentence.
*/
Sentence<T> MakeSentence(const BeamNode<T>* node) const;
/**
* Param:
* cur_ids: LoDTensor of One step for word ID
* cur_scores: LoDTensor of One Step for word score
* prefixes_list: prefixes for each source sentence.
* sentence_vector_list: result sentence_vector for each source sentence.
* Return:
* a new prefixes list for each source of current step
*/
std::vector<BeamNodeVector<T>> PackTwoSteps(
const LoDTensor& cur_ids, const LoDTensor& cur_scores,
std::vector<BeamNodeVector<T>>& prefixes_list,
std::vector<SentenceVector<T>>* sentence_vector_list) const;
/**
* convert the result sentence_vector for each source sentence into two
* LodTensor.
* One is all candidate sentences with word id, one is all candidate sentences
* with word score.
* Param:
* sentence_vector_list: sentence_vector for each source sentence.
* id_tensor: result LoDTensor for sentences of id.
* score_tensor: result LoDTensor for sentences of score.
*/
void ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;
/**
* Pack all steps of id/score LodTensor into sentence LoDTensor
* it's main logic is:
* ```python
* prefix
* result_sentence
* result_lod_tensor
*
* for (step in steps):
* prefix = PackTwoSteps(prefix, step, &result_sentence)
* ConvertSentenceVector<T>ToLodTensor(result_sentence, &result_lod_tensor)
* ```
*/
void PackAllSteps(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;
};
template <typename T>
Sentence<T> BeamSearchDecoder<T>::MakeSentence(const BeamNode<T>* node) const {
Sentence<T> sentence;
while (node != nullptr) {
sentence.word_ids.emplace_back(node->word_id_);
sentence.scores.emplace_back(node->score_);
node = node->parent_;
}
std::reverse(std::begin(sentence.word_ids), std::end(sentence.word_ids));
std::reverse(std::begin(sentence.scores), std::end(sentence.scores));
return sentence;
}
template <typename T>
std::vector<BeamNodeVector<T>> BeamSearchDecoder<T>::PackTwoSteps(
const LoDTensor& cur_ids, const LoDTensor& cur_scores,
std::vector<BeamNodeVector<T>>& prefixes_list,
std::vector<SentenceVector<T>>* sentence_vector_list) const {
std::vector<BeamNodeVector<T>> result;
for (size_t src_idx = 0; src_idx < cur_ids.lod()[kSourceLevel].size() - 1;
++src_idx) {
size_t src_start = cur_ids.lod().at(kSourceLevel)[src_idx];
size_t src_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1];
BeamNodeVector<T> beam_nodes;
// if prefixes size is 0, it means this is the first step. In this step,
// all candidate id is the start of candidate sentences.
if (prefixes_list.empty()) {
PADDLE_ENFORCE_EQ(cur_ids.lod().at(kSourceLevel).back(),
cur_ids.lod().at(kSentenceLevel).back(),
"in the first step");
for (size_t id_idx = src_start; id_idx < src_end; ++id_idx) {
beam_nodes.push_back(std::unique_ptr<BeamNode<T>>(new BeamNode<T>(
cur_ids.data<int64_t>()[id_idx], cur_scores.data<T>()[id_idx])));
}
} else {
BeamNodeVector<T>& prefixes = prefixes_list[src_idx];
SentenceVector<T>& sentence_vector = (*sentence_vector_list)[src_idx];
PADDLE_ENFORCE_EQ(src_end - src_start, prefixes.size(),
"prefix and candidate set number should be the same");
auto candidate_offset = cur_ids.lod()[kSentenceLevel];
for (size_t prefix_idx = 0; prefix_idx < prefixes.size(); ++prefix_idx) {
std::unique_ptr<BeamNode<T>>& prefix = prefixes[prefix_idx];
size_t candidate_start = candidate_offset[src_start + prefix_idx];
size_t candidate_end = candidate_offset[src_start + prefix_idx + 1];
if (candidate_start == candidate_end) {
VLOG(3) << "this sentence has no more candidate, "
"add to result sentence and rm it from beam tree";
sentence_vector.push_back(MakeSentence(prefix.get()));
prefix.reset();
} else {
for (size_t candidate_idx = candidate_start;
candidate_idx < candidate_end; ++candidate_idx) {
auto* candidate =
new BeamNode<T>(cur_ids.data<int64_t>()[candidate_idx],
cur_scores.data<T>()[candidate_idx]);
candidate->AppendTo(prefix.get());
beam_nodes.push_back(std::unique_ptr<BeamNode<T>>(candidate));
}
prefix.release();
}
}
}
result.push_back(std::move(beam_nodes));
}
return result;
}
template <typename T>
void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
size_t src_num = sentence_vector_list.size();
PADDLE_ENFORCE_NE(src_num, 0, "src_num should not be 0");
std::vector<size_t> source_level_lod = {0};
std::vector<size_t> sentence_level_lod = {0};
std::vector<int64_t> id_data;
std::vector<T> score_data;
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
for (Sentence<T>& sentence : sentence_vector_list[src_idx]) {
id_data.insert(id_data.end(), sentence.word_ids.begin(),
sentence.word_ids.end());
score_data.insert(score_data.end(), sentence.scores.begin(),
sentence.scores.end());
sentence_level_lod.push_back(sentence_level_lod.back() +
sentence.word_ids.size());
}
source_level_lod.push_back(source_level_lod.back() +
sentence_vector_list[src_idx].size());
}
auto cpu_place = new paddle::platform::CPUPlace();
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place);
framework::LoD lod;
lod.push_back(source_level_lod);
lod.push_back(sentence_level_lod);
id_tensor->set_lod(lod);
id_tensor->Resize({static_cast<int64_t>(id_data.size())});
id_tensor->mutable_data<int64_t>(paddle::platform::CPUPlace());
id_tensor->CopyFromVector<int64_t>(id_data, cpu_ctx);
score_tensor->set_lod(lod);
score_tensor->Resize({static_cast<int64_t>(score_data.size())});
score_tensor->mutable_data<T>(paddle::platform::CPUPlace());
score_tensor->CopyFromVector<T>(score_data, cpu_ctx);
}
template <typename T>
void BeamSearchDecoder<T>::PackAllSteps(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores,
LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
PADDLE_ENFORCE(!step_ids.empty(), "step num should be larger than 0");
PADDLE_ENFORCE_EQ(step_ids.size(), step_scores.size(),
"step_ids and step_scores should be the same");
const size_t step_num = step_ids.size();
const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1;
PADDLE_ENFORCE_GT(src_num, 0UL, "source num should be larger than 0");
// previous prefixes for each step,
// the init length is 0, means this is the first step.
std::vector<BeamNodeVector<T>> beamnode_vector_list(0);
std::vector<SentenceVector<T>> sentence_vector_list(src_num);
// pack all steps for one batch first, then another batch
for (size_t step_id = 0; step_id < step_num; ++step_id) {
beamnode_vector_list =
PackTwoSteps(step_ids.at(step_id), step_scores.at(step_id),
beamnode_vector_list, &sentence_vector_list);
}
// append last beam_node to result
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
for (auto& beam_node : beamnode_vector_list.at(src_idx)) {
sentence_vector_list[src_idx].push_back(MakeSentence(beam_node.get()));
beam_node.reset();
}
}
ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor,
score_tensor);
}
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/beam_search_decode_op.h"
#include "gtest/gtest.h"
using CPUPlace = paddle::platform::CPUPlace;
using LoD = paddle::framework::LoD;
using LoDTensor = paddle::framework::LoDTensor;
using LoDTensorArray = paddle::framework::LoDTensorArray;
template <typename T>
using BeamNode = paddle::operators::BeamNode<T>;
template <typename T>
using BeamSearchDecoder = paddle::operators::BeamSearchDecoder<T>;
template <typename T>
using Sentence = paddle::operators::Sentence<T>;
template <typename T>
using BeamNodeVector = paddle::operators::BeamNodeVector<T>;
template <typename T>
using SentenceVector = paddle::operators::SentenceVector<T>;
namespace paddle {
namespace test {
void GenerateExample(const std::vector<size_t>& level_0,
const std::vector<size_t>& level_1,
const std::vector<int>& data, LoDTensorArray* ids,
LoDTensorArray* scores) {
PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1,
"source level is used to describe candidate set");
PADDLE_ENFORCE_EQ(level_1.back(), data.size(),
"the lowest level is used to describe data"
", so it's last element should be data length");
CPUPlace place;
LoD lod;
lod.push_back(level_0);
lod.push_back(level_1);
// Ids
LoDTensor tensor_id;
tensor_id.set_lod(lod);
tensor_id.Resize({static_cast<int64_t>(data.size())});
// malloc memory
int64_t* id_ptr = tensor_id.mutable_data<int64_t>(place);
for (size_t i = 0; i < data.size(); ++i) {
id_ptr[i] = static_cast<int64_t>(data.at(i));
}
// Scores
LoDTensor tensor_score;
tensor_score.set_lod(lod);
tensor_score.Resize({static_cast<int64_t>(data.size())});
// malloc memory
float* score_ptr = tensor_score.mutable_data<float>(place);
for (size_t i = 0; i < data.size(); ++i) {
score_ptr[i] = static_cast<float>(data.at(i));
}
ids->push_back(tensor_id);
scores->push_back(tensor_score);
}
} // namespace test
} // namespace paddle
TEST(BeamSearchDecodeOp, DeleteBeamNode) {
auto* root = new BeamNode<float>(0, 0);
auto* b1 = new BeamNode<float>(1, 1);
auto* b2 = new BeamNode<float>(2, 2);
auto* b3 = new BeamNode<float>(3, 3);
b1->AppendTo(root);
b2->AppendTo(root);
b3->AppendTo(b1);
delete b3;
delete b2;
}
TEST(BeamSearchDecodeOp, MakeSentence) {
auto* root = new BeamNode<float>(0, 0);
auto* b1 = new BeamNode<float>(1, 1);
auto* end = new BeamNode<float>(2, 2);
b1->AppendTo(root);
end->AppendTo(b1);
BeamSearchDecoder<float> helper;
Sentence<float> sentence = helper.MakeSentence(end);
delete end;
std::vector<int64_t> expect_ids = {0, 1, 2};
ASSERT_EQ(sentence.word_ids, expect_ids);
std::vector<float> expect_scores = {0, 1, 2};
ASSERT_EQ(sentence.scores, expect_scores);
}
TEST(BeamSearchDecodeOp, PackTwoStepsFistStep) {
CPUPlace place;
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(
std::vector<size_t>{0, 2, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
std::vector<BeamNodeVector<float>> beamnode_vector_list;
std::vector<SentenceVector<float>> sentence_vector_list(
2, SentenceVector<float>());
BeamSearchDecoder<float> helper;
beamnode_vector_list = helper.PackTwoSteps(
ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
ASSERT_EQ(beamnode_vector_list.size(), 2UL);
ASSERT_EQ(beamnode_vector_list[0].size(), 2UL);
ASSERT_EQ(beamnode_vector_list[1].size(), 4UL);
}
TEST(BeamSearchDecodeOp, PackTwoSteps) {
CPUPlace place;
// first source has three prefix
BeamNodeVector<float> source0_prefixes;
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(1, 1)));
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(0, 0)));
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(3, 3)));
// second source has two prefix
BeamNodeVector<float> source1_prefixes;
source1_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(4, 4)));
source1_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(5, 5)));
std::vector<BeamNodeVector<float>> beamnode_vector_list;
std::vector<SentenceVector<float>> sentence_vector_list(
2, SentenceVector<float>());
beamnode_vector_list.push_back(std::move(source0_prefixes));
beamnode_vector_list.push_back(std::move(source1_prefixes));
// generate data for one step
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(std::vector<size_t>{0, 3, 5},
std::vector<size_t>{0, 1, 1, 3, 4, 5},
std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
BeamSearchDecoder<float> helper1;
beamnode_vector_list = helper1.PackTwoSteps(
ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
ASSERT_EQ(sentence_vector_list[0].size(), 1UL);
ASSERT_EQ(sentence_vector_list[1].size(), 0UL);
ASSERT_EQ(beamnode_vector_list[0].size(), 3UL);
ASSERT_EQ(beamnode_vector_list[1].size(), 2UL);
}
TEST(BeamSearchDecodeOp, PackAllSteps) {
CPUPlace place;
// we will constuct a sample data with 3 steps and 2 source sentences
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(
std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
paddle::test::GenerateExample(
std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 1, 3, 5, 5, 6},
std::vector<int>{0, 1, 2, 3, 4, 5}, &ids, &scores);
paddle::test::GenerateExample(std::vector<size_t>{0, 3, 6},
std::vector<size_t>{0, 0, 1, 2, 3, 4, 5},
std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
ASSERT_EQ(ids.size(), 3UL);
ASSERT_EQ(scores.size(), 3UL);
BeamSearchDecoder<float> helper;
LoDTensor id_tensor;
LoDTensor score_tensor;
helper.PackAllSteps(ids, scores, &id_tensor, &score_tensor);
LoD lod = id_tensor.lod();
std::vector<size_t> expect_source_lod = {0, 4, 8};
EXPECT_EQ(lod[0], expect_source_lod);
std::vector<size_t> expect_sentence_lod = {0, 1, 3, 6, 9, 10, 13, 16, 19};
EXPECT_EQ(lod[1], expect_sentence_lod);
// 2| 1, 0| 3, 1, 0| 3, 2, 1| 5| 4, 3, 2| 4, 4, 3| 6, 5, 4
std::vector<int> expect_data = {2, 1, 0, 3, 1, 0, 3, 2, 1, 5,
4, 3, 2, 4, 4, 3, 6, 5, 4};
ASSERT_EQ(id_tensor.dims()[0], static_cast<int64_t>(expect_data.size()));
for (size_t i = 0; i < expect_data.size(); ++i) {
ASSERT_EQ(id_tensor.data<int64_t>()[i],
static_cast<int64_t>(expect_data[i]));
}
for (int64_t i = 0; i < id_tensor.dims()[0]; ++i) {
ASSERT_EQ(score_tensor.data<float>()[i],
static_cast<float>(id_tensor.data<int64_t>()[i]));
}
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/bilinear_tensor_product_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class BilinearTensorProductOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight");
PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input(X) must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input(Y) must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL,
"The input(Weight) must be a 3D tensor.");
PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0],
"The first dimension(batch_size) of input(X) must be "
"equal to the first dimension of the input(Y).");
PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1],
"The second dimension of input(X) must be equal to "
"the second dimension of the input(Weight).");
PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2],
"The second dimension of input(Y) must be equal to "
"the third dimension of the input(Weight).");
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE(bias_dims.size() == 2UL && bias_dims[0] == 1UL,
"The Input(Bias) must be a 2-D tensor with "
"the 2nd dimension fixed to 1 (a row vector).");
PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0],
"The second dimension of input(Bias) must be equal "
"to the first dimension of the input(Weight).");
}
ctx->SetOutputDim("Out", {x_dims[0], weight_dims[0]});
ctx->ShareLoD("X", /*->*/ "Out");
}
};
class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
public:
BilinearTensorProductOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of bilinear_tensor_product operator.");
AddInput("Y", "The second input of bilinear_tensor_product operator.");
AddInput("Weight",
"The learnable parameters of bilinear_tensor_product operator.");
AddInput("Bias", "The learnable bias of bilinear_tensor_product operator.")
.AsDispensable();
AddOutput("Out", "The output of bilinear_tensor_product operator.");
AddComment(R"DOC(
Bilinear Tensor Product operator.
Given input X and Y, a 3D tensor weight, and bias. Each column of the
output is computed by one slice i = 1, . . . , k of the tensor:
M = (X W_i) \cdot Y
Out_i = \sum_i {M_i} + Bias_i
)DOC");
}
};
class BilinearTensorProductOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_EQ(out_dims.size(), 2UL,
"The input(Out@GRAD) must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(
x_dims[0], out_dims[0],
"The first dimension(batch_size) of input(Out@GRAD) must be "
"equal to the first dimension of the Input(X).");
PADDLE_ENFORCE_EQ(
weight_dims[0], out_dims[1],
"The second dimension of input(Out@GRAD) must be equal to "
"the third dimension of the Input(Weight).");
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(
bias_dims[1], out_dims[1],
"The second dimension of input(Out@GRAD) must be equal to "
"the second dimension of the Input(Bias).");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto x_grad_name = framework::GradVarName("X");
auto y_grad_name = framework::GradVarName("Y");
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims);
}
if (ctx->HasOutput(y_grad_name)) {
ctx->SetOutputDim(y_grad_name, y_dims);
}
if (ctx->HasOutput(weight_grad_name)) {
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp,
ops::BilinearTensorProductOpMaker, bilinear_tensor_product_grad,
ops::BilinearTensorProductOpGrad);
REGISTER_OP_CPU_KERNEL(
bilinear_tensor_product,
ops::BilinearTensorProductKernel<paddle::platform::CPUPlace, float>,
ops::BilinearTensorProductKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
bilinear_tensor_product_grad,
ops::BilinearTensorProductGradKernel<paddle::platform::CPUPlace, float>,
ops::BilinearTensorProductGradKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/bilinear_tensor_product_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product,
ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, float>,
ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product_grad,
ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, float>,
ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
using framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename Place, typename T>
class BilinearTensorProductKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* weight = ctx.Input<Tensor>("Weight");
auto* bias = ctx.Input<Tensor>("Bias");
auto* out = ctx.Output<Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
auto y_mat = EigenMatrix<T>::From(*y);
auto output_mat = EigenMatrix<T>::From(*out);
auto batch_size = x->dims()[0];
auto weight_dims = weight->dims();
int out_dim = weight_dims[0];
auto x_dim = weight_dims[1];
auto y_dim = weight_dims[2];
auto place = ctx.GetEigenDevice<Place>();
// Create the intermediate variable to caculate the result of
// Input(X) multiplied by Input(Weight_i), the formula is:
// left_mul = X Weight_i.
Tensor left_mul;
left_mul.mutable_data<T>(framework::make_ddim({batch_size, y_dim}),
ctx.GetPlace());
auto left_mul_mat = EigenMatrix<T>::From(left_mul);
for (int i = 0; i < out_dim; ++i) {
auto output_col_vec = output_mat.chip(i, 1);
Tensor weight_mat =
weight->Slice(i, i + 1).Resize(framework::make_ddim({x_dim, y_dim}));
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, y_dim, x_dim, 1, x->data<T>(),
weight_mat.data<T>(), 0, left_mul.data<T>());
output_col_vec.device(place) =
(left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
}
if (bias) {
auto bias_vec = EigenMatrix<T>::From(*bias);
Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat;
}
}
};
template <typename Place, typename T>
class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const Tensor* x = ctx.Input<Tensor>("X");
const Tensor* y = ctx.Input<Tensor>("Y");
const Tensor* weight = ctx.Input<Tensor>("Weight");
Tensor* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
Tensor* d_y = ctx.Output<Tensor>(framework::GradVarName("Y"));
Tensor* d_weight = ctx.Output<Tensor>(framework::GradVarName("Weight"));
Tensor* d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
const Tensor* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto batch_size = x->dims()[0];
auto weight_dims = weight->dims();
int out_dim = weight_dims[0];
auto x_dim = weight_dims[1];
auto y_dim = weight_dims[2];
auto x_mat = EigenMatrix<T>::From(*x);
auto y_mat = EigenMatrix<T>::From(*y);
auto d_out_mat = EigenMatrix<T>::From(*d_out);
auto place = ctx.GetEigenDevice<Place>();
// Create the intermediate variable to caculate the Output(Y@Grad).
Tensor x_scale;
x_scale.mutable_data<T>(framework::make_ddim({batch_size, x_dim}),
ctx.GetPlace());
auto x_scale_mat = EigenMatrix<T>::From(x_scale);
// Create the intermediate variable to caculate the Output(X@Grad).
Tensor y_scale;
y_scale.mutable_data<T>(framework::make_ddim({batch_size, y_dim}),
ctx.GetPlace());
auto y_scale_mat = EigenMatrix<T>::From(y_scale);
math::SetConstant<Place, T> set_zero;
// Set Output(X@Grad) be zero.
if (d_x) {
d_x->mutable_data<T>(ctx.GetPlace());
set_zero(ctx.device_context(), d_x, static_cast<T>(0));
}
// Set Output(Y@Grad) be zero.
if (d_y) {
d_y->mutable_data<T>(ctx.GetPlace());
set_zero(ctx.device_context(), d_y, static_cast<T>(0));
}
// Caculate the Output(X@Grad) and Output(Y@Grad).
if (d_x || d_y) {
Eigen::DSizes<int, 2> bcast_for_x(1, y_dim);
Eigen::DSizes<int, 2> bcast_for_y(1, x_dim);
for (int i = 0; i < out_dim; ++i) {
Tensor weight_i = weight->Slice(i, i + 1).Resize(
framework::make_ddim({x_dim, y_dim}));
auto output_vec = d_out_mat.chip(i, 1);
if (d_x) {
y_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_x) *
y_mat;
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasTrans,
batch_size, x_dim, y_dim, 1, y_scale.data<T>(),
weight_i.data<T>(), 1, d_x->data<T>());
}
if (d_y) {
x_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_y) *
x_mat;
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, y_dim, x_dim, 1, x_scale.data<T>(),
weight_i.data<T>(), 1, d_y->data<T>());
}
}
}
// Caculate the gradient of Input(Weight).
if (d_weight) {
d_weight->mutable_data<T>(ctx.GetPlace());
Eigen::DSizes<int, 2> bcast_for_weight(1, x_dim);
for (int i = 0; i < out_dim; ++i) {
Tensor d_weight_i = d_weight->Slice(i, i + 1).Resize(
framework::make_ddim({x_dim, y_dim}));
auto output_vec = d_out_mat.chip(i, 1);
x_scale_mat.device(place) =
output_vec.reshape(Eigen::DSizes<int, 2>(batch_size, 1))
.broadcast(bcast_for_weight) *
x_mat;
math::gemm<Place, T>(ctx.device_context(), CblasTrans, CblasNoTrans,
x_dim, y_dim, batch_size, 1, x_scale.data<T>(),
y->data<T>(), 0, d_weight_i.data<T>());
}
}
// Caculate the gradient of Input(Bias).
if (d_bias) {
d_bias->mutable_data<T>(ctx.GetPlace());
auto d_bias_mat = EigenMatrix<T>::From(*d_bias);
d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes<int, 1>(0));
}
}
};
} // namespace operators
} // namespace paddle
......@@ -94,5 +94,13 @@ class CompareOp : public framework::OperatorWithKernel {
REGISTER_LOGICAL_OP(less_than, "Out = X < Y");
REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y");
REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_OP(greater_than, "Out = X > Y");
REGISTER_LOGICAL_KERNEL(greater_than, CPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_OP(greater_equal, "Out = X >= Y");
REGISTER_LOGICAL_KERNEL(greater_equal, CPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_OP(equal, "Out = X == Y");
REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor);
......@@ -15,4 +15,9 @@
#include "paddle/operators/compare_op.h"
REGISTER_LOGICAL_KERNEL(less_than, GPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_KERNEL(less_equal, GPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_KERNEL(greater_than, GPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_KERNEL(greater_equal, GPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_KERNEL(equal, GPU, paddle::operators::EqualFunctor);
......@@ -27,6 +27,24 @@ struct LessThanFunctor {
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a < b; }
};
template <typename T>
struct LessEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; }
};
template <typename T>
struct GreaterThanFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; }
};
template <typename T>
struct GreaterEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; }
};
template <typename T>
struct EqualFunctor {
using ELEM_TYPE = T;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/framework/executor.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
class ConditionalOp : public framework::OperatorBase {
public:
ConditionalOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
protected:
std::vector<const framework::LoDTensor *> InputTensors(
const framework::Scope &scope) const {
std::vector<const framework::LoDTensor *> retv;
auto xs = Inputs("X");
retv.resize(xs.size(), nullptr);
std::transform(
xs.begin(), xs.end(), retv.begin(),
[&scope](const std::string &var_name) -> const framework::LoDTensor * {
auto *var = scope.FindVar(var_name);
PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", var_name);
return &var->Get<framework::LoDTensor>();
});
return retv;
}
};
class ConditionalBlockOp : public ConditionalOp {
public:
ConditionalBlockOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: ConditionalOp(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto xs = InputTensors(scope);
bool need_run = std::all_of(
xs.begin(), xs.end(),
[](const framework::LoDTensor *t) { return t->numel() != 0; });
if (need_run) {
auto *scope_var = scope.FindVar(Output("Scope"));
PADDLE_ENFORCE(scope_var != nullptr, "Must set scope");
auto *scopes = scope_var->GetMutable<std::vector<framework::Scope *>>();
scopes->resize(1);
scopes->front() = &scope.NewScope();
auto &cur_scope = *scopes->front();
auto *block = Attr<framework::BlockDescBind *>("block");
framework::Executor exec(dev_ctx);
exec.Run(*block->Program(), &cur_scope, block->ID(), false);
}
}
};
class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
ConditionalBlockOpProtoMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"The conditional variable of this operator. If X is empty, the "
"whole sub-block will not be executed.")
.AsDuplicable();
AddInput("Params", "The input variables of the sub-block.").AsDuplicable();
AddOutput("Out", "The output variables of the sub-block.").AsDuplicable();
AddOutput("Scope",
"(std::vector<Scope*>) The step scope of conditional block. To "
"unify the conditional block, rnn and while op, the type of "
"scope is std::vector<Scope*>");
AddAttr<framework::BlockDescBind *>(
"block", "The step block of conditional block operator");
AddComment(R"DOC(Conditional block operator
Run the sub-block if X is not empty. Params is the other inputs and Out is the
outputs of the sub-block.
)DOC");
}
};
class ConditionalBlockGradOp : public ConditionalOp {
public:
ConditionalBlockGradOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: ConditionalOp(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto xs = this->InputTensors(scope);
bool need_run = std::all_of(
xs.begin(), xs.end(),
[](const framework::LoDTensor *t) { return t->numel() != 0; });
if (need_run) {
auto *scope_var = scope.FindVar(Input("Scope"));
PADDLE_ENFORCE(scope_var != nullptr, "Must set scope");
auto &scopes = scope_var->Get<std::vector<framework::Scope *>>();
framework::Scope &cur_scope = *scopes[0];
auto *block = Attr<framework::BlockDescBind *>("block");
framework::Executor exec(dev_ctx);
exec.Run(*block->Program(), &cur_scope, block->ID(), false);
AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("Params"),
Outputs(framework::GradVarName("Params")));
AssignLocalGradientToGlobal(dev_ctx, cur_scope, Inputs("X"),
Outputs(framework::GradVarName("X")));
}
}
private:
void AssignLocalGradientToGlobal(
const platform::DeviceContext &dev_ctx, const framework::Scope &cur_scope,
const std::vector<std::string> &p_names,
const std::vector<std::string> &pg_names) const {
for (size_t i = 0; i < p_names.size(); ++i) {
auto out_grad_name = pg_names[i];
auto in_grad_name = framework::GradVarName(p_names[i]);
auto *in_var = cur_scope.FindVar(in_grad_name);
if (in_var == nullptr) {
continue;
}
auto new_in_grad_name = cur_scope.Rename(in_grad_name);
auto assign =
framework::OpRegistry::CreateOp("assign", {{"X", {new_in_grad_name}}},
{{"Out", {out_grad_name}}}, {});
assign->Run(cur_scope, dev_ctx);
cur_scope.Rename(new_in_grad_name, in_grad_name);
}
}
};
class ConditionalBlockGradInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
PADDLE_ENFORCE(context->HasInputs("X"));
if (context->HasInputs("Params")) {
PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("Params")));
context->SetOutputsDim(framework::GradVarName("Params"),
context->GetInputsDim("Params"));
}
PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("X")));
context->SetOutputsDim(framework::GradVarName("X"),
context->GetInputsDim("X"));
}
};
class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto grad_op = new framework::OpDescBind();
grad_op->SetType("conditional_block_grad");
grad_op->SetInput("X", Input("X"));
grad_op->SetInput("Params", Input("Params"));
grad_op->SetInput("Out", Output("Out"));
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
grad_op->SetInput("Scope", Output("Scope"));
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
grad_op->SetOutput(framework::GradVarName("Params"), InputGrad("Params"));
grad_op->SetBlockAttr("block", *this->grad_block_[0]);
return std::unique_ptr<framework::OpDescBind>(grad_op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(conditional_block, ops::ConditionalBlockOp,
ops::ConditionalBlockOpProtoMaker,
ops::ConditionalBlockGradMaker);
REGISTER_OPERATOR(conditional_block_grad, ops::ConditionalBlockGradOp,
ops::ConditionalBlockGradInferShape);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/expand_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class ExpandOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
std::vector<int> expand_times =
ctx->Attrs().Get<std::vector<int>>("expand_times");
auto x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(static_cast<size_t>(x_dims.size()), expand_times.size(),
"The number of Attr(expand_times)'s value must be equal "
"to the rank of Input(X).");
PADDLE_ENFORCE_LE(x_dims.size(), 6,
"The rank of Input(X) must not be greater than 6.");
std::vector<int64_t> out_shape(x_dims.size());
for (size_t i = 0; i < expand_times.size(); ++i) {
PADDLE_ENFORCE_GE(expand_times[i], 1,
"Each value of Attr(expand_times) should not be "
"less than 1.");
out_shape[i] = x_dims[i] * expand_times[i];
}
ctx->SetOutputDim("Out", framework::make_ddim(out_shape));
if (out_shape[0] == x_dims[0]) {
ctx->ShareLoD("X", "Out");
}
}
};
class ExpandOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ExpandOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(Tensor, default Tensor<float>) A tensor with rank in [1, 6]."
"X is the input tensor to be expanded.");
AddOutput("Out",
"(Tensor, default Tensor<float>) A tensor with rank in [1, 6]."
"The rank of Output(Out) is same as Input(X) except that each "
"dimension size of Output(Out) is equal to corresponding "
"dimension size of Input(X) multiplying corresponding value of "
"Attr(expand_times).");
AddAttr<std::vector<int>>("expand_times",
"Expand times number for each dimension.");
AddComment(R"DOC(
Expand operator tiles the input by given times number. You should set times
number for each dimension by providing attribute 'expand_times'. The rank of X
should be in [1, 6]. Please notice that size of 'expand_times' must be same with
X's rank. Following is a using case:
Input(X) is a 3-D tensor with shape [2, 3, 1]:
[
[[1], [2], [3]],
[[4], [5], [6]]
]
Attr(expand_times): [1, 2, 2]
Output(Out) is a 3-D tensor with shape [2, 6, 2]:
[
[[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]],
[[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]]
]
)DOC");
}
};
class ExpandGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null.");
auto x_dims = ctx->GetInputDim("X");
std::vector<int> expand_times =
ctx->Attrs().Get<std::vector<int>>("expand_times");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
for (size_t i = 0; i < expand_times.size(); ++i) {
PADDLE_ENFORCE_EQ(x_dims[i] * expand_times[i], out_dims[i],
"Each dimension size of Input(Out@GRAD) should be "
"equal to multiplication of crroresponding dimension "
"size of Input(X) and Attr(expand_times) value.");
}
auto x_grad_name = framework::GradVarName("X");
if (ctx->HasOutput(x_grad_name)) {
ctx->SetOutputDim(x_grad_name, x_dims);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(expand, ops::ExpandOp, ops::ExpandOpMaker, expand_grad,
ops::ExpandGradOp);
REGISTER_OP_CPU_KERNEL(expand,
ops::ExpandKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
expand_grad, ops::ExpandGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/expand_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(expand,
ops::ExpandKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
expand_grad, ops::ExpandGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <boost/preprocessor/arithmetic/div.hpp>
#include <boost/preprocessor/arithmetic/mod.hpp>
#include <boost/preprocessor/comparison/greater.hpp>
#include <boost/preprocessor/comparison/greater_equal.hpp>
#include <boost/preprocessor/control/if.hpp>
#include <boost/preprocessor/repetition/repeat.hpp>
#include <iostream>
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#define MAX_RANK_SUPPORTED 6
#define EXPAND_TEMPLATE(z, n, data) \
case n + 1: { \
Expand<n + 1>(context); \
break; \
}
#define REP_EXPAND_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_TEMPLATE, ~)
#define COND(n) \
BOOST_PP_GREATER_EQUAL(BOOST_PP_DIV(n, MAX_RANK_SUPPORTED), \
BOOST_PP_MOD(n, MAX_RANK_SUPPORTED))
#define EXPAND_GRAD_CASE(n) \
case n: { \
ExpandBackward<n>(context, reshape_dims_vec, reduce_dims_vec); \
break; \
}
#define EXPAND_GRAD_TEMPLATE(z, n, data) \
BOOST_PP_IF(COND(n), EXPAND_GRAD_CASE(n), )
#define REP_EXPAND_GRAD_TEMPLATE(n) BOOST_PP_REPEAT(n, EXPAND_GRAD_TEMPLATE, ~)
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename T, size_t D, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
template <typename Place, typename T>
class ExpandKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto rank = context.Input<Tensor>("X")->dims().size();
switch (rank) {
REP_EXPAND_TEMPLATE(MAX_RANK_SUPPORTED)
default:
PADDLE_ENFORCE(false,
"Only support tensor with rank being between 1 and 6.");
}
}
protected:
template <int Rank>
void Expand(const framework::ExecutionContext& context) const {
auto* in0 = context.Input<Tensor>("X");
auto& expand_times = context.Attr<std::vector<int>>("expand_times");
auto* out0 = context.Output<Tensor>("Out");
Eigen::DSizes<int, Rank> bcast_dims;
auto x_dims = in0->dims();
for (size_t i = 0; i < expand_times.size(); ++i) {
bcast_dims[i] = expand_times[i];
}
auto x = EigenTensor<T, Rank>::From(*in0);
out0->mutable_data<T>(context.GetPlace());
auto y = EigenTensor<T, Rank>::From(*out0);
auto place = context.GetEigenDevice<Place>();
y.device(place) = x.broadcast(bcast_dims);
}
};
template <typename Place, typename T>
class ExpandGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* in0 = context.Input<Tensor>("X");
auto& expand_times = context.Attr<std::vector<int>>("expand_times");
auto x_dims = in0->dims();
// 1. reshape_dims_vec is the broadcast parameter. For each dimension i,
// if expand_times[i] > 1 and x_dims[i] > 1, i will be splitted to two
// dimensions [expand_times[i], x_dims[i]].
// 2. reduce_dims_vec is the dimension parameter to compute gradients. For
// each dimension expanded, the gradients should be summed to original
// size.
std::vector<int> reshape_dims_vec;
std::vector<int> reduce_dims_vec;
for (size_t i = 0; i < expand_times.size(); ++i) {
if (expand_times[i] == 1) {
reshape_dims_vec.push_back(x_dims[i]);
} else {
if (x_dims[i] == 1) {
reduce_dims_vec.push_back(reshape_dims_vec.size());
reshape_dims_vec.push_back(expand_times[i]);
} else {
reduce_dims_vec.push_back(reshape_dims_vec.size());
reshape_dims_vec.push_back(expand_times[i]);
reshape_dims_vec.push_back(x_dims[i]);
}
}
}
int dims = reshape_dims_vec.size() * MAX_RANK_SUPPORTED +
reduce_dims_vec.size() - MAX_RANK_SUPPORTED - 1;
// no need reduce, just copy
if (reduce_dims_vec.size() == 0) {
auto* in0 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
out0->mutable_data<T>(context.GetPlace());
out0->CopyFrom(*in0, context.GetPlace(), context.device_context());
} else {
switch (dims) {
REP_EXPAND_GRAD_TEMPLATE(72)
default:
PADDLE_ENFORCE(
false, "Only support tensor with rank being between 1 and 6.");
}
}
}
protected:
template <int Dims>
void ExpandBackward(const framework::ExecutionContext& context,
const std::vector<int>& reshape_dims_vec,
const std::vector<int>& reduce_dims_vec) const {
size_t reshape_size = Dims / MAX_RANK_SUPPORTED + 1;
size_t reduce_size = Dims % MAX_RANK_SUPPORTED + 1;
PADDLE_ENFORCE_EQ(reshape_size, reshape_dims_vec.size(),
"Inconsistent size between template Dims and "
"reshape dimensions.");
PADDLE_ENFORCE_EQ(reduce_size, reduce_dims_vec.size(),
"Inconsistent size between template Dims and "
"reduce dimensions.");
auto* in0 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
auto x = EigenVector<T>::Flatten(*(context.Input<Tensor>("X")));
out0->mutable_data<T>(context.GetPlace());
auto x_grad = EigenVector<T>::Flatten(*out0);
Eigen::DSizes<int, Dims / MAX_RANK_SUPPORTED + 1> reshape_dims;
for (size_t i = 0; i < reshape_size; ++i) {
reshape_dims[i] = reshape_dims_vec[i];
}
Eigen::DSizes<int, Dims % MAX_RANK_SUPPORTED + 1> reduce_dims;
for (size_t i = 0; i < reduce_size; ++i) {
reduce_dims[i] = reduce_dims_vec[i];
}
auto out_grad = EigenVector<T>::Flatten(*in0);
x_grad.device(context.GetEigenDevice<Place>()) =
out_grad.reshape(reshape_dims).sum(reduce_dims).reshape(x.dimensions());
}
};
} // namespace operators
} // namespace paddle
......@@ -29,7 +29,7 @@ class L1NormKernel : public framework::OpKernel<T> {
Out->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto out = framework::EigenVector<T>::Flatten(*Out);
auto out = framework::EigenScalar<T>::From(*Out);
auto place = context.GetEigenDevice<Place>();
out.device(place) = x.abs().sum();
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/lod_reset_op.h"
namespace paddle {
namespace operators {
class LoDResetOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
// input check
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of LoDResetOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of LoDResetOp should not be null.");
// If target LoD is not set form Input(), then it must be set from Attr().
if (!ctx->HasInput("TargetLoD")) {
auto level0 = ctx->Attrs().Get<std::vector<int>>("target_lod");
PADDLE_ENFORCE(level0.size() > 1,
"Target LoD is not found, should be set to be a valid one "
"through Input() or Attr().");
}
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
}
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LoDResetOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor) The input tensor of lod_reset operator.");
AddInput("TargetLoD",
"(Tensor, optional) The target level 0 LoD from Input().")
.AsDispensable();
AddOutput("Out", "(LoDTensor) The output tensor of lod_reset operator.");
AddAttr<std::vector<int>>("target_lod",
"The target level 0 LoD from Attr().")
.SetDefault(std::vector<int>{});
AddComment(R"DOC(LoDReset operator
Reset LoD of Input(X) into a new one specified by Input(TargetLoD) or
Attr(target_lod), or set LoD for Input(X) if it doesn't have one.
Currently the lod_reset operator only supports the reset of level 0 LoD.
At least one of Input(TargetLoD) and Attr(target_lod) must be set,
and if both of them are set, Input(TargetLoD) will be chosen as the
target LoD.
An example:
Given a float LoDTensor X with shape (6, 1), its transpose form represents
[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
with LoD = [[0, 2, 5, 6]] and the three (transposed) sequences look like
[1.0, 2.0], [3.0, 4.0, 5.0], [6.0].
If target LoD = [0, 4, 6], the lod_reset operator will reset the LoD and
the sequences that the LoDTensor Output(Out) contains becomes:
[1.0, 2.0, 3.0, 4.0], [5.0, 6.0].
)DOC");
}
};
class LoDResetGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) shouldn't be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, lod_reset_grad,
ops::LoDResetGradOp);
REGISTER_OP_CPU_KERNEL(lod_reset,
ops::LoDResetKernel<paddle::platform::CPUPlace, float>,
ops::LoDResetKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(
lod_reset_grad, ops::LoDResetGradKernel<paddle::platform::CPUPlace, float>,
ops::LoDResetGradKernel<paddle::platform::CPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/lod_reset_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(lod_reset,
ops::LoDResetKernel<paddle::platform::GPUPlace, float>,
ops::LoDResetKernel<paddle::platform::GPUPlace, double>);
REGISTER_OP_GPU_KERNEL(
lod_reset_grad, ops::LoDResetGradKernel<paddle::platform::GPUPlace, float>,
ops::LoDResetGradKernel<paddle::platform::GPUPlace, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class LoDResetKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* out = ctx.Output<framework::LoDTensor>("Out");
auto* in = ctx.Input<framework::LoDTensor>("X");
auto* lod_t = ctx.Input<framework::Tensor>("TargetLoD");
std::vector<int> level0;
if (lod_t) {
auto* lod = lod_t->data<int>();
if (platform::is_gpu_place(ctx.GetPlace())) {
framework::Tensor lod_cpu;
lod_cpu.CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context());
lod = lod_cpu.data<int>();
}
level0 = std::vector<int>(lod, lod + lod_t->numel());
} else {
level0 = ctx.Attr<std::vector<int>>("target_lod");
}
PADDLE_ENFORCE(level0.size() > 1UL,
"The size of target LoD should be greater than 1.");
PADDLE_ENFORCE(level0[0] == 0,
"Target LoD should be a vector starting from 0.");
PADDLE_ENFORCE(level0.back() == in->dims()[0],
"Target LoD should be a vector end with the "
"first dimension of Input(X).");
for (size_t i = 0; i < level0.size() - 1; ++i) {
PADDLE_ENFORCE(level0[i + 1] > level0[i],
"Target LoD should be an ascending vector.");
}
out->ShareDataWith(*in);
// cast level0 to size_t
std::vector<size_t> ulevel0(level0.size(), 0);
std::transform(level0.begin(), level0.end(), ulevel0.begin(),
[](int a) { return static_cast<size_t>(a); });
framework::LoD target_lod;
target_lod.push_back(ulevel0);
out->set_lod(target_lod);
}
};
template <typename Place, typename T>
class LoDResetGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->ShareDataWith(*d_out);
}
};
} // namespace operators
} // namespace paddle
......@@ -13,7 +13,7 @@ if(WITH_GPU)
nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context)
nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context)
nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions)
nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function)
else()
cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator)
cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function)
......
......@@ -27,15 +27,15 @@ template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_process) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const int output_channels = output->dims()[1];
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
......@@ -47,7 +47,7 @@ class Pool2dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -87,11 +87,12 @@ template <typename PoolProcess, class T>
class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) {
PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
......@@ -110,7 +111,7 @@ class Pool2dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -154,10 +155,11 @@ template <class T>
class MaxPool2dGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
......@@ -176,7 +178,7 @@ class MaxPool2dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -240,17 +242,17 @@ template <typename PoolProcess, class T>
class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_process) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_process, framework::Tensor* output) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
......@@ -265,7 +267,7 @@ class Pool3dFunctor<platform::CPUPlace, PoolProcess, T> {
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -315,11 +317,12 @@ template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_grad_process) {
PoolProcess pool_grad_process,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
......@@ -343,7 +346,7 @@ class Pool3dGradFunctor<platform::CPUPlace, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -398,10 +401,11 @@ template <class T>
class MaxPool3dGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
......@@ -425,7 +429,7 @@ class MaxPool3dGradFunctor<platform::CPUPlace, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -498,15 +502,15 @@ template <typename T>
class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0];
const int input_height = input.dims()[2];
const int input_width = input.dims()[3];
const int output_channels = output.dims()[1];
const int output_height = output.dims()[2];
const int output_width = output.dims()[3];
const int output_channels = output->dims()[1];
const int output_height = output->dims()[2];
const int output_width = output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
......@@ -517,8 +521,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -563,13 +567,13 @@ template <typename T>
class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const int batch_size = input_grad.dims()[0];
const int input_height = input_grad.dims()[2];
const int input_width = input_grad.dims()[3];
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0];
const int input_height = input_grad->dims()[2];
const int input_width = input_grad->dims()[3];
const int output_channels = output_grad.dims()[1];
const int output_height = output_grad.dims()[2];
const int output_width = output_grad.dims()[3];
......@@ -578,7 +582,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) {
......@@ -612,17 +616,17 @@ template <typename T>
class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
......@@ -636,8 +640,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUPlace, T> {
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output.mutable_data<T>(context.GetPlace());
T* mask_data = mask.mutable_data<T>(context.GetPlace());
T* output_data = output->mutable_data<T>(context.GetPlace());
T* mask_data = mask->mutable_data<T>(context.GetPlace());
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -691,14 +695,14 @@ template <typename T>
class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings) {
const int batch_size = input_grad.dims()[0];
const int input_depth = input_grad.dims()[2];
const int input_height = input_grad.dims()[3];
const int input_width = input_grad.dims()[4];
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0];
const int input_depth = input_grad->dims()[2];
const int input_height = input_grad->dims()[3];
const int input_width = input_grad->dims()[4];
const int output_channels = output_grad.dims()[1];
const int output_depth = output_grad.dims()[2];
const int output_height = output_grad.dims()[3];
......@@ -708,7 +712,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUPlace, T> {
const T* mask_data = mask.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad.mutable_data<T>(context.GetPlace());
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < output_channels; ++c) {
......
此差异已折叠。
......@@ -88,60 +88,62 @@ template <typename Place, typename PoolProcess, typename T>
class Pool2dFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_compute);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute, framework::Tensor* output);
};
template <typename Place, typename PoolProcess, typename T>
class Pool2dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute);
PoolProcess pool_compute, framework::Tensor* input_grad);
};
template <typename Place, class T>
class MaxPool2dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
template <typename Place, typename PoolProcess, typename T>
class Pool3dFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
std::vector<int>& ksize, std::vector<int>& strides,
std::vector<int>& paddings, PoolProcess pool_compute);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute, framework::Tensor* output);
};
template <typename Place, typename PoolProcess, typename T>
class Pool3dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
PoolProcess pool_compute);
PoolProcess pool_compute, framework::Tensor* input_grad);
};
template <typename Place, class T>
class MaxPool3dGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& input_grad,
const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
/*
......@@ -155,38 +157,38 @@ template <typename Place, typename T>
class MaxPool2dWithIndexFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask);
};
template <typename Place, typename T>
class MaxPool2dWithIndexGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
template <typename Place, typename T>
class MaxPool3dWithIndexFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& input, framework::Tensor& output,
framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
const framework::Tensor& input, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* output, framework::Tensor* mask);
};
template <typename Place, typename T>
class MaxPool3dWithIndexGradFunctor {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& input_grad,
const framework::Tensor& output_grad,
const framework::Tensor& mask, std::vector<int>& ksize,
std::vector<int>& strides, std::vector<int>& paddings);
std::vector<int>& strides, std::vector<int>& paddings,
framework::Tensor* input_grad);
};
} // namespace math
......
......@@ -74,11 +74,10 @@ Tensor CombineBatchAndN(const framework::ExecutionContext& context,
Tensor output;
auto in_dims = input.dims();
if (in_dims.size() == 3) {
output.Resize(in_dims);
output.Resize({in_dims[1], in_dims[0], in_dims[2]});
output.mutable_data<T>(context.GetPlace());
EigenTranspose<Place, T, 3>(context, input, output, {1, 0, 2});
std::vector<int64_t> out_dims = {in_dims[1], in_dims[0] * in_dims[2]};
output.Resize(make_ddim(out_dims));
output.Resize({in_dims[1], in_dims[0] * in_dims[2]});
} else {
output.ShareDataWith(input);
}
......
此差异已折叠。
......@@ -75,16 +75,16 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T>
pool2d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool2dFunctor<
Place, paddle::operators::math::AvgPool<T>, T>
pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
}
} break;
case 3: {
......@@ -93,15 +93,15 @@ class PoolKernel : public framework::OpKernel<T> {
Place, paddle::operators::math::MaxPool<T>, T>
pool3d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dFunctor<
Place, paddle::operators::math::AvgPool<T>, T>
pool3d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool3d_forward(context.device_context(), *in_x, *out, ksize, strides,
paddings, pool_process);
pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, pool_process, out);
}
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......@@ -142,30 +142,30 @@ class PoolGradKernel : public framework::OpKernel<T> {
if (pooling_type == "max") {
paddle::operators::math::MaxPool2dGradFunctor<Place, T>
pool2d_backward;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings);
pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool2dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool2d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool2d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings, pool_process);
pool2d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, pool_process, in_x_grad);
}
} break;
case 3: {
if (pooling_type == "max") {
paddle::operators::math::MaxPool3dGradFunctor<Place, T>
pool3d_backward;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings);
pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dGradFunctor<
Place, paddle::operators::math::AvgPoolGrad<T>, T>
pool3d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool3d_backward(context.device_context(), *in_x, *in_x_grad, *out,
*out_grad, ksize, strides, paddings, pool_process);
pool3d_backward(context.device_context(), *in_x, *out, *out_grad,
ksize, strides, paddings, pool_process, in_x_grad);
}
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......
......@@ -46,14 +46,14 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T> {
case 2: {
paddle::operators::math::MaxPool2dWithIndexFunctor<Place, T>
pool2d_forward;
pool2d_forward(context.device_context(), *in_x, *out, *mask, ksize,
strides, paddings);
pool2d_forward(context.device_context(), *in_x, ksize, strides,
paddings, out, mask);
} break;
case 3: {
paddle::operators::math::MaxPool3dWithIndexFunctor<Place, T>
pool3d_forward;
pool3d_forward(context.device_context(), *in_x, *out, *mask, ksize,
strides, paddings);
pool3d_forward(context.device_context(), *in_x, ksize, strides,
paddings, out, mask);
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
}
......@@ -89,14 +89,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T> {
case 2: {
paddle::operators::math::MaxPool2dWithIndexGradFunctor<Place, T>
pool2d_backward;
pool2d_backward(context.device_context(), *in_x_grad, *out_grad,
*mask, ksize, strides, paddings);
pool2d_backward(context.device_context(), *out_grad, *mask, ksize,
strides, paddings, in_x_grad);
} break;
case 3: {
paddle::operators::math::MaxPool3dWithIndexGradFunctor<Place, T>
pool3d_backward;
pool3d_backward(context.device_context(), *in_x_grad, *out_grad,
*mask, ksize, strides, paddings);
pool3d_backward(context.device_context(), *out_grad, *mask, ksize,
strides, paddings, in_x_grad);
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
}
......
......@@ -47,7 +47,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(vector<LoDTensor>) Input is a vector of LoDTensor, "
"(LodTensorArray) Input is a vector of LoDTensor, "
"each of which is a variable-length sequence or nested sequence.")
.AsDuplicable();
AddOutput("Out",
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
from paddle.v2.framework import framework as framework
from paddle.v2.fluid import framework as framework
__all__ = ['append_backward_ops']
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册