提交 191fafe3 编写于 作者: Q qingqing01 提交者: hedaoyuan

support rectangle padding, stride, window and input for PoolProjection (#115)

* support rectangle padding, stride, window and input for PoolProjection

* Follow comments.
1. Remove start
2. refine img_pool_a/b.conf for test_NetworkCompare
3. Split unit test

* Modify the test in img_layers.py
上级 8a044d2e
......@@ -84,16 +84,23 @@ extern void hl_expand_feature2col(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
*
*/
extern void hl_maxpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData);
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
/**
* @brief Maximum pool backward.
......@@ -107,21 +114,28 @@ extern void hl_maxpool_forward(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[out] targetGrad output grad.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] targetGrad output grad.
*
*/
extern void hl_maxpool_backward(
int frameCnt, const real* inputData,
const int frameCnt, const real* inputData,
const real* outData, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* targetGrad,
real scaleA, real scaleB);
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad);
/**
* @brief Averge pool forward.
......@@ -133,16 +147,23 @@ extern void hl_maxpool_backward(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
*
*/
extern void hl_avgpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData);
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
/**
* @brief Maximum pool backward.
......@@ -154,20 +175,27 @@ extern void hl_avgpool_forward(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[out] backGrad output grad.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[out] backGrad output grad.
*
*/
extern void hl_avgpool_backward(
int frameCnt, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* backGrad,
real scaleA, real scaleB);
const int frameCnt, const real* outGrad,
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad);
/**
* @brief Cross-map-respose normalize forward.
......
......@@ -38,29 +38,45 @@ inline void hl_expand_feature2col(
real* dataCol) {}
inline void hl_maxpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData) {}
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
inline void hl_maxpool_backward(
int frameCnt, const real* inputData,
const int frameCnt, const real* inputData,
const real* outData, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* targetGrad,
real scaleA, real scaleB) {}
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {}
inline void hl_avgpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData) {}
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
inline void hl_avgpool_backward(
int frameCnt, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* backGrad,
real scaleA, real scaleB) {}
const int frameCnt, const real* outGrad,
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad) {}
inline void hl_CMRNorm_forward(
size_t frameCnt, const real* in, real* scale, real* out,
......
......@@ -145,24 +145,28 @@ void hl_shrink_col2feature(const real * dataCol, size_t channels,
CHECK_SYNC("hl_shrink_col2feature failed");
}
__global__ void KeMaxPoolForward(int nthreads, const real* inputData,
int channels, int height, int width,
int pooledH, int pooledW,
int ksize, int stride, int start,
__global__ void KeMaxPoolForward(const int nthreads, const real* inputData,
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int ksizeW, const int ksizeH,
const int strideH, const int strideW,
const int offsetH, const int offsetW,
real* tgtData) {
int index = blockIdx.y * blockDim.x + threadIdx.x;
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
int ph = (index / pooledW) % pooledH;
int c = (index / pooledW / pooledH) % channels;
int frameNum = blockIdx.x;
int hstart = ph * stride + start;
int hend = min(hstart + ksize, height);
int wstart = pw * stride + start;
int wend = min(wstart + ksize, width);
int frameNum = index / pooledW / pooledH / channels;
int hstart = ph * strideH - offsetH;
int wstart = pw * strideW - offsetW;
int hend = min(hstart + ksizeH, height);
int wend = min(wstart + ksizeW, width);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
real maxval = -FLT_MAX;
inputData += (frameNum * channels + c) * height * width;
tgtData += (frameNum * channels) * pooledW * pooledH;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
if (maxval < inputData[h * width + w])
......@@ -173,44 +177,54 @@ __global__ void KeMaxPoolForward(int nthreads, const real* inputData,
}
}
void hl_maxpool_forward(int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData) {
int num_kernels = pooledH * pooledW * channels;
int blocksX = frameCnt;
int blocksY = (num_kernels + 1024 -1) / 1024;
void hl_maxpool_forward(const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real* tgtData) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocksX, blocksY);
dim3 grid(blocks, 1);
KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels, height, width,
pooledH, pooledW, sizeX, stride, start, tgtData);
pooledH, pooledW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData);
CHECK_SYNC("hl_maxpool_forward failed");
}
__global__ void KeMaxPoolBackward(int nthreads, const real* inputData,
__global__ void KeMaxPoolBackward(const int nthreads, const real* inputData,
const real* outData, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* targetGrad,
real scaleA, real scaleB) {
int index = blockIdx.y * blockDim.x + threadIdx.x;
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
real* targetGrad) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
// find out the local index
// find out the local offset
int offsetW = index % width + start;
int offsetH = (index / width) % height + start;
int offsetW = index % width + padW;
int offsetH = (index / width) % height + padH;
int offsetC = (index / width / height) % channels;
int frameNum = blockIdx.x;
int phstart = (offsetH < sizeX) ? 0 : (offsetH - sizeX) / stride + 1;
int phend = min(offsetH / stride + 1, pooledH);
int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / stride + 1;
int pwend = min(offsetW / stride + 1, pooledW);
int frameNum = index / width / height / channels;
int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1;
int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1;
int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0;
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
inputData += (frameNum * channels) * height * width;
real input = inputData[index];
outData += (frameNum * channels + offsetC) * pooledH * pooledW;
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
targetGrad += (frameNum * channels) * height * width;
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
if (input == outData[ph * pooledW + pw]) {
......@@ -223,90 +237,114 @@ __global__ void KeMaxPoolBackward(int nthreads, const real* inputData,
}
}
void hl_maxpool_backward(int frameCnt, const real* inputData,
void hl_maxpool_backward(const int frameCnt, const real* inputData,
const real* outData, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* targetGrad,
real scaleA, real scaleB) {
int num_kernels = (height - start) * (width - start) * channels;
int blocksX = frameCnt;
int blocksY = (num_kernels + 1024 -1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocksX, blocksY);
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {
KeMaxPoolBackward<<< grid, threads, 0, STREAM_DEFAULT >>>
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
KeMaxPoolBackward<<< blocks, 1024, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, outData, outGrad, channels,
height, width, pooledH, pooledW, sizeX, stride, start,
targetGrad, scaleA, scaleB);
height, width, pooledH, pooledW, sizeX, sizeY,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
targetGrad);
CHECK_SYNC("hl_maxpool_backward");
}
__global__ void KeAvePoolForward(int nthreads, const real* inputData,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* tgtData) {
int index = blockIdx.y * blockDim.x + threadIdx.x;
__global__ void KeAvgPoolForward(const int nthreads, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int padH, const int padW,
real* tgtData) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int pw = index % pooledW;
int ph = (index / pooledW) % pooledH;
int c = (index / pooledW / pooledH) % channels;
int frameNum = blockIdx.x;
int hstart = ph * stride + start;
int hend = min(hstart + sizeX, height);
int wstart = pw * stride + start;
int wend = min(wstart + sizeX, width);
int frameNum = index / pooledW / pooledH / channels;
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW;
int hend = min(hstart + sizeY, height + padH);
int wend = min(wstart + sizeX, width + padW);
int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, height);
wend = min(wend, width);
real aveval = 0;
inputData += (frameNum * channels + c) * height * width;
tgtData += (frameNum * channels) * pooledH * pooledW;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
aveval += inputData[h * width + w];
}
}
tgtData[index] = aveval / ((hend - hstart) * (wend - wstart));
tgtData[index] = aveval / pool_size;
}
}
void hl_avgpool_forward(int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData) {
int num_kernels = pooledH * pooledW * channels;
int blocksX = frameCnt;
int blocksY = (num_kernels + 1024 -1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocksX, blocksY);
KeAvePoolForward<<< grid, threads, 0, STREAM_DEFAULT >>>
void hl_avgpool_forward(const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {
int num_kernels = pooledH * pooledW * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>>
(num_kernels, inputData, channels,
height, width, pooledH, pooledW,
sizeX, stride, start, tgtData);
sizeX, sizeY, strideH, strideW,
paddingH, paddingW, tgtData);
CHECK_SYNC("hl_avgpool_forward failed");
}
__global__ void KeAvgPoolBackward(int nthreads, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* tgtGrad,
real scaleA, real scaleB) {
int index = blockIdx.y * blockDim.x + threadIdx.x;
__global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad,
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int padH, const int padW,
real scaleA, real scaleB,
real* tgtGrad) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < nthreads) {
int offsetW = index % width + start;
int offsetH = (index / width) % height + start;
int offsetW = index % width + padW;
int offsetH = (index / width) % height + padH;
int offsetC = (index / width / height) % channels;
int frameNum = blockIdx.x;
int phstart = (offsetH < sizeX) ? 0 : (offsetH - sizeX) / stride + 1;
int phend = min(offsetH / stride + 1, pooledH);
int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / stride + 1;
int pwend = min(offsetW / stride + 1, pooledW);
int frameNum = index / width / height / channels;
int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1;
int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1;
int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0;
int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0;
real gradient = 0;
outGrad += (frameNum * channels + offsetC) * pooledH * pooledW;
tgtGrad += (frameNum * channels) * height * width;
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
// figure out the pooling size
int poolsize = (min(ph * stride + sizeX, height) - ph * stride) *
(min(pw * stride + sizeX, width) - pw * stride);
int hstart = ph * strideH - padH;
int wstart = pw * strideW - padW;
int hend = min(hstart + sizeY, height + padH);
int wend = min(wstart + sizeX, width + padW);
int poolsize = (hend - hstart) * (wend - wstart);
gradient += outGrad[ph * pooledW + pw]/poolsize;
}
}
......@@ -314,20 +352,25 @@ __global__ void KeAvgPoolBackward(int nthreads, const real* outGrad,
}
}
void hl_avgpool_backward(int frameCnt, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* backGrad,
real scaleA, real scaleB) {
int num_kernels = (height - start) * (width - start) * channels;
int blocksX = frameCnt;
int blocksY = (num_kernels + 1024 -1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocksX, blocksY);
void hl_avgpool_backward(const int frameCnt, const real* outGrad,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* backGrad) {
int num_kernels = height * width * channels * frameCnt;
int blocks = (num_kernels + 1024 - 1) / 1024;
KeAvgPoolBackward <<< grid, threads, 0, STREAM_DEFAULT >>>
KeAvgPoolBackward <<< blocks, 1024, 0, STREAM_DEFAULT >>>
(num_kernels, outGrad, channels, height, width,
pooledH, pooledW, sizeX, stride, start, backGrad, scaleA, scaleB);
pooledH, pooledW, sizeX, sizeY,
strideH, strideW,
paddingH, paddingW,
scaleA, scaleB,
backGrad);
CHECK_SYNC("hl_avgpool_backward failed");
}
......
......@@ -51,7 +51,6 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
PoolLayer::init(layerMap, parameterMap);
CHECK(useGpu_) << "CudnnPoolLayer only support gpu";
CHECK_EQ(start_, 0) << poolType_ << " dose not support 'start'";
hl_create_tensor_descriptor(&inputDesc_);
hl_create_tensor_descriptor(&outputDesc_);
......
......@@ -56,16 +56,6 @@ public:
void reshape(int batchSize);
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
/**
* Calculate output size according window size of pooling.
*/
int outputSize(int imageSize, int windowSize, int padding, int stride) {
int outputSize;
outputSize =
(imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
return outputSize;
}
};
} // namespace paddle
......@@ -35,7 +35,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
poolType_ = conf.pool_type();
channels_ = conf.channels();
sizeX_ = conf.size_x();
start_ = conf.start();
stride_ = conf.stride();
outputX_ = conf.output_x();
imgSize_ = conf.img_size();
......@@ -47,22 +46,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
bool cudnnTypeCheck = true;
#ifndef PADDLE_ONLY_CPU
cudnnTypeCheck = !CudnnPoolLayer::typeCheck(poolType_);
#endif
if ((sizeY_ != sizeX_ || imgSizeY_ != imgSize_ || strideY_ != stride_ ||
confPaddingY_ != confPadding_ || outputY_ != outputX_) &&
cudnnTypeCheck) {
LOG(FATAL) << poolType_ << " does not supported non-square "
"filter, image, stride or padding";
}
if (confPadding_ != 0 && cudnnTypeCheck) {
LOG(FATAL) << poolType_ << " does not supported 'padding'";
}
return true;
}
......
......@@ -28,7 +28,7 @@ namespace paddle {
class PoolLayer : public Layer {
protected:
size_t channels_, sizeX_, stride_, outputX_, imgSize_;
int start_, confPadding_;
int confPadding_;
size_t sizeY_;
size_t imgSizeY_;
......@@ -47,6 +47,16 @@ public:
static Layer* create(const LayerConfig& config);
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
/**
* Calculate output size according window size and padding size.
*/
int outputSize(int imageSize, int windowSize, int padding, int stride) {
int outputSize;
outputSize =
(imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
return outputSize;
}
};
} // namespace paddle
......@@ -25,13 +25,15 @@ size_t PoolProjectionLayer::getSize() {
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = imgSize_;
imgSizeH_ = imgSizeY_;
}
if (imgSizeW_ == 0) {
imgSizeW_ = imgSize_;
}
outputH_ = 1 + (imgSizeH_ - start_ - sizeX_ + stride_ - 1) / stride_;
outputW_ = 1 + (imgSizeW_ - start_ - sizeX_ + stride_ - 1) / stride_;
outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
layerSize = outputH_ * outputW_ * channels_;
getOutput().setFrameHeight(outputH_);
......@@ -51,8 +53,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_,
stride_, outputH_, outputW_);
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
}
void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
......@@ -69,7 +72,9 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
sizeX_, start_, stride_, outputH_, outputW_, 1, 1);
sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
}
void AvgPoolProjectionLayer::forward(PassType passType) {
......@@ -84,8 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_,
stride_, outputH_, outputW_);
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
}
void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
......@@ -97,7 +103,9 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
/* Do derivation */
MatrixPtr outputGrad = getOutputGrad();
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, start_,
stride_, outputH_, outputW_, 1, 1);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
}
} // namespace paddle
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=8,stride=1)
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=MaxPooling(),
)
avgpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=AvgPooling(),
)
outputs([maxpool, avgpool])
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8, num_filters=8, stride=1)
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=CudnnMaxPooling(),
)
avgpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=CudnnAvgPooling(),
)
outputs([maxpool, avgpool])
......@@ -791,21 +791,24 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(16);
(*config).layerConfig.set_partial_sum(1);
(*config).layerConfig.set_shared_biases(true);
int kw = 3, kh = 3;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType);
pool->set_channels(16);
pool->set_size_x(3);
if (poolType == "cudnn-max-pool" || poolType == "cudnn-avg-pool") {
pool->set_padding(0);
} else {
pool->set_start(0);
}
pool->set_stride(2);
pool->set_output_x((pool->img_size() - pool->start() - pool->size_x()) /
((float)pool->stride()) +
1.5);
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_start(0);
pool->set_padding(pw);
pool->set_padding_y(ph);
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
pool->set_output_x(ow);
pool->set_output_y(oh);
}
void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
......@@ -814,9 +817,10 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
setPoolConfig(&config, pool, poolType);
pool->set_img_size(14);
config.layerConfig.set_size(pool->output_x() * pool->output_x() *
pool->set_img_size_y(14);
setPoolConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
testLayerGrad(config, "pool", 100, trans, useGpu);
......@@ -829,11 +833,11 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
setPoolConfig(&config, pool, poolType);
pool->set_size_y(4);
pool->set_stride_y(3);
pool->set_img_size(10);
pool->set_img_size_y(20);
setPoolConfig(&config, pool, poolType);
pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) /
((float)pool->stride_y()) +
1.5);
......@@ -1252,8 +1256,6 @@ TEST(Layer, MultiplexLayer) {
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
......@@ -116,6 +116,8 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) {
gradientMachine->start(trainer.getConfig(), nullptr);
gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN);
for (size_t i = 0; i < in.outGrads.size(); i++) {
// If the all the layers in the config have no parameters, also
// not set NeedGradient(), the outArgs[i] will be nullptr.
outArgs[i].grad->copyFrom(*in.outGrads[i]);
}
gradientMachine->backward();
......@@ -225,6 +227,18 @@ TEST(Compare, concat_table) {
compareNetwork(config_file_a, config_file_b);
}
#ifndef PADDLE_ONLY_CPU
TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf";
std::string config_file_b = "./gserver/tests/img_pool_b.conf";
bool useGpu = FLAGS_use_gpu;
FLAGS_use_gpu = true;
compareNetwork(config_file_a, config_file_b);
FLAGS_use_gpu = useGpu;
}
#endif
P_DEFINE_string(config_file_a, "", "config of one network to compare");
P_DEFINE_string(config_file_b, "", "config of another network to compare");
TEST(Compare, network) {
......
......@@ -860,9 +860,11 @@ void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
}
void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels, size_t sizeX,
int start, size_t stride, size_t outputH,
size_t outputW) {
size_t imgSizeW, size_t channels,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData();
......@@ -874,14 +876,17 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
CHECK(width_ == outputH * outputW * channels);
hl_maxpool_forward(frameNum, inputData, channels, height, width,
outputH, outputW, sizeX, stride, start, data_);
outputH, outputW, sizeX, sizeY, strideH, strideW,
paddingH, paddingW, data_);
}
void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, Matrix& outGrad, Matrix& outV,
size_t sizeX, int start, size_t stride,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput) {
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true &&
outV.useGpu_ == true)
<< "Matrix type are not equal";
......@@ -899,15 +904,19 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH,
CHECK(outGrad.getHeight() == outV.getHeight() &&
outGrad.getWidth() == outV.getWidth());
hl_maxpool_backward(frameNum, inputData, outData, outDiff, channels,
height, width, outputH, outputW, sizeX, stride,
start, data_, scaleTargets, scaleOutput);
height, width, outputH, outputW, sizeX, sizeY,
strideH, strideW, paddingH, paddingW,
scaleTargets, scaleOutput, data_);
}
void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels, size_t sizeX,
int start, size_t stride, size_t outputH,
size_t outputW) {
size_t imgSizeW, size_t channels,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal";
real* inputData = inputMat.getData();
......@@ -919,13 +928,17 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH,
CHECK(width_ == outputH * outputW * channels);
hl_avgpool_forward(frameNum, inputData, channels, height, width,
outputH, outputW, sizeX, stride, start, data_);
outputH, outputW, sizeX, sizeY,
strideH, strideW,
paddingH, paddingW, data_);
}
void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH,
size_t imgSizeW, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput) {
size_t imgSizeW, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal";
real* outDiff = outGrad.getData();
......@@ -938,8 +951,10 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH,
CHECK(outGrad.getWidth() == outputH * outputW * channels);
hl_avgpool_backward(frameNum, outDiff, channels, height, width,
outputH, outputW, sizeX, stride, start, data_,
scaleTargets, scaleOutput);
outputH, outputW, sizeX, sizeY,
strideH, strideW, paddingH, paddingW,
scaleTargets, scaleOutput,
data_);
}
void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH,
......@@ -1450,19 +1465,23 @@ void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight,
}
void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels, size_t sizeX,
int start, size_t stride, size_t outputH,
size_t outputW) {
size_t imgSizeW, size_t channels,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
real* inputData = inputMat.getData();
real* outData = data_;
size_t num = inputMat.getHeight();
size_t inWidth = imgSizeW;
size_t inHeight = imgSizeH;
CHECK(inHeight * inWidth == inputMat.getWidth() / channels);
CHECK_EQ(num, this->getHeight());
CHECK_EQ(channels*outputH*outputW, this->getWidth());
/* initialize the data_ */
for (size_t i = 0; i < height_ * width_; i++) {
data_[i] = -FLT_MAX;
outData[i] = -(real)FLT_MAX;
}
/* pool max one by one */
......@@ -1470,12 +1489,14 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
for (size_t c = 0; c < channels; ++c) { // channel by channel
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
size_t hstart = ph * stride + start;
size_t wstart = pw * stride + start;
size_t hend = std::min(hstart + sizeX, inHeight);
size_t wend = std::min(wstart + sizeX, inWidth);
for (size_t h = hstart; h < hend; ++h) {
for (size_t w = wstart; w < wend; ++w) {
int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW;
int hend = std::min(hstart + sizeY, inHeight);
int wend = std::min(wstart + sizeX, inWidth);
hstart = std::max(hstart, 0);
wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
outData[ph * outputW + pw] = std::max(outData[ph * outputW + pw],
inputData[h * inWidth + w]);
}
......@@ -1491,9 +1512,10 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX,
int start, size_t stride, size_t outputH,
size_t outputW, real scaleTargets,
real scaleOutput) {
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
size_t num = image.getHeight();
size_t channels = size_t(width_ / imgSizeH / imgSizeW);
CHECK(image.getWidth() == imgSizeH * imgSizeW * channels);
......@@ -1509,32 +1531,36 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
size_t hstart = ph * stride + start;
size_t wstart = pw * stride + start;
size_t hend = std::min(hstart + sizeX, imgSizeH);
size_t wend = std::min(wstart + sizeX, imgSizeW);
for (size_t h = hstart; h < hend; ++h) {
for (size_t w = wstart; w < wend; ++w) {
int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW;
int hend = std::min(hstart + sizeY, imgSizeH);
int wend = std::min(wstart + sizeX, imgSizeW);
hstart = std::max(hstart, 0);
wstart = std::max(wstart, 0);
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
tgtGrad[h * imgSizeW + w] =
scaleTargets * tgtGrad[h * imgSizeW + w] +
scaleOutput * otGrad[ph * outputW + pw] *
(inData[h * imgSizeW + w] == otData[ph * outputH + pw]);
(inData[h * imgSizeW + w] == otData[ph * outputW + pw]);
}
}
}
}
// offset
inData += imgSizeH * imgSizeW;
otData += outputH * outputW;
tgtGrad += imgSizeH * imgSizeW;
otData += outputH * outputW;
otGrad += outputH * outputW;
}
}
}
void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW) {
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
// The main loop
size_t num = input.getHeight();
size_t inHeight = imgSizeH;
......@@ -1548,17 +1574,24 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
size_t hstart = ph * stride + start;
size_t wstart = pw * stride + start;
size_t hend = std::min(hstart + sizeX, inHeight);
size_t wend = std::min(wstart + sizeX, inWidth);
int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW;
int hend = std::min(hstart + sizeY, inHeight + paddingH);
int wend = std::min(wstart + sizeX, inWidth + paddingW);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = std::max(hstart, 0);
wstart = std::max(wstart, 0);
hend = std::min(hend, static_cast<int>(inHeight));
wend = std::min(wend, static_cast<int>(inWidth));
CHECK(poolSize);
tgtData[ph * outputW + pw] = 0; // clear
for (size_t h = hstart; h < hend; ++h) {
for (size_t w = wstart; w < wend; ++w) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
tgtData[ph * outputW + pw] += inData[h * inWidth + w];
}
}
tgtData[ph * outputW + pw] /= (hend - hstart) * (wend - wstart);
tgtData[ph * outputW + pw] /= poolSize;
}
}
// compute offset
......@@ -1569,9 +1602,11 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
}
void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput) {
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
size_t num = input.getHeight();
size_t channels = input.getWidth() / outputH / outputW;
CHECK(imgSizeH * imgSizeW * channels == getWidth());
......@@ -1582,14 +1617,20 @@ void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
for (size_t c = 0; c < channels; ++c) {
for (size_t ph = 0; ph < outputH; ++ph) {
for (size_t pw = 0; pw < outputW; ++pw) {
size_t hstart = ph * stride + start;
size_t wstart = pw * stride + start;
size_t hend = std::min(hstart + sizeX, imgSizeH);
size_t wend = std::min(wstart + sizeX, imgSizeW);
size_t poolsize = (hend - hstart) * (wend - wstart);
for (size_t h = hstart; h < hend; ++h) {
for (size_t w = wstart; w < wend; ++w) {
outData[h * imgSizeW + w] += inData[ph * outputW + pw] / poolsize;
int hstart = ph * strideH - paddingH;
int wstart = pw * strideW - paddingW;
int hend = std::min(hstart + sizeY, imgSizeH + paddingH);
int wend = std::min(wstart + sizeX, imgSizeW + paddingW);
int poolSize = (hend - hstart) * (wend - wstart);
hstart = std::max(hstart, 0);
wstart = std::max(wstart, 0);
hend = std::min(hend, static_cast<int>(imgSizeH));
wend = std::min(wend, static_cast<int>(imgSizeW));
CHECK(poolSize);
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
outData[h * imgSizeW + w] += inData[ph * outputW + pw] / poolSize;
}
}
}
......
......@@ -742,31 +742,37 @@ public:
*/
virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels, size_t sizeX,
int start_, size_t stride, size_t outputH,
size_t outputW) {
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
/// Pooling backward operation.
virtual void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX,
int start, size_t stride, size_t outputH,
size_t outputW, real scaleTargets,
real scaleOutput) {
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
/// Pooling forward operation, caculate the average of sizeX elements.
virtual void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW) {
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
virtual void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput) {
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
......@@ -1131,21 +1137,30 @@ public:
real alpha = 1.0f, real beta = 0.0f);
void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start_, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput);
Matrix& outGrad, Matrix& outV, size_t sizeX,
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride, size_t outputH,
size_t outputW, real scaleTargets, real scaleOutput);
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
Matrix& denoms, size_t channels, size_t sizeX,
......@@ -1242,21 +1257,31 @@ public:
real alpha = 1.0f, real beta = 0.0f);
void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start_, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput);
Matrix& outGrad, Matrix& outV,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride, size_t outputH,
size_t outputW, real scaleTargets, real scaleOutput);
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
Matrix& denoms, size_t channels, size_t sizeX,
......
......@@ -1846,6 +1846,159 @@ TEST(Matrix, classificationError) {
}
}
void testMaxPoolFwdBwd(int numSamples, int channels,
int imgSizeH, int imgSizeW,
int ksizeH, int ksizeW,
int strideH, int strideW,
int padH, int padW) {
int outH = 0, outW = 0;
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->maxPoolForward(*input, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
targetGpu->maxPoolForward(*inputGpu, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
checkMatrixEqual(target, targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->maxPoolBackward(*input, imgSizeH, imgSizeW,
*targetGrad, *target,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
inputGpuGrad->maxPoolBackward(*inputGpu, imgSizeH, imgSizeW,
*targetGpuGrad, *targetGpu,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
checkMatrixEqual(inputGrad, targetBwdCheck);
}
void testAvgPoolFwdBwd(int numSamples, int channels,
int imgSizeH, int imgSizeW,
int ksizeH, int ksizeW,
int strideH, int strideW,
int padH, int padW) {
int outH = 0, outW = 0;
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->avgPoolForward(*input, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
targetGpu->avgPoolForward(*inputGpu, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->avgPoolBackward(*targetGrad, imgSizeH, imgSizeW,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
inputGpuGrad->avgPoolBackward(*targetGpuGrad, imgSizeH, imgSizeW,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetBwdCheck);
}
TEST(Matrix, PoolFwdBwd) {
for (auto numSamples : {5, 32}) {
for (auto channels : {1, 9, 32}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
for (auto sizeX : {2, 5}) {
for (auto sizeY : {2, 5}) {
for (auto sH : {1, 2}) {
for (auto sW : {1, 2}) {
for (auto pH : {0, (sizeY - 1)/2}) {
for (auto pW : {0, (sizeX - 1)/2}) {
VLOG(3) << " numSamples=" << numSamples
<< " channels=" << channels
<< " imgSizeH=" << imgSizeH
<< " imgSizeW=" << imgSizeW
<< " sizeX=" << sizeX
<< " sizeY=" << sizeY
<< " strideH=" << sH
<< " strideW=" << sW
<< " padingH=" << pH
<< " padingW=" << pW;
testMaxPoolFwdBwd(numSamples, channels, imgSizeH,
imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
testAvgPoolFwdBwd(numSamples, channels, imgSizeH,
imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
}
}
}
}
}
}
}
}
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
......@@ -88,7 +88,8 @@ message PoolConfig {
required uint32 size_x = 3;
// Tell the net where in the input image to start the pooling.
required uint32 start = 4;
// start is deprecated now.
optional uint32 start = 4;
// Defines the stride size between successive pooling squares.
required uint32 stride = 5;
......
......@@ -961,10 +961,6 @@ def parse_pool(pool, input_layer_name, pool_conf):
"['max-projection', 'avg-projection', "
"'cudnn-max-pool', 'cudnn-avg-pool']"
% pool.pool_type)
if pool.size_y or pool.stride_y or pool.img_width or pool.padding_y:
config_assert(pool.pool_type.startswith('cudnn'),
"'size_y', 'stride_y' and 'img_width' and 'padding_y'"
"can only be used for cudnn")
pool_conf.channels = pool.channels
pool_conf.size_x = pool.size_x
......@@ -974,36 +970,25 @@ def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.stride_y = default(pool.stride_y, pool_conf.stride);
img_pixels = g_layer_map[input_layer_name].size / pool.channels
# the img_width may be removed,
# and it can be calculated automatically later.
pool_conf.img_size = default(pool.img_width, int(img_pixels ** 0.5))
pool_conf.img_size_y = img_pixels / pool_conf.img_size
config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels,
"Incorrect input image size %d for input image pixels %d"
% (pool_conf.img_size, img_pixels))
if pool.start is not None:
config_assert(pool.padding is None,
'At most one of start and padding can be set.')
pool_conf.start = pool.start
pool_conf.padding = 0
pool_conf.output_x = int(math.ceil((pool_conf.img_size - \
pool_conf.start - pool_conf.size_x) / \
float(pool_conf.stride))) + 1
config_assert(not pool.start, "start is deprecated in pooling.")
pool_conf.output_y = int(math.ceil((pool_conf.img_size_y - \
pool_conf.start - pool_conf.size_y) / \
float(pool_conf.stride_y))) + 1
elif pool.padding is not None:
if pool.padding is not None:
pool_conf.padding = pool.padding
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.start = 0
pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
2*pool_conf.padding - pool_conf.size_x) / \
float(pool_conf.stride))) + 1
pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
2*pool_conf.padding_y - pool_conf.size_y) / \
float(pool_conf.stride_y))) + 1
else:
raise ValueError('At least one of start and padding should be set.')
def parse_image(image, input_layer_name, image_conf):
image_conf.channels = image.channels
......@@ -1603,7 +1588,7 @@ class PoolLayer(LayerBase):
pool_conf = self.config.inputs[input_index].pool_conf
print("output size for %s is %d*%d " % (
name, pool_conf.output_y, pool_conf.output_x))
self.set_layer_size((pool_conf.output_x ** 2) * pool_conf.channels)
self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
@config_layer('batch_norm')
class BatchNormLayer(LayerBase):
......
......@@ -210,7 +210,7 @@ DEVICE = 'device'
def layer_support(*attrs):
attrs_list = list(attrs)
attrs_list = list(attrs)
attrs_list.append(DEVICE)
def decorator(method):
@functools.wraps(method)
......@@ -1627,7 +1627,9 @@ def img_conv_layer(input, filter_size, num_filters,
@layer_support()
def img_pool_layer(input, pool_size, name=None,
num_channels=None, pool_type=None,
stride=1, start=None, padding=0, layer_attr=None):
stride=1, start=None, padding=0, layer_attr=None,
pool_size_y=None, stride_y=None, padding_y=None,
img_width=None):
"""
Image pooling Layer.
......@@ -1635,25 +1637,34 @@ def img_pool_layer(input, pool_size, name=None,
.. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
:param padding: pooling padding
:param padding: pooling padding width.
:type padding: int
:param padding_y: pooling padding height. It's equal to padding by default.
:type padding_y: int|None
:param name: name of pooling layer
:type name: basestring.
:param input: layer's input
:type input: LayerOutput
:param pool_size: pooling size
:param pool_size: pooling window width
:type pool_size: int
:param pool_size_y: pooling window height. It's eaqual to pool_size by default.
:type pool_size_y: int|None
:param num_channels: number of input channel.
:type num_channels: int
:param pool_type: pooling type. MaxPooling or AveragePooling. Default is
MaxPooling.
:type pool_type: BasePoolingType
:param stride: stride of pooling.
:param stride: stride width of pooling.
:type stride: int
:param start: start position of pooling operation.
:type start: int
:param stride_y: stride height of pooling. It is equal to stride by default.
:type stride_y: int|None
:param start: start position of pooling operation. Note it is deprecated now.
:type start: int|None
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:param img_width: the width of input feature map. If it is None, the input feature
map should be square.
:type img_width: int|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -1666,17 +1677,29 @@ def img_pool_layer(input, pool_size, name=None,
elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg'
type_name = pool_type.name + '-projection' \
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
pool_size_y = pool_size if pool_size_y is None else pool_size_y
stride_y = stride if stride_y is None else stride_y
padding_y = padding if padding_y is None else padding_y
Layer(
name=name,
type=LayerType.POOL_LAYER,
inputs=[Input(input.name,
pool=Pool(
pool_type=''.join([pool_type.name, '-projection']),
pool_type=type_name,
channels=num_channels,
size_x=pool_size,
start=start,
stride=stride,
padding=padding
padding=padding,
size_y=pool_size_y,
stride_y=stride_y,
padding_y=padding_y,
img_width=img_width
))],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)
......@@ -2751,7 +2774,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False,
name=name)
return tmp
......
......@@ -170,13 +170,13 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None,
:type shared_bias: bool
:param conv_layer_attr: see img_conv_layer for details
:type conv_layer_attr: ExtraLayerAttribute
:param pool_stride: see img_conv_layer for details
:param pool_stride: see img_pool_layer for details
:type pool_stride: int
:param pool_start: see img_conv_layer for details
:param pool_start: see img_pool_layer for details. It is deprecated now.
:type pool_start: int
:param pool_padding: see img_conv_layer for details
:param pool_padding: see img_pool_layer for details
:type pool_padding: int
:param pool_layer_attr: see img_conv_layer for details
:param pool_layer_attr: see img_pool_layer for details
:type pool_layer_attr: ExtraLayerAttribute
:return: Layer's output
:rtype: LayerOutput
......@@ -243,7 +243,7 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None,
:param bn_layer_attr: ParameterAttribute.
:param pool_stride: see img_pool_layer's document.
:type pool_stride: int
:param pool_start: see img_pool_layer's document.
:param pool_start: see img_pool_layer's document. It is deprecated now.
:type pool_start: int
:param pool_padding: see img_pool_layer's document.
:type pool_padding: int
......@@ -555,7 +555,7 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
:type gate_act: BaseActivation
:param state_act: lstm state activiation type.
:type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer.
:param mixed_bias_attr: bias parameter attribute of mixed layer.
False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False
:param lstm_bias_attr: bias parameter attribute of lstm layer.
......
......@@ -19,6 +19,8 @@ __all__ = [
"BasePoolingType",
"MaxPooling",
"AvgPooling",
"CudnnMaxPooling",
"CudnnAvgPooling",
"SumPooling",
"SquareRootNPooling"
]
......@@ -26,7 +28,7 @@ __all__ = [
class BasePoolingType(object):
"""
Base Pooling Type.
Base Pooling Type.
Note these pooling types are used for sequence input, not for images.
Each PoolingType contains one parameter:
......@@ -55,7 +57,24 @@ class MaxPooling(BasePoolingType):
def __init__(self, output_max_index=None):
BasePoolingType.__init__(self, "max")
self.output_max_index = output_max_index
class CudnnMaxPooling(BasePoolingType):
"""
Cudnn max pooling only support GPU. Return the maxinum value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-max-pool")
class CudnnAvgPooling(BasePoolingType):
"""
Cudnn average pooling only support GPU. Return the average value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-avg-pool")
class AvgPooling(BasePoolingType):
"""
......
7e6919d17562516e9a1d9a88de1fb3b9 img_layers.protostr
86c0815275a9d5eb902e23c6a592f58a img_layers.protostr
a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr
9c038249ec8ff719753a746cdb04c026 layer_activations.protostr
5913f87b39cee3b2701fa158270aca26 projections.protostr
......
......@@ -7,8 +7,10 @@ settings(
img = data_layer(name='image', size=256*256)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
filter_size=(32, 64), padding=(1, 0), stride=(1, 1),
filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
......@@ -17,4 +19,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm)
\ No newline at end of file
outputs(img_pool, img_norm)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册