diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index dcae62d06b26d1ac3875f9a393da989ed923cfea..5d750333e1e35d6097d33d905a02d647c3919eb1 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -84,16 +84,23 @@ extern void hl_expand_feature2col( * @param[in] width image width. * @param[in] pooledH output image height. * @param[in] pooledW output image width. - * @param[in] sizeX size of pooling window. - * @param[in] stride pooling stride. - * @param[in] start pooling start. + * @param[in] sizeX width of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. * @param[out] tgtData output data. * */ extern void hl_maxpool_forward( - int frameCnt, const real* inputData, int channels, - int height, int width, int pooledH, int pooledW, - int sizeX, int stride, int start, real* tgtData); + const int frameCnt, const real* inputData, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, real* tgtData); /** * @brief Maximum pool backward. @@ -107,21 +114,28 @@ extern void hl_maxpool_forward( * @param[in] width image width. * @param[in] pooledH output image height. * @param[in] pooledW output image width. - * @param[in] sizeX size of pooling window. - * @param[in] stride pooling stride. - * @param[in] start pooling start. - * @param[out] targetGrad output grad. + * @param[in] sizeX width of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. * @param[in] scaleA scale. * @param[in] scaleB scale. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. + * @param[out] targetGrad output grad. * */ extern void hl_maxpool_backward( - int frameCnt, const real* inputData, + const int frameCnt, const real* inputData, const real* outData, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* targetGrad, - real scaleA, real scaleB); + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, + real scaleA, real scaleB, + real* targetGrad); /** * @brief Averge pool forward. @@ -133,16 +147,23 @@ extern void hl_maxpool_backward( * @param[in] width image width. * @param[in] pooledH output image height. * @param[in] pooledW output image width. - * @param[in] sizeX size of pooling window. - * @param[in] stride pooling stride. - * @param[in] start pooling start. + * @param[in] sizeX width of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. * @param[out] tgtData output data. * */ extern void hl_avgpool_forward( - int frameCnt, const real* inputData, int channels, - int height, int width, int pooledH, int pooledW, - int sizeX, int stride, int start, real* tgtData); + const int frameCnt, const real* inputData, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, real* tgtData); /** * @brief Maximum pool backward. @@ -154,20 +175,27 @@ extern void hl_avgpool_forward( * @param[in] width image width. * @param[in] pooledH output image height. * @param[in] pooledW output image width. - * @param[in] sizeX size of pooling window. - * @param[in] stride pooling stride. - * @param[in] start pooling start. - * @param[out] backGrad output grad. + * @param[in] sizeX width of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. * @param[in] scaleA scale. * @param[in] scaleB scale. + * @param[out] backGrad output grad. * */ extern void hl_avgpool_backward( - int frameCnt, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* backGrad, - real scaleA, real scaleB); + const int frameCnt, const real* outGrad, + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + int paddingH, int paddingW, + real scaleA, real scaleB, + real* backGrad); /** * @brief Cross-map-respose normalize forward. diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index e4d46e4fb186ee0357555a67ce222d65e02f9b5d..38e359c3eb2f34e5874187f4b06280a3df901c8e 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -38,29 +38,45 @@ inline void hl_expand_feature2col( real* dataCol) {} inline void hl_maxpool_forward( - int frameCnt, const real* inputData, int channels, - int height, int width, int pooledH, int pooledW, - int sizeX, int stride, int start, real* tgtData) {} + const int frameCnt, const real* inputData, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, real* tgtData) {} inline void hl_maxpool_backward( - int frameCnt, const real* inputData, + const int frameCnt, const real* inputData, const real* outData, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* targetGrad, - real scaleA, real scaleB) {} + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, + real scaleA, real scaleB, + real* targetGrad) {} inline void hl_avgpool_forward( - int frameCnt, const real* inputData, int channels, - int height, int width, int pooledH, int pooledW, - int sizeX, int stride, int start, real* tgtData) {} + const int frameCnt, const real* inputData, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, real* tgtData) {} inline void hl_avgpool_backward( - int frameCnt, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* backGrad, - real scaleA, real scaleB) {} + const int frameCnt, const real* outGrad, + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + int paddingH, int paddingW, + real scaleA, real scaleB, + real* backGrad) {} inline void hl_CMRNorm_forward( size_t frameCnt, const real* in, real* scale, real* out, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index b3695a2c7f88ee8e4bbd176b3df014d77ef0a1bd..abac83a3e04472fe25bdbe662427aea56c096ad4 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -145,24 +145,28 @@ void hl_shrink_col2feature(const real * dataCol, size_t channels, CHECK_SYNC("hl_shrink_col2feature failed"); } -__global__ void KeMaxPoolForward(int nthreads, const real* inputData, - int channels, int height, int width, - int pooledH, int pooledW, - int ksize, int stride, int start, +__global__ void KeMaxPoolForward(const int nthreads, const real* inputData, + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int ksizeW, const int ksizeH, + const int strideH, const int strideW, + const int offsetH, const int offsetW, real* tgtData) { - int index = blockIdx.y * blockDim.x + threadIdx.x; + int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int pw = index % pooledW; int ph = (index / pooledW) % pooledH; int c = (index / pooledW / pooledH) % channels; - int frameNum = blockIdx.x; - int hstart = ph * stride + start; - int hend = min(hstart + ksize, height); - int wstart = pw * stride + start; - int wend = min(wstart + ksize, width); + int frameNum = index / pooledW / pooledH / channels; + int hstart = ph * strideH - offsetH; + int wstart = pw * strideW - offsetW; + int hend = min(hstart + ksizeH, height); + int wend = min(wstart + ksizeW, width); + hstart = max(hstart, 0); + wstart = max(wstart, 0); real maxval = -FLT_MAX; inputData += (frameNum * channels + c) * height * width; - tgtData += (frameNum * channels) * pooledW * pooledH; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { if (maxval < inputData[h * width + w]) @@ -173,44 +177,54 @@ __global__ void KeMaxPoolForward(int nthreads, const real* inputData, } } -void hl_maxpool_forward(int frameCnt, const real* inputData, int channels, - int height, int width, int pooledH, int pooledW, - int sizeX, int stride, int start, real* tgtData) { - int num_kernels = pooledH * pooledW * channels; - int blocksX = frameCnt; - int blocksY = (num_kernels + 1024 -1) / 1024; +void hl_maxpool_forward(const int frameCnt, const real* inputData, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, + real* tgtData) { + + int num_kernels = pooledH * pooledW * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; dim3 threads(1024, 1); - dim3 grid(blocksX, blocksY); + dim3 grid(blocks, 1); + KeMaxPoolForward<<< grid, threads, 0, STREAM_DEFAULT >>> (num_kernels, inputData, channels, height, width, - pooledH, pooledW, sizeX, stride, start, tgtData); + pooledH, pooledW, sizeX, sizeY, strideH, strideW, + paddingH, paddingW, tgtData); CHECK_SYNC("hl_maxpool_forward failed"); } -__global__ void KeMaxPoolBackward(int nthreads, const real* inputData, +__global__ void KeMaxPoolBackward(const int nthreads, const real* inputData, const real* outData, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* targetGrad, - real scaleA, real scaleB) { - int index = blockIdx.y * blockDim.x + threadIdx.x; + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int padH, const int padW, + real scaleA, real scaleB, + real* targetGrad) { + int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { // find out the local index // find out the local offset - int offsetW = index % width + start; - int offsetH = (index / width) % height + start; + int offsetW = index % width + padW; + int offsetH = (index / width) % height + padH; int offsetC = (index / width / height) % channels; - int frameNum = blockIdx.x; - int phstart = (offsetH < sizeX) ? 0 : (offsetH - sizeX) / stride + 1; - int phend = min(offsetH / stride + 1, pooledH); - int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / stride + 1; - int pwend = min(offsetW / stride + 1, pooledW); + + int frameNum = index / width / height / channels; + int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; + int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; + int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0; + int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; real gradient = 0; - inputData += (frameNum * channels) * height * width; real input = inputData[index]; outData += (frameNum * channels + offsetC) * pooledH * pooledW; outGrad += (frameNum * channels + offsetC) * pooledH * pooledW; - targetGrad += (frameNum * channels) * height * width; for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { if (input == outData[ph * pooledW + pw]) { @@ -223,90 +237,114 @@ __global__ void KeMaxPoolBackward(int nthreads, const real* inputData, } } -void hl_maxpool_backward(int frameCnt, const real* inputData, +void hl_maxpool_backward(const int frameCnt, const real* inputData, const real* outData, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* targetGrad, - real scaleA, real scaleB) { - int num_kernels = (height - start) * (width - start) * channels; - int blocksX = frameCnt; - int blocksY = (num_kernels + 1024 -1) / 1024; - dim3 threads(1024, 1); - dim3 grid(blocksX, blocksY); + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, + real scaleA, real scaleB, + real* targetGrad) { - KeMaxPoolBackward<<< grid, threads, 0, STREAM_DEFAULT >>> + int num_kernels = height * width * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + + KeMaxPoolBackward<<< blocks, 1024, 0, STREAM_DEFAULT >>> (num_kernels, inputData, outData, outGrad, channels, - height, width, pooledH, pooledW, sizeX, stride, start, - targetGrad, scaleA, scaleB); + height, width, pooledH, pooledW, sizeX, sizeY, + strideH, strideW, + paddingH, paddingW, + scaleA, scaleB, + targetGrad); CHECK_SYNC("hl_maxpool_backward"); } -__global__ void KeAvePoolForward(int nthreads, const real* inputData, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* tgtData) { - int index = blockIdx.y * blockDim.x + threadIdx.x; +__global__ void KeAvgPoolForward(const int nthreads, const real* inputData, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int padH, const int padW, + real* tgtData) { + int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int pw = index % pooledW; int ph = (index / pooledW) % pooledH; int c = (index / pooledW / pooledH) % channels; - int frameNum = blockIdx.x; - int hstart = ph * stride + start; - int hend = min(hstart + sizeX, height); - int wstart = pw * stride + start; - int wend = min(wstart + sizeX, width); + int frameNum = index / pooledW / pooledH / channels; + + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int hend = min(hstart + sizeY, height + padH); + int wend = min(wstart + sizeX, width + padW); + int pool_size = (hend - hstart) * (wend - wstart); + hstart = max(hstart, 0); + wstart = max(wstart, 0); + hend = min(hend, height); + wend = min(wend, width); + real aveval = 0; inputData += (frameNum * channels + c) * height * width; - tgtData += (frameNum * channels) * pooledH * pooledW; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { aveval += inputData[h * width + w]; } } - tgtData[index] = aveval / ((hend - hstart) * (wend - wstart)); + tgtData[index] = aveval / pool_size; } } -void hl_avgpool_forward(int frameCnt, const real* inputData, int channels, - int height, int width, int pooledH, int pooledW, - int sizeX, int stride, int start, real* tgtData) { - int num_kernels = pooledH * pooledW * channels; - int blocksX = frameCnt; - int blocksY = (num_kernels + 1024 -1) / 1024; - dim3 threads(1024, 1); - dim3 grid(blocksX, blocksY); - KeAvePoolForward<<< grid, threads, 0, STREAM_DEFAULT >>> +void hl_avgpool_forward(const int frameCnt, const real* inputData, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, real* tgtData) { + int num_kernels = pooledH * pooledW * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + KeAvgPoolForward<<< blocks, 1024, 0, STREAM_DEFAULT >>> (num_kernels, inputData, channels, height, width, pooledH, pooledW, - sizeX, stride, start, tgtData); + sizeX, sizeY, strideH, strideW, + paddingH, paddingW, tgtData); CHECK_SYNC("hl_avgpool_forward failed"); } -__global__ void KeAvgPoolBackward(int nthreads, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* tgtGrad, - real scaleA, real scaleB) { - int index = blockIdx.y * blockDim.x + threadIdx.x; +__global__ void KeAvgPoolBackward(const int nthreads, const real* outGrad, + const int channels, const int height, + const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int padH, const int padW, + real scaleA, real scaleB, + real* tgtGrad) { + int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { - int offsetW = index % width + start; - int offsetH = (index / width) % height + start; + int offsetW = index % width + padW; + int offsetH = (index / width) % height + padH; int offsetC = (index / width / height) % channels; - int frameNum = blockIdx.x; - int phstart = (offsetH < sizeX) ? 0 : (offsetH - sizeX) / stride + 1; - int phend = min(offsetH / stride + 1, pooledH); - int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / stride + 1; - int pwend = min(offsetW / stride + 1, pooledW); + int frameNum = index / width / height / channels; + + int phstart = (offsetH < sizeY) ? 0 : (offsetH - sizeY) / strideH + 1; + int pwstart = (offsetW < sizeX) ? 0 : (offsetW - sizeX) / strideW + 1; + int phend = offsetH >= 0 ? min(offsetH / strideH + 1, pooledH) : 0; + int pwend = offsetW >= 0 ? min(offsetW / strideW + 1, pooledW) : 0; real gradient = 0; outGrad += (frameNum * channels + offsetC) * pooledH * pooledW; - tgtGrad += (frameNum * channels) * height * width; for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { // figure out the pooling size - int poolsize = (min(ph * stride + sizeX, height) - ph * stride) * - (min(pw * stride + sizeX, width) - pw * stride); + int hstart = ph * strideH - padH; + int wstart = pw * strideW - padW; + int hend = min(hstart + sizeY, height + padH); + int wend = min(wstart + sizeX, width + padW); + int poolsize = (hend - hstart) * (wend - wstart); gradient += outGrad[ph * pooledW + pw]/poolsize; } } @@ -314,20 +352,25 @@ __global__ void KeAvgPoolBackward(int nthreads, const real* outGrad, } } -void hl_avgpool_backward(int frameCnt, const real* outGrad, - int channels, int height, int width, - int pooledH, int pooledW, int sizeX, - int stride, int start, real* backGrad, - real scaleA, real scaleB) { - int num_kernels = (height - start) * (width - start) * channels; - int blocksX = frameCnt; - int blocksY = (num_kernels + 1024 -1) / 1024; - dim3 threads(1024, 1); - dim3 grid(blocksX, blocksY); +void hl_avgpool_backward(const int frameCnt, const real* outGrad, + const int channels, + const int height, const int width, + const int pooledH, const int pooledW, + const int sizeX, const int sizeY, + const int strideH, const int strideW, + const int paddingH, const int paddingW, + real scaleA, real scaleB, + real* backGrad) { + int num_kernels = height * width * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; - KeAvgPoolBackward <<< grid, threads, 0, STREAM_DEFAULT >>> + KeAvgPoolBackward <<< blocks, 1024, 0, STREAM_DEFAULT >>> (num_kernels, outGrad, channels, height, width, - pooledH, pooledW, sizeX, stride, start, backGrad, scaleA, scaleB); + pooledH, pooledW, sizeX, sizeY, + strideH, strideW, + paddingH, paddingW, + scaleA, scaleB, + backGrad); CHECK_SYNC("hl_avgpool_backward failed"); } diff --git a/paddle/gserver/layers/CudnnPoolLayer.cpp b/paddle/gserver/layers/CudnnPoolLayer.cpp index 86c056ef5692a7337b3d832bfab0d43e1ec867a8..4c733591b3779f2502c308a965cb731466b464f0 100644 --- a/paddle/gserver/layers/CudnnPoolLayer.cpp +++ b/paddle/gserver/layers/CudnnPoolLayer.cpp @@ -51,7 +51,6 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap, PoolLayer::init(layerMap, parameterMap); CHECK(useGpu_) << "CudnnPoolLayer only support gpu"; - CHECK_EQ(start_, 0) << poolType_ << " dose not support 'start'"; hl_create_tensor_descriptor(&inputDesc_); hl_create_tensor_descriptor(&outputDesc_); diff --git a/paddle/gserver/layers/CudnnPoolLayer.h b/paddle/gserver/layers/CudnnPoolLayer.h index df97ef2edfd0125399505b69ae41708fb492308a..2ef94720d2b9f13597cb0fb546726a2c2a67cb36 100644 --- a/paddle/gserver/layers/CudnnPoolLayer.h +++ b/paddle/gserver/layers/CudnnPoolLayer.h @@ -56,16 +56,6 @@ public: void reshape(int batchSize); virtual void forward(PassType passType); virtual void backward(const UpdateCallback& callback = nullptr); - - /** - * Calculate output size according window size of pooling. - */ - int outputSize(int imageSize, int windowSize, int padding, int stride) { - int outputSize; - outputSize = - (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1; - return outputSize; - } }; } // namespace paddle diff --git a/paddle/gserver/layers/PoolLayer.cpp b/paddle/gserver/layers/PoolLayer.cpp index 0ff7f374abb4beafb848c0e7ae734edabe86a038..7fc27ac0bd8e05246d87bac0e9692d8496f6601f 100644 --- a/paddle/gserver/layers/PoolLayer.cpp +++ b/paddle/gserver/layers/PoolLayer.cpp @@ -35,7 +35,6 @@ bool PoolLayer::init(const LayerMap& layerMap, poolType_ = conf.pool_type(); channels_ = conf.channels(); sizeX_ = conf.size_x(); - start_ = conf.start(); stride_ = conf.stride(); outputX_ = conf.output_x(); imgSize_ = conf.img_size(); @@ -47,22 +46,6 @@ bool PoolLayer::init(const LayerMap& layerMap, confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); - bool cudnnTypeCheck = true; -#ifndef PADDLE_ONLY_CPU - cudnnTypeCheck = !CudnnPoolLayer::typeCheck(poolType_); -#endif - - if ((sizeY_ != sizeX_ || imgSizeY_ != imgSize_ || strideY_ != stride_ || - confPaddingY_ != confPadding_ || outputY_ != outputX_) && - cudnnTypeCheck) { - LOG(FATAL) << poolType_ << " does not supported non-square " - "filter, image, stride or padding"; - } - - if (confPadding_ != 0 && cudnnTypeCheck) { - LOG(FATAL) << poolType_ << " does not supported 'padding'"; - } - return true; } diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h index b7a1dfd7632f91bc3935f0f19ebfbd44258dcf7b..bde1f5b8dcbfdc4301266fa758278486fe930daf 100644 --- a/paddle/gserver/layers/PoolLayer.h +++ b/paddle/gserver/layers/PoolLayer.h @@ -28,7 +28,7 @@ namespace paddle { class PoolLayer : public Layer { protected: size_t channels_, sizeX_, stride_, outputX_, imgSize_; - int start_, confPadding_; + int confPadding_; size_t sizeY_; size_t imgSizeY_; @@ -47,6 +47,16 @@ public: static Layer* create(const LayerConfig& config); virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + /** + * Calculate output size according window size and padding size. + */ + int outputSize(int imageSize, int windowSize, int padding, int stride) { + int outputSize; + outputSize = + (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1; + return outputSize; + } }; } // namespace paddle diff --git a/paddle/gserver/layers/PoolProjectionLayer.cpp b/paddle/gserver/layers/PoolProjectionLayer.cpp index 9c2d6d2164a3f54e85373b001255aec136d76095..5a2e9afb6e1640b0fcf7937adc5e64f4666bd789 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.cpp +++ b/paddle/gserver/layers/PoolProjectionLayer.cpp @@ -25,13 +25,15 @@ size_t PoolProjectionLayer::getSize() { imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); if (imgSizeH_ == 0) { - imgSizeH_ = imgSize_; + imgSizeH_ = imgSizeY_; } if (imgSizeW_ == 0) { imgSizeW_ = imgSize_; } - outputH_ = 1 + (imgSizeH_ - start_ - sizeX_ + stride_ - 1) / stride_; - outputW_ = 1 + (imgSizeW_ - start_ - sizeX_ + stride_ - 1) / stride_; + + outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_); + outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_); + layerSize = outputH_ * outputW_ * channels_; getOutput().setFrameHeight(outputH_); @@ -51,8 +53,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) { MatrixPtr outV = getOutputValue(); - outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_, - stride_, outputH_, outputW_); + outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, + sizeX_, sizeY_, strideY_, stride_, + outputH_, outputW_, confPaddingY_, confPadding_); } void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) { @@ -69,7 +72,9 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) { MatrixPtr inputGrad = getInputGrad(0); inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV, - sizeX_, start_, stride_, outputH_, outputW_, 1, 1); + sizeX_, sizeY_, + strideY_, stride_, outputH_, outputW_, 1, 1, + confPaddingY_, confPadding_); } void AvgPoolProjectionLayer::forward(PassType passType) { @@ -84,8 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) { MatrixPtr outV = getOutputValue(); - outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_, - stride_, outputH_, outputW_); + outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, + sizeX_, sizeY_, strideY_, stride_, + outputH_, outputW_, confPaddingY_, confPadding_); } void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) { @@ -97,7 +103,9 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) { /* Do derivation */ MatrixPtr outputGrad = getOutputGrad(); MatrixPtr inputGrad = getInputGrad(0); - inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, start_, - stride_, outputH_, outputW_, 1, 1); + inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, + sizeX_, sizeY_, strideY_, stride_, + outputH_, outputW_, 1, 1, + confPaddingY_, confPadding_); } } // namespace paddle diff --git a/paddle/gserver/tests/img_pool_a.conf b/paddle/gserver/tests/img_pool_a.conf new file mode 100644 index 0000000000000000000000000000000000000000..5938e7611201c9a4e3b44ca8aae2f39a80b1ff3b --- /dev/null +++ b/paddle/gserver/tests/img_pool_a.conf @@ -0,0 +1,46 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +settings(batch_size=10) +data = data_layer(name ="input", size=8*16*16) +conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, + num_channels=8, + num_filters=8,stride=1) +maxpool = img_pool_layer(input=conv, + pool_size=3, + pool_size_y=5, + num_channels=8, + stride=1, + stride_y=2, + padding=1, + padding_y=2, + img_width=16, + pool_type=MaxPooling(), +) +avgpool = img_pool_layer(input=conv, + pool_size=3, + pool_size_y=5, + num_channels=8, + stride=1, + stride_y=2, + padding=1, + padding_y=2, + img_width=16, + pool_type=AvgPooling(), +) + +outputs([maxpool, avgpool]) diff --git a/paddle/gserver/tests/img_pool_b.conf b/paddle/gserver/tests/img_pool_b.conf new file mode 100644 index 0000000000000000000000000000000000000000..6ea9649b3f1eaf72686fcf8a157ef9d75c662e46 --- /dev/null +++ b/paddle/gserver/tests/img_pool_b.conf @@ -0,0 +1,44 @@ +#edit-mode: -*- python -*- +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +settings(batch_size=10) +data = data_layer(name ="input", size=8*16*16) +conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, + num_channels=8, num_filters=8, stride=1) +maxpool = img_pool_layer(input=conv, + pool_size=3, + pool_size_y=5, + num_channels=8, + stride=1, + stride_y=2, + padding=1, + padding_y=2, + pool_type=CudnnMaxPooling(), +) + +avgpool = img_pool_layer(input=conv, + pool_size=3, + pool_size_y=5, + num_channels=8, + stride=1, + stride_y=2, + padding=1, + padding_y=2, + pool_type=CudnnAvgPooling(), +) + +outputs([maxpool, avgpool]) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 3150c31e4900c3b09f9f49a19e65b1b8c25d19e6..c5723f8574ab3d7a15bfe7c8db8a9d03951f08b1 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -791,21 +791,24 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool, (*config).biasSize = 0; (*config).layerConfig.set_type("pool"); (*config).layerConfig.set_num_filters(16); - (*config).layerConfig.set_partial_sum(1); - (*config).layerConfig.set_shared_biases(true); + int kw = 3, kh = 3; + int pw = 0, ph = 0; + int sw = 2, sh = 2; pool->set_pool_type(poolType); pool->set_channels(16); - pool->set_size_x(3); - if (poolType == "cudnn-max-pool" || poolType == "cudnn-avg-pool") { - pool->set_padding(0); - } else { - pool->set_start(0); - } - pool->set_stride(2); - pool->set_output_x((pool->img_size() - pool->start() - pool->size_x()) / - ((float)pool->stride()) + - 1.5); + pool->set_size_x(kw); + pool->set_size_y(kh); + pool->set_start(0); + pool->set_padding(pw); + pool->set_padding_y(ph); + pool->set_stride(sw); + pool->set_stride_y(sh); + + int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1; + int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1; + pool->set_output_x(ow); + pool->set_output_y(oh); } void testPoolLayer(const string& poolType, bool trans, bool useGpu) { @@ -814,9 +817,10 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) { LayerInputConfig* input = config.layerConfig.add_inputs(); PoolConfig* pool = input->mutable_pool_conf(); - setPoolConfig(&config, pool, poolType); pool->set_img_size(14); - config.layerConfig.set_size(pool->output_x() * pool->output_x() * + pool->set_img_size_y(14); + setPoolConfig(&config, pool, poolType); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * pool->channels()); testLayerGrad(config, "pool", 100, trans, useGpu); @@ -829,11 +833,11 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { LayerInputConfig* input = config.layerConfig.add_inputs(); PoolConfig* pool = input->mutable_pool_conf(); - setPoolConfig(&config, pool, poolType); pool->set_size_y(4); pool->set_stride_y(3); pool->set_img_size(10); pool->set_img_size_y(20); + setPoolConfig(&config, pool, poolType); pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / ((float)pool->stride_y()) + 1.5); @@ -1252,8 +1256,6 @@ TEST(Layer, MultiplexLayer) { } } - - int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/gserver/tests/test_NetworkCompare.cpp b/paddle/gserver/tests/test_NetworkCompare.cpp index 1c6a8b0017fc9152802bebe98a10f8c4043c13fd..b3ef53067301b4f7f50ba799a035a80fa1c39e65 100644 --- a/paddle/gserver/tests/test_NetworkCompare.cpp +++ b/paddle/gserver/tests/test_NetworkCompare.cpp @@ -116,6 +116,8 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) { gradientMachine->start(trainer.getConfig(), nullptr); gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN); for (size_t i = 0; i < in.outGrads.size(); i++) { + // If the all the layers in the config have no parameters, also + // not set NeedGradient(), the outArgs[i] will be nullptr. outArgs[i].grad->copyFrom(*in.outGrads[i]); } gradientMachine->backward(); @@ -225,6 +227,18 @@ TEST(Compare, concat_table) { compareNetwork(config_file_a, config_file_b); } +#ifndef PADDLE_ONLY_CPU +TEST(Compare, img_pool) { + std::string config_file_a = "./gserver/tests/img_pool_a.conf"; + std::string config_file_b = "./gserver/tests/img_pool_b.conf"; + bool useGpu = FLAGS_use_gpu; + FLAGS_use_gpu = true; + compareNetwork(config_file_a, config_file_b); + FLAGS_use_gpu = useGpu; +} +#endif + + P_DEFINE_string(config_file_a, "", "config of one network to compare"); P_DEFINE_string(config_file_b, "", "config of another network to compare"); TEST(Compare, network) { diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index e351bede724ac3cef941f50b9418af5d438d6f77..a6ff2f3b35d04783c718a7ce5c42cd586f3eadae 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -860,9 +860,11 @@ void GpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight, } void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, - size_t imgSizeW, size_t channels, size_t sizeX, - int start, size_t stride, size_t outputH, - size_t outputW) { + size_t imgSizeW, size_t channels, + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW) { CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; real* inputData = inputMat.getData(); @@ -874,14 +876,17 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, CHECK(width_ == outputH * outputW * channels); hl_maxpool_forward(frameNum, inputData, channels, height, width, - outputH, outputW, sizeX, stride, start, data_); + outputH, outputW, sizeX, sizeY, strideH, strideW, + paddingH, paddingW, data_); } void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, Matrix& outGrad, Matrix& outV, - size_t sizeX, int start, size_t stride, + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, size_t outputH, size_t outputW, - real scaleTargets, real scaleOutput) { + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW) { CHECK(inputMat.useGpu_ == true && outGrad.useGpu_ == true && outV.useGpu_ == true) << "Matrix type are not equal"; @@ -899,15 +904,19 @@ void GpuMatrix::maxPoolBackward(Matrix& inputMat, size_t imgSizeH, CHECK(outGrad.getHeight() == outV.getHeight() && outGrad.getWidth() == outV.getWidth()); + hl_maxpool_backward(frameNum, inputData, outData, outDiff, channels, - height, width, outputH, outputW, sizeX, stride, - start, data_, scaleTargets, scaleOutput); + height, width, outputH, outputW, sizeX, sizeY, + strideH, strideW, paddingH, paddingW, + scaleTargets, scaleOutput, data_); } void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH, - size_t imgSizeW, size_t channels, size_t sizeX, - int start, size_t stride, size_t outputH, - size_t outputW) { + size_t imgSizeW, size_t channels, + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW) { CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; real* inputData = inputMat.getData(); @@ -919,13 +928,17 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t imgSizeH, CHECK(width_ == outputH * outputW * channels); hl_avgpool_forward(frameNum, inputData, channels, height, width, - outputH, outputW, sizeX, stride, start, data_); + outputH, outputW, sizeX, sizeY, + strideH, strideW, + paddingH, paddingW, data_); } void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH, - size_t imgSizeW, size_t sizeX, int start, - size_t stride, size_t outputH, size_t outputW, - real scaleTargets, real scaleOutput) { + size_t imgSizeW, size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW) { CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal"; real* outDiff = outGrad.getData(); @@ -938,8 +951,10 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, size_t imgSizeH, CHECK(outGrad.getWidth() == outputH * outputW * channels); hl_avgpool_backward(frameNum, outDiff, channels, height, width, - outputH, outputW, sizeX, stride, start, data_, - scaleTargets, scaleOutput); + outputH, outputW, sizeX, sizeY, + strideH, strideW, paddingH, paddingW, + scaleTargets, scaleOutput, + data_); } void GpuMatrix::crossMapNormalFwd(Matrix& input, size_t imgSizeH, @@ -1450,19 +1465,23 @@ void CpuMatrix::convShrink(Matrix& expandFeat, int thisImgHeight, } void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, - size_t imgSizeW, size_t channels, size_t sizeX, - int start, size_t stride, size_t outputH, - size_t outputW) { + size_t imgSizeW, size_t channels, + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW) { real* inputData = inputMat.getData(); real* outData = data_; size_t num = inputMat.getHeight(); size_t inWidth = imgSizeW; size_t inHeight = imgSizeH; CHECK(inHeight * inWidth == inputMat.getWidth() / channels); + CHECK_EQ(num, this->getHeight()); + CHECK_EQ(channels*outputH*outputW, this->getWidth()); /* initialize the data_ */ for (size_t i = 0; i < height_ * width_; i++) { - data_[i] = -FLT_MAX; + outData[i] = -(real)FLT_MAX; } /* pool max one by one */ @@ -1470,12 +1489,14 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { - size_t hstart = ph * stride + start; - size_t wstart = pw * stride + start; - size_t hend = std::min(hstart + sizeX, inHeight); - size_t wend = std::min(wstart + sizeX, inWidth); - for (size_t h = hstart; h < hend; ++h) { - for (size_t w = wstart; w < wend; ++w) { + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int hend = std::min(hstart + sizeY, inHeight); + int wend = std::min(wstart + sizeX, inWidth); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { outData[ph * outputW + pw] = std::max(outData[ph * outputW + pw], inputData[h * inWidth + w]); } @@ -1491,9 +1512,10 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, Matrix& outGrad, Matrix& outV, size_t sizeX, - int start, size_t stride, size_t outputH, - size_t outputW, real scaleTargets, - real scaleOutput) { + size_t sizeY, size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW) { size_t num = image.getHeight(); size_t channels = size_t(width_ / imgSizeH / imgSizeW); CHECK(image.getWidth() == imgSizeH * imgSizeW * channels); @@ -1509,32 +1531,36 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, for (size_t c = 0; c < channels; ++c) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { - size_t hstart = ph * stride + start; - size_t wstart = pw * stride + start; - size_t hend = std::min(hstart + sizeX, imgSizeH); - size_t wend = std::min(wstart + sizeX, imgSizeW); - for (size_t h = hstart; h < hend; ++h) { - for (size_t w = wstart; w < wend; ++w) { + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int hend = std::min(hstart + sizeY, imgSizeH); + int wend = std::min(wstart + sizeX, imgSizeW); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { tgtGrad[h * imgSizeW + w] = scaleTargets * tgtGrad[h * imgSizeW + w] + scaleOutput * otGrad[ph * outputW + pw] * - (inData[h * imgSizeW + w] == otData[ph * outputH + pw]); + (inData[h * imgSizeW + w] == otData[ph * outputW + pw]); } } } } // offset inData += imgSizeH * imgSizeW; - otData += outputH * outputW; tgtGrad += imgSizeH * imgSizeW; + otData += outputH * outputW; otGrad += outputH * outputW; } } } void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t channels, size_t sizeX, int start, - size_t stride, size_t outputH, size_t outputW) { + size_t channels, size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW) { // The main loop size_t num = input.getHeight(); size_t inHeight = imgSizeH; @@ -1548,17 +1574,24 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, for (size_t c = 0; c < channels; ++c) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { - size_t hstart = ph * stride + start; - size_t wstart = pw * stride + start; - size_t hend = std::min(hstart + sizeX, inHeight); - size_t wend = std::min(wstart + sizeX, inWidth); + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int hend = std::min(hstart + sizeY, inHeight + paddingH); + int wend = std::min(wstart + sizeX, inWidth + paddingW); + int poolSize = (hend - hstart) * (wend - wstart); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + hend = std::min(hend, static_cast(inHeight)); + wend = std::min(wend, static_cast(inWidth)); + + CHECK(poolSize); tgtData[ph * outputW + pw] = 0; // clear - for (size_t h = hstart; h < hend; ++h) { - for (size_t w = wstart; w < wend; ++w) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { tgtData[ph * outputW + pw] += inData[h * inWidth + w]; } } - tgtData[ph * outputW + pw] /= (hend - hstart) * (wend - wstart); + tgtData[ph * outputW + pw] /= poolSize; } } // compute offset @@ -1569,9 +1602,11 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, } void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t sizeX, int start, size_t stride, + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, size_t outputH, size_t outputW, - real scaleTargets, real scaleOutput) { + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW) { size_t num = input.getHeight(); size_t channels = input.getWidth() / outputH / outputW; CHECK(imgSizeH * imgSizeW * channels == getWidth()); @@ -1582,14 +1617,20 @@ void CpuMatrix::avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, for (size_t c = 0; c < channels; ++c) { for (size_t ph = 0; ph < outputH; ++ph) { for (size_t pw = 0; pw < outputW; ++pw) { - size_t hstart = ph * stride + start; - size_t wstart = pw * stride + start; - size_t hend = std::min(hstart + sizeX, imgSizeH); - size_t wend = std::min(wstart + sizeX, imgSizeW); - size_t poolsize = (hend - hstart) * (wend - wstart); - for (size_t h = hstart; h < hend; ++h) { - for (size_t w = wstart; w < wend; ++w) { - outData[h * imgSizeW + w] += inData[ph * outputW + pw] / poolsize; + int hstart = ph * strideH - paddingH; + int wstart = pw * strideW - paddingW; + int hend = std::min(hstart + sizeY, imgSizeH + paddingH); + int wend = std::min(wstart + sizeX, imgSizeW + paddingW); + int poolSize = (hend - hstart) * (wend - wstart); + hstart = std::max(hstart, 0); + wstart = std::max(wstart, 0); + hend = std::min(hend, static_cast(imgSizeH)); + wend = std::min(wend, static_cast(imgSizeW)); + CHECK(poolSize); + + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + outData[h * imgSizeW + w] += inData[ph * outputW + pw] / poolSize; } } } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index cfb30797fcf1bcafe8080fcc0679171b2386f217..5c15c94012816eee6234298142c7c8baa53afae7 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -742,31 +742,37 @@ public: */ virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, size_t channels, size_t sizeX, - int start_, size_t stride, size_t outputH, - size_t outputW) { + size_t sizeY, size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW) { LOG(FATAL) << "Not implemeted"; } /// Pooling backward operation. virtual void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, Matrix& outGrad, Matrix& outV, size_t sizeX, - int start, size_t stride, size_t outputH, - size_t outputW, real scaleTargets, - real scaleOutput) { + size_t sizeY, size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW) { LOG(FATAL) << "Not implemeted"; } /// Pooling forward operation, caculate the average of sizeX elements. virtual void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t channels, size_t sizeX, int start, - size_t stride, size_t outputH, size_t outputW) { + size_t channels, size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW) { LOG(FATAL) << "Not implemeted"; } virtual void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t sizeX, int start, size_t stride, + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, size_t outputH, size_t outputW, - real scaleTargets, real scaleOutput) { + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW) { LOG(FATAL) << "Not implemeted"; } @@ -1131,21 +1137,30 @@ public: real alpha = 1.0f, real beta = 0.0f); void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, - size_t channels, size_t sizeX, int start_, size_t stride, - size_t outputH, size_t outputW); + size_t channels, size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW); void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, - Matrix& outGrad, Matrix& outV, size_t sizeX, int start, - size_t stride, size_t outputH, size_t outputW, - real scaleTargets, real scaleOutput); + Matrix& outGrad, Matrix& outV, size_t sizeX, + size_t sizeY, size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW); void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t channels, size_t sizeX, int start, size_t stride, - size_t outputH, size_t outputW); + size_t channels, size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW); void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t sizeX, int start, size_t stride, size_t outputH, - size_t outputW, real scaleTargets, real scaleOutput); + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW); void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW, Matrix& denoms, size_t channels, size_t sizeX, @@ -1242,21 +1257,31 @@ public: real alpha = 1.0f, real beta = 0.0f); void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, - size_t channels, size_t sizeX, int start_, size_t stride, - size_t outputH, size_t outputW); + size_t channels, size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW); void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, - Matrix& outGrad, Matrix& outV, size_t sizeX, int start, - size_t stride, size_t outputH, size_t outputW, - real scaleTargets, real scaleOutput); + Matrix& outGrad, Matrix& outV, + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW); void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t channels, size_t sizeX, int start, size_t stride, - size_t outputH, size_t outputW); + size_t channels, size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + size_t paddingH, size_t paddingW); void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, - size_t sizeX, int start, size_t stride, size_t outputH, - size_t outputW, real scaleTargets, real scaleOutput); + size_t sizeX, size_t sizeY, + size_t strideH, size_t strideW, + size_t outputH, size_t outputW, + real scaleTargets, real scaleOutput, + size_t paddingH, size_t paddingW); void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW, Matrix& denoms, size_t channels, size_t sizeX, diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index fe8eacc2efbc5446e63ed40f914b62c70189010e..e1bda79a8acb16ffb9025ff92afa2bb24d76c4fe 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1846,6 +1846,159 @@ TEST(Matrix, classificationError) { } } +void testMaxPoolFwdBwd(int numSamples, int channels, + int imgSizeH, int imgSizeW, + int ksizeH, int ksizeW, + int strideH, int strideW, + int padH, int padW) { + int outH = 0, outW = 0; + outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1; + outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1; + + int inWidth = imgSizeH * imgSizeW * channels; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); + + int outWidth = channels * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->maxPoolForward(*input, imgSizeH, imgSizeW, + channels, ksizeW, ksizeH, + strideH, strideW, outH, outW, padH, padW); + targetGpu->maxPoolForward(*inputGpu, imgSizeH, imgSizeW, + channels, ksizeW, ksizeH, + strideH, strideW, outH, outW, padH, padW); + MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); + targetCheck->copyFrom(*targetGpu); + checkMatrixEqual(target, targetCheck); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth, + false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->maxPoolBackward(*input, imgSizeH, imgSizeW, + *targetGrad, *target, + ksizeW, ksizeH, + strideH, strideW, + outH, outW, 1.0, 1.0, padH, padW); + inputGpuGrad->maxPoolBackward(*inputGpu, imgSizeH, imgSizeW, + *targetGpuGrad, *targetGpu, + ksizeW, ksizeH, + strideH, strideW, + outH, outW, 1.0, 1.0, padH, padW); + MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth, + false, false); + targetBwdCheck->copyFrom(*inputGpuGrad); + checkMatrixEqual(inputGrad, targetBwdCheck); +} + +void testAvgPoolFwdBwd(int numSamples, int channels, + int imgSizeH, int imgSizeW, + int ksizeH, int ksizeW, + int strideH, int strideW, + int padH, int padW) { + int outH = 0, outW = 0; + outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1; + outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1; + + int inWidth = imgSizeH * imgSizeW * channels; + MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); + + int outWidth = channels * outH * outW; + MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true); + + input->randomizeUniform(); + target->randomizeUniform(); + inputGpu->copyFrom(*input); + targetGpu->copyFrom(*target); + + target->avgPoolForward(*input, imgSizeH, imgSizeW, + channels, ksizeW, ksizeH, + strideH, strideW, outH, outW, padH, padW); + targetGpu->avgPoolForward(*inputGpu, imgSizeH, imgSizeW, + channels, ksizeW, ksizeH, + strideH, strideW, outH, outW, padH, padW); + MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false); + targetCheck->copyFrom(*targetGpu); + MatrixCheckErr(*target, *targetCheck); + + MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false); + MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true); + MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false); + MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth, + false, true); + + inputGrad->randomizeUniform(); + targetGrad->randomizeUniform(); + inputGpuGrad->copyFrom(*inputGrad); + targetGpuGrad->copyFrom(*targetGrad); + + inputGrad->avgPoolBackward(*targetGrad, imgSizeH, imgSizeW, + ksizeW, ksizeH, + strideH, strideW, + outH, outW, 1.0, 1.0, padH, padW); + inputGpuGrad->avgPoolBackward(*targetGpuGrad, imgSizeH, imgSizeW, + ksizeW, ksizeH, + strideH, strideW, + outH, outW, 1.0, 1.0, padH, padW); + MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth, + false, false); + targetBwdCheck->copyFrom(*inputGpuGrad); + MatrixCheckErr(*inputGrad, *targetBwdCheck); +} + +TEST(Matrix, PoolFwdBwd) { + for (auto numSamples : {5, 32}) { + for (auto channels : {1, 9, 32}) { + for (auto imgSizeH : {14, 28}) { + for (auto imgSizeW : {16, 30}) { + for (auto sizeX : {2, 5}) { + for (auto sizeY : {2, 5}) { + for (auto sH : {1, 2}) { + for (auto sW : {1, 2}) { + for (auto pH : {0, (sizeY - 1)/2}) { + for (auto pW : {0, (sizeX - 1)/2}) { + VLOG(3) << " numSamples=" << numSamples + << " channels=" << channels + << " imgSizeH=" << imgSizeH + << " imgSizeW=" << imgSizeW + << " sizeX=" << sizeX + << " sizeY=" << sizeY + << " strideH=" << sH + << " strideW=" << sW + << " padingH=" << pH + << " padingW=" << pW; + testMaxPoolFwdBwd(numSamples, channels, imgSizeH, + imgSizeW, sizeX, sizeY, sH, sW, pH, pW); + testAvgPoolFwdBwd(numSamples, channels, imgSizeH, + imgSizeW, sizeX, sizeY, sH, sW, pH, pW); + } + } + } + } + } + } + } + } + } + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/proto/ModelConfig.proto.m4 b/proto/ModelConfig.proto.m4 index b32f8b1ee90723e7bfdd4cbd5d93a35ac22b6b6d..25e36f9c4c1687aec46ca7202d1ba8a6e0088fec 100644 --- a/proto/ModelConfig.proto.m4 +++ b/proto/ModelConfig.proto.m4 @@ -88,7 +88,8 @@ message PoolConfig { required uint32 size_x = 3; // Tell the net where in the input image to start the pooling. - required uint32 start = 4; + // start is deprecated now. + optional uint32 start = 4; // Defines the stride size between successive pooling squares. required uint32 stride = 5; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 1f55298f24f0742203adf6b332f86193d3ffc732..fb47fd0c6f0c35fa62f89c2a8730f5df69b27b7e 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -961,10 +961,6 @@ def parse_pool(pool, input_layer_name, pool_conf): "['max-projection', 'avg-projection', " "'cudnn-max-pool', 'cudnn-avg-pool']" % pool.pool_type) - if pool.size_y or pool.stride_y or pool.img_width or pool.padding_y: - config_assert(pool.pool_type.startswith('cudnn'), - "'size_y', 'stride_y' and 'img_width' and 'padding_y'" - "can only be used for cudnn") pool_conf.channels = pool.channels pool_conf.size_x = pool.size_x @@ -974,36 +970,25 @@ def parse_pool(pool, input_layer_name, pool_conf): pool_conf.stride_y = default(pool.stride_y, pool_conf.stride); img_pixels = g_layer_map[input_layer_name].size / pool.channels + # the img_width may be removed, + # and it can be calculated automatically later. pool_conf.img_size = default(pool.img_width, int(img_pixels ** 0.5)) pool_conf.img_size_y = img_pixels / pool_conf.img_size config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels, "Incorrect input image size %d for input image pixels %d" % (pool_conf.img_size, img_pixels)) - if pool.start is not None: - config_assert(pool.padding is None, - 'At most one of start and padding can be set.') - pool_conf.start = pool.start - pool_conf.padding = 0 - pool_conf.output_x = int(math.ceil((pool_conf.img_size - \ - pool_conf.start - pool_conf.size_x) / \ - float(pool_conf.stride))) + 1 + config_assert(not pool.start, "start is deprecated in pooling.") - pool_conf.output_y = int(math.ceil((pool_conf.img_size_y - \ - pool_conf.start - pool_conf.size_y) / \ - float(pool_conf.stride_y))) + 1 - elif pool.padding is not None: + if pool.padding is not None: pool_conf.padding = pool.padding pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) - pool_conf.start = 0 pool_conf.output_x = int(math.ceil((pool_conf.img_size + \ 2*pool_conf.padding - pool_conf.size_x) / \ float(pool_conf.stride))) + 1 pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \ 2*pool_conf.padding_y - pool_conf.size_y) / \ float(pool_conf.stride_y))) + 1 - else: - raise ValueError('At least one of start and padding should be set.') def parse_image(image, input_layer_name, image_conf): image_conf.channels = image.channels @@ -1603,7 +1588,7 @@ class PoolLayer(LayerBase): pool_conf = self.config.inputs[input_index].pool_conf print("output size for %s is %d*%d " % ( name, pool_conf.output_y, pool_conf.output_x)) - self.set_layer_size((pool_conf.output_x ** 2) * pool_conf.channels) + self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels) @config_layer('batch_norm') class BatchNormLayer(LayerBase): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 47db197f422eae9b3d7e3517ccec91bc8f916f77..5e7e66a908ee00f1524240fb154cd9bc5f8aa31c 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -210,7 +210,7 @@ DEVICE = 'device' def layer_support(*attrs): - attrs_list = list(attrs) + attrs_list = list(attrs) attrs_list.append(DEVICE) def decorator(method): @functools.wraps(method) @@ -1627,7 +1627,9 @@ def img_conv_layer(input, filter_size, num_filters, @layer_support() def img_pool_layer(input, pool_size, name=None, num_channels=None, pool_type=None, - stride=1, start=None, padding=0, layer_attr=None): + stride=1, start=None, padding=0, layer_attr=None, + pool_size_y=None, stride_y=None, padding_y=None, + img_width=None): """ Image pooling Layer. @@ -1635,25 +1637,34 @@ def img_pool_layer(input, pool_size, name=None, .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ - :param padding: pooling padding + :param padding: pooling padding width. :type padding: int + :param padding_y: pooling padding height. It's equal to padding by default. + :type padding_y: int|None :param name: name of pooling layer :type name: basestring. :param input: layer's input :type input: LayerOutput - :param pool_size: pooling size + :param pool_size: pooling window width :type pool_size: int + :param pool_size_y: pooling window height. It's eaqual to pool_size by default. + :type pool_size_y: int|None :param num_channels: number of input channel. :type num_channels: int :param pool_type: pooling type. MaxPooling or AveragePooling. Default is MaxPooling. :type pool_type: BasePoolingType - :param stride: stride of pooling. + :param stride: stride width of pooling. :type stride: int - :param start: start position of pooling operation. - :type start: int + :param stride_y: stride height of pooling. It is equal to stride by default. + :type stride_y: int|None + :param start: start position of pooling operation. Note it is deprecated now. + :type start: int|None :param layer_attr: Extra Layer attribute. :type layer_attr: ExtraLayerAttribute + :param img_width: the width of input feature map. If it is None, the input feature + map should be square. + :type img_width: int|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -1666,17 +1677,29 @@ def img_pool_layer(input, pool_size, name=None, elif isinstance(pool_type, AvgPooling): pool_type.name = 'avg' + type_name = pool_type.name + '-projection' \ + if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ + else pool_type.name + + pool_size_y = pool_size if pool_size_y is None else pool_size_y + stride_y = stride if stride_y is None else stride_y + padding_y = padding if padding_y is None else padding_y + Layer( name=name, type=LayerType.POOL_LAYER, inputs=[Input(input.name, pool=Pool( - pool_type=''.join([pool_type.name, '-projection']), + pool_type=type_name, channels=num_channels, size_x=pool_size, start=start, stride=stride, - padding=padding + padding=padding, + size_y=pool_size_y, + stride_y=stride_y, + padding_y=padding_y, + img_width=img_width ))], **ExtraLayerAttribute.to_kwargs(layer_attr) ) @@ -2751,7 +2774,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size, tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False, name=name) - + return tmp diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index e59e93acbe33ab354e820fbc0a34069399bf6f86..ab4057d9d6c6b98e78764f390dfa453b8c0d4e10 100644 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -170,13 +170,13 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None, :type shared_bias: bool :param conv_layer_attr: see img_conv_layer for details :type conv_layer_attr: ExtraLayerAttribute - :param pool_stride: see img_conv_layer for details + :param pool_stride: see img_pool_layer for details :type pool_stride: int - :param pool_start: see img_conv_layer for details + :param pool_start: see img_pool_layer for details. It is deprecated now. :type pool_start: int - :param pool_padding: see img_conv_layer for details + :param pool_padding: see img_pool_layer for details :type pool_padding: int - :param pool_layer_attr: see img_conv_layer for details + :param pool_layer_attr: see img_pool_layer for details :type pool_layer_attr: ExtraLayerAttribute :return: Layer's output :rtype: LayerOutput @@ -243,7 +243,7 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None, :param bn_layer_attr: ParameterAttribute. :param pool_stride: see img_pool_layer's document. :type pool_stride: int - :param pool_start: see img_pool_layer's document. + :param pool_start: see img_pool_layer's document. It is deprecated now. :type pool_start: int :param pool_padding: see img_pool_layer's document. :type pool_padding: int @@ -555,7 +555,7 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None, :type gate_act: BaseActivation :param state_act: lstm state activiation type. :type state_act: BaseActivation - :param mixed_bias_attr: bias parameter attribute of mixed layer. + :param mixed_bias_attr: bias parameter attribute of mixed layer. False means no bias, None means default bias. :type mixed_bias_attr: ParameterAttribute|False :param lstm_bias_attr: bias parameter attribute of lstm layer. diff --git a/python/paddle/trainer_config_helpers/poolings.py b/python/paddle/trainer_config_helpers/poolings.py index d627daab0c496d4fa465c0d3afda3cec2b98c3f9..3d2320f3ffc42e08add9874da8665b21c184f376 100644 --- a/python/paddle/trainer_config_helpers/poolings.py +++ b/python/paddle/trainer_config_helpers/poolings.py @@ -19,6 +19,8 @@ __all__ = [ "BasePoolingType", "MaxPooling", "AvgPooling", + "CudnnMaxPooling", + "CudnnAvgPooling", "SumPooling", "SquareRootNPooling" ] @@ -26,7 +28,7 @@ __all__ = [ class BasePoolingType(object): """ - Base Pooling Type. + Base Pooling Type. Note these pooling types are used for sequence input, not for images. Each PoolingType contains one parameter: @@ -55,7 +57,24 @@ class MaxPooling(BasePoolingType): def __init__(self, output_max_index=None): BasePoolingType.__init__(self, "max") self.output_max_index = output_max_index - + + +class CudnnMaxPooling(BasePoolingType): + """ + Cudnn max pooling only support GPU. Return the maxinum value in the + pooling window. + """ + def __init__(self): + BasePoolingType.__init__(self, "cudnn-max-pool") + + +class CudnnAvgPooling(BasePoolingType): + """ + Cudnn average pooling only support GPU. Return the average value in the + pooling window. + """ + def __init__(self): + BasePoolingType.__init__(self, "cudnn-avg-pool") class AvgPooling(BasePoolingType): """ diff --git a/python/paddle/trainer_config_helpers/tests/configs/check.md5 b/python/paddle/trainer_config_helpers/tests/configs/check.md5 index 29928b6f7b4239a0240b9fc035b6e1568427a9aa..359652f3d09c7fe701d194869bba038d2503c48a 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/check.md5 +++ b/python/paddle/trainer_config_helpers/tests/configs/check.md5 @@ -1,4 +1,4 @@ -7e6919d17562516e9a1d9a88de1fb3b9 img_layers.protostr +86c0815275a9d5eb902e23c6a592f58a img_layers.protostr a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr 9c038249ec8ff719753a746cdb04c026 layer_activations.protostr 5913f87b39cee3b2701fa158270aca26 projections.protostr diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py index 6c8ba8be846e5d943a5b1f034e2dabaaf001cede..f33357c3906fdbbebb1b4995e84115ff4edef581 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py @@ -7,8 +7,10 @@ settings( img = data_layer(name='image', size=256*256) +# the parse_conv in config_parse.py is not strictly accurate when filter_size +# is not square. So here set square filter_size. img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, - filter_size=(32, 64), padding=(1, 0), stride=(1, 1), + filter_size=(32, 32), padding=(1, 1), stride=(1, 1), act=LinearActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) @@ -17,4 +19,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32) img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) -outputs(img_pool, img_norm) \ No newline at end of file +outputs(img_pool, img_norm)