diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 6b56d9ec8d3daae96aaaa04ed79cb637331e2281..62a761cd700d592a62b029baa9ccd25a28b53b45 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "hl_base.h" /** - * @brief Maximum pool forward. + * @brief Maximum pool forward with Mask output. * * @param[in] frameCnt batch size of input image. * @param[in] inputData input data. @@ -35,7 +35,47 @@ limitations under the License. */ * @param[in] paddingW padding width. * @param[out] tgtData output data. * @param[in] tgtStride stride between output data samples. + * @param[out] maskData the location indices of select max data + * @param[in] withMask set true if output maskData + */ +extern void hl_maxpool_forward(const int frameCnt, + const real* inputData, + const int channels, + const int height, + const int width, + const int pooledH, + const int pooledW, + const int sizeX, + const int sizeY, + const int strideH, + const int strideW, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride, + real* maskData, + bool withMask); + +/** + * @brief Maximum pool forward. * + * @param[in] frameCnt batch size of input image. + * @param[in] inputData input data. + * @param[in] channels number of channel. + * @param[in] height image height. + * @param[in] width image width. + * @param[in] pooledH output image height. + * @param[in] pooledW output image width. + * @param[in] sizeX width of pooling window. + * @param[in] sizeY height of pooling window. + * @param[in] strideH pooling stride height. + * @param[in] strideW pooling stride width. + * @param[in] paddingH padding height. + * @param[in] paddingW padding width. + * @param[out] tgtData output data. + * @param[in] tgtStride stride between output data samples. + * @param[out] maskData the location indices of select max data + * @param[in] withMask set true if output maskData */ extern void hl_maxpool_forward(const int frameCnt, const real* inputData, diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index a76dbf0b6578de0606702ad1af227fbf6e1cd62e..d6e659d8422d83dc7aab0bd7cff483c7c107eeeb 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -33,6 +33,24 @@ inline void hl_maxpool_forward(const int frameCnt, real* tgtData, const int tgtStride) {} +inline void hl_maxpool_forward(const int frameCnt, + const real* inputData, + const int channels, + const int height, + const int width, + const int pooledH, + const int pooledW, + const int sizeX, + const int sizeY, + const int strideH, + const int strideW, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride, + real* MaskData, + bool withMask) {} + inline void hl_maxpool_backward(const int frameCnt, const real* inputData, const real* outData, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 58674febdc4a094c95ff03701e4586c32729847d..f2a762f1089387b10026974087ea30b488620e12 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -31,7 +31,9 @@ __global__ void KeMaxPoolForward(const int nthreads, const int offsetH, const int offsetW, real* tgtData, - const int tgtStride) { + const int tgtStride, + real* maskData, + bool withMask) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int pw = index % pooledW; @@ -45,16 +47,22 @@ __global__ void KeMaxPoolForward(const int nthreads, hstart = max(hstart, 0); wstart = max(wstart, 0); real maxval = -FLT_MAX; + int max_index = -1; inputData += (frameNum * channels + c) * height * width; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - if (maxval < inputData[h * width + w]) + if (maxval < inputData[h * width + w]) { maxval = inputData[h * width + w]; + max_index = h * width + w; + } } } int tgtIndex = index % (pooledW * pooledH * channels) + frameNum * tgtStride; tgtData[tgtIndex] = maxval; + if (withMask) { + maskData[tgtIndex] = max_index; + } } } @@ -92,7 +100,51 @@ void hl_maxpool_forward(const int frameCnt, paddingH, paddingW, tgtData, - tgtStride); + tgtStride, + NULL, + false); + CHECK_SYNC("hl_maxpool_forward failed"); +} + +void hl_maxpool_forward(const int frameCnt, + const real* inputData, + const int channels, + const int height, + const int width, + const int pooledH, + const int pooledW, + const int sizeX, + const int sizeY, + const int strideH, + const int strideW, + const int paddingH, + const int paddingW, + real* tgtData, + const int tgtStride, + real* maskData, + bool withMask) { + int num_kernels = pooledH * pooledW * channels * frameCnt; + int blocks = (num_kernels + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KeMaxPoolForward<<>>(num_kernels, + inputData, + channels, + height, + width, + pooledH, + pooledW, + sizeX, + sizeY, + strideH, + strideW, + paddingH, + paddingW, + tgtData, + tgtStride, + maskData, + withMask); CHECK_SYNC("hl_maxpool_forward failed"); } diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index c3e34d5309d9ca8a32d7b0a8043e668cdb5be54b..607e53074cb49923925cdf8eaa2872b2180c144c 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1029,14 +1029,51 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t outputW, size_t paddingH, size_t paddingW) { + maxPoolForward(inputMat, + imgSizeH, + imgSizeW, + channels, + sizeX, + sizeY, + strideH, + strideW, + outputH, + outputW, + paddingH, + paddingW, + NULL, + false); +} + +void GpuMatrix::maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW, + MatrixPtr maskMatP, + bool withMask) { CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; real* inputData = inputMat.getData(); + real* maskData = NULL; size_t frameNum = inputMat.getHeight(); CHECK(imgSizeH * imgSizeW * channels == inputMat.getWidth()); CHECK(height_ == inputMat.getHeight()); CHECK(width_ == outputH * outputW * channels); + if (withMask) { + CHECK(maskMatP->useGpu_ == true) << "Matrix type are not equal"; + CHECK(outputH * outputW * channels == maskMatP->getWidth()); + maskData = maskMatP->getData(); + } + hl_maxpool_forward(frameNum, inputData, channels, @@ -1051,7 +1088,9 @@ void GpuMatrix::maxPoolForward(Matrix& inputMat, paddingH, paddingW, data_, - getStride()); + getStride(), + maskData, + withMask); } void GpuMatrix::maxPoolBackward(Matrix& inputMat, @@ -1974,8 +2013,39 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t outputW, size_t paddingH, size_t paddingW) { + maxPoolForward(inputMat, + imgSizeH, + imgSizeW, + channels, + sizeX, + sizeY, + strideH, + strideW, + outputH, + outputW, + paddingH, + paddingW, + NULL, + false); +} + +void CpuMatrix::maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW, + MatrixPtr maskMatP, + bool withMask) { real* inputData = inputMat.getData(); real* outData = data_; + real* maskData = NULL; size_t num = inputMat.getHeight(); size_t inLength = imgSizeH * imgSizeW; size_t outLength = outputH * outputW; @@ -1984,6 +2054,11 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, CHECK_EQ(channels * outLength, this->getWidth()); size_t outStride = getStride(); + if (withMask) { + maskData = maskMatP->getData(); + CHECK_EQ(channels * outLength, maskMatP->getWidth()); + } + /* initialize the data_ */ for (size_t i = 0; i < height_; i++) { for (size_t j = 0; j < width_; j++) { @@ -2005,10 +2080,21 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, int wstart = pw * strideW - paddingW; int wend = std::min(wstart + sizeX, imgSizeW); wstart = std::max(wstart, 0); - for (int h = hstart; h < hend; ++h) { - for (int w = wstart; w < wend; ++w) { - outData[ph * outputW + pw] = std::max( - outData[ph * outputW + pw], inputData[h * imgSizeW + w]); + if (!withMask) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + outData[ph * outputW + pw] = std::max( + outData[ph * outputW + pw], inputData[h * imgSizeW + w]); + } + } + } else { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + if (outData[ph * outputW + pw] < inputData[h * imgSizeW + w]) { + outData[ph * outputW + pw] = inputData[h * imgSizeW + w]; + maskData[ph * outputW + pw] = h * imgSizeW + w; + } + } } } } @@ -2016,6 +2102,8 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, // compute offset inputData += inLength; outData += outLength; + + if (withMask) maskData += outLength; } } } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 44180bca8bca53e74d71ce7bed3516399c01c81d..87a14a0af35cb55805c76797ea7237923aadc177 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -861,7 +861,7 @@ public: /** * Pooling forward operation, pick out the largest element - * in the sizeX of value + * in the sizeX of value. */ virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH, @@ -878,6 +878,28 @@ public: LOG(FATAL) << "Not implemeted"; } + /** + * Pooling forward operation, pick out the largest element + * in the sizeX of value, if set withMask true, it will + * also caculate the location indices. + */ + virtual void maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW, + MatrixPtr maskMatP, + bool withMask) { + LOG(FATAL) << "Not implemeted"; + } + /// Pooling backward operation. virtual void maxPoolBackward(Matrix& image, size_t imgSizeH, @@ -1428,6 +1450,21 @@ public: size_t paddingH, size_t paddingW); + void maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW, + MatrixPtr maskMatP, + bool withMask); + void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, @@ -1699,6 +1736,21 @@ public: size_t paddingH, size_t paddingW); + void maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW, + MatrixPtr maskMatP, + bool withMask); + void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,