diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index f55197c8c9ebb4a0f67ab915abfefd6a45cd13aa..9f84db72da24b0e678520b077f9cba7ffc2d589a 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -17,73 +17,6 @@ limitations under the License. */ #include "hl_base.h" -/** - * @brief Shrink column to feature. - * - * @param[in] dataCol expand data. - * @param[in] channels number of channel. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] blockH filter height. - * @param[in] blockW filter width. - * @param[in] strideH stride height. - * @param[in] strideW stride width. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[in] outputH output height. - * @param[in] outputW output width. - * @param[out] dataIm output image data. - * @param[in] alpha - * @param[in] beta - */ -extern void hl_shrink_col2feature(const real* dataCol, - size_t channels, - size_t height, - size_t width, - size_t blockH, - size_t blockW, - size_t strideH, - size_t strideW, - size_t paddingH, - size_t paddingW, - size_t outputH, - size_t outputW, - real* dataIm, - real alpha = 1.0f, - real beta = 0.0f); - -/** - * @brief Expand feature to column. - * - * @param[in] dataIm input image data. - * @param[in] channels number of channel. - * @param[in] height image height. - * @param[in] width image width. - * @param[in] blockH filter height. - * @param[in] blockW filter width. - * @param[in] strideH stride height. - * @param[in] strideW stride width. - * @param[in] paddingH padding height. - * @param[in] paddingW padding width. - * @param[in] outputH output height. - * @param[in] outputW output width. - * @param[out] dataCol expand data. - * - */ -extern void hl_expand_feature2col(const real* dataIm, - size_t channels, - size_t height, - size_t width, - size_t blockH, - size_t blockW, - size_t strideH, - size_t strideW, - size_t paddingH, - size_t paddingW, - size_t outputH, - size_t outputW, - real* dataCol); - /** * @brief Maximum pool forward. * diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 039551c6cc69525e71c8c311f78fb6dec07d7fed..2bbb9fa8dfd5eeac9d55aa67a28ebfbffa2acd46 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -17,36 +17,6 @@ limitations under the License. */ #include "hl_cnn.h" -inline void hl_shrink_col2feature(const real* dataCol, - size_t channels, - size_t height, - size_t width, - size_t blockH, - size_t blockW, - size_t strideH, - size_t strideW, - size_t paddingH, - size_t paddingW, - size_t outputH, - size_t outputW, - real* dataIm, - real alpha, - real beta) {} - -inline void hl_expand_feature2col(const real* dataIm, - size_t channels, - size_t height, - size_t width, - size_t blockH, - size_t blockW, - size_t strideH, - size_t strideW, - size_t paddingH, - size_t paddingW, - size_t outputH, - size_t outputW, - real* dataCol) {} - inline void hl_maxpool_forward(const int frameCnt, const real* inputData, const int channels, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index b94f4d8fe4a251750c527d4b686fcc8f452d4606..b6e3e63a4f52261e49467bd82fdabd063e81460e 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -18,134 +18,6 @@ limitations under the License. */ #include "hl_cnn.h" #include "hl_device_functions.cuh" -__global__ void KeFeature2col(size_t n, size_t height, const real* data_im, - size_t blockH, size_t blockW, size_t width, - size_t strideH, size_t strideW, - size_t paddingH, size_t paddingW, - size_t height_col, size_t width_col, - real* data_col) { - size_t index = - (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - if (index < n) { - size_t w_out = index % width_col; - index /= width_col; - size_t h_out = index % height_col; - size_t channel_in = index / height_col; - size_t channel_out = channel_in * blockH * blockW; - size_t h_in = h_out * strideH; - size_t w_in = w_out * strideW; - - data_col += (channel_out * height_col + h_out) * width_col + w_out; - for (size_t i = 0; i < blockH; ++i) { - for (size_t j = 0; j < blockW; ++j) { - int rIdx = int(h_in+i); - int cIdx = int(w_in+j); - if ((rIdx-(int)paddingH) >= (int)height || - (rIdx-(int)paddingH) < 0 || - (cIdx-(int)paddingW) >= (int)width || - (cIdx-(int)paddingW) < 0) { - *data_col = 0; - } else { - rIdx = rIdx + channel_in*height - paddingH; - cIdx = cIdx - paddingW; - *data_col = data_im[rIdx* width + cIdx]; - } - data_col += height_col * width_col; - } - } - } -} - -void hl_expand_feature2col(const real* dataIm, size_t channels, - size_t height, size_t width, - size_t blockH, size_t blockW, - size_t strideH, size_t strideW, - size_t paddingH, size_t paddingW, - size_t outputH, size_t outputW, - real* dataCol) { - size_t numKernels = channels * outputH * outputW; - - size_t blocks = (numKernels + 1024 -1) / 1024; - size_t blockX = 512; - size_t blockY = (blocks+512-1)/512; - dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - KeFeature2col<<< grid, threads, 0, STREAM_DEFAULT >>> - (numKernels, height, dataIm, blockH, blockW, width, - strideH, strideW, paddingH, paddingW, - outputH, outputW, dataCol); - CHECK_SYNC("hl_expand_feature2col failed"); -} - -__global__ void KeCol2Feature(size_t n, const real* data_col, size_t height, - size_t width, size_t channels, - size_t blockH, size_t blockW, - size_t strideH, size_t strideW, - size_t paddingH, size_t paddingW, - size_t height_col, size_t width_col, - real* data_im, real alpha, real beta) { - size_t index = - (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; - if (index < n) { - real val = 0; - int w = int(index % width); - int h = int((index / width) % height); - int c = int(index / (width * height)); - if ((w - (int)paddingW) >= 0 && - (w - (int)paddingW) < (width-2 * paddingW) && - (h - (int)paddingH) >= 0 && - (h - paddingH) < (height - 2 * paddingH)) { - // compute the start and end of the output - int w_col_start = - (w < (int)blockW) ? 0 : (w - int(blockW)) / (int)strideW + 1; - int w_col_end = - min((int)(w / (int)strideW + 1), (int)(width_col)); - int h_col_start = - (h < (int)blockH) ? 0 : (h - (int)blockH) / (int)strideH + 1; - int h_col_end = min(int(h / strideH + 1), int(height_col)); - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - // the col location: [c * width * height + h_out, w_out] - int c_col = int(c * blockH* blockW) + \ - (h - h_col * (int)strideH) * (int)blockW + - (w - w_col * (int)strideW); - val += data_col[(c_col * height_col + h_col) * width_col + w_col]; - } - } - h -= paddingH; - w -= paddingW; - real tD = data_im[c*((width-2*paddingW) * (height-2*paddingH)) + - h*(width-2*paddingW) + w]; - data_im[c*((width-2*paddingW) * (height-2*paddingH)) + - h*(width-2*paddingW) + w] = alpha * val + beta*tD; - } - } -} - -void hl_shrink_col2feature(const real * dataCol, size_t channels, - size_t height, size_t width, - size_t blockH, size_t blockW, - size_t strideH, size_t strideW, - size_t paddingH, size_t paddingW, - size_t outputH, size_t outputW, - real* dataIm, real alpha, real beta) { - size_t numKernels = channels * (height + 2*paddingH) * (width + 2*paddingW); - - size_t blocks = (numKernels + 1024 -1) / 1024; - size_t blockX = 512; - size_t blockY = (blocks+512-1)/512; - dim3 threads(1024, 1); - dim3 grid(blockX, blockY); - - // To avoid involving atomic operations, we will launch one kernel per - // bottom dimension, and then in the kernel add up the top dimensions. - KeCol2Feature<<< grid, threads, 0, STREAM_DEFAULT >>> - (numKernels, dataCol, height + 2*paddingH, width + 2*paddingW, - channels, blockH, blockW, strideH, strideW, paddingH, paddingW, - outputH, outputW, dataIm, alpha, beta); - CHECK_SYNC("hl_shrink_col2feature failed"); -} - __global__ void KeMaxPoolForward(const int nthreads, const real* inputData, const int channels, const int height, const int width, diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index c910146164ebfb0737583c72c48ce6dbc5b49939..a3ad9d46e4988baf59cacade712e207cf9ba6ff2 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1016,81 +1016,6 @@ void GpuMatrix::check(std::ostream& os, Matrix& refMat, bool printDiff) { LOG(INFO) << "the diffCnt is " << diffCnt; } -void GpuMatrix::convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW) { - CHECK(feature.useGpu_ == true) << "Matrix type are not equal"; - - CHECK_EQ(size_t(feaImgHeight * feaImgWidth * channels), - feature.getHeight() * feature.getWidth()) - << "Matrix dimensions are not equal"; - - size_t elemCnt = outputH * outputW * blockH * blockW * channels; - CHECK_EQ(elemCnt, height_ * width_) << "Matrix dimensions are not equal"; - - hl_expand_feature2col(feature.getData(), - channels, - feaImgHeight, - feaImgWidth, - blockH, - blockW, - strideH, - strideW, - paddingH, - paddingW, - outputH, - outputW, - getData()); -} - -void GpuMatrix::convShrink(Matrix& expandFeat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha, - real beta) { - CHECK(expandFeat.useGpu_ == true) << "Matrix type are not equal"; - CHECK_EQ(size_t(thisImgHeight * thisImgWidth * channels), - getHeight() * getWidth()) - << "Matrix dimensions are not equal"; - - size_t elemCnt = outputH * outputW * blockW * blockH * channels; - CHECK(elemCnt == expandFeat.getHeight() * expandFeat.getWidth()) - << "Matrix dimensions are not equal"; - hl_shrink_col2feature(expandFeat.getData(), - channels, - thisImgHeight, - thisImgWidth, - blockH, - blockW, - strideH, - strideW, - paddingH, - paddingW, - outputH, - outputW, - getData(), - alpha, - beta); -} - void GpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, @@ -1775,103 +1700,6 @@ void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) { CHECK_EQ(info, 0); } -void CpuMatrix::convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW) { - CHECK(feature.useGpu_ == false) << "Matrix type are not equal"; - - CHECK_EQ(size_t(feaImgHeight * feaImgWidth * channels), - feature.getHeight() * feature.getWidth()) - << "Matrix dimensions are not equal"; - - size_t elemCnt = outputH * outputW * blockH * blockW * channels; - CHECK_EQ(elemCnt, height_ * width_) << "Matrix dimensions are not equal"; - - int channelsCol = channels * blockH * blockW; - real* srcData = feature.getData(); - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % blockW; - int hOffset = (c / blockW) % blockH; - int c_im = c / blockH / blockW; - for (int h = 0; h < outputH; ++h) { - for (int w = 0; w < outputW; ++w) { - // no c_im*height to Exclude the channel number - int imgRowIdx = h * strideH + hOffset; - int imgColIdx = w * strideW + wOffset; - if ((imgRowIdx - paddingH) < 0 || - (imgRowIdx - paddingH) >= feaImgHeight || - (imgColIdx - paddingW) < 0 || - (imgColIdx - paddingW) >= feaImgWidth) { - data_[(c * outputH + h) * outputW + w] = 0; - } else { - imgRowIdx += c_im * feaImgHeight - paddingH; - imgColIdx -= paddingW; - data_[(c * outputH + h) * outputW + w] = - srcData[imgRowIdx * feaImgWidth + imgColIdx]; - } - } - } - } -} - -void CpuMatrix::convShrink(Matrix& expandFeat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha, - real beta) { - CHECK(expandFeat.useGpu_ == false) << "Matrix type are not equal"; - CHECK_EQ(size_t(thisImgHeight * thisImgWidth * channels), - getHeight() * getWidth()) - << "Matrix dimensions are not equal"; - - size_t elemCnt = outputH * outputW * blockH * blockW * channels; - - CHECK(elemCnt == expandFeat.getHeight() * expandFeat.getWidth()) - << "Matrix dimensions are not equal"; - - real* expandData = expandFeat.getData(); - int channelsCol = channels * blockH * blockW; - for (int c = 0; c < channelsCol; ++c) { - int wOffset = c % blockW; - int hOffset = (c / blockW) % blockH; - int c_im = c / blockW / blockH; - for (int h = 0; h < outputH; ++h) { - for (int w = 0; w < outputW; ++w) { - int imRowIdx = h * strideH + hOffset; - int imColIdx = w * strideW + wOffset; - if ((imRowIdx - paddingH) >= 0 && - (imRowIdx - paddingH) < thisImgHeight && - (imColIdx - paddingW) >= 0 && - (imColIdx - paddingW) < thisImgWidth) { - imRowIdx += c_im * thisImgHeight - paddingH; - imColIdx -= paddingW; - data_[imRowIdx * thisImgWidth + imColIdx] = - alpha * expandData[(c * outputH + h) * outputW + w] + - beta * data_[imRowIdx * thisImgWidth + imColIdx]; - } - } - } - } -} - void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 748be850b4c902d1b48c1dafbb0d5ea2bf197e6e..bbf98a609c63eeb14e66775960a5b5fc460d7ca6 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -858,49 +858,6 @@ public: LOG(FATAL) << "Not implemented"; } - /** - * This function is used to calculate the convolution: - * - * It will expand a feature matrix according to the - * convolution filters - */ - virtual void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * This function is the reverse implementation of convExpand: - * - * Its function is to restore a expanded-matrix into a feature matrix - */ - virtual void convShrink(Matrix& expandColMat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f) { - LOG(FATAL) << "Not implemeted"; - } - /** * Pooling forward operation, pick out the largest element * in the sizeX of value @@ -1334,34 +1291,6 @@ public: void classificationError(Matrix& output, IVector& label, size_t topkSize = 1); - void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW); - - void convShrink(Matrix& expandColMat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blochW, - int strideH, - int strideW, - int paddingH, - int paddingWreal, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f); - void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, @@ -1521,34 +1450,6 @@ public: MatrixPtr clone(size_t height, size_t width, bool useGpu = false); - void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blcokH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW); - - void convShrink(Matrix& expandFeat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f); - void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,