From 2558c3f15a9d4bd340d0d50492a185231bb2627a Mon Sep 17 00:00:00 2001 From: Haonan Date: Wed, 1 Feb 2017 14:37:54 -0800 Subject: [PATCH] revisions according to reviews --- paddle/cuda/include/hl_matrix.h | 12 ++++ paddle/cuda/include/stub/hl_matrix_stub.h | 4 ++ paddle/cuda/src/hl_cuda_matrix.cu | 25 ++++++++ paddle/gserver/layers/RotateLayer.cpp | 77 ++++++++++++----------- paddle/gserver/layers/RotateLayer.h | 16 ++--- paddle/gserver/tests/test_LayerGrad.cpp | 7 ++- paddle/math/Matrix.cpp | 24 +++---- paddle/math/Matrix.h | 14 ++++- paddle/math/tests/test_matrixCompare.cpp | 21 ++++++- 9 files changed, 142 insertions(+), 58 deletions(-) diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h index abd5eb3a0c..40828dd5cc 100644 --- a/paddle/cuda/include/hl_matrix.h +++ b/paddle/cuda/include/hl_matrix.h @@ -267,4 +267,16 @@ extern void hl_matrix_collect_shared_bias(real* B_d, const int dimN, real scale); +/** + * @brief Matrix rotation in 90 degrees + * + * @param[in] mat input matrix (M x N). + * @param[out] matRot output matrix (N x M). + * @param[in] dimM input matrix height. + * @param[in] dimN input matrix width. + * @param[in] clockWise rotation direction + */ +extern void hl_matrix_rotate( + real* mat, real* matRot, int dimM, int dimN, bool clockWise); + #endif /* HL_MATRIX_H_ */ diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h index 0b669f6735..1309c5974f 100644 --- a/paddle/cuda/include/stub/hl_matrix_stub.h +++ b/paddle/cuda/include/stub/hl_matrix_stub.h @@ -106,4 +106,8 @@ inline void hl_matrix_collect_shared_bias(real* B_d, const int dimM, const int dimN, real scale) {} + +inline void hl_matrix_rotate( + real* mat, real* matRot, int dimM, int dimN, bool clockWise); + #endif // HL_MATRIX_STUB_H_ diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index 2b4c6f7c39..cd23bd3105 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -840,3 +840,28 @@ void hl_matrix_collect_shared_bias(real* B_d, (B_d, A_d, channel, dimM, dimN, dim, limit, scale); CHECK_SYNC("hl_matrix_collect_shared_bias failed"); } + +__global__ void keMatrixRotate(real* mat, real* matRot, + int dimM, int dimN, bool clockWise) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < dimM * dimN) { + int i = idx / dimN; + int j = idx % dimN; + if (clockWise) { + matRot[j * dimM + i] = mat[(dimM - i - 1) * dimN + j]; + } else { + matRot[j * dimM + i] = mat[i * dimN + (dimN - j - 1)]; + } + } +} + +void hl_matrix_rotate(real *mat, real* matRot, + int dimM, int dimN, bool clockWise) { + CHECK_NOTNULL(mat); + CHECK_NOTNULL(matRot); + const int threads = 512; + const int blocks = DIVUP(dimM * dimN, threads); + keMatrixRotate<<< blocks, threads, 0, STREAM_DEFAULT >>> + (mat, matRot, dimM, dimN, clockWise); + CHECK_SYNC("hl_matrix_rotate failed"); +} diff --git a/paddle/gserver/layers/RotateLayer.cpp b/paddle/gserver/layers/RotateLayer.cpp index 269ad43b31..1cdd8366d5 100644 --- a/paddle/gserver/layers/RotateLayer.cpp +++ b/paddle/gserver/layers/RotateLayer.cpp @@ -23,7 +23,8 @@ bool RotateLayer::init(const LayerMap& layerMap, Layer::init(layerMap, parameterMap); CHECK_EQ(inputLayers_.size(), 1UL); - sampleHeight_ = config_.height(); + height_ = config_.height(); + width_ = config_.width(); return true; } @@ -32,26 +33,31 @@ void RotateLayer::forward(PassType passType) { MatrixPtr input = getInputValue(0); batchSize_ = input->getHeight(); - sampleSize_ = input->getWidth(); - sampleWidth_ = sampleSize_ / sampleHeight_; - CHECK_EQ(sampleSize_ % sampleHeight_, 0); + size_ = input->getWidth(); + CHECK_GE(size_, height_ * width_); + CHECK_EQ(size_ % (height_ * width_), 0) + << "The input's depth should be an int"; + channels_ = size_ / (height_ * width_); - resizeOutput(batchSize_, sampleSize_); + resizeOutput(batchSize_, size_); MatrixPtr outV = getOutputValue(); - - for (int b = 0; b < batchSize_; b++) { - MatrixPtr inputSample = Matrix::create(input->getData() + b * sampleSize_, - sampleHeight_, - sampleWidth_, - false, - useGpu_); - MatrixPtr outputSample = Matrix::create(outV->getData() + b * sampleSize_, - sampleWidth_, - sampleHeight_, - false, - useGpu_); - inputSample->rotate(outputSample, false, true); + for (int b = 0; b < batchSize_; b++) { // for each input feat map + for (int c = 0; c < channels_; c++) { // for each feat channel + MatrixPtr inputSample = + Matrix::create(input->getData() + b * size_ + c * height_ * width_, + height_, + width_, + false, + useGpu_); + MatrixPtr outputSample = + Matrix::create(outV->getData() + b * size_ + c * height_ * width_, + width_, + height_, + false, + useGpu_); + inputSample->rotate(outputSample, false, true /* clock-wise */); + } } if (getInputGrad(0)) { @@ -69,23 +75,24 @@ void RotateLayer::backward(const UpdateCallback& callback) { // the grad should be rotated in the reverse direction MatrixPtr preGrad = getInputGrad(0); - for (int b = 0; b < batchSize_; b++) { - MatrixPtr inputSampleGrad = - Matrix::create(preGrad->getData() + b * sampleSize_, - sampleHeight_, - sampleWidth_, - false, - useGpu_); - MatrixPtr outputSampleGrad = - Matrix::create(outputGrad->getData() + b * sampleSize_, - sampleWidth_, - sampleHeight_, - false, - useGpu_); - MatrixPtr tmpGrad = - Matrix::create(sampleHeight_, sampleWidth_, false, useGpu_); - outputSampleGrad->rotate(tmpGrad, false, false); - inputSampleGrad->add(*tmpGrad); + for (int b = 0; b < batchSize_; b++) { // for each input feat map + for (int c = 0; c < channels_; c++) { // for each feat channel + MatrixPtr inputSampleGrad = + Matrix::create(preGrad->getData() + b * size_ + c * height_ * width_, + height_, + width_, + false, + useGpu_); + MatrixPtr outputSampleGrad = Matrix::create( + outputGrad->getData() + b * size_ + c * height_ * width_, + width_, + height_, + false, + useGpu_); + MatrixPtr tmpGrad = nullptr; + outputSampleGrad->rotate(tmpGrad, true, false /* anti clock-wise */); + inputSampleGrad->add(*tmpGrad); + } } } diff --git a/paddle/gserver/layers/RotateLayer.h b/paddle/gserver/layers/RotateLayer.h index c8eca7c506..a6f565a7e5 100644 --- a/paddle/gserver/layers/RotateLayer.h +++ b/paddle/gserver/layers/RotateLayer.h @@ -19,12 +19,13 @@ limitations under the License. */ namespace paddle { /** - * A layer for rotating an input sample (assume it's a matrix) - * The rotation is in clock-wise + * A layer for rotating a multi-channel feature map (M x N x C) in the spatial + * domain + * The rotation is 90 degrees in clock-wise * \f[ - * y(j,i) = x(M-i-1,j) + * y(j,i,:) = x(M-i-1,j,:) * \f] - * where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output. + * where \f$x\f$ is (M x N x C) input, and \f$y\f$ is (N x M x C) output. * * The config file api is rotate_layer * @@ -41,9 +42,10 @@ public: private: int batchSize_; - int sampleSize_; - int sampleHeight_; - int sampleWidth_; + int size_; + int height_; + int width_; + int channels_; }; } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index f7a6c672de..685d2ca542 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1320,9 +1320,12 @@ TEST(Layer, RotateLayer) { TestConfig config; config.biasSize = 0; config.layerConfig.set_type("rotate"); - const int INPUT_SIZE = 64; // height * width + const int INPUT_SIZE = 64; // height * width * depth + const int HEIGHT = 8; + const int WIDTH = 4; config.layerConfig.set_size(INPUT_SIZE); - config.layerConfig.set_height(32); + config.layerConfig.set_height(HEIGHT); + config.layerConfig.set_width(WIDTH); config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); config.layerConfig.add_inputs(); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 3c09f010bb..a8b53e2105 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -388,6 +388,8 @@ void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { matTrans = std::make_shared(width_, height_); } else { CHECK(matTrans != NULL); + CHECK_EQ(matTrans->getHeight(), width_); + CHECK_EQ(matTrans->getWidth(), height_); } real* dataTrans = matTrans->getData(); real* data = getData(); @@ -402,15 +404,13 @@ void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { matRot = std::make_shared(width_, height_); } else { CHECK(matRot != NULL); + CHECK_EQ(matRot->getHeight(), width_); + CHECK_EQ(matRot->getWidth(), height_); } - MatrixPtr cpuMat = std::make_shared(height_, width_); - cpuMat->copyFrom(*this); - - MatrixPtr cpuMatRot = std::make_shared(width_, height_); - cpuMat->rotate(cpuMatRot, false, clockWise); - - matRot->copyFrom(*cpuMatRot); + real* dataRot = matRot->getData(); + real* data = getData(); + hl_matrix_rotate(data, dataRot, height_, width_, clockWise); } MatrixPtr GpuMatrix::getInverse() { @@ -1723,6 +1723,8 @@ void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { matTrans = std::make_shared(width_, height_); } else { CHECK(matTrans != NULL); + CHECK_EQ(matTrans->getHeight(), width_); + CHECK_EQ(matTrans->getWidth(), height_); } real* dataTrans = matTrans->getData(); real* data = getData(); @@ -1741,18 +1743,18 @@ void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { matRot = std::make_shared(width_, height_); } else { CHECK(matRot != NULL); + CHECK_EQ(matRot->getHeight(), width_); + CHECK_EQ(matRot->getWidth(), height_); } real* dataRot = matRot->getData(); real* data = getData(); - int lda = getStride(); - int ldc = matRot->getStride(); for (size_t i = 0; i < height_; i++) { for (size_t j = 0; j < width_; j++) { if (clockWise) { - dataRot[j * ldc + i] = data[(height_ - i - 1) * lda + j]; + dataRot[j * height_ + i] = data[(height_ - i - 1) * width_ + j]; } else { - dataRot[j * ldc + i] = data[i * lda + (width_ - j - 1)]; + dataRot[j * height_ + i] = data[i * width_ + (width_ - j - 1)]; } } } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index d123f827c9..c92c0a272d 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -377,9 +377,19 @@ public: } /** - * @brief rotate clock-wise. + * @brief rotate 90 degrees in clock-wise if clockWise=true; + * otherwise rotate in anti clock-wise + * clock-wise: + * \f[ + * y(j,i) = x(M-i-1,j) + * \f] + * anti clock-wise: + * \f[ + * y(j,i) = x(i, N-1-j) + * \f] + * where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output. * - * allocate matTrans' memory outside, then set memAlloc as false; + * allocate matRot' memory outside, then set memAlloc as false; * else set as true. */ virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index c062ca1080..e024f2cf1b 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -176,11 +176,29 @@ void testMatrixTranspose(int height, int width) { cpu->randomizeUniform(); gpu->copyFrom(*cpu); cpu->transpose(cpuT, false); - gpu->transpose(gpuT, false); + gpu->transpose(gpuT, true); TensorCheckEqual(*cpuT, *gpuT); } +void testMatrixRotate(int height, int width) { + MatrixPtr cpu = std::make_shared(height, width); + MatrixPtr gpu = std::make_shared(height, width); + MatrixPtr cpuR = std::make_shared(width, height); + MatrixPtr gpuR = std::make_shared(width, height); + + cpu->randomizeUniform(); + gpu->copyFrom(*cpu); + + cpu->rotate(cpuR, false, true); + gpu->rotate(gpuR, true, true); + TensorCheckEqual(*cpuR, *gpuR); + + cpu->rotate(cpuR, true, false); + gpu->rotate(gpuR, false, false); + TensorCheckEqual(*cpuR, *gpuR); +} + void testMatrixInverse(int height) { MatrixPtr cpu = std::make_shared(height, height); MatrixPtr gpu = std::make_shared(height, height); @@ -215,6 +233,7 @@ TEST(Matrix, unary) { testMatrixZeroAtOffset(height, width); testMatrixGetSum(height, width); testMatrixTranspose(height, width); + testMatrixRotate(height, width); } // inverse testMatrixInverse(height); -- GitLab