From db1757556e0712ab74b23c5f048768c952bf59a9 Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 8 Nov 2016 17:20:02 +0800 Subject: [PATCH] Follow comments --- paddle/cuda/src/hl_cuda_cnn.cu | 16 ++--- paddle/gserver/layers/BilinearInterpLayer.cpp | 9 ++- paddle/gserver/layers/BilinearInterpLayer.h | 1 + paddle/gserver/tests/test_LayerGrad.cpp | 10 +++ paddle/math/Matrix.cpp | 69 ++++++++----------- paddle/math/Matrix.h | 24 +++++-- paddle/math/tests/test_matrixCompare.cpp | 11 +-- .../paddle/trainer_config_helpers/layers.py | 17 +++-- 8 files changed, 88 insertions(+), 69 deletions(-) diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 49c09334e0..9eec44f77f 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -532,8 +532,7 @@ void hl_CMRNorm_backward(size_t frameCnt, const real* inV, CHECK_SYNC("hl_CMRNorm_backward"); } -__global__ void KeBilinearInterpFw(const size_t nthreads, - const real* in, +__global__ void KeBilinearInterpFw(const real* in, const size_t inImgH, const size_t inImgW, const size_t inputH, @@ -546,6 +545,7 @@ __global__ void KeBilinearInterpFw(const size_t nthreads, const size_t numChannels, const real ratioH, const real ratioW) { + int nthreads = outputH * outputW; int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < nthreads) { int outIdH = tid / outputW; @@ -593,13 +593,12 @@ void hl_bilinear_forward(const real* inData, int blocks = (threadNum + 1024 - 1) / 1024; KeBilinearInterpFw<<< blocks, 1024, 0, STREAM_DEFAULT>>>( - threadNum, inData, inImgH, inImgW, inputH, inputW, outData, - outImgH, outImgW, outputH, outputW, numChannels, ratioH, ratioW); + inData, inImgH, inImgW, inputH, inputW, outData, outImgH, + outImgW, outputH, outputW, numChannels, ratioH, ratioW); CHECK_SYNC("hl_bilinear_forward failed"); } -__global__ void KeBilinearInterpBw(const size_t nthreads, - real* in, +__global__ void KeBilinearInterpBw(real* in, const size_t inImgH, const size_t inImgW, const size_t inputH, @@ -612,6 +611,7 @@ __global__ void KeBilinearInterpBw(const size_t nthreads, const size_t numChannels, const real ratioH, const real ratioW) { + int nthreads = outputH * outputW; int tid = blockIdx.x * blockDim.x + threadIdx.x; if (tid < nthreads) { int outIdH = tid / outputW; @@ -659,8 +659,8 @@ void hl_bilinear_backward(real* inGrad, int blocks = (threadNum + 1024 - 1) / 1024; KeBilinearInterpBw<<< blocks, 1024, 0, STREAM_DEFAULT>>>( - threadNum, inGrad, inImgH, inImgW, inputH, inputW, outGrad, - outImgH, outImgW, outputH, outputW, numChannels, ratioH, ratioW); + inGrad, inImgH, inImgW, inputH, inputW, outGrad, outImgH, + outImgW, outputH, outputW, numChannels, ratioH, ratioW); CHECK_SYNC("hl_bilinear_backward failed"); } diff --git a/paddle/gserver/layers/BilinearInterpLayer.cpp b/paddle/gserver/layers/BilinearInterpLayer.cpp index f37efc824a..ac5f87be7a 100644 --- a/paddle/gserver/layers/BilinearInterpLayer.cpp +++ b/paddle/gserver/layers/BilinearInterpLayer.cpp @@ -40,6 +40,11 @@ size_t BilinearInterpLayer::getSize() { CHECK(inImgH_ > 0 && inImgW_ > 0); CHECK(numChannels_); + ratioH_ = (outImgH_ > 1) ? + static_cast(inImgH_ - 1) / (outImgH_ - 1) : 0.f; + ratioW_ = (outImgW_ > 1) ? + static_cast(inImgW_ - 1) / (outImgW_ - 1) : 0.f; + getOutput().setFrameHeight(outImgH_); getOutput().setFrameWidth(outImgW_); return outImgH_ * outImgW_ * numChannels_; @@ -70,7 +75,7 @@ void BilinearInterpLayer::forward(PassType passType) { { REGISTER_TIMER_INFO("FwBilinearInterpTimer", getName().c_str()); outV->bilinearForward(*inV, inImgH_, inImgW_, outImgH_, outImgW_, - numChannels_); + numChannels_, ratioH_, ratioW_); } } @@ -83,7 +88,7 @@ void BilinearInterpLayer::backward(const UpdateCallback& callback) { REGISTER_TIMER_INFO("BwBilinearInterpTimer", getName().c_str()); if (inputG) { inputG->bilinearBackward(*outG, outImgH_, outImgW_, inImgH_, inImgW_, - numChannels_); + numChannels_, ratioH_, ratioW_); } } } diff --git a/paddle/gserver/layers/BilinearInterpLayer.h b/paddle/gserver/layers/BilinearInterpLayer.h index 33e0cb1220..eba3c054fa 100644 --- a/paddle/gserver/layers/BilinearInterpLayer.h +++ b/paddle/gserver/layers/BilinearInterpLayer.h @@ -29,6 +29,7 @@ class BilinearInterpLayer : public Layer { protected: size_t outImgH_, outImgW_; size_t inImgH_, inImgW_; + real ratioH_, ratioW_; size_t numChannels_; public: diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index c001904499..4d4e439dc6 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -50,6 +50,16 @@ TEST(Layer, BilinearInterpLayer) { for (auto useGpu : {false, true}) { testLayerGrad(config, "bilinear_interp", 10, false, useGpu); } + + bilinear->set_img_size_x(32); + bilinear->set_img_size_y(32); + bilinear->set_out_size_x(32); + bilinear->set_out_size_y(32); + bilinear->set_num_channels(4); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "bilinear_interp", 10, false, useGpu); + } } TEST(Operator, dot_mul) { diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 283733fe84..9abcbba67a 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1227,7 +1227,9 @@ void GpuMatrix::bilinearForward(const Matrix& in, const size_t inImgW, const size_t outImgH, const size_t outImgW, - const size_t numChannels) { + const size_t numChannels, + const real ratioH, + const real ratioW) { CHECK(dynamic_cast(&in)); const size_t outputW = getWidth(); @@ -1238,11 +1240,6 @@ void GpuMatrix::bilinearForward(const Matrix& in, real* outData = getData(); const real* inData = in.getData(); - real ratioH = (outImgH > 1) ? - static_cast(inImgH - 1) / (outImgH - 1) : 0.f; - real ratioW = (outImgW > 1) ? - static_cast(inImgW - 1) / (outImgW - 1) : 0.f; - if (inImgH == outImgW && inImgW == outImgW) { this->copyFrom(in); } else { @@ -1258,7 +1255,9 @@ void GpuMatrix::bilinearBackward(const Matrix& out, const size_t outImgW, const size_t inImgH, const size_t inImgW, - const size_t numChannels) { + const size_t numChannels, + const real ratioH, + const real ratioW) { CHECK(dynamic_cast(&out)); const size_t inputW = getWidth(); @@ -1269,13 +1268,8 @@ void GpuMatrix::bilinearBackward(const Matrix& out, real* inGrad = getData(); const real* outGrad = out.getData(); - real ratioH = (outImgH > 1) ? - static_cast(inImgH - 1) / (outImgH - 1) : 0.f; - real ratioW = (outImgW > 1) ? - static_cast(inImgW - 1) / (outImgW - 1) : 0.f; - if (outImgH == inImgH && outImgW == inImgW) { - this->addBias(const_cast(out), 1.f); + this->add(const_cast(out)); } else { hl_bilinear_backward( inGrad, inImgH, inImgW, inputH, inputW, outGrad, @@ -3908,7 +3902,9 @@ void CpuMatrix::bilinearForward(const Matrix& in, const size_t inImgW, const size_t outImgH, const size_t outImgW, - const size_t numChannels) { + const size_t numChannels, + const real ratioH, + const real ratioW) { CHECK(dynamic_cast(&in)); size_t outputW = getWidth(); @@ -3920,11 +3916,6 @@ void CpuMatrix::bilinearForward(const Matrix& in, real* outData = getData(); const real* inData = in.getData(); - const real ratioH = (outImgH > 1) ? - static_cast(inImgH - 1) / (outImgH - 1) : 0.f; - const real ratioW = (outImgW > 1) ? - static_cast(inImgW - 1) / (outImgW - 1) : 0.f; - if (inImgH == outImgH && inImgW == outImgW) { this->copyFrom(in); } else { @@ -3932,21 +3923,23 @@ void CpuMatrix::bilinearForward(const Matrix& in, for (size_t i = 0; i < outImgH; ++i) { // loop for images size_t h = ratioH * i; size_t hid = (h < inImgH - 1) ? 1 : 0; - real hlambda = ratioH * i - h; + real h1lambda = ratioH * i - h; + real h2lambda = 1 - h1lambda; for (size_t j = 0; j < outImgW; ++j) { size_t w = ratioW * j; size_t wid = (w < inImgW - 1) ? 1 : 0; - real wlambda = ratioW * j - w; + real w1lambda = ratioW * j - w; + real w2lambda = 1 - w1lambda; // calculate four position for bilinear interpolation const real* inPos = &inData[k * inputW + h * inImgW + w]; real* outPos = &outData[k * outputW + i * outImgW + j]; for (size_t c = 0; c < numChannels; ++c) { // loop for channels // bilinear interpolation - outPos[0] = (1.f - hlambda) * - ((1.f - wlambda) * inPos[0] + wlambda * inPos[wid]) + - hlambda * ((1.f - wlambda) * inPos[hid * inImgW] + - wlambda * inPos[hid * inImgW + wid]); + outPos[0] = + h2lambda * (w2lambda * inPos[0] + w1lambda * inPos[wid]) + + h1lambda * (w2lambda * inPos[hid * inImgW] + + w1lambda * inPos[hid * inImgW + wid]); inPos += inImgH * inImgW; outPos += outImgH * outImgW; } @@ -3961,7 +3954,9 @@ void CpuMatrix::bilinearBackward(const Matrix& out, const size_t outImgW, const size_t inImgH, const size_t inImgW, - const size_t numChannels) { + const size_t numChannels, + const real ratioH, + const real ratioW) { CHECK(dynamic_cast(&out)); size_t inputW = getWidth(); @@ -3973,32 +3968,28 @@ void CpuMatrix::bilinearBackward(const Matrix& out, real* inGrad = getData(); const real* outGrad = out.getData(); - const real ratioH = (outImgH > 1) ? - static_cast(inImgH - 1) / (outImgH - 1) : 0.f; - const real ratioW = (outImgW > 1) ? - static_cast(inImgW - 1) / (outImgW - 1) : 0.f; - if (inImgH == outImgH && inImgW == outImgW) { - this->addBias(const_cast(out), 1.f); + this->add(const_cast(out)); } else { for (size_t k = 0; k < batchSize; ++k) { // loop for batches for (size_t i = 0; i < outImgH; ++i) { // loop for images size_t h = ratioH * i; size_t hid = (h < inImgH - 1) ? 1 : 0; - real hlambda = ratioH * i - h; - + real h1lambda = ratioH * i - h; + real h2lambda = 1 - h1lambda; for (size_t j = 0; j < outImgW; ++j) { size_t w = ratioW * j; size_t wid = (w < inImgW - 1) ? 1 : 0; - real wlambda = ratioW * j - w; + real w1lambda = ratioW * j - w; + real w2lambda = 1 - w1lambda; real* inPos = &inGrad[k * inputW + h * inImgW + w]; const real* outPos = &outGrad[k * outputW + i * outImgW + j]; for (size_t c = 0; c < numChannels; ++c) { // loop for channels - inPos[0] += (1.f - hlambda) * (1.f - wlambda) * outPos[0]; - inPos[wid] += (1.f - hlambda) * wlambda * outPos[0]; - inPos[hid * inImgW] += hlambda * (1.f - wlambda) * outPos[0]; - inPos[hid * inImgW + wid] += hlambda * wlambda * outPos[0]; + inPos[0] += h2lambda * w2lambda * outPos[0]; + inPos[wid] += h2lambda * w1lambda * outPos[0]; + inPos[hid * inImgW] += h1lambda * w2lambda * outPos[0]; + inPos[hid * inImgW + wid] += h1lambda * w1lambda * outPos[0]; inPos += inImgH * inImgW; outPos += outImgH * outImgW; } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 25748a1569..07a2aebf55 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -997,7 +997,9 @@ public: const size_t inImgW, const size_t outImgH, const size_t outImgW, - const size_t numChannels) { + const size_t numChannels, + const real ratioH, + const real ratioW) { LOG(FATAL) << "Not implemented"; } virtual void bilinearBackward(const Matrix& out, @@ -1005,7 +1007,9 @@ public: const size_t outImgW, const size_t inImgH, const size_t inImgW, - const size_t numChannels) { + const size_t numChannels, + const real ratioH, + const real ratioW) { LOG(FATAL) << "Not implemented"; } }; @@ -1283,14 +1287,18 @@ public: const size_t inImgW, const size_t outImgH, const size_t outImgW, - const size_t numChannels); + const size_t numChannels, + const real ratioH, + const real ratioW); void bilinearBackward(const Matrix& out, const size_t outImgH, const size_t outImgW, const size_t inImgH, const size_t inImgW, - const size_t numChannels); + const size_t numChannels, + const real ratioH, + const real ratioW); }; class CpuMatrix : public Matrix { @@ -1583,14 +1591,18 @@ public: const size_t inImgW, const size_t outImgH, const size_t outImgW, - const size_t numChannels); + const size_t numChannels, + const real ratioH, + const real ratioW); void bilinearBackward(const Matrix& out, const size_t outImgH, const size_t outImgW, const size_t inImgH, const size_t inImgW, - const size_t numChannels); + const size_t numChannels, + const real ratioH, + const real ratioW); }; class SharedCpuMatrix : public CpuMatrix { diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index ef22e2aa8d..017fddc799 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -94,7 +94,8 @@ void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW, int channels) { int inWidth = imgSizeH * imgSizeW * channels; int outWidth = 2 * imgSizeH * 2 * imgSizeW * channels; - + real ratioH = 0.5; + real ratioW = 0.5; // forward MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false); MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true); @@ -107,9 +108,9 @@ void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW, inputGpu->copyFrom(*input); target->bilinearForward(*input, imgSizeH, imgSizeW, - 2 * imgSizeH, 2 * imgSizeW, channels); + 2 * imgSizeH, 2 * imgSizeW, channels, ratioH, ratioW); targetGpu->bilinearForward(*inputGpu, imgSizeH, imgSizeW, - 2 * imgSizeH, 2 * imgSizeW, channels); + 2 * imgSizeH, 2 * imgSizeW, channels, ratioH, ratioW); // check targetCheck->copyFrom(*targetGpu); @@ -131,9 +132,9 @@ void testBilinearFwdBwd(int numSamples, int imgSizeH, int imgSizeW, targetGpuGrad->copyFrom(*targetGrad); inputGrad->bilinearBackward(*targetGrad, 2 * imgSizeH, 2 * imgSizeW, - imgSizeH, imgSizeW, channels); + imgSizeH, imgSizeW, channels, ratioH, ratioW); inputGpuGrad->bilinearBackward(*targetGpuGrad, 2 * imgSizeH, 2 * imgSizeW, - imgSizeH, imgSizeW, channels); + imgSizeH, imgSizeW, channels, ratioH, ratioW); // check targetCheckGrad->copyFrom(*inputGpuGrad); diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 038f4d32a5..ccfdb3ded3 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1272,19 +1272,17 @@ def bilinear_interp_layer(input, .. code-block:: python - bilinear = bilinear_interp_layer(input, - out_size_x, - out_size_y) + bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64) - :para input: A input layer. + :param input: A input layer. :type input: LayerOutput. - :para out_size_x: bilinear interpolation output width. + :param out_size_x: bilinear interpolation output width. :type out_size_x: int|None - :para out_size_y: bilinear interpolation output height. + :param out_size_y: bilinear interpolation output height. :type out_size_y: int|None - :para name: The layer's name, which cna not be specified. + :param name: The layer's name, which cna not be specified. :type name: None|basestring - :para layer_attr: Extra Layer attribute. + :param layer_attr: Extra Layer attribute. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -1301,7 +1299,8 @@ def bilinear_interp_layer(input, num_channels=num_channels)), type=LayerType.BILINEAR_INTERP_LAYER, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.BILINEAR_INTERP_LAYER, parents=[input]) + return LayerOutput(name, LayerType.BILINEAR_INTERP_LAYER, parents=[input], + num_filters=num_channels) @wrap_name_default() @layer_support() -- GitLab