From e2a5905eaec4bafa2d469c94f9da5c01f9aae328 Mon Sep 17 00:00:00 2001 From: sweetsky0901 Date: Wed, 22 Nov 2017 15:38:17 +0800 Subject: [PATCH] gpu test ok unpool2dmax --- paddle/operators/math/unpooling.cc | 2 - paddle/operators/math/unpooling.cu | 42 ++++++++++++------- paddle/operators/math/unpooling.h | 3 -- paddle/operators/unpool_op.cc | 3 -- paddle/operators/unpool_op.h | 9 +++- .../paddle/v2/fluid/tests/test_unpool_op.py | 4 +- 6 files changed, 38 insertions(+), 25 deletions(-) diff --git a/paddle/operators/math/unpooling.cc b/paddle/operators/math/unpooling.cc index 0becab721ec..1622dcca872 100644 --- a/paddle/operators/math/unpooling.cc +++ b/paddle/operators/math/unpooling.cc @@ -37,8 +37,6 @@ class Unpool2dMaxFunctor { const T* input_data = input.data(); const T * indices_data = indices.data(); T* output_data = output->mutable_data(context.GetPlace()); - memset(output_data, 0, \ - sizeof(T) * output_feasize * output_channels * batch_size); for (int b = 0; b < batch_size; ++b) { for (int c = 0; c < output_channels; ++c) { for (int i = 0; i < input_feasize; ++i) { diff --git a/paddle/operators/math/unpooling.cu b/paddle/operators/math/unpooling.cu index cd313770ab2..d26ceed6ad4 100644 --- a/paddle/operators/math/unpooling.cu +++ b/paddle/operators/math/unpooling.cu @@ -22,41 +22,56 @@ namespace math { template __global__ void KernelUnpool2dMax(const int nthreads, const T* input_data, - const int* indices_data, + const T* indices_data, const int input_height, const int input_width, + const int channels, T* output_data, const int output_height, const int output_width) { + int bsize = input_height * input_width * channels; + int csize = input_height * input_width; + int out_bsize = output_height * output_width * channels; + int out_csize = output_height * output_width; int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; for (int i = index; i < nthreads; i += offset) { - int out_offset = i / (input_height * input_width) \ - * output_height * output_width; + int bidx = i / bsize; + int boffset = i % bsize; + int cidx = boffset / csize; + int out_offset = bidx * out_bsize + cidx * out_csize; int out_index = indices_data[i]; PADDLE_ASSERT(out_index < (output_height * output_width)); + printf("-------%d------[%f]\n", out_offset + out_index, input_data[i]); output_data[out_offset + out_index] = input_data[i]; } } template __global__ void KernelUnpool2dMaxGrad(const int nthreads, const T* input_data, - const int* indices_data, + const T* indices_data, const int input_height, const int input_width, + const int channels, const T* output_data, const T* output_grad, const int output_height, const int output_width, T* input_grad) { + int bsize = input_height * input_width * channels; + int csize = input_height * input_width; + int out_bsize = output_height * output_width * channels; + int out_csize = output_height * output_width; int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; for (int i = index; i < nthreads; i += offset) { - int out_offset = i / (input_height * input_width) \ - * output_height * output_width; - int out_index = indices_data[i]; - PADDLE_ASSERT(out_index < (output_height * output_width)); - input_grad[i] = output_grad[out_offset + out_index]; + int bidx = i / bsize; + int boffset = i % bsize; + int cidx = boffset / csize; + int out_offset = bidx * out_bsize + cidx * out_csize; + int out_index = indices_data[i]; + PADDLE_ASSERT(out_index < (output_height * output_width)); + input_grad[i] = output_grad[out_offset + out_index]; } } /* @@ -78,8 +93,7 @@ class Unpool2dMaxFunctor { const T* input_data = input.data(); const T* indices_data = indices.data(); T* output_data = output->mutable_data(context.GetPlace()); - - int nthreads = output->numel(); + int nthreads = batch_size * output_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; dim3 threads(1024, 1); dim3 grid(blocks, 1); @@ -88,7 +102,7 @@ class Unpool2dMaxFunctor { T><<(context) .stream()>>>(nthreads, input_data, indices_data, - input_height, input_width, + input_height, input_width, output_channels, output_data, output_height, output_width); } }; @@ -115,7 +129,7 @@ class Unpool2dMaxGradFunctor { const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); T* input_grad_data = input_grad->mutable_data(context.GetPlace()); - int nthreads = output.numel(); + int nthreads = batch_size * output_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; dim3 threads(1024, 1); dim3 grid(blocks, 1); @@ -125,7 +139,7 @@ class Unpool2dMaxGradFunctor { reinterpret_cast(context) .stream()>>>( nthreads, input_data, indices_data, - input_height, input_width, + input_height, input_width, output_channels, output_data, output_grad_data, output_height, output_width, input_grad_data); diff --git a/paddle/operators/math/unpooling.h b/paddle/operators/math/unpooling.h index 93a77bf53e1..88e88ba117d 100644 --- a/paddle/operators/math/unpooling.h +++ b/paddle/operators/math/unpooling.h @@ -21,9 +21,6 @@ namespace paddle { namespace operators { namespace math { -#define FLT_MAX \ - __FLT_MAX__ - template class Unpool2dMaxFunctor { diff --git a/paddle/operators/unpool_op.cc b/paddle/operators/unpool_op.cc index 9036005a4d6..add8f157368 100644 --- a/paddle/operators/unpool_op.cc +++ b/paddle/operators/unpool_op.cc @@ -108,9 +108,6 @@ class UnpoolOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); - // PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must not be null."); - // PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - // "Input(Out@GRAD) should not be null"); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Input(X@GRAD) should not be null."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); diff --git a/paddle/operators/unpool_op.h b/paddle/operators/unpool_op.h index 452a328eee9..e3a45ff9a71 100644 --- a/paddle/operators/unpool_op.h +++ b/paddle/operators/unpool_op.h @@ -29,11 +29,16 @@ class UnpoolKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { const Tensor* in_x = context.Input("X"); const Tensor* in_y = context.Input("Y"); - Tensor* out = context.Output("Out"); + auto * out = context.Output("Out"); std::string unpoolingtype = context.Attr("unpoolingtype"); std::vector ksize = context.Attr>("ksize"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); + T* output_data = out->mutable_data(context.GetPlace()); + if (output_data) { + math::SetConstant set_zero; + set_zero(context.device_context(), out, static_cast(0)); + } switch (ksize.size()) { case 2: { if (unpoolingtype == "max") { @@ -66,7 +71,7 @@ class UnpoolGradKernel : public framework::OpKernel { if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0.0)); - } + } switch (ksize.size()) { case 2: { if (unpoolingtype == "max") { diff --git a/python/paddle/v2/fluid/tests/test_unpool_op.py b/python/paddle/v2/fluid/tests/test_unpool_op.py index 566da6e26ee..7984743e6f6 100644 --- a/python/paddle/v2/fluid/tests/test_unpool_op.py +++ b/python/paddle/v2/fluid/tests/test_unpool_op.py @@ -54,6 +54,8 @@ class TestUnpoolOp(OpTest): self.outputs = {'Out': output.astype('float32')} def test_check_output(self): + print self.inputs['X'] + print self.inputs['Y'] print self.outputs['Out'] self.check_output() @@ -63,7 +65,7 @@ class TestUnpoolOp(OpTest): def init_test_case(self): self.Unpool2d_forward_naive = unpool2dmax_forward_naive self.unpoolingtype = "max" - self.shape = [10, 2, 5, 5] + self.shape = [6, 4, 5, 5] self.ksize = [3, 3] self.strides = [2, 2] self.paddings = [0, 0] -- GitLab