diff --git a/paddle/operators/math/unpooling.cc b/paddle/operators/math/unpooling.cc index 0becab721ec115d3a79a7704ba28f2a059cffd5a..1622dcca872fc1bd1be658c9726a9944964c6c29 100644 --- a/paddle/operators/math/unpooling.cc +++ b/paddle/operators/math/unpooling.cc @@ -37,8 +37,6 @@ class Unpool2dMaxFunctor { const T* input_data = input.data(); const T * indices_data = indices.data(); T* output_data = output->mutable_data(context.GetPlace()); - memset(output_data, 0, \ - sizeof(T) * output_feasize * output_channels * batch_size); for (int b = 0; b < batch_size; ++b) { for (int c = 0; c < output_channels; ++c) { for (int i = 0; i < input_feasize; ++i) { diff --git a/paddle/operators/math/unpooling.cu b/paddle/operators/math/unpooling.cu index cd313770ab2602a298871506c8740d055e245522..d26ceed6ad411656d317ca9d4d8f760c2c5b8314 100644 --- a/paddle/operators/math/unpooling.cu +++ b/paddle/operators/math/unpooling.cu @@ -22,41 +22,56 @@ namespace math { template __global__ void KernelUnpool2dMax(const int nthreads, const T* input_data, - const int* indices_data, + const T* indices_data, const int input_height, const int input_width, + const int channels, T* output_data, const int output_height, const int output_width) { + int bsize = input_height * input_width * channels; + int csize = input_height * input_width; + int out_bsize = output_height * output_width * channels; + int out_csize = output_height * output_width; int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; for (int i = index; i < nthreads; i += offset) { - int out_offset = i / (input_height * input_width) \ - * output_height * output_width; + int bidx = i / bsize; + int boffset = i % bsize; + int cidx = boffset / csize; + int out_offset = bidx * out_bsize + cidx * out_csize; int out_index = indices_data[i]; PADDLE_ASSERT(out_index < (output_height * output_width)); + printf("-------%d------[%f]\n", out_offset + out_index, input_data[i]); output_data[out_offset + out_index] = input_data[i]; } } template __global__ void KernelUnpool2dMaxGrad(const int nthreads, const T* input_data, - const int* indices_data, + const T* indices_data, const int input_height, const int input_width, + const int channels, const T* output_data, const T* output_grad, const int output_height, const int output_width, T* input_grad) { + int bsize = input_height * input_width * channels; + int csize = input_height * input_width; + int out_bsize = output_height * output_width * channels; + int out_csize = output_height * output_width; int index = blockIdx.x * blockDim.x + threadIdx.x; int offset = blockDim.x * gridDim.x; for (int i = index; i < nthreads; i += offset) { - int out_offset = i / (input_height * input_width) \ - * output_height * output_width; - int out_index = indices_data[i]; - PADDLE_ASSERT(out_index < (output_height * output_width)); - input_grad[i] = output_grad[out_offset + out_index]; + int bidx = i / bsize; + int boffset = i % bsize; + int cidx = boffset / csize; + int out_offset = bidx * out_bsize + cidx * out_csize; + int out_index = indices_data[i]; + PADDLE_ASSERT(out_index < (output_height * output_width)); + input_grad[i] = output_grad[out_offset + out_index]; } } /* @@ -78,8 +93,7 @@ class Unpool2dMaxFunctor { const T* input_data = input.data(); const T* indices_data = indices.data(); T* output_data = output->mutable_data(context.GetPlace()); - - int nthreads = output->numel(); + int nthreads = batch_size * output_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; dim3 threads(1024, 1); dim3 grid(blocks, 1); @@ -88,7 +102,7 @@ class Unpool2dMaxFunctor { T><<(context) .stream()>>>(nthreads, input_data, indices_data, - input_height, input_width, + input_height, input_width, output_channels, output_data, output_height, output_width); } }; @@ -115,7 +129,7 @@ class Unpool2dMaxGradFunctor { const T* output_data = output.data(); const T* output_grad_data = output_grad.data(); T* input_grad_data = input_grad->mutable_data(context.GetPlace()); - int nthreads = output.numel(); + int nthreads = batch_size * output_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; dim3 threads(1024, 1); dim3 grid(blocks, 1); @@ -125,7 +139,7 @@ class Unpool2dMaxGradFunctor { reinterpret_cast(context) .stream()>>>( nthreads, input_data, indices_data, - input_height, input_width, + input_height, input_width, output_channels, output_data, output_grad_data, output_height, output_width, input_grad_data); diff --git a/paddle/operators/math/unpooling.h b/paddle/operators/math/unpooling.h index 93a77bf53e1389f6a96fefab14d71e8af6453958..88e88ba117d22e466f6e480f30e8bdf04e925641 100644 --- a/paddle/operators/math/unpooling.h +++ b/paddle/operators/math/unpooling.h @@ -21,9 +21,6 @@ namespace paddle { namespace operators { namespace math { -#define FLT_MAX \ - __FLT_MAX__ - template class Unpool2dMaxFunctor { diff --git a/paddle/operators/unpool_op.cc b/paddle/operators/unpool_op.cc index 9036005a4d6b0c5f3235ad71169489dd20dafe24..add8f157368ad99c8d3ec73dd4e0bdf828a53506 100644 --- a/paddle/operators/unpool_op.cc +++ b/paddle/operators/unpool_op.cc @@ -108,9 +108,6 @@ class UnpoolOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); - // PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) must not be null."); - // PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - // "Input(Out@GRAD) should not be null"); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Input(X@GRAD) should not be null."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); diff --git a/paddle/operators/unpool_op.h b/paddle/operators/unpool_op.h index 452a328eee925a88a1413043581495f940adf2c0..e3a45ff9a7162da2178cc1e29d15d575deea7e71 100644 --- a/paddle/operators/unpool_op.h +++ b/paddle/operators/unpool_op.h @@ -29,11 +29,16 @@ class UnpoolKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { const Tensor* in_x = context.Input("X"); const Tensor* in_y = context.Input("Y"); - Tensor* out = context.Output("Out"); + auto * out = context.Output("Out"); std::string unpoolingtype = context.Attr("unpoolingtype"); std::vector ksize = context.Attr>("ksize"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); + T* output_data = out->mutable_data(context.GetPlace()); + if (output_data) { + math::SetConstant set_zero; + set_zero(context.device_context(), out, static_cast(0)); + } switch (ksize.size()) { case 2: { if (unpoolingtype == "max") { @@ -66,7 +71,7 @@ class UnpoolGradKernel : public framework::OpKernel { if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0.0)); - } + } switch (ksize.size()) { case 2: { if (unpoolingtype == "max") { diff --git a/python/paddle/v2/fluid/tests/test_unpool_op.py b/python/paddle/v2/fluid/tests/test_unpool_op.py index 566da6e26ee7252127117e2ee3364ca5ec8ecb5a..7984743e6f61c887d0f76ce8d24a2b5a92e992e0 100644 --- a/python/paddle/v2/fluid/tests/test_unpool_op.py +++ b/python/paddle/v2/fluid/tests/test_unpool_op.py @@ -54,6 +54,8 @@ class TestUnpoolOp(OpTest): self.outputs = {'Out': output.astype('float32')} def test_check_output(self): + print self.inputs['X'] + print self.inputs['Y'] print self.outputs['Out'] self.check_output() @@ -63,7 +65,7 @@ class TestUnpoolOp(OpTest): def init_test_case(self): self.Unpool2d_forward_naive = unpool2dmax_forward_naive self.unpoolingtype = "max" - self.shape = [10, 2, 5, 5] + self.shape = [6, 4, 5, 5] self.ksize = [3, 3] self.strides = [2, 2] self.paddings = [0, 0]