diff --git a/paddle/fluid/operators/tile_op.cc b/paddle/fluid/operators/tile_op.cc index bc1cb3b4aa1c1bdd0a9be39a4e113301d65ce5b5..6527362bb969072b3a41cb8f74e433f030a31af0 100644 --- a/paddle/fluid/operators/tile_op.cc +++ b/paddle/fluid/operators/tile_op.cc @@ -167,6 +167,7 @@ class TileGradOp : public framework::OperatorWithKernel { framework::GradVarName("Out"), "TileGrad"); auto x_dims = ctx->GetInputDim("X"); + std::vector repeat_times = ctx->Attrs().Get>("repeat_times"); if (repeat_times.size() == 0) { diff --git a/paddle/fluid/operators/tile_op.h b/paddle/fluid/operators/tile_op.h index c6b0fdd720cf4be79dc403a53341b18366998a67..dffd3e586417705601841899e055a0dbb51d630f 100644 --- a/paddle/fluid/operators/tile_op.h +++ b/paddle/fluid/operators/tile_op.h @@ -186,9 +186,9 @@ template class TileGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); + auto* x = context.Input("X"); auto repeat_times = get_repeat_times(context); - auto x_dims = in0->dims(); + auto x_dims = x->dims(); auto vec_in_dims = framework::vectorize(x_dims); if (repeat_times.size() < vec_in_dims.size()) { int diff = vec_in_dims.size() - repeat_times.size(); @@ -220,11 +220,13 @@ class TileGradKernel : public framework::OpKernel { } // no need reduce, just copy if (just_copy) { - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); - out0->mutable_data(context.GetPlace()); - framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), - out0); + auto* dout = context.Input(framework::GradVarName("Out")); + auto* dx = context.Output(framework::GradVarName("X")); + dx->mutable_data(context.GetPlace()); + framework::TensorCopy(*dout, context.GetPlace(), context.device_context(), + dx); + // TensorCopy may change the dims of dx + dx->Resize(x_dims); } else { PADDLE_ENFORCE_GE(dims, 1, platform::errors::InvalidArgument( @@ -261,6 +263,7 @@ class TileGradKernel : public framework::OpKernel { for (size_t i = 0; i < reduce_size; ++i) { reduce_dims[i] = reduce_dims_vec[i]; } + auto out_grad = EigenVector::Flatten(*in0); x_grad.device( *context.template device_context().eigen_device()) =