diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 1d7a2eb5b38255531880fe3d2e5321024caf0c6b..de77d189c8591c730741da0152359e0c279d3ae8 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -114,6 +114,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, auto dst_ptr = dst->mutable_data(dst_place, src.type()); auto size = src.numel() * SizeOfType(src.type()); if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { + if (src_ptr == dst_ptr) { + VLOG(3) << "Skip copy the same data from " << src.place() << " to " + << dst_place; + return; + } memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size); } @@ -132,6 +137,12 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, platform::is_gpu_place(dst_place)) { auto src_gpu_place = boost::get(src_place); auto dst_gpu_place = boost::get(dst_place); + if (src_ptr == dst_ptr && + src_gpu_place.GetDeviceId() == dst_gpu_place.GetDeviceId()) { + VLOG(3) << "Skip copy the same data from " << src.place() << " to " + << dst_place; + return; + } memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr); } #endif diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index b8fdc3f826662a9ca7ec3b3bf2a4da7308a757ab..500d86fec33830fc2cfb0412f1f2c7780d08eb02 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -195,7 +195,6 @@ class ReshapeGradOp : public framework::OperatorWithKernel { } }; -template class ReshapeKernel { public: void operator()(const framework::ExecutionContext &ctx) const { @@ -228,15 +227,12 @@ class ReshapeKernel { "sequence_reshape op."); } - if (in->data() != - reinterpret_cast(out->mutable_data(ctx.GetPlace(), in->type()))) { - framework::TensorCopySync(*in, ctx.GetPlace(), out); - } + out->mutable_data(ctx.GetPlace(), in->type()); + framework::TensorCopySync(*in, ctx.GetPlace(), out); out->Resize(out_dims); } }; -template class ReshapeGradKernel { public: void operator()(const framework::ExecutionContext &ctx) const { @@ -244,9 +240,8 @@ class ReshapeGradKernel { auto *d_x = ctx.Output(framework::GradVarName("X")); auto in_dims = d_x->dims(); - if (d_out->data() != d_x->mutable_data(ctx.GetPlace(), d_out->type())) { - framework::TensorCopySync(*d_out, ctx.GetPlace(), d_x); - } + d_x->mutable_data(ctx.GetPlace(), d_out->type()); + framework::TensorCopySync(*d_out, ctx.GetPlace(), d_x); d_x->Resize(in_dims); } }; @@ -341,46 +336,38 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(reshape_grad, ops::ReshapeGradOp); -REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, - double, ops::ReshapeKernel, int, - ops::ReshapeKernel, int64_t, - ops::ReshapeKernel); -REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, - ops::ReshapeGradKernel, double, - ops::ReshapeGradKernel, int, - ops::ReshapeGradKernel, int64_t, - ops::ReshapeGradKernel); +REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, + ops::ReshapeKernel, int, ops::ReshapeKernel, + int64_t, ops::ReshapeKernel); +REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, + double, ops::ReshapeGradKernel, int, + ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel); REGISTER_OPERATOR(reshape2, ops::Reshape2Op, ops::Reshape2OpMaker, ops::Reshape2GradMaker); REGISTER_OPERATOR(reshape2_grad, ops::Reshape2GradOp); -REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, - double, ops::ReshapeKernel, int, - ops::ReshapeKernel, int64_t, - ops::ReshapeKernel); -REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, - ops::ReshapeGradKernel, double, - ops::ReshapeGradKernel, int, - ops::ReshapeGradKernel, int64_t, - ops::ReshapeGradKernel); +REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, + ops::ReshapeKernel, int, ops::ReshapeKernel, + int64_t, ops::ReshapeKernel); +REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, + double, ops::ReshapeGradKernel, int, + ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel); #ifdef PADDLE_WITH_CUDA -REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, - double, ops::ReshapeKernel, int, - ops::ReshapeKernel, int64_t, - ops::ReshapeKernel); -REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, - ops::ReshapeGradKernel, double, - ops::ReshapeGradKernel, int, - ops::ReshapeGradKernel, int64_t, - ops::ReshapeGradKernel); -REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, - double, ops::ReshapeKernel, int, - ops::ReshapeKernel, int64_t, - ops::ReshapeKernel); -REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2_grad, float, - ops::ReshapeGradKernel, double, - ops::ReshapeGradKernel, int, - ops::ReshapeGradKernel, int64_t, - ops::ReshapeGradKernel); +REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, + ops::ReshapeKernel, int, ops::ReshapeKernel, + int64_t, ops::ReshapeKernel); +REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, + double, ops::ReshapeGradKernel, int, + ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel); +REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, + ops::ReshapeKernel, int, ops::ReshapeKernel, + int64_t, ops::ReshapeKernel); +REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2_grad, float, ops::ReshapeGradKernel, + double, ops::ReshapeGradKernel, int, + ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel); #endif