diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 812612580cf53f12d8c4c267fd0060ee6034d0aa..16f0a4c6ffa39bcd0ea1a407c6d9b06ebfecdf60 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -36,14 +36,15 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto dst_ptr = dst->mutable_data(dst_place, src.type()); + if (src_ptr == dst_ptr && src_place == dst_place) { + VLOG(3) << "Skip copy the same data async from " << src_place << " to " + << dst_place; + return; + } + auto size = src.numel() * SizeOfType(src.type()); if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { - if (src_ptr == dst_ptr) { - VLOG(3) << "Skip copy the same data async from " << src_place << " to " - << dst_place; - return; - } memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size); } @@ -79,11 +80,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, auto stream = reinterpret_cast(ctx).stream(); if (platform::is_same_place(src_place, dst_place)) { - if (src_ptr == dst_ptr) { - VLOG(3) << "Skip copy the same data async from " << src_place << " to " - << dst_place; - return; - } memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { @@ -127,13 +123,15 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, auto src_place = src.place(); auto src_ptr = src.data(); auto dst_ptr = dst->mutable_data(dst_place, src.type()); + + if (src_ptr == dst_ptr && src_place == dst_place) { + VLOG(3) << "Skip copy the same data from " << src_place << " to " + << dst_place; + return; + } + auto size = src.numel() * SizeOfType(src.type()); if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { - if (src_ptr == dst_ptr) { - VLOG(3) << "Skip copy the same data from " << src_place << " to " - << dst_place; - return; - } memory::Copy(boost::get(dst_place), dst_ptr, boost::get(src_place), src_ptr, size); } @@ -153,11 +151,6 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, } else if (platform::is_gpu_place(src_place) && platform::is_gpu_place(dst_place)) { platform::RecordEvent record_event("TensorCopy:GPU->GPU"); - if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) { - VLOG(3) << "Skip copy the same data from " << src_place << " to " - << dst_place; - return; - } auto src_gpu_place = boost::get(src_place); auto dst_gpu_place = boost::get(dst_place); memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr); diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc index 94483bc7f4cd87316c20b3280520dfb633e2bf86..c7cd230a450a9d078024c2f9a29947fc459fd9c0 100644 --- a/paddle/fluid/operators/lod_reset_op.cc +++ b/paddle/fluid/operators/lod_reset_op.cc @@ -205,6 +205,11 @@ class LoDResetGradMaker : public framework::SingleGradOpMaker { } }; +DECLARE_INPLACE_OP_INFERER(LodResetInplaceInferer, {"X", "Out"}); +DECLARE_INPLACE_OP_INFERER(LodResetGradInplaceInferer, + {framework::GradVarName("Out"), + framework::GradVarName("X")}); + DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LoDResetGradNoNeedBufferVarInference, "X"); @@ -215,9 +220,10 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, ops::LoDResetGradMaker, ops::LoDResetGradMaker, - ops::LoDResetOpVarTypeInference); + ops::LoDResetOpVarTypeInference, ops::LodResetInplaceInferer); REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp, - ops::LoDResetGradNoNeedBufferVarInference); + ops::LoDResetGradNoNeedBufferVarInference, + ops::LodResetGradInplaceInferer); REGISTER_OP_CPU_KERNEL( lod_reset, ops::LoDResetKernel, diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h index 7677fa2251f0b2fefe84f5baa0eb2763fbae21c1..87e8c31a9d0ae567de31e7a120d48ec8cadf755c 100644 --- a/paddle/fluid/operators/lod_reset_op.h +++ b/paddle/fluid/operators/lod_reset_op.h @@ -31,7 +31,7 @@ class LoDResetKernel : public framework::OpKernel { auto* lod_t = ctx.Input("Y"); bool append = ctx.Attr("append"); - out->ShareDataWith(*in); + framework::TensorCopy(*in, in->place(), out); std::vector level0; if (lod_t) { @@ -45,8 +45,8 @@ class LoDResetKernel : public framework::OpKernel { return; // early return, since lod already set } else { auto* lod = lod_t->data(); + framework::Tensor lod_cpu; if (platform::is_gpu_place(lod_t->place())) { - framework::Tensor lod_cpu; framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu); lod = lod_cpu.data(); } @@ -90,7 +90,7 @@ class LoDResetGradKernel : public framework::OpKernel { auto* d_out = ctx.Input(framework::GradVarName("Out")); auto* d_x = ctx.Output(framework::GradVarName("X")); - d_x->ShareDataWith(*d_out); + framework::TensorCopy(*d_out, d_out->place(), d_x); } }; } // namespace operators diff --git a/paddle/fluid/operators/pad_constant_like_op.h b/paddle/fluid/operators/pad_constant_like_op.h index 01d66901afc49a487c344b039b65f547967e95ff..4718ae915a66372df522cd24c3b9ef936a7b63c7 100644 --- a/paddle/fluid/operators/pad_constant_like_op.h +++ b/paddle/fluid/operators/pad_constant_like_op.h @@ -34,8 +34,7 @@ class PadConstantLikeKernel : public framework::OpKernel { auto* out = context.Output("Out"); if (in_x->dims() == in_y->dims()) { - // TensorCopy(in_y, context.GetPlace(), context, out); - out->ShareDataWith(*in_y); + framework::TensorCopy(*in_y, context.GetPlace(), out); return; } @@ -70,8 +69,7 @@ class PadConstantLikeGradKernel : public framework::OpKernel { } if (in_dout->dims() == in_y->dims()) { - // TensorCopy(in_dout, context.GetPlace(), context, d_y); - d_y->ShareDataWith(*in_dout); + framework::TensorCopy(*in_dout, context.GetPlace(), d_y); return; } diff --git a/paddle/fluid/operators/sample_logits_op.cu b/paddle/fluid/operators/sample_logits_op.cu index fb49793b730f72d66dc846f233bd95ebdab37c52..4bcd27036a53017e437e4819d00d886172040adb 100644 --- a/paddle/fluid/operators/sample_logits_op.cu +++ b/paddle/fluid/operators/sample_logits_op.cu @@ -155,8 +155,15 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { context.Input("CustomizedSamples"); const Tensor* customized_probabilities = context.Input("CustomizedProbabilities"); - samples->ShareDataWith(*customized_samples); - probabilities->ShareDataWith(*customized_probabilities); + PADDLE_ENFORCE_EQ(customized_samples, samples, + platform::errors::InvalidArgument( + "CustomizedSamples must be the same Tensor with " + "Samples when use_customized_samples = True")); + PADDLE_ENFORCE_EQ( + customized_probabilities, probabilities, + platform::errors::InvalidArgument( + "CustomizedProbabilities must be the same Tensor with " + "Probabilities when use_customized_samples = True")); } else { samples->mutable_data(context.GetPlace()); probabilities->mutable_data(samples_dim, context.GetPlace()); diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h index 18ef6c9d3fe62a913413cf8c84e23b7c6accfc5c..5c0d3a677db679f1f70186ce5b57ca7c79263ab9 100644 --- a/paddle/fluid/operators/sample_logits_op.h +++ b/paddle/fluid/operators/sample_logits_op.h @@ -195,8 +195,15 @@ class SampleLogitsKernel : public framework::OpKernel { context.Input("CustomizedSamples"); const Tensor* customized_probabilities = context.Input("CustomizedProbabilities"); - samples->ShareDataWith(*customized_samples); - probabilities->ShareDataWith(*customized_probabilities); + PADDLE_ENFORCE_EQ(customized_samples, samples, + platform::errors::InvalidArgument( + "CustomizedSamples must be the same Tensor with " + "Samples when use_customized_samples = True")); + PADDLE_ENFORCE_EQ( + customized_probabilities, probabilities, + platform::errors::InvalidArgument( + "CustomizedProbabilities must be the same Tensor with " + "Probabilities when use_customized_samples = True")); } else { samples->mutable_data(context.GetPlace()); probabilities->mutable_data(samples_dim, context.GetPlace()); diff --git a/paddle/fluid/operators/scatter_op.cc b/paddle/fluid/operators/scatter_op.cc index d763f74d4ee6e66b8ec6de5d3e0c11679c664687..6ba5ba7bb277c21384c12b60f9f573e4afc31513 100644 --- a/paddle/fluid/operators/scatter_op.cc +++ b/paddle/fluid/operators/scatter_op.cc @@ -130,14 +130,21 @@ class ScatterGradMaker : public framework::SingleGradOpMaker { DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ScatterGradNoNeedBufferVarsInference, "Updates"); +DECLARE_INPLACE_OP_INFERER(ScatterInplaceInferer, {"X", "Out"}); +DECLARE_INPLACE_OP_INFERER(ScatterGradInplaceInferer, + {framework::GradVarName("Out"), + framework::GradVarName("X")}); + } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR(scatter, ops::ScatterOp, ops::ScatterOpMaker, ops::ScatterGradMaker, - ops::ScatterGradMaker); + ops::ScatterGradMaker, + ops::ScatterInplaceInferer); REGISTER_OPERATOR(scatter_grad, ops::ScatterGradOp, - ops::ScatterGradNoNeedBufferVarsInference); + ops::ScatterGradNoNeedBufferVarsInference, + ops::ScatterGradInplaceInferer); REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel); REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel); diff --git a/paddle/fluid/operators/scatter_op.cu b/paddle/fluid/operators/scatter_op.cu index 6c4da760ce828e49b55c5d488958e1039fe62702..f3e0faa164c2f95bcb2402a7ba92847cfb67167c 100644 --- a/paddle/fluid/operators/scatter_op.cu +++ b/paddle/fluid/operators/scatter_op.cu @@ -32,7 +32,7 @@ class ScatterOpCUDAKernel : public framework::OpKernel { auto *Out = ctx.Output("Out"); bool overwrite = ctx.Attr("overwrite"); - Out->ShareDataWith(*X); + framework::TensorCopy(*X, ctx.GetPlace(), Out); // use template class to support int32_t and int64_t const auto &index_type = Ids->type(); bool index_type_match = index_type == framework::proto::VarType::INT32 || diff --git a/paddle/fluid/operators/scatter_op.h b/paddle/fluid/operators/scatter_op.h index 97254f817d9856aca9ffe1a101551b902541d9cf..b4043b6ebbf8ba2ac77ecbbd3427db13bdf99397 100644 --- a/paddle/fluid/operators/scatter_op.h +++ b/paddle/fluid/operators/scatter_op.h @@ -36,7 +36,7 @@ class ScatterOpKernel : public framework::OpKernel { double overwrite = ctx.Attr("overwrite"); // In place output: Out = X, Out[Ids] = Updates - framework::TensorCopySync(*X, ctx.GetPlace(), Out); + framework::TensorCopy(*X, ctx.GetPlace(), Out); // Apply ScatterUpdate: Out[index] = Updates[:] const auto &index_type = Ids->type(); bool index_type_match = index_type == framework::proto::VarType::INT32 || @@ -76,7 +76,7 @@ class ScatterGradientOpKernel : public framework::OpKernel { if (dX) { // In place gradient: dX = dO - framework::TensorCopySync(*dOut, ctx.GetPlace(), dX); + framework::TensorCopy(*dOut, ctx.GetPlace(), dX); } if (dUpdates) { dUpdates->mutable_data(ctx.GetPlace()); diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 2b3833b12a65b61b794cc80973d4b32cfbc1d76d..e5eaa270afec60bd62047e5c607d5e5a1c67cbf2 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -1060,8 +1060,9 @@ def sampled_softmax_with_cross_entropy(logits, logits=fc, label=label, num_samples=25) """ helper = LayerHelper('sample_logits', **locals()) - samples = helper.create_variable_for_type_inference(dtype='int64') - probabilities = helper.create_variable_for_type_inference( + samples = customized_samples if use_customized_samples else helper.create_variable_for_type_inference( + dtype='int64') + probabilities = customized_probabilities if use_customized_samples else helper.create_variable_for_type_inference( dtype=logits.dtype) sampled_logits \ = helper.create_variable_for_type_inference(dtype=logits.dtype)