未验证 提交 b97fc16d 编写于 作者: Z Zeng Jinle 提交者: GitHub

fix lod_reset bug, test=develop (#21392)

上级 89966525
...@@ -36,14 +36,15 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -36,14 +36,15 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto dst_ptr = dst->mutable_data(dst_place, src.type()); auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type()); if (src_ptr == dst_ptr && src_place == dst_place) {
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size); boost::get<platform::CPUPlace>(src_place), src_ptr, size);
} }
...@@ -79,11 +80,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -79,11 +80,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto stream = auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) { if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream); stream);
} else { } else {
...@@ -127,13 +123,15 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -127,13 +123,15 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto src_place = src.place(); auto src_place = src.place();
auto src_ptr = src.data<void>(); auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(dst_place, src.type()); auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (src_ptr == dst_ptr && src_place == dst_place) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data from " << src_place << " to " VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size); boost::get<platform::CPUPlace>(src_place), src_ptr, size);
} }
...@@ -153,11 +151,6 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -153,11 +151,6 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_gpu_place(src_place) && } else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) { platform::is_gpu_place(dst_place)) {
platform::RecordEvent record_event("TensorCopy:GPU->GPU"); platform::RecordEvent record_event("TensorCopy:GPU->GPU");
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place); auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place); auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr); memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
......
...@@ -205,6 +205,11 @@ class LoDResetGradMaker : public framework::SingleGradOpMaker<T> { ...@@ -205,6 +205,11 @@ class LoDResetGradMaker : public framework::SingleGradOpMaker<T> {
} }
}; };
DECLARE_INPLACE_OP_INFERER(LodResetInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(LodResetGradInplaceInferer,
{framework::GradVarName("Out"),
framework::GradVarName("X")});
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LoDResetGradNoNeedBufferVarInference, DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LoDResetGradNoNeedBufferVarInference,
"X"); "X");
...@@ -215,9 +220,10 @@ namespace ops = paddle::operators; ...@@ -215,9 +220,10 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, REGISTER_OPERATOR(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker,
ops::LoDResetGradMaker<paddle::framework::OpDesc>, ops::LoDResetGradMaker<paddle::framework::OpDesc>,
ops::LoDResetGradMaker<paddle::imperative::OpBase>, ops::LoDResetGradMaker<paddle::imperative::OpBase>,
ops::LoDResetOpVarTypeInference); ops::LoDResetOpVarTypeInference, ops::LodResetInplaceInferer);
REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp, REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp,
ops::LoDResetGradNoNeedBufferVarInference); ops::LoDResetGradNoNeedBufferVarInference,
ops::LodResetGradInplaceInferer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
lod_reset, ops::LoDResetKernel<paddle::platform::CPUPlace, float>, lod_reset, ops::LoDResetKernel<paddle::platform::CPUPlace, float>,
......
...@@ -31,7 +31,7 @@ class LoDResetKernel : public framework::OpKernel<T> { ...@@ -31,7 +31,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
auto* lod_t = ctx.Input<framework::LoDTensor>("Y"); auto* lod_t = ctx.Input<framework::LoDTensor>("Y");
bool append = ctx.Attr<bool>("append"); bool append = ctx.Attr<bool>("append");
out->ShareDataWith(*in); framework::TensorCopy(*in, in->place(), out);
std::vector<int> level0; std::vector<int> level0;
if (lod_t) { if (lod_t) {
...@@ -45,8 +45,8 @@ class LoDResetKernel : public framework::OpKernel<T> { ...@@ -45,8 +45,8 @@ class LoDResetKernel : public framework::OpKernel<T> {
return; // early return, since lod already set return; // early return, since lod already set
} else { } else {
auto* lod = lod_t->data<int>(); auto* lod = lod_t->data<int>();
if (platform::is_gpu_place(lod_t->place())) {
framework::Tensor lod_cpu; framework::Tensor lod_cpu;
if (platform::is_gpu_place(lod_t->place())) {
framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu); framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu);
lod = lod_cpu.data<int>(); lod = lod_cpu.data<int>();
} }
...@@ -90,7 +90,7 @@ class LoDResetGradKernel : public framework::OpKernel<T> { ...@@ -90,7 +90,7 @@ class LoDResetGradKernel : public framework::OpKernel<T> {
auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out")); auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X")); auto* d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->ShareDataWith(*d_out); framework::TensorCopy(*d_out, d_out->place(), d_x);
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -34,8 +34,7 @@ class PadConstantLikeKernel : public framework::OpKernel<T> { ...@@ -34,8 +34,7 @@ class PadConstantLikeKernel : public framework::OpKernel<T> {
auto* out = context.Output<framework::Tensor>("Out"); auto* out = context.Output<framework::Tensor>("Out");
if (in_x->dims() == in_y->dims()) { if (in_x->dims() == in_y->dims()) {
// TensorCopy(in_y, context.GetPlace(), context, out); framework::TensorCopy(*in_y, context.GetPlace(), out);
out->ShareDataWith(*in_y);
return; return;
} }
...@@ -70,8 +69,7 @@ class PadConstantLikeGradKernel : public framework::OpKernel<T> { ...@@ -70,8 +69,7 @@ class PadConstantLikeGradKernel : public framework::OpKernel<T> {
} }
if (in_dout->dims() == in_y->dims()) { if (in_dout->dims() == in_y->dims()) {
// TensorCopy(in_dout, context.GetPlace(), context, d_y); framework::TensorCopy(*in_dout, context.GetPlace(), d_y);
d_y->ShareDataWith(*in_dout);
return; return;
} }
......
...@@ -155,8 +155,15 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> { ...@@ -155,8 +155,15 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> {
context.Input<Tensor>("CustomizedSamples"); context.Input<Tensor>("CustomizedSamples");
const Tensor* customized_probabilities = const Tensor* customized_probabilities =
context.Input<Tensor>("CustomizedProbabilities"); context.Input<Tensor>("CustomizedProbabilities");
samples->ShareDataWith(*customized_samples); PADDLE_ENFORCE_EQ(customized_samples, samples,
probabilities->ShareDataWith(*customized_probabilities); platform::errors::InvalidArgument(
"CustomizedSamples must be the same Tensor with "
"Samples when use_customized_samples = True"));
PADDLE_ENFORCE_EQ(
customized_probabilities, probabilities,
platform::errors::InvalidArgument(
"CustomizedProbabilities must be the same Tensor with "
"Probabilities when use_customized_samples = True"));
} else { } else {
samples->mutable_data<int64_t>(context.GetPlace()); samples->mutable_data<int64_t>(context.GetPlace());
probabilities->mutable_data<T>(samples_dim, context.GetPlace()); probabilities->mutable_data<T>(samples_dim, context.GetPlace());
......
...@@ -195,8 +195,15 @@ class SampleLogitsKernel : public framework::OpKernel<T> { ...@@ -195,8 +195,15 @@ class SampleLogitsKernel : public framework::OpKernel<T> {
context.Input<Tensor>("CustomizedSamples"); context.Input<Tensor>("CustomizedSamples");
const Tensor* customized_probabilities = const Tensor* customized_probabilities =
context.Input<Tensor>("CustomizedProbabilities"); context.Input<Tensor>("CustomizedProbabilities");
samples->ShareDataWith(*customized_samples); PADDLE_ENFORCE_EQ(customized_samples, samples,
probabilities->ShareDataWith(*customized_probabilities); platform::errors::InvalidArgument(
"CustomizedSamples must be the same Tensor with "
"Samples when use_customized_samples = True"));
PADDLE_ENFORCE_EQ(
customized_probabilities, probabilities,
platform::errors::InvalidArgument(
"CustomizedProbabilities must be the same Tensor with "
"Probabilities when use_customized_samples = True"));
} else { } else {
samples->mutable_data<int64_t>(context.GetPlace()); samples->mutable_data<int64_t>(context.GetPlace());
probabilities->mutable_data<T>(samples_dim, context.GetPlace()); probabilities->mutable_data<T>(samples_dim, context.GetPlace());
......
...@@ -130,14 +130,21 @@ class ScatterGradMaker : public framework::SingleGradOpMaker<T> { ...@@ -130,14 +130,21 @@ class ScatterGradMaker : public framework::SingleGradOpMaker<T> {
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ScatterGradNoNeedBufferVarsInference, DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ScatterGradNoNeedBufferVarsInference,
"Updates"); "Updates");
DECLARE_INPLACE_OP_INFERER(ScatterInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(ScatterGradInplaceInferer,
{framework::GradVarName("Out"),
framework::GradVarName("X")});
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OPERATOR(scatter, ops::ScatterOp, ops::ScatterOpMaker, REGISTER_OPERATOR(scatter, ops::ScatterOp, ops::ScatterOpMaker,
ops::ScatterGradMaker<paddle::framework::OpDesc>, ops::ScatterGradMaker<paddle::framework::OpDesc>,
ops::ScatterGradMaker<paddle::imperative::OpBase>); ops::ScatterGradMaker<paddle::imperative::OpBase>,
ops::ScatterInplaceInferer);
REGISTER_OPERATOR(scatter_grad, ops::ScatterGradOp, REGISTER_OPERATOR(scatter_grad, ops::ScatterGradOp,
ops::ScatterGradNoNeedBufferVarsInference); ops::ScatterGradNoNeedBufferVarsInference,
ops::ScatterGradInplaceInferer);
REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel<float>); REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel<float>);
REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel<float>); REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel<float>);
...@@ -32,7 +32,7 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> { ...@@ -32,7 +32,7 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> {
auto *Out = ctx.Output<Tensor>("Out"); auto *Out = ctx.Output<Tensor>("Out");
bool overwrite = ctx.Attr<bool>("overwrite"); bool overwrite = ctx.Attr<bool>("overwrite");
Out->ShareDataWith(*X); framework::TensorCopy(*X, ctx.GetPlace(), Out);
// use template class to support int32_t and int64_t // use template class to support int32_t and int64_t
const auto &index_type = Ids->type(); const auto &index_type = Ids->type();
bool index_type_match = index_type == framework::proto::VarType::INT32 || bool index_type_match = index_type == framework::proto::VarType::INT32 ||
......
...@@ -36,7 +36,7 @@ class ScatterOpKernel : public framework::OpKernel<T> { ...@@ -36,7 +36,7 @@ class ScatterOpKernel : public framework::OpKernel<T> {
double overwrite = ctx.Attr<bool>("overwrite"); double overwrite = ctx.Attr<bool>("overwrite");
// In place output: Out = X, Out[Ids] = Updates // In place output: Out = X, Out[Ids] = Updates
framework::TensorCopySync(*X, ctx.GetPlace(), Out); framework::TensorCopy(*X, ctx.GetPlace(), Out);
// Apply ScatterUpdate: Out[index] = Updates[:] // Apply ScatterUpdate: Out[index] = Updates[:]
const auto &index_type = Ids->type(); const auto &index_type = Ids->type();
bool index_type_match = index_type == framework::proto::VarType::INT32 || bool index_type_match = index_type == framework::proto::VarType::INT32 ||
...@@ -76,7 +76,7 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> { ...@@ -76,7 +76,7 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
if (dX) { if (dX) {
// In place gradient: dX = dO // In place gradient: dX = dO
framework::TensorCopySync(*dOut, ctx.GetPlace(), dX); framework::TensorCopy(*dOut, ctx.GetPlace(), dX);
} }
if (dUpdates) { if (dUpdates) {
dUpdates->mutable_data<T>(ctx.GetPlace()); dUpdates->mutable_data<T>(ctx.GetPlace());
......
...@@ -1060,8 +1060,9 @@ def sampled_softmax_with_cross_entropy(logits, ...@@ -1060,8 +1060,9 @@ def sampled_softmax_with_cross_entropy(logits,
logits=fc, label=label, num_samples=25) logits=fc, label=label, num_samples=25)
""" """
helper = LayerHelper('sample_logits', **locals()) helper = LayerHelper('sample_logits', **locals())
samples = helper.create_variable_for_type_inference(dtype='int64') samples = customized_samples if use_customized_samples else helper.create_variable_for_type_inference(
probabilities = helper.create_variable_for_type_inference( dtype='int64')
probabilities = customized_probabilities if use_customized_samples else helper.create_variable_for_type_inference(
dtype=logits.dtype) dtype=logits.dtype)
sampled_logits \ sampled_logits \
= helper.create_variable_for_type_inference(dtype=logits.dtype) = helper.create_variable_for_type_inference(dtype=logits.dtype)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册