未验证 提交 b97fc16d 编写于 作者: Z Zeng Jinle 提交者: GitHub

fix lod_reset bug, test=develop (#21392)

上级 89966525
......@@ -36,14 +36,15 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
if (src_ptr == dst_ptr && src_place == dst_place) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
......@@ -79,11 +80,6 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
......@@ -127,13 +123,15 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
if (src_ptr == dst_ptr && src_place == dst_place) {
VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
......@@ -153,11 +151,6 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
platform::RecordEvent record_event("TensorCopy:GPU->GPU");
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
......
......@@ -205,6 +205,11 @@ class LoDResetGradMaker : public framework::SingleGradOpMaker<T> {
}
};
DECLARE_INPLACE_OP_INFERER(LodResetInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(LodResetGradInplaceInferer,
{framework::GradVarName("Out"),
framework::GradVarName("X")});
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LoDResetGradNoNeedBufferVarInference,
"X");
......@@ -215,9 +220,10 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker,
ops::LoDResetGradMaker<paddle::framework::OpDesc>,
ops::LoDResetGradMaker<paddle::imperative::OpBase>,
ops::LoDResetOpVarTypeInference);
ops::LoDResetOpVarTypeInference, ops::LodResetInplaceInferer);
REGISTER_OPERATOR(lod_reset_grad, ops::LoDResetGradOp,
ops::LoDResetGradNoNeedBufferVarInference);
ops::LoDResetGradNoNeedBufferVarInference,
ops::LodResetGradInplaceInferer);
REGISTER_OP_CPU_KERNEL(
lod_reset, ops::LoDResetKernel<paddle::platform::CPUPlace, float>,
......
......@@ -31,7 +31,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
auto* lod_t = ctx.Input<framework::LoDTensor>("Y");
bool append = ctx.Attr<bool>("append");
out->ShareDataWith(*in);
framework::TensorCopy(*in, in->place(), out);
std::vector<int> level0;
if (lod_t) {
......@@ -45,8 +45,8 @@ class LoDResetKernel : public framework::OpKernel<T> {
return; // early return, since lod already set
} else {
auto* lod = lod_t->data<int>();
if (platform::is_gpu_place(lod_t->place())) {
framework::Tensor lod_cpu;
if (platform::is_gpu_place(lod_t->place())) {
framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu);
lod = lod_cpu.data<int>();
}
......@@ -90,7 +90,7 @@ class LoDResetGradKernel : public framework::OpKernel<T> {
auto* d_out = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->ShareDataWith(*d_out);
framework::TensorCopy(*d_out, d_out->place(), d_x);
}
};
} // namespace operators
......
......@@ -34,8 +34,7 @@ class PadConstantLikeKernel : public framework::OpKernel<T> {
auto* out = context.Output<framework::Tensor>("Out");
if (in_x->dims() == in_y->dims()) {
// TensorCopy(in_y, context.GetPlace(), context, out);
out->ShareDataWith(*in_y);
framework::TensorCopy(*in_y, context.GetPlace(), out);
return;
}
......@@ -70,8 +69,7 @@ class PadConstantLikeGradKernel : public framework::OpKernel<T> {
}
if (in_dout->dims() == in_y->dims()) {
// TensorCopy(in_dout, context.GetPlace(), context, d_y);
d_y->ShareDataWith(*in_dout);
framework::TensorCopy(*in_dout, context.GetPlace(), d_y);
return;
}
......
......@@ -155,8 +155,15 @@ class SampleLogitsCUDAKernel : public framework::OpKernel<T> {
context.Input<Tensor>("CustomizedSamples");
const Tensor* customized_probabilities =
context.Input<Tensor>("CustomizedProbabilities");
samples->ShareDataWith(*customized_samples);
probabilities->ShareDataWith(*customized_probabilities);
PADDLE_ENFORCE_EQ(customized_samples, samples,
platform::errors::InvalidArgument(
"CustomizedSamples must be the same Tensor with "
"Samples when use_customized_samples = True"));
PADDLE_ENFORCE_EQ(
customized_probabilities, probabilities,
platform::errors::InvalidArgument(
"CustomizedProbabilities must be the same Tensor with "
"Probabilities when use_customized_samples = True"));
} else {
samples->mutable_data<int64_t>(context.GetPlace());
probabilities->mutable_data<T>(samples_dim, context.GetPlace());
......
......@@ -195,8 +195,15 @@ class SampleLogitsKernel : public framework::OpKernel<T> {
context.Input<Tensor>("CustomizedSamples");
const Tensor* customized_probabilities =
context.Input<Tensor>("CustomizedProbabilities");
samples->ShareDataWith(*customized_samples);
probabilities->ShareDataWith(*customized_probabilities);
PADDLE_ENFORCE_EQ(customized_samples, samples,
platform::errors::InvalidArgument(
"CustomizedSamples must be the same Tensor with "
"Samples when use_customized_samples = True"));
PADDLE_ENFORCE_EQ(
customized_probabilities, probabilities,
platform::errors::InvalidArgument(
"CustomizedProbabilities must be the same Tensor with "
"Probabilities when use_customized_samples = True"));
} else {
samples->mutable_data<int64_t>(context.GetPlace());
probabilities->mutable_data<T>(samples_dim, context.GetPlace());
......
......@@ -130,14 +130,21 @@ class ScatterGradMaker : public framework::SingleGradOpMaker<T> {
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(ScatterGradNoNeedBufferVarsInference,
"Updates");
DECLARE_INPLACE_OP_INFERER(ScatterInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(ScatterGradInplaceInferer,
{framework::GradVarName("Out"),
framework::GradVarName("X")});
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(scatter, ops::ScatterOp, ops::ScatterOpMaker,
ops::ScatterGradMaker<paddle::framework::OpDesc>,
ops::ScatterGradMaker<paddle::imperative::OpBase>);
ops::ScatterGradMaker<paddle::imperative::OpBase>,
ops::ScatterInplaceInferer);
REGISTER_OPERATOR(scatter_grad, ops::ScatterGradOp,
ops::ScatterGradNoNeedBufferVarsInference);
ops::ScatterGradNoNeedBufferVarsInference,
ops::ScatterGradInplaceInferer);
REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel<float>);
REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel<float>);
......@@ -32,7 +32,7 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> {
auto *Out = ctx.Output<Tensor>("Out");
bool overwrite = ctx.Attr<bool>("overwrite");
Out->ShareDataWith(*X);
framework::TensorCopy(*X, ctx.GetPlace(), Out);
// use template class to support int32_t and int64_t
const auto &index_type = Ids->type();
bool index_type_match = index_type == framework::proto::VarType::INT32 ||
......
......@@ -36,7 +36,7 @@ class ScatterOpKernel : public framework::OpKernel<T> {
double overwrite = ctx.Attr<bool>("overwrite");
// In place output: Out = X, Out[Ids] = Updates
framework::TensorCopySync(*X, ctx.GetPlace(), Out);
framework::TensorCopy(*X, ctx.GetPlace(), Out);
// Apply ScatterUpdate: Out[index] = Updates[:]
const auto &index_type = Ids->type();
bool index_type_match = index_type == framework::proto::VarType::INT32 ||
......@@ -76,7 +76,7 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
if (dX) {
// In place gradient: dX = dO
framework::TensorCopySync(*dOut, ctx.GetPlace(), dX);
framework::TensorCopy(*dOut, ctx.GetPlace(), dX);
}
if (dUpdates) {
dUpdates->mutable_data<T>(ctx.GetPlace());
......
......@@ -1060,8 +1060,9 @@ def sampled_softmax_with_cross_entropy(logits,
logits=fc, label=label, num_samples=25)
"""
helper = LayerHelper('sample_logits', **locals())
samples = helper.create_variable_for_type_inference(dtype='int64')
probabilities = helper.create_variable_for_type_inference(
samples = customized_samples if use_customized_samples else helper.create_variable_for_type_inference(
dtype='int64')
probabilities = customized_probabilities if use_customized_samples else helper.create_variable_for_type_inference(
dtype=logits.dtype)
sampled_logits \
= helper.create_variable_for_type_inference(dtype=logits.dtype)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册