未验证 提交 b708ec0a 编写于 作者: F fengjiayi 提交者: GitHub

Merge pull request #10412 from JiayiFeng/correct_TensorCopy_misuse

Correct tensor copy misuse
......@@ -46,8 +46,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
auto* lod = lod_t->data<int>();
if (platform::is_gpu_place(ctx.GetPlace())) {
framework::Tensor lod_cpu;
framework::TensorCopy(*lod_t, platform::CPUPlace(),
ctx.device_context(), &lod_cpu);
framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu);
lod = lod_cpu.data<int>();
}
level0 = std::vector<int>(lod, lod + lod_t->numel());
......
......@@ -69,8 +69,8 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
}
std::vector<paddle::framework::Tensor> input;
......@@ -86,8 +86,8 @@ void testConcat() {
int* out_ptr;
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......@@ -142,8 +142,8 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
}
input.clear();
......@@ -157,8 +157,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......@@ -215,8 +215,8 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
}
input.clear();
......@@ -230,8 +230,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......@@ -290,8 +290,8 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
}
input.clear();
......@@ -305,8 +305,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......
......@@ -41,7 +41,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
if (paddle::platform::is_cpu_place(*place)) {
seq = cpu_seq;
} else {
TensorCopy(cpu_seq, *place, *context, &seq);
TensorCopySync(cpu_seq, *place, &seq);
seq.set_lod(lod);
}
......@@ -64,7 +64,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
if (paddle::platform::is_cpu_place(*place)) {
cpu_seq_back = seq_back;
} else {
TensorCopy(seq_back, paddle::platform::CPUPlace(), *context, &cpu_seq_back);
TensorCopySync(seq_back, paddle::platform::CPUPlace(), &cpu_seq_back);
cpu_seq_back.set_lod(lod);
}
......
......@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows;
// copy index to cpu
Tensor index_t_cpu;
TensorCopy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
TensorCopySync(*ids, platform::CPUPlace(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream();
platform::CUDAPlace place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
......@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows;
// copy index to cpu
Tensor index_t_cpu;
TensorCopy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
TensorCopySync(*ids, platform::CPUPlace(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream();
......
......@@ -66,13 +66,11 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::TensorCopy(*offset, platform::CPUPlace(), ctx.device_context(),
&offset_cpu);
framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::TensorCopy(*length, platform::CPUPlace(), ctx.device_context(),
&length_cpu);
framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
length_data = length_cpu.data<int64_t>();
}
......@@ -127,13 +125,11 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::TensorCopy(*offset, platform::CPUPlace(), ctx.device_context(),
&offset_cpu);
framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::TensorCopy(*length, platform::CPUPlace(), ctx.device_context(),
&length_cpu);
framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
length_data = length_cpu.data<int64_t>();
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册