未验证 提交 b708ec0a 编写于 作者: F fengjiayi 提交者: GitHub

Merge pull request #10412 from JiayiFeng/correct_TensorCopy_misuse

Correct tensor copy misuse
...@@ -46,8 +46,7 @@ class LoDResetKernel : public framework::OpKernel<T> { ...@@ -46,8 +46,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
auto* lod = lod_t->data<int>(); auto* lod = lod_t->data<int>();
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
framework::Tensor lod_cpu; framework::Tensor lod_cpu;
framework::TensorCopy(*lod_t, platform::CPUPlace(), framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu);
ctx.device_context(), &lod_cpu);
lod = lod_cpu.data<int>(); lod = lod_cpu.data<int>();
} }
level0 = std::vector<int>(lod, lod + lod_t->numel()); level0 = std::vector<int>(lod, lod + lod_t->numel());
......
...@@ -69,8 +69,8 @@ void testConcat() { ...@@ -69,8 +69,8 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
} }
std::vector<paddle::framework::Tensor> input; std::vector<paddle::framework::Tensor> input;
...@@ -86,7 +86,7 @@ void testConcat() { ...@@ -86,7 +86,7 @@ void testConcat() {
int* out_ptr; int* out_ptr;
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu); &out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
...@@ -142,8 +142,8 @@ void testConcat() { ...@@ -142,8 +142,8 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
} }
input.clear(); input.clear();
...@@ -157,7 +157,7 @@ void testConcat() { ...@@ -157,7 +157,7 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu); &out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
...@@ -215,8 +215,8 @@ void testConcat() { ...@@ -215,8 +215,8 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
} }
input.clear(); input.clear();
...@@ -230,7 +230,7 @@ void testConcat() { ...@@ -230,7 +230,7 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu); &out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
...@@ -290,8 +290,8 @@ void testConcat() { ...@@ -290,8 +290,8 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); paddle::framework::TensorCopySync(input_a_cpu, Place(), &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b);
} }
input.clear(); input.clear();
...@@ -305,7 +305,7 @@ void testConcat() { ...@@ -305,7 +305,7 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, paddle::framework::TensorCopySync(out, paddle::platform::CPUPlace(),
&out_cpu); &out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
......
...@@ -41,7 +41,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod, ...@@ -41,7 +41,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
seq = cpu_seq; seq = cpu_seq;
} else { } else {
TensorCopy(cpu_seq, *place, *context, &seq); TensorCopySync(cpu_seq, *place, &seq);
seq.set_lod(lod); seq.set_lod(lod);
} }
...@@ -64,7 +64,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod, ...@@ -64,7 +64,7 @@ void TestSequencePadding(const paddle::framework::LoD& lod,
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
cpu_seq_back = seq_back; cpu_seq_back = seq_back;
} else { } else {
TensorCopy(seq_back, paddle::platform::CPUPlace(), *context, &cpu_seq_back); TensorCopySync(seq_back, paddle::platform::CPUPlace(), &cpu_seq_back);
cpu_seq_back.set_lod(lod); cpu_seq_back.set_lod(lod);
} }
......
...@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> { ...@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows; auto cols = ins[0]->numel() / rows;
// copy index to cpu // copy index to cpu
Tensor index_t_cpu; Tensor index_t_cpu;
TensorCopy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); TensorCopySync(*ids, platform::CPUPlace(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>(); auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream(); auto stream = ctx.cuda_device_context().stream();
platform::CUDAPlace place = boost::get<platform::CUDAPlace>(ctx.GetPlace()); platform::CUDAPlace place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
...@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> { ...@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows; auto cols = ins[0]->numel() / rows;
// copy index to cpu // copy index to cpu
Tensor index_t_cpu; Tensor index_t_cpu;
TensorCopy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); TensorCopySync(*ids, platform::CPUPlace(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>(); auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream(); auto stream = ctx.cuda_device_context().stream();
......
...@@ -66,13 +66,11 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> { ...@@ -66,13 +66,11 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace()); offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::TensorCopy(*offset, platform::CPUPlace(), ctx.device_context(), framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
&offset_cpu);
offset_data = offset_cpu.data<int64_t>(); offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace()); length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::TensorCopy(*length, platform::CPUPlace(), ctx.device_context(), framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
&length_cpu);
length_data = length_cpu.data<int64_t>(); length_data = length_cpu.data<int64_t>();
} }
...@@ -127,13 +125,11 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> { ...@@ -127,13 +125,11 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace()); offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::TensorCopy(*offset, platform::CPUPlace(), ctx.device_context(), framework::TensorCopySync(*offset, platform::CPUPlace(), &offset_cpu);
&offset_cpu);
offset_data = offset_cpu.data<int64_t>(); offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace()); length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::TensorCopy(*length, platform::CPUPlace(), ctx.device_context(), framework::TensorCopySync(*length, platform::CPUPlace(), &length_cpu);
&length_cpu);
length_data = length_cpu.data<int64_t>(); length_data = length_cpu.data<int64_t>();
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册