提交 ce6dad3b 编写于 作者: Y Yu Yang 提交者: Yang Yang(Tony)

Rename CopyFrom to Copy for tensors (#7292)

* Rename Tensor::CopyFrom to Tensor::Copy

* Fix CI

* Fix compile
上级 32b09b51
...@@ -88,7 +88,7 @@ struct CastDataType { ...@@ -88,7 +88,7 @@ struct CastDataType {
trans(*context, in_begin, in_end, out_begin, trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>()); CastDataTypeFunctor<InType, OutType>());
} else { } else {
// TODO(dzhwinter): enhance CopyFrom CPU<->GPU with different data type? // TODO(dzhwinter): enhance Copy CPU<->GPU with different data type?
PADDLE_THROW("Unsupport CPU <-> GPU!"); PADDLE_THROW("Unsupport CPU <-> GPU!");
} }
} }
......
...@@ -37,7 +37,7 @@ Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) { ...@@ -37,7 +37,7 @@ Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) {
Tensor* out = new Tensor(); Tensor* out = new Tensor();
auto* dev_ctx = GetDeviceContext(in.place(), dst_place); auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
dev_ctx->Wait(); dev_ctx->Wait();
CopyFrom(in, dst_place, *dev_ctx, out); Copy(in, dst_place, *dev_ctx, out);
dev_ctx->Wait(); dev_ctx->Wait();
return out; return out;
} }
......
...@@ -157,7 +157,7 @@ TEST(Operator, CPUtoGPU) { ...@@ -157,7 +157,7 @@ TEST(Operator, CPUtoGPU) {
auto dev_ctx = pool.Get(cuda_place); auto dev_ctx = pool.Get(cuda_place);
paddle::framework::Tensor output_tensor; paddle::framework::Tensor output_tensor;
CopyFrom(output2->Get<LoDTensor>(), paddle::platform::CPUPlace(), *dev_ctx, Copy(output2->Get<LoDTensor>(), paddle::platform::CPUPlace(), *dev_ctx,
&output_tensor); &output_tensor);
dev_ctx->Wait(); dev_ctx->Wait();
......
...@@ -232,7 +232,7 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor( ...@@ -232,7 +232,7 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
auto dst_ptr = dst.mutable_data(dst_place, src.type()); auto dst_ptr = dst.mutable_data(dst_place, src.type());
// TODO(tonyyang-svail): // TODO(tonyyang-svail):
// change the following to framework::CopyFrom // change the following to framework::Copy
auto src_place = src.place(); auto src_place = src.place();
auto src_ptr = src.data<void>(); auto src_ptr = src.data<void>();
auto size = src.numel() * SizeOfType(src.type()); auto size = src.numel() * SizeOfType(src.type());
......
...@@ -147,7 +147,7 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, ...@@ -147,7 +147,7 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
for (size_t ins = 0; ins < num_instances; ins++) { for (size_t ins = 0; ins < num_instances; ins++) {
for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) { for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
auto slice = tensor.Slice(elem, elem + 1); auto slice = tensor.Slice(elem, elem + 1);
CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(), Copy(source.Slice(ins, ins + 1), platform::CPUPlace(),
platform::CPUDeviceContext(), &slice); platform::CPUDeviceContext(), &slice);
} }
} }
......
...@@ -69,7 +69,7 @@ struct AnyVisitor : public boost::static_visitor<bool> { ...@@ -69,7 +69,7 @@ struct AnyVisitor : public boost::static_visitor<bool> {
tmp.mutable_data<bool>(cpu); tmp.mutable_data<bool>(cpu);
auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu); auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu);
gpuctx->Wait(); gpuctx->Wait();
CopyFrom(out, cpu, *gpuctx, &tmp); Copy(out, cpu, *gpuctx, &tmp);
gpuctx->Wait(); gpuctx->Wait();
return GetResult(tmp, cpu); return GetResult(tmp, cpu);
} }
......
...@@ -29,10 +29,10 @@ namespace framework { ...@@ -29,10 +29,10 @@ namespace framework {
* @param[in] dst_place The dst place. * @param[in] dst_place The dst place.
* @param[in] ctx The device context contains device resources. * @param[in] ctx The device context contains device resources.
* *
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. * @note Copy supports CPU <-> GPU, GPU <-> GPU.
*/ */
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, inline void Copy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) { const platform::DeviceContext& ctx, Tensor* dst) {
src.check_memory_size(); src.check_memory_size();
...@@ -88,9 +88,9 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, ...@@ -88,9 +88,9 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
} }
/** /**
* @brief CopyFrom support CPU <-> CPU * @brief Copy supports CPU <-> CPU
*/ */
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, inline void Copy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
src.check_memory_size(); src.check_memory_size();
dst->Resize(src.dims()); dst->Resize(src.dims());
...@@ -316,7 +316,7 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor, ...@@ -316,7 +316,7 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor,
DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace())); DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
is.read(static_cast<char*>(buf), cpu_tensor.memory_size()); is.read(static_cast<char*>(buf), cpu_tensor.memory_size());
auto cpu_place = new platform::CPUPlace(); auto cpu_place = new platform::CPUPlace();
framework::CopyFrom(cpu_tensor, *cpu_place, dev_ctx, tensor); framework::Copy(cpu_tensor, *cpu_place, dev_ctx, tensor);
delete cpu_place; delete cpu_place;
#else #else
PADDLE_THROW("Unexpected branch"); PADDLE_THROW("Unexpected branch");
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
TEST(CopyFrom, Tensor) { TEST(Copy, Tensor) {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
platform::CPUDeviceContext cpu_ctx((platform::CPUPlace())); platform::CPUDeviceContext cpu_ctx((platform::CPUPlace()));
...@@ -32,7 +32,7 @@ TEST(CopyFrom, Tensor) { ...@@ -32,7 +32,7 @@ TEST(CopyFrom, Tensor) {
src_tensor.set_layout(DataLayout::kAnyLayout); src_tensor.set_layout(DataLayout::kAnyLayout);
auto cpu_place = new platform::CPUPlace(); auto cpu_place = new platform::CPUPlace();
CopyFrom(src_tensor, *cpu_place, &dst_tensor); Copy(src_tensor, *cpu_place, &dst_tensor);
const int* dst_ptr = dst_tensor.data<int>(); const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr); ASSERT_NE(src_ptr, dst_ptr);
...@@ -43,7 +43,7 @@ TEST(CopyFrom, Tensor) { ...@@ -43,7 +43,7 @@ TEST(CopyFrom, Tensor) {
EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout()); EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
Tensor slice_tensor = src_tensor.Slice(1, 2); Tensor slice_tensor = src_tensor.Slice(1, 2);
CopyFrom(slice_tensor, *cpu_place, &dst_tensor); Copy(slice_tensor, *cpu_place, &dst_tensor);
const int* slice_ptr = slice_tensor.data<int>(); const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr); ASSERT_NE(dst_ptr, slice_ptr);
...@@ -67,11 +67,11 @@ TEST(CopyFrom, Tensor) { ...@@ -67,11 +67,11 @@ TEST(CopyFrom, Tensor) {
// CPU Tensor to GPU Tensor // CPU Tensor to GPU Tensor
auto gpu_place = new platform::CUDAPlace(0); auto gpu_place = new platform::CUDAPlace(0);
platform::CUDADeviceContext gpu_ctx(*gpu_place); platform::CUDADeviceContext gpu_ctx(*gpu_place);
CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); Copy(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
// GPU Tensor to CPU Tensor // GPU Tensor to CPU Tensor
auto cpu_place = new platform::CPUPlace(); auto cpu_place = new platform::CPUPlace();
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Tensors // Sync before Compare Tensors
gpu_ctx.Wait(); gpu_ctx.Wait();
...@@ -84,10 +84,10 @@ TEST(CopyFrom, Tensor) { ...@@ -84,10 +84,10 @@ TEST(CopyFrom, Tensor) {
Tensor slice_tensor = src_tensor.Slice(1, 2); Tensor slice_tensor = src_tensor.Slice(1, 2);
// CPU Slice Tensor to GPU Tensor // CPU Slice Tensor to GPU Tensor
CopyFrom(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor); Copy(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
// GPU Tensor to CPU Tensor // GPU Tensor to CPU Tensor
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Slice Tensors // Sync before Compare Slice Tensors
gpu_ctx.Wait(); gpu_ctx.Wait();
...@@ -155,7 +155,7 @@ TEST(CopyFromVector, Tensor) { ...@@ -155,7 +155,7 @@ TEST(CopyFromVector, Tensor) {
CUDADeviceContext gpu_ctx(*gpu_place); CUDADeviceContext gpu_ctx(*gpu_place);
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor); CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
// Copy from GPU to CPU tensor for comparison // Copy from GPU to CPU tensor for comparison
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Tensors // Sync before Compare Tensors
gpu_ctx.Wait(); gpu_ctx.Wait();
...@@ -175,7 +175,7 @@ TEST(CopyFromVector, Tensor) { ...@@ -175,7 +175,7 @@ TEST(CopyFromVector, Tensor) {
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor); CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
gpu_tensor.Resize(make_ddim({2, 2})); gpu_tensor.Resize(make_ddim({2, 2}));
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor); CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Tensors // Sync before Compare Tensors
gpu_ctx.Wait(); gpu_ctx.Wait();
...@@ -287,7 +287,7 @@ TEST(Tensor, SerializeAndDeserialize) { ...@@ -287,7 +287,7 @@ TEST(Tensor, SerializeAndDeserialize) {
auto gpu_place = new platform::CUDAPlace(); auto gpu_place = new platform::CUDAPlace();
platform::CUDADeviceContext gpu_ctx(*gpu_place); platform::CUDADeviceContext gpu_ctx(*gpu_place);
CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); Copy(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
std::ostringstream oss; std::ostringstream oss;
SerializeToStream(oss, gpu_tensor, gpu_ctx); SerializeToStream(oss, gpu_tensor, gpu_ctx);
......
...@@ -42,7 +42,7 @@ class ArrayOp : public framework::OperatorBase { ...@@ -42,7 +42,7 @@ class ArrayOp : public framework::OperatorBase {
if (platform::is_gpu_place(i_tensor.place())) { if (platform::is_gpu_place(i_tensor.place())) {
// FIXME: Avoid copy from GPU to CPU // FIXME: Avoid copy from GPU to CPU
framework::Tensor t; framework::Tensor t;
framework::CopyFrom(i_tensor, platform::CPUPlace(), dev_ctx, &t); framework::Copy(i_tensor, platform::CPUPlace(), dev_ctx, &t);
dev_ctx.Wait(); dev_ctx.Wait();
offset = static_cast<size_t>(*t.data<int64_t>()); offset = static_cast<size_t>(*t.data<int64_t>());
} else { } else {
......
...@@ -110,7 +110,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -110,7 +110,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, framework::Copy(x[x_idx].Slice(start_offset, end_offset), place,
dev_ctx, &slice); dev_ctx, &slice);
out_offset += len; out_offset += len;
} }
......
...@@ -45,7 +45,7 @@ class AssignFunctor { ...@@ -45,7 +45,7 @@ class AssignFunctor {
out_rows.set_height(rows.height()); out_rows.set_height(rows.height());
auto &t = rows.value(); auto &t = rows.value();
auto *m = out_rows.mutable_value(); auto *m = out_rows.mutable_value();
framework::CopyFrom(t, t.place(), dev_ctx_, m); framework::Copy(t, t.place(), dev_ctx_, m);
} }
template <typename T> template <typename T>
...@@ -57,7 +57,7 @@ class AssignFunctor { ...@@ -57,7 +57,7 @@ class AssignFunctor {
void copy_tensor(const framework::LoDTensor &lod_tensor, void copy_tensor(const framework::LoDTensor &lod_tensor,
framework::LoDTensor *out) const { framework::LoDTensor *out) const {
auto &out_tensor = *out; auto &out_tensor = *out;
CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_, &out_tensor); Copy(lod_tensor, lod_tensor.place(), dev_ctx_, &out_tensor);
out_tensor.set_lod(lod_tensor.lod()); out_tensor.set_lod(lod_tensor.lod());
} }
......
...@@ -98,15 +98,15 @@ class DetectionOutputKernel : public framework::OpKernel<T> { ...@@ -98,15 +98,15 @@ class DetectionOutputKernel : public framework::OpKernel<T> {
T* conf_data = conf_tensor.data<T>(); T* conf_data = conf_tensor.data<T>();
if (platform::is_gpu_place(context.GetPlace())) { if (platform::is_gpu_place(context.GetPlace())) {
loc_cpu.mutable_data<T>(loc_tensor.dims(), platform::CPUPlace()); loc_cpu.mutable_data<T>(loc_tensor.dims(), platform::CPUPlace());
framework::CopyFrom(loc_tensor, platform::CPUPlace(), framework::Copy(loc_tensor, platform::CPUPlace(),
context.device_context(), &loc_cpu); context.device_context(), &loc_cpu);
loc_data = loc_cpu.data<T>(); loc_data = loc_cpu.data<T>();
conf_cpu.mutable_data<T>(conf_tensor.dims(), platform::CPUPlace()); conf_cpu.mutable_data<T>(conf_tensor.dims(), platform::CPUPlace());
framework::CopyFrom(conf_tensor, platform::CPUPlace(), framework::Copy(conf_tensor, platform::CPUPlace(),
context.device_context(), &conf_cpu); context.device_context(), &conf_cpu);
conf_data = conf_cpu.data<T>(); conf_data = conf_cpu.data<T>();
priorbox_cpu.mutable_data<T>(in_priorbox->dims(), platform::CPUPlace()); priorbox_cpu.mutable_data<T>(in_priorbox->dims(), platform::CPUPlace());
framework::CopyFrom(*in_priorbox, platform::CPUPlace(), framework::Copy(*in_priorbox, platform::CPUPlace(),
context.device_context(), &priorbox_cpu); context.device_context(), &priorbox_cpu);
priorbox_data = priorbox_cpu.data<T>(); priorbox_data = priorbox_cpu.data<T>();
} }
...@@ -158,8 +158,8 @@ class DetectionOutputKernel : public framework::OpKernel<T> { ...@@ -158,8 +158,8 @@ class DetectionOutputKernel : public framework::OpKernel<T> {
batch_size, all_indices, all_decoded_bboxes, batch_size, all_indices, all_decoded_bboxes,
out_data); out_data);
if (platform::is_gpu_place(context.GetPlace())) { if (platform::is_gpu_place(context.GetPlace())) {
framework::CopyFrom(out_cpu, platform::CUDAPlace(), framework::Copy(out_cpu, platform::CUDAPlace(), context.device_context(),
context.device_context(), out); out);
} }
} }
}; };
......
...@@ -126,8 +126,7 @@ class ExpandGradKernel : public framework::OpKernel<T> { ...@@ -126,8 +126,7 @@ class ExpandGradKernel : public framework::OpKernel<T> {
auto* in0 = context.Input<Tensor>(framework::GradVarName("Out")); auto* in0 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<Tensor>(framework::GradVarName("X")); auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
out0->mutable_data<T>(context.GetPlace()); out0->mutable_data<T>(context.GetPlace());
framework::CopyFrom(*in0, context.GetPlace(), context.device_context(), framework::Copy(*in0, context.GetPlace(), context.device_context(), out0);
out0);
} else { } else {
switch (dims) { switch (dims) {
REP_EXPAND_GRAD_TEMPLATE(72) REP_EXPAND_GRAD_TEMPLATE(72)
......
...@@ -52,7 +52,7 @@ class FeedOp : public framework::OperatorBase { ...@@ -52,7 +52,7 @@ class FeedOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(feed_item, place, dev_ctx, out_item); framework::Copy(feed_item, place, dev_ctx, out_item);
out_item->set_lod(feed_item.lod()); out_item->set_lod(feed_item.lod());
} }
}; };
......
...@@ -55,7 +55,7 @@ class FetchOp : public framework::OperatorBase { ...@@ -55,7 +55,7 @@ class FetchOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(src_item.place()); auto &dev_ctx = *pool.Get(src_item.place());
CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item); Copy(src_item, platform::CPUPlace(), dev_ctx, &dst_item);
dev_ctx.Wait(); dev_ctx.Wait();
dst_item.set_lod(src_item.lod()); dst_item.set_lod(src_item.lod());
......
...@@ -72,7 +72,7 @@ class FillOp : public framework::OperatorBase { ...@@ -72,7 +72,7 @@ class FillOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(tensor, place, dev_ctx, &out); framework::Copy(tensor, place, dev_ctx, &out);
} }
} }
}; };
......
...@@ -196,7 +196,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -196,7 +196,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto copyLoDTensor = [](const platform::DeviceContext& ctx, auto copyLoDTensor = [](const platform::DeviceContext& ctx,
const LoDTensor& src, LoDTensor* dst) { const LoDTensor& src, LoDTensor* dst) {
dst->mutable_data<T>(src.dims(), platform::CPUPlace()); dst->mutable_data<T>(src.dims(), platform::CPUPlace());
framework::CopyFrom(src, platform::CPUPlace(), ctx, dst); framework::Copy(src, platform::CPUPlace(), ctx, dst);
}; };
copyLoDTensor(ctx, emission_weights_src, emission_weights_dst); copyLoDTensor(ctx, emission_weights_src, emission_weights_dst);
...@@ -204,7 +204,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -204,7 +204,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
transition_weights_dst->mutable_data<T>(transition_weights_src.dims(), transition_weights_dst->mutable_data<T>(transition_weights_src.dims(),
platform::CPUPlace()); platform::CPUPlace());
framework::CopyFrom(transition_weights_src, platform::CPUPlace(), ctx, framework::Copy(transition_weights_src, platform::CPUPlace(), ctx,
transition_weights_dst); transition_weights_dst);
} }
...@@ -220,7 +220,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> { ...@@ -220,7 +220,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src,
Tensor* dst) { Tensor* dst) {
dst->mutable_data<T>(platform::CUDAPlace()); dst->mutable_data<T>(platform::CUDAPlace());
framework::CopyFrom(src, platform::CUDAPlace(), ctx, dst); framework::Copy(src, platform::CUDAPlace(), ctx, dst);
}; };
copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, emission_exps_src, emission_exps_dst);
copyTensor(ctx, transition_exps_src, transition_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst);
...@@ -410,12 +410,12 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> { ...@@ -410,12 +410,12 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
// Copy the inputs from GPU memory to CPU memory when this operators runs on // Copy the inputs from GPU memory to CPU memory when this operators runs on
// GPU device. // GPU device.
label_dst->mutable_data<T>(label_src.dims(), platform::CPUPlace()); label_dst->mutable_data<T>(label_src.dims(), platform::CPUPlace());
framework::CopyFrom(label_src, platform::CPUPlace(), ctx, label_dst); framework::Copy(label_src, platform::CPUPlace(), ctx, label_dst);
auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src,
Tensor* dst) { Tensor* dst) {
dst->mutable_data<T>(src.dims(), platform::CPUPlace()); dst->mutable_data<T>(src.dims(), platform::CPUPlace());
framework::CopyFrom(src, platform::CPUPlace(), ctx, dst); framework::Copy(src, platform::CPUPlace(), ctx, dst);
}; };
copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, emission_exps_src, emission_exps_dst);
copyTensor(ctx, transition_exps_src, transition_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst);
...@@ -434,7 +434,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> { ...@@ -434,7 +434,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor* dst) { Tensor* dst) {
if (src && dst) { if (src && dst) {
dst->mutable_data<T>(platform::CUDAPlace()); dst->mutable_data<T>(platform::CUDAPlace());
framework::CopyFrom(*src, platform::CUDAPlace(), ctx, dst); framework::Copy(*src, platform::CUDAPlace(), ctx, dst);
} }
}; };
copyTensor(ctx, emission_grad_src, emission_grad_dst); copyTensor(ctx, emission_grad_src, emission_grad_dst);
......
...@@ -53,7 +53,7 @@ class LoadOp : public framework::OperatorBase { ...@@ -53,7 +53,7 @@ class LoadOp : public framework::OperatorBase {
out_var->Clear(); out_var->Clear();
tensor = out_var->GetMutable<framework::LoDTensor>(); tensor = out_var->GetMutable<framework::LoDTensor>();
tensor->set_lod(cpu_tensor.lod()); tensor->set_lod(cpu_tensor.lod());
CopyFrom(cpu_tensor, place, dev_ctx, tensor); Copy(cpu_tensor, place, dev_ctx, tensor);
} }
} }
}; };
......
...@@ -33,7 +33,7 @@ class LoDResetKernel : public framework::OpKernel<T> { ...@@ -33,7 +33,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
auto* lod = lod_t->data<int>(); auto* lod = lod_t->data<int>();
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
framework::Tensor lod_cpu; framework::Tensor lod_cpu;
framework::CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context(), framework::Copy(*lod_t, platform::CPUPlace(), ctx.device_context(),
&lod_cpu); &lod_cpu);
lod = lod_cpu.data<int>(); lod = lod_cpu.data<int>();
} }
......
...@@ -92,7 +92,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase { ...@@ -92,7 +92,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x.Slice(static_cast<int>(each_range.begin), framework::Copy(x.Slice(static_cast<int>(each_range.begin),
static_cast<int>(each_range.end)), static_cast<int>(each_range.end)),
x.place(), dev_ctx, &slice); x.place(), dev_ctx, &slice);
offset += len; offset += len;
......
...@@ -149,7 +149,7 @@ class ContextProjectFunctor { ...@@ -149,7 +149,7 @@ class ContextProjectFunctor {
Tensor out_t_sub = out_t.Slice(k * context_length, Tensor out_t_sub = out_t.Slice(k * context_length,
k * context_length + padding_size); k * context_length + padding_size);
Tensor w_sub = padding_data.Slice(k, k + padding_size); Tensor w_sub = padding_data.Slice(k, k + padding_size);
framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub); framework::Copy(w_sub, context.GetPlace(), context, &out_t_sub);
} }
} }
if (down_pad > 0) { // add down pad if (down_pad > 0) { // add down pad
...@@ -179,7 +179,7 @@ class ContextProjectFunctor { ...@@ -179,7 +179,7 @@ class ContextProjectFunctor {
(down_pad_begin_row + t) * context_length); (down_pad_begin_row + t) * context_length);
Tensor w_sub = padding_data.Slice( Tensor w_sub = padding_data.Slice(
up_pad + padding_idx, up_pad + padding_idx + padding_size); up_pad + padding_idx, up_pad + padding_idx + padding_size);
framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub); framework::Copy(w_sub, context.GetPlace(), context, &out_t_sub);
} }
} }
out_t.Resize({sequence_height, context_length * sequence_width}); out_t.Resize({sequence_height, context_length * sequence_width});
......
...@@ -63,7 +63,7 @@ void testIm2col() { ...@@ -63,7 +63,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
CopyFrom(input_tmp, *place, *context, &input); Copy(input_tmp, *place, *context, &input);
} }
output_cfo.mutable_data<float>( output_cfo.mutable_data<float>(
{1, filter_size, filter_size, output_height, output_width}, *place); {1, filter_size, filter_size, output_height, output_width}, *place);
...@@ -88,7 +88,7 @@ void testIm2col() { ...@@ -88,7 +88,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output_cfo.data<float>(); out_cfo_ptr = output_cfo.data<float>();
} else { } else {
CopyFrom(output_cfo, paddle::platform::CPUPlace(), *context, &output_tmp); Copy(output_cfo, paddle::platform::CPUPlace(), *context, &output_tmp);
out_cfo_ptr = output_tmp.data<float>(); out_cfo_ptr = output_tmp.data<float>();
} }
for (int i = 0; i < 6; ++i) { for (int i = 0; i < 6; ++i) {
...@@ -99,7 +99,7 @@ void testIm2col() { ...@@ -99,7 +99,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
out_ocf_ptr = output_ocf.data<float>(); out_ocf_ptr = output_ocf.data<float>();
} else { } else {
CopyFrom(output_ocf, paddle::platform::CPUPlace(), *context, &output_tmp); Copy(output_ocf, paddle::platform::CPUPlace(), *context, &output_tmp);
out_ocf_ptr = output_tmp.data<float>(); out_ocf_ptr = output_tmp.data<float>();
} }
for (int i = 0; i < 6; ++i) { for (int i = 0; i < 6; ++i) {
...@@ -119,7 +119,7 @@ void testIm2col() { ...@@ -119,7 +119,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
CopyFrom(input_tmp, *place, *context, &input); Copy(input_tmp, *place, *context, &input);
} }
col2im(*context, output_cfo, dilation, stride, padding, &input); col2im(*context, output_cfo, dilation, stride, padding, &input);
...@@ -128,7 +128,7 @@ void testIm2col() { ...@@ -128,7 +128,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>(); in_ptr = input.data<float>();
} else { } else {
CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); Copy(input, paddle::platform::CPUPlace(), *context, &input_tmp);
in_ptr = input_tmp.data<float>(); in_ptr = input_tmp.data<float>();
} }
for (int i = 0; i < 6; ++i) { for (int i = 0; i < 6; ++i) {
...@@ -140,7 +140,7 @@ void testIm2col() { ...@@ -140,7 +140,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
CopyFrom(input_tmp, *place, *context, &input); Copy(input_tmp, *place, *context, &input);
} }
col2im_ocf(*context, output_ocf, dilation, stride, padding, &input); col2im_ocf(*context, output_ocf, dilation, stride, padding, &input);
...@@ -148,7 +148,7 @@ void testIm2col() { ...@@ -148,7 +148,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>(); in_ptr = input.data<float>();
} else { } else {
CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); Copy(input, paddle::platform::CPUPlace(), *context, &input_tmp);
in_ptr = input_tmp.data<float>(); in_ptr = input_tmp.data<float>();
} }
for (int i = 0; i < 6; ++i) { for (int i = 0; i < 6; ++i) {
......
...@@ -16,15 +16,15 @@ TEST(math_function, notrans_mul_trans) { ...@@ -16,15 +16,15 @@ TEST(math_function, notrans_mul_trans) {
auto* gpu_place = new paddle::platform::CUDAPlace(0); auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu); paddle::framework::Copy(input1, *gpu_place, context, &input2_gpu);
out_gpu.mutable_data<float>({2, 2}, *gpu_place); out_gpu.mutable_data<float>({2, 2}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::CUDADeviceContext, float>( paddle::operators::math::matmul<paddle::platform::CUDADeviceContext, float>(
context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0); context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0);
paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out); paddle::framework::Copy(out_gpu, *cpu_place, context, &out);
float* out_ptr = out.data<float>(); float* out_ptr = out.data<float>();
context.Wait(); context.Wait();
...@@ -50,15 +50,15 @@ TEST(math_function, trans_mul_notrans) { ...@@ -50,15 +50,15 @@ TEST(math_function, trans_mul_notrans) {
auto* gpu_place = new paddle::platform::CUDAPlace(0); auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu); paddle::framework::Copy(input1, *gpu_place, context, &input2_gpu);
out_gpu.mutable_data<float>({3, 3}, *gpu_place); out_gpu.mutable_data<float>({3, 3}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::CUDADeviceContext, float>( paddle::operators::math::matmul<paddle::platform::CUDADeviceContext, float>(
context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0); context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0);
paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out); paddle::framework::Copy(out_gpu, *cpu_place, context, &out);
float* out_ptr = out.data<float>(); float* out_ptr = out.data<float>();
context.Wait(); context.Wait();
...@@ -99,9 +99,9 @@ TEST(math_function, gemm_notrans_cublas) { ...@@ -99,9 +99,9 @@ TEST(math_function, gemm_notrans_cublas) {
auto* gpu_place = new paddle::platform::CUDAPlace(0); auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu); paddle::framework::Copy(input2, *gpu_place, context, &input2_gpu);
paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu); paddle::framework::Copy(input3, *gpu_place, context, &input3_gpu);
float* a = input1_gpu.data<float>(); float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>(); float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place); float* c = input3_gpu.mutable_data<float>(*gpu_place);
...@@ -109,7 +109,7 @@ TEST(math_function, gemm_notrans_cublas) { ...@@ -109,7 +109,7 @@ TEST(math_function, gemm_notrans_cublas) {
paddle::operators::math::gemm<paddle::platform::CUDADeviceContext, float>( paddle::operators::math::gemm<paddle::platform::CUDADeviceContext, float>(
context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4); context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4);
paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3); paddle::framework::Copy(input3_gpu, *cpu_place, context, &input3);
// numpy code: // numpy code:
// a = np.arange(6).reshape(2, 3) // a = np.arange(6).reshape(2, 3)
...@@ -154,9 +154,9 @@ TEST(math_function, gemm_trans_cublas) { ...@@ -154,9 +154,9 @@ TEST(math_function, gemm_trans_cublas) {
auto* gpu_place = new paddle::platform::CUDAPlace(0); auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu); paddle::framework::Copy(input2, *gpu_place, context, &input2_gpu);
paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu); paddle::framework::Copy(input3, *gpu_place, context, &input3_gpu);
float* a = input1_gpu.data<float>(); float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>(); float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place); float* c = input3_gpu.mutable_data<float>(*gpu_place);
...@@ -164,7 +164,7 @@ TEST(math_function, gemm_trans_cublas) { ...@@ -164,7 +164,7 @@ TEST(math_function, gemm_trans_cublas) {
paddle::operators::math::gemm<paddle::platform::CUDADeviceContext, float>( paddle::operators::math::gemm<paddle::platform::CUDADeviceContext, float>(
context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4); context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4);
paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3); paddle::framework::Copy(input3_gpu, *cpu_place, context, &input3);
context.Wait(); context.Wait();
EXPECT_EQ(input3_ptr[0], 0); EXPECT_EQ(input3_ptr[0], 0);
...@@ -205,14 +205,14 @@ void GemvTest(int m, int n, bool trans) { ...@@ -205,14 +205,14 @@ void GemvTest(int m, int n, bool trans) {
} }
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(mat_a, *gpu_place, context, &g_mat_a); paddle::framework::Copy(mat_a, *gpu_place, context, &g_mat_a);
paddle::framework::CopyFrom(vec_b, *gpu_place, context, &g_vec_b); paddle::framework::Copy(vec_b, *gpu_place, context, &g_vec_b);
paddle::operators::math::gemv<paddle::platform::CUDADeviceContext, T>( paddle::operators::math::gemv<paddle::platform::CUDADeviceContext, T>(
context, trans, static_cast<int>(m), static_cast<int>(n), 1., g_data_a, context, trans, static_cast<int>(m), static_cast<int>(n), 1., g_data_a,
g_data_b, 0., g_data_c); g_data_b, 0., g_data_c);
paddle::framework::CopyFrom(g_vec_c, paddle::platform::CPUPlace(), context, paddle::framework::Copy(g_vec_c, paddle::platform::CPUPlace(), context,
&vec_c); &vec_c);
if (!trans) { if (!trans) {
......
...@@ -67,7 +67,7 @@ TEST(selected_rows_functor, gpu_add) { ...@@ -67,7 +67,7 @@ TEST(selected_rows_functor, gpu_add) {
EXPECT_EQ(out_rows[6], 9); EXPECT_EQ(out_rows[6], 9);
Tensor out_cpu; Tensor out_cpu;
CopyFrom(*out_value, cpu_place, ctx, &out_cpu); Copy(*out_value, cpu_place, ctx, &out_cpu);
ctx.Wait(); ctx.Wait();
auto* out_cpu_data = out_cpu.data<float>(); auto* out_cpu_data = out_cpu.data<float>();
...@@ -94,7 +94,7 @@ TEST(selected_rows_functor, gpu_add) { ...@@ -94,7 +94,7 @@ TEST(selected_rows_functor, gpu_add) {
add_tensor_functor(ctx, *output, *tensor1, tensor2.get()); add_tensor_functor(ctx, *output, *tensor1, tensor2.get());
Tensor tensor2_cpu; Tensor tensor2_cpu;
CopyFrom(*tensor2, cpu_place, ctx, &tensor2_cpu); Copy(*tensor2, cpu_place, ctx, &tensor2_cpu);
ctx.Wait(); ctx.Wait();
auto* tensor2_cpu_data = tensor2_cpu.data<float>(); auto* tensor2_cpu_data = tensor2_cpu.data<float>();
...@@ -167,7 +167,7 @@ TEST(selected_rows_functor, gpu_add_to) { ...@@ -167,7 +167,7 @@ TEST(selected_rows_functor, gpu_add_to) {
EXPECT_EQ(out_rows[6], 9); EXPECT_EQ(out_rows[6], 9);
Tensor out_cpu; Tensor out_cpu;
CopyFrom(*out_value, cpu_place, ctx, &out_cpu); Copy(*out_value, cpu_place, ctx, &out_cpu);
ctx.Wait(); ctx.Wait();
auto* out_cpu_data = out_cpu.data<float>(); auto* out_cpu_data = out_cpu.data<float>();
...@@ -191,7 +191,7 @@ TEST(selected_rows_functor, gpu_add_to) { ...@@ -191,7 +191,7 @@ TEST(selected_rows_functor, gpu_add_to) {
add_to_tensor_functor(ctx, *output, tensor1.get()); add_to_tensor_functor(ctx, *output, tensor1.get());
Tensor tensor1_cpu; Tensor tensor1_cpu;
CopyFrom(*tensor1, cpu_place, ctx, &tensor1_cpu); Copy(*tensor1, cpu_place, ctx, &tensor1_cpu);
ctx.Wait(); ctx.Wait();
auto* tensor1_cpu_data = tensor1_cpu.data<float>(); auto* tensor1_cpu_data = tensor1_cpu.data<float>();
......
...@@ -71,7 +71,7 @@ void testVol2col() { ...@@ -71,7 +71,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
CopyFrom(input_tmp, *place, *context, &input); Copy(input_tmp, *place, *context, &input);
} }
output.mutable_data<float>({1, filter_size, filter_size, filter_size, output.mutable_data<float>({1, filter_size, filter_size, filter_size,
output_depth, output_height, output_width}, output_depth, output_height, output_width},
...@@ -85,7 +85,7 @@ void testVol2col() { ...@@ -85,7 +85,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output.data<float>(); out_cfo_ptr = output.data<float>();
} else { } else {
CopyFrom(output, paddle::platform::CPUPlace(), *context, &output_tmp); Copy(output, paddle::platform::CPUPlace(), *context, &output_tmp);
out_cfo_ptr = output_tmp.data<float>(); out_cfo_ptr = output_tmp.data<float>();
} }
...@@ -99,7 +99,7 @@ void testVol2col() { ...@@ -99,7 +99,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
CopyFrom(input_tmp, *place, *context, &input); Copy(input_tmp, *place, *context, &input);
} }
paddle::operators::math::Col2VolFunctor<DeviceContext, float> col2vol; paddle::operators::math::Col2VolFunctor<DeviceContext, float> col2vol;
...@@ -109,7 +109,7 @@ void testVol2col() { ...@@ -109,7 +109,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>(); in_ptr = input.data<float>();
} else { } else {
CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); Copy(input, paddle::platform::CPUPlace(), *context, &input_tmp);
in_ptr = input_tmp.data<float>(); in_ptr = input_tmp.data<float>();
} }
......
...@@ -49,7 +49,7 @@ class MergeLoDTensorOp : public framework::OperatorBase { ...@@ -49,7 +49,7 @@ class MergeLoDTensorOp : public framework::OperatorBase {
cpu_mask->ShareDataWith(mask); cpu_mask->ShareDataWith(mask);
} else if (platform::is_gpu_place(mask.place())) { } else if (platform::is_gpu_place(mask.place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); framework::Copy(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get());
#else #else
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
#endif #endif
...@@ -104,8 +104,8 @@ class MergeLoDTensorOp : public framework::OperatorBase { ...@@ -104,8 +104,8 @@ class MergeLoDTensorOp : public framework::OperatorBase {
continue; continue;
} }
auto slice = out->Slice(out_offset, out_offset + len); auto slice = out->Slice(out_offset, out_offset + len);
framework::CopyFrom(input->Slice(start_offset, end_offset), place, framework::Copy(input->Slice(start_offset, end_offset), place, dev_ctx,
dev_ctx, &slice); &slice);
out_offset += len; out_offset += len;
(*in_idx) += 1; (*in_idx) += 1;
} }
......
...@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> { ...@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows; auto cols = ins[0]->numel() / rows;
// copy index to cpu // copy index to cpu
Tensor index_t_cpu; Tensor index_t_cpu;
CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); Copy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>(); auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream(); auto stream = ctx.cuda_device_context().stream();
platform::CUDAPlace place = boost::get<platform::CUDAPlace>(ctx.GetPlace()); platform::CUDAPlace place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
...@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> { ...@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows; auto cols = ins[0]->numel() / rows;
// copy index to cpu // copy index to cpu
Tensor index_t_cpu; Tensor index_t_cpu;
CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); Copy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>(); auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream(); auto stream = ctx.cuda_device_context().stream();
......
...@@ -211,7 +211,7 @@ class ParallelDoGradOp : public OperatorBase { ...@@ -211,7 +211,7 @@ class ParallelDoGradOp : public OperatorBase {
auto &tt = sub_scopes[place_idx]->FindVar(s)->Get<LoDTensor>(); auto &tt = sub_scopes[place_idx]->FindVar(s)->Get<LoDTensor>();
VLOG(3) << place_idx; VLOG(3) << place_idx;
VLOG(3) << tt; VLOG(3) << tt;
framework::CopyFrom(tt, places[0], t_buf); framework::Copy(tt, places[0], t_buf);
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, s_buf}}}, {{"Out", {s}}}, "sum", {{"X", {s, s_buf}}}, {{"Out", {s}}},
...@@ -220,7 +220,7 @@ class ParallelDoGradOp : public OperatorBase { ...@@ -220,7 +220,7 @@ class ParallelDoGradOp : public OperatorBase {
} }
VLOG(3) << t; VLOG(3) << t;
framework::CopyFrom(t, place, scope.FindVar(s)->GetMutable<LoDTensor>()); framework::Copy(t, place, scope.FindVar(s)->GetMutable<LoDTensor>());
} }
} }
}; };
......
...@@ -290,7 +290,7 @@ class RecurrentOp : public RecurrentBase { ...@@ -290,7 +290,7 @@ class RecurrentOp : public RecurrentBase {
auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1); auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1);
// Explicit copy output since the local RNN scope can be destroyed // Explicit copy output since the local RNN scope can be destroyed
// early. // early.
framework::CopyFrom(src_tensor, place, dev_ctx, &dst_out); framework::Copy(src_tensor, place, dev_ctx, &dst_out);
}); });
scopes.Next(); scopes.Next();
...@@ -376,7 +376,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -376,7 +376,7 @@ class RecurrentGradOp : public RecurrentBase {
auto *cur_grad_var = cur_scope.Var(cur_grad); auto *cur_grad_var = cur_scope.Var(cur_grad);
auto cur_grad_tensor = auto cur_grad_tensor =
cur_grad_var->GetMutable<framework::LoDTensor>(); cur_grad_var->GetMutable<framework::LoDTensor>();
framework::CopyFrom(ex_tensor, place, dev_ctx, cur_grad_tensor); framework::Copy(ex_tensor, place, dev_ctx, cur_grad_tensor);
} }
} }
...@@ -450,7 +450,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -450,7 +450,7 @@ class RecurrentGradOp : public RecurrentBase {
} }
auto dst = outside->Slice(seq_offset, seq_offset + 1); auto dst = outside->Slice(seq_offset, seq_offset + 1);
framework::CopyFrom(inside, place, dev_ctx, &dst); framework::Copy(inside, place, dev_ctx, &dst);
}); });
VLOG(5) << "Link outside gradient finished "; VLOG(5) << "Link outside gradient finished ";
...@@ -463,7 +463,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -463,7 +463,7 @@ class RecurrentGradOp : public RecurrentBase {
framework::LoDTensor *outside) { framework::LoDTensor *outside) {
outside->Resize(inside.dims()); outside->Resize(inside.dims());
outside->mutable_data(place, inside.type()); outside->mutable_data(place, inside.type());
framework::CopyFrom(inside, place, dev_ctx, outside); framework::Copy(inside, place, dev_ctx, outside);
}); });
VLOG(5) << "Link initialize state gradient finished "; VLOG(5) << "Link initialize state gradient finished ";
} }
......
...@@ -146,7 +146,7 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { ...@@ -146,7 +146,7 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x_sliced, out_sliced.place(), dev_ctx, &out_sliced); framework::Copy(x_sliced, out_sliced.place(), dev_ctx, &out_sliced);
out_offset += len; out_offset += len;
return out_offset; return out_offset;
} }
......
...@@ -28,7 +28,7 @@ class ReshapeKernel : public framework::OpKernel<T> { ...@@ -28,7 +28,7 @@ class ReshapeKernel : public framework::OpKernel<T> {
auto* in = ctx.Input<framework::Tensor>("X"); auto* in = ctx.Input<framework::Tensor>("X");
auto out_dims = out->dims(); auto out_dims = out->dims();
out->mutable_data<T>(ctx.GetPlace()); out->mutable_data<T>(ctx.GetPlace());
framework::CopyFrom(*in, ctx.GetPlace(), ctx.device_context(), out); framework::Copy(*in, ctx.GetPlace(), ctx.device_context(), out);
out->Resize(out_dims); out->Resize(out_dims);
} }
}; };
...@@ -42,7 +42,7 @@ class ReshapeGradKernel : public framework::OpKernel<T> { ...@@ -42,7 +42,7 @@ class ReshapeGradKernel : public framework::OpKernel<T> {
d_x->mutable_data<T>(ctx.GetPlace()); d_x->mutable_data<T>(ctx.GetPlace());
auto in_dims = d_x->dims(); auto in_dims = d_x->dims();
framework::CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context(), d_x); framework::Copy(*d_out, ctx.GetPlace(), ctx.device_context(), d_x);
d_x->Resize(in_dims); d_x->Resize(in_dims);
} }
}; };
......
...@@ -66,12 +66,12 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> { ...@@ -66,12 +66,12 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace()); offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(), framework::Copy(*offset, platform::CPUPlace(), ctx.device_context(),
&offset_cpu); &offset_cpu);
offset_data = offset_cpu.data<int64_t>(); offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace()); length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(), framework::Copy(*length, platform::CPUPlace(), ctx.device_context(),
&length_cpu); &length_cpu);
length_data = length_cpu.data<int64_t>(); length_data = length_cpu.data<int64_t>();
} }
...@@ -127,12 +127,12 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> { ...@@ -127,12 +127,12 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace()); offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(), framework::Copy(*offset, platform::CPUPlace(), ctx.device_context(),
&offset_cpu); &offset_cpu);
offset_data = offset_cpu.data<int64_t>(); offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace()); length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(), framework::Copy(*length, platform::CPUPlace(), ctx.device_context(),
&length_cpu); &length_cpu);
length_data = length_cpu.data<int64_t>(); length_data = length_cpu.data<int64_t>();
} }
......
...@@ -115,7 +115,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { ...@@ -115,7 +115,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
auto &dout_tensor = dout_var->Get<framework::LoDTensor>(); auto &dout_tensor = dout_var->Get<framework::LoDTensor>();
auto height = dout_tensor.dims()[0]; auto height = dout_tensor.dims()[0];
auto slice = dx_tensor.Slice(0, static_cast<int>(height)); auto slice = dx_tensor.Slice(0, static_cast<int>(height));
framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice); framework::Copy(dout_tensor, dout_tensor.place(), dev_ctx, &slice);
if (dx_tensor.dims()[0] > height) { if (dx_tensor.dims()[0] > height) {
auto rest_tensor = dx_tensor.Slice( auto rest_tensor = dx_tensor.Slice(
static_cast<int>(height), static_cast<int>(dx_tensor.dims()[0])); static_cast<int>(height), static_cast<int>(dx_tensor.dims()[0]));
......
...@@ -53,7 +53,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { ...@@ -53,7 +53,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
cpu_mask->ShareDataWith(mask); cpu_mask->ShareDataWith(mask);
} else if (platform::is_gpu_place(mask.place())) { } else if (platform::is_gpu_place(mask.place())) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); framework::Copy(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get());
#else #else
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
#endif #endif
...@@ -111,7 +111,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { ...@@ -111,7 +111,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
// out[offset: offset+len] = x[each_range.begin: each_range.end] // out[offset: offset+len] = x[each_range.begin: each_range.end]
auto slice = out->Slice(static_cast<int>(offset), auto slice = out->Slice(static_cast<int>(offset),
static_cast<int>(offset + len)); static_cast<int>(offset + len));
framework::CopyFrom(x.Slice(static_cast<int>(each_range.begin), framework::Copy(x.Slice(static_cast<int>(each_range.begin),
static_cast<int>(each_range.end)), static_cast<int>(each_range.end)),
x.place(), dev_ctx, &slice); x.place(), dev_ctx, &slice);
offset += len; offset += len;
......
...@@ -107,7 +107,7 @@ class SumKernel : public framework::OpKernel<T> { ...@@ -107,7 +107,7 @@ class SumKernel : public framework::OpKernel<T> {
out_array.resize(i + 1); out_array.resize(i + 1);
} }
if (out_array[i].numel() == 0) { if (out_array[i].numel() == 0) {
framework::CopyFrom(in_array[i], in_array[i].place(), framework::Copy(in_array[i], in_array[i].place(),
context.device_context(), &out_array[i]); context.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod()); out_array[i].set_lod(in_array[i].lod());
} else { } else {
......
...@@ -44,7 +44,7 @@ class WriteToArrayOp : public ArrayOp { ...@@ -44,7 +44,7 @@ class WriteToArrayOp : public ArrayOp {
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
CopyFrom(x_tensor, place, dev_ctx, out_tensor); Copy(x_tensor, place, dev_ctx, out_tensor);
out_tensor->set_lod(x_tensor.lod()); out_tensor->set_lod(x_tensor.lod());
} else { } else {
VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so " VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
...@@ -135,7 +135,7 @@ class ReadFromArrayOp : public ArrayOp { ...@@ -135,7 +135,7 @@ class ReadFromArrayOp : public ArrayOp {
platform::DeviceContextPool &pool = platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance(); platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor); framework::Copy(x_array[offset], place, dev_ctx, out_tensor);
out_tensor->set_lod(x_array[offset].lod()); out_tensor->set_lod(x_array[offset].lod());
} else { } else {
VLOG(10) << "offset " << offset << " >= " << x_array.size(); VLOG(10) << "offset " << offset << " >= " << x_array.size();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册