提交 ce6dad3b 编写于 作者: Y Yu Yang 提交者: Yang Yang(Tony)

Rename CopyFrom to Copy for tensors (#7292)

* Rename Tensor::CopyFrom to Tensor::Copy

* Fix CI

* Fix compile
上级 32b09b51
......@@ -88,7 +88,7 @@ struct CastDataType {
trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>());
} else {
// TODO(dzhwinter): enhance CopyFrom CPU<->GPU with different data type?
// TODO(dzhwinter): enhance Copy CPU<->GPU with different data type?
PADDLE_THROW("Unsupport CPU <-> GPU!");
}
}
......
......@@ -37,7 +37,7 @@ Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) {
Tensor* out = new Tensor();
auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
dev_ctx->Wait();
CopyFrom(in, dst_place, *dev_ctx, out);
Copy(in, dst_place, *dev_ctx, out);
dev_ctx->Wait();
return out;
}
......
......@@ -157,7 +157,7 @@ TEST(Operator, CPUtoGPU) {
auto dev_ctx = pool.Get(cuda_place);
paddle::framework::Tensor output_tensor;
CopyFrom(output2->Get<LoDTensor>(), paddle::platform::CPUPlace(), *dev_ctx,
Copy(output2->Get<LoDTensor>(), paddle::platform::CPUPlace(), *dev_ctx,
&output_tensor);
dev_ctx->Wait();
......
......@@ -232,7 +232,7 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
auto dst_ptr = dst.mutable_data(dst_place, src.type());
// TODO(tonyyang-svail):
// change the following to framework::CopyFrom
// change the following to framework::Copy
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto size = src.numel() * SizeOfType(src.type());
......
......@@ -147,7 +147,7 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
for (size_t ins = 0; ins < num_instances; ins++) {
for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) {
auto slice = tensor.Slice(elem, elem + 1);
CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(),
Copy(source.Slice(ins, ins + 1), platform::CPUPlace(),
platform::CPUDeviceContext(), &slice);
}
}
......
......@@ -69,7 +69,7 @@ struct AnyVisitor : public boost::static_visitor<bool> {
tmp.mutable_data<bool>(cpu);
auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu);
gpuctx->Wait();
CopyFrom(out, cpu, *gpuctx, &tmp);
Copy(out, cpu, *gpuctx, &tmp);
gpuctx->Wait();
return GetResult(tmp, cpu);
}
......
......@@ -29,10 +29,10 @@ namespace framework {
* @param[in] dst_place The dst place.
* @param[in] ctx The device context contains device resources.
*
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
* @note Copy supports CPU <-> GPU, GPU <-> GPU.
*/
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
inline void Copy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
src.check_memory_size();
......@@ -88,9 +88,9 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
}
/**
* @brief CopyFrom support CPU <-> CPU
* @brief Copy supports CPU <-> CPU
*/
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
inline void Copy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
src.check_memory_size();
dst->Resize(src.dims());
......@@ -316,7 +316,7 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor,
DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
is.read(static_cast<char*>(buf), cpu_tensor.memory_size());
auto cpu_place = new platform::CPUPlace();
framework::CopyFrom(cpu_tensor, *cpu_place, dev_ctx, tensor);
framework::Copy(cpu_tensor, *cpu_place, dev_ctx, tensor);
delete cpu_place;
#else
PADDLE_THROW("Unexpected branch");
......
......@@ -19,7 +19,7 @@
namespace paddle {
namespace framework {
TEST(CopyFrom, Tensor) {
TEST(Copy, Tensor) {
Tensor src_tensor;
Tensor dst_tensor;
platform::CPUDeviceContext cpu_ctx((platform::CPUPlace()));
......@@ -32,7 +32,7 @@ TEST(CopyFrom, Tensor) {
src_tensor.set_layout(DataLayout::kAnyLayout);
auto cpu_place = new platform::CPUPlace();
CopyFrom(src_tensor, *cpu_place, &dst_tensor);
Copy(src_tensor, *cpu_place, &dst_tensor);
const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr);
......@@ -43,7 +43,7 @@ TEST(CopyFrom, Tensor) {
EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout());
Tensor slice_tensor = src_tensor.Slice(1, 2);
CopyFrom(slice_tensor, *cpu_place, &dst_tensor);
Copy(slice_tensor, *cpu_place, &dst_tensor);
const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr);
......@@ -67,11 +67,11 @@ TEST(CopyFrom, Tensor) {
// CPU Tensor to GPU Tensor
auto gpu_place = new platform::CUDAPlace(0);
platform::CUDADeviceContext gpu_ctx(*gpu_place);
CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
Copy(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
// GPU Tensor to CPU Tensor
auto cpu_place = new platform::CPUPlace();
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Tensors
gpu_ctx.Wait();
......@@ -84,10 +84,10 @@ TEST(CopyFrom, Tensor) {
Tensor slice_tensor = src_tensor.Slice(1, 2);
// CPU Slice Tensor to GPU Tensor
CopyFrom(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
Copy(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
// GPU Tensor to CPU Tensor
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Slice Tensors
gpu_ctx.Wait();
......@@ -155,7 +155,7 @@ TEST(CopyFromVector, Tensor) {
CUDADeviceContext gpu_ctx(*gpu_place);
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
// Copy from GPU to CPU tensor for comparison
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Tensors
gpu_ctx.Wait();
......@@ -175,7 +175,7 @@ TEST(CopyFromVector, Tensor) {
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
gpu_tensor.Resize(make_ddim({2, 2}));
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
Copy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
// Sync before Compare Tensors
gpu_ctx.Wait();
......@@ -287,7 +287,7 @@ TEST(Tensor, SerializeAndDeserialize) {
auto gpu_place = new platform::CUDAPlace();
platform::CUDADeviceContext gpu_ctx(*gpu_place);
CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
Copy(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
std::ostringstream oss;
SerializeToStream(oss, gpu_tensor, gpu_ctx);
......
......@@ -42,7 +42,7 @@ class ArrayOp : public framework::OperatorBase {
if (platform::is_gpu_place(i_tensor.place())) {
// FIXME: Avoid copy from GPU to CPU
framework::Tensor t;
framework::CopyFrom(i_tensor, platform::CPUPlace(), dev_ctx, &t);
framework::Copy(i_tensor, platform::CPUPlace(), dev_ctx, &t);
dev_ctx.Wait();
offset = static_cast<size_t>(*t.data<int64_t>());
} else {
......
......@@ -110,7 +110,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place,
framework::Copy(x[x_idx].Slice(start_offset, end_offset), place,
dev_ctx, &slice);
out_offset += len;
}
......
......@@ -45,7 +45,7 @@ class AssignFunctor {
out_rows.set_height(rows.height());
auto &t = rows.value();
auto *m = out_rows.mutable_value();
framework::CopyFrom(t, t.place(), dev_ctx_, m);
framework::Copy(t, t.place(), dev_ctx_, m);
}
template <typename T>
......@@ -57,7 +57,7 @@ class AssignFunctor {
void copy_tensor(const framework::LoDTensor &lod_tensor,
framework::LoDTensor *out) const {
auto &out_tensor = *out;
CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_, &out_tensor);
Copy(lod_tensor, lod_tensor.place(), dev_ctx_, &out_tensor);
out_tensor.set_lod(lod_tensor.lod());
}
......
......@@ -98,15 +98,15 @@ class DetectionOutputKernel : public framework::OpKernel<T> {
T* conf_data = conf_tensor.data<T>();
if (platform::is_gpu_place(context.GetPlace())) {
loc_cpu.mutable_data<T>(loc_tensor.dims(), platform::CPUPlace());
framework::CopyFrom(loc_tensor, platform::CPUPlace(),
framework::Copy(loc_tensor, platform::CPUPlace(),
context.device_context(), &loc_cpu);
loc_data = loc_cpu.data<T>();
conf_cpu.mutable_data<T>(conf_tensor.dims(), platform::CPUPlace());
framework::CopyFrom(conf_tensor, platform::CPUPlace(),
framework::Copy(conf_tensor, platform::CPUPlace(),
context.device_context(), &conf_cpu);
conf_data = conf_cpu.data<T>();
priorbox_cpu.mutable_data<T>(in_priorbox->dims(), platform::CPUPlace());
framework::CopyFrom(*in_priorbox, platform::CPUPlace(),
framework::Copy(*in_priorbox, platform::CPUPlace(),
context.device_context(), &priorbox_cpu);
priorbox_data = priorbox_cpu.data<T>();
}
......@@ -158,8 +158,8 @@ class DetectionOutputKernel : public framework::OpKernel<T> {
batch_size, all_indices, all_decoded_bboxes,
out_data);
if (platform::is_gpu_place(context.GetPlace())) {
framework::CopyFrom(out_cpu, platform::CUDAPlace(),
context.device_context(), out);
framework::Copy(out_cpu, platform::CUDAPlace(), context.device_context(),
out);
}
}
};
......
......@@ -126,8 +126,7 @@ class ExpandGradKernel : public framework::OpKernel<T> {
auto* in0 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
out0->mutable_data<T>(context.GetPlace());
framework::CopyFrom(*in0, context.GetPlace(), context.device_context(),
out0);
framework::Copy(*in0, context.GetPlace(), context.device_context(), out0);
} else {
switch (dims) {
REP_EXPAND_GRAD_TEMPLATE(72)
......
......@@ -52,7 +52,7 @@ class FeedOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(feed_item, place, dev_ctx, out_item);
framework::Copy(feed_item, place, dev_ctx, out_item);
out_item->set_lod(feed_item.lod());
}
};
......
......@@ -55,7 +55,7 @@ class FetchOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(src_item.place());
CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item);
Copy(src_item, platform::CPUPlace(), dev_ctx, &dst_item);
dev_ctx.Wait();
dst_item.set_lod(src_item.lod());
......
......@@ -72,7 +72,7 @@ class FillOp : public framework::OperatorBase {
platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(tensor, place, dev_ctx, &out);
framework::Copy(tensor, place, dev_ctx, &out);
}
}
};
......
......@@ -196,7 +196,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto copyLoDTensor = [](const platform::DeviceContext& ctx,
const LoDTensor& src, LoDTensor* dst) {
dst->mutable_data<T>(src.dims(), platform::CPUPlace());
framework::CopyFrom(src, platform::CPUPlace(), ctx, dst);
framework::Copy(src, platform::CPUPlace(), ctx, dst);
};
copyLoDTensor(ctx, emission_weights_src, emission_weights_dst);
......@@ -204,7 +204,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
transition_weights_dst->mutable_data<T>(transition_weights_src.dims(),
platform::CPUPlace());
framework::CopyFrom(transition_weights_src, platform::CPUPlace(), ctx,
framework::Copy(transition_weights_src, platform::CPUPlace(), ctx,
transition_weights_dst);
}
......@@ -220,7 +220,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src,
Tensor* dst) {
dst->mutable_data<T>(platform::CUDAPlace());
framework::CopyFrom(src, platform::CUDAPlace(), ctx, dst);
framework::Copy(src, platform::CUDAPlace(), ctx, dst);
};
copyTensor(ctx, emission_exps_src, emission_exps_dst);
copyTensor(ctx, transition_exps_src, transition_exps_dst);
......@@ -410,12 +410,12 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
// Copy the inputs from GPU memory to CPU memory when this operators runs on
// GPU device.
label_dst->mutable_data<T>(label_src.dims(), platform::CPUPlace());
framework::CopyFrom(label_src, platform::CPUPlace(), ctx, label_dst);
framework::Copy(label_src, platform::CPUPlace(), ctx, label_dst);
auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src,
Tensor* dst) {
dst->mutable_data<T>(src.dims(), platform::CPUPlace());
framework::CopyFrom(src, platform::CPUPlace(), ctx, dst);
framework::Copy(src, platform::CPUPlace(), ctx, dst);
};
copyTensor(ctx, emission_exps_src, emission_exps_dst);
copyTensor(ctx, transition_exps_src, transition_exps_dst);
......@@ -434,7 +434,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor* dst) {
if (src && dst) {
dst->mutable_data<T>(platform::CUDAPlace());
framework::CopyFrom(*src, platform::CUDAPlace(), ctx, dst);
framework::Copy(*src, platform::CUDAPlace(), ctx, dst);
}
};
copyTensor(ctx, emission_grad_src, emission_grad_dst);
......
......@@ -53,7 +53,7 @@ class LoadOp : public framework::OperatorBase {
out_var->Clear();
tensor = out_var->GetMutable<framework::LoDTensor>();
tensor->set_lod(cpu_tensor.lod());
CopyFrom(cpu_tensor, place, dev_ctx, tensor);
Copy(cpu_tensor, place, dev_ctx, tensor);
}
}
};
......
......@@ -33,7 +33,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
auto* lod = lod_t->data<int>();
if (platform::is_gpu_place(ctx.GetPlace())) {
framework::Tensor lod_cpu;
framework::CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context(),
framework::Copy(*lod_t, platform::CPUPlace(), ctx.device_context(),
&lod_cpu);
lod = lod_cpu.data<int>();
}
......
......@@ -92,7 +92,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x.Slice(static_cast<int>(each_range.begin),
framework::Copy(x.Slice(static_cast<int>(each_range.begin),
static_cast<int>(each_range.end)),
x.place(), dev_ctx, &slice);
offset += len;
......
......@@ -149,7 +149,7 @@ class ContextProjectFunctor {
Tensor out_t_sub = out_t.Slice(k * context_length,
k * context_length + padding_size);
Tensor w_sub = padding_data.Slice(k, k + padding_size);
framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub);
framework::Copy(w_sub, context.GetPlace(), context, &out_t_sub);
}
}
if (down_pad > 0) { // add down pad
......@@ -179,7 +179,7 @@ class ContextProjectFunctor {
(down_pad_begin_row + t) * context_length);
Tensor w_sub = padding_data.Slice(
up_pad + padding_idx, up_pad + padding_idx + padding_size);
framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub);
framework::Copy(w_sub, context.GetPlace(), context, &out_t_sub);
}
}
out_t.Resize({sequence_height, context_length * sequence_width});
......
......@@ -63,7 +63,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
CopyFrom(input_tmp, *place, *context, &input);
Copy(input_tmp, *place, *context, &input);
}
output_cfo.mutable_data<float>(
{1, filter_size, filter_size, output_height, output_width}, *place);
......@@ -88,7 +88,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output_cfo.data<float>();
} else {
CopyFrom(output_cfo, paddle::platform::CPUPlace(), *context, &output_tmp);
Copy(output_cfo, paddle::platform::CPUPlace(), *context, &output_tmp);
out_cfo_ptr = output_tmp.data<float>();
}
for (int i = 0; i < 6; ++i) {
......@@ -99,7 +99,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) {
out_ocf_ptr = output_ocf.data<float>();
} else {
CopyFrom(output_ocf, paddle::platform::CPUPlace(), *context, &output_tmp);
Copy(output_ocf, paddle::platform::CPUPlace(), *context, &output_tmp);
out_ocf_ptr = output_tmp.data<float>();
}
for (int i = 0; i < 6; ++i) {
......@@ -119,7 +119,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
CopyFrom(input_tmp, *place, *context, &input);
Copy(input_tmp, *place, *context, &input);
}
col2im(*context, output_cfo, dilation, stride, padding, &input);
......@@ -128,7 +128,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>();
} else {
CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp);
Copy(input, paddle::platform::CPUPlace(), *context, &input_tmp);
in_ptr = input_tmp.data<float>();
}
for (int i = 0; i < 6; ++i) {
......@@ -140,7 +140,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
CopyFrom(input_tmp, *place, *context, &input);
Copy(input_tmp, *place, *context, &input);
}
col2im_ocf(*context, output_ocf, dilation, stride, padding, &input);
......@@ -148,7 +148,7 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>();
} else {
CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp);
Copy(input, paddle::platform::CPUPlace(), *context, &input_tmp);
in_ptr = input_tmp.data<float>();
}
for (int i = 0; i < 6; ++i) {
......
......@@ -16,15 +16,15 @@ TEST(math_function, notrans_mul_trans) {
auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu);
paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::Copy(input1, *gpu_place, context, &input2_gpu);
out_gpu.mutable_data<float>({2, 2}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::CUDADeviceContext, float>(
context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0);
paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out);
paddle::framework::Copy(out_gpu, *cpu_place, context, &out);
float* out_ptr = out.data<float>();
context.Wait();
......@@ -50,15 +50,15 @@ TEST(math_function, trans_mul_notrans) {
auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu);
paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::Copy(input1, *gpu_place, context, &input2_gpu);
out_gpu.mutable_data<float>({3, 3}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::CUDADeviceContext, float>(
context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0);
paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out);
paddle::framework::Copy(out_gpu, *cpu_place, context, &out);
float* out_ptr = out.data<float>();
context.Wait();
......@@ -99,9 +99,9 @@ TEST(math_function, gemm_notrans_cublas) {
auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu);
paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu);
paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::Copy(input2, *gpu_place, context, &input2_gpu);
paddle::framework::Copy(input3, *gpu_place, context, &input3_gpu);
float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place);
......@@ -109,7 +109,7 @@ TEST(math_function, gemm_notrans_cublas) {
paddle::operators::math::gemm<paddle::platform::CUDADeviceContext, float>(
context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4);
paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3);
paddle::framework::Copy(input3_gpu, *cpu_place, context, &input3);
// numpy code:
// a = np.arange(6).reshape(2, 3)
......@@ -154,9 +154,9 @@ TEST(math_function, gemm_trans_cublas) {
auto* gpu_place = new paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu);
paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu);
paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu);
paddle::framework::Copy(input1, *gpu_place, context, &input1_gpu);
paddle::framework::Copy(input2, *gpu_place, context, &input2_gpu);
paddle::framework::Copy(input3, *gpu_place, context, &input3_gpu);
float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place);
......@@ -164,7 +164,7 @@ TEST(math_function, gemm_trans_cublas) {
paddle::operators::math::gemm<paddle::platform::CUDADeviceContext, float>(
context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4);
paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3);
paddle::framework::Copy(input3_gpu, *cpu_place, context, &input3);
context.Wait();
EXPECT_EQ(input3_ptr[0], 0);
......@@ -205,14 +205,14 @@ void GemvTest(int m, int n, bool trans) {
}
paddle::platform::CUDADeviceContext context(*gpu_place);
paddle::framework::CopyFrom(mat_a, *gpu_place, context, &g_mat_a);
paddle::framework::CopyFrom(vec_b, *gpu_place, context, &g_vec_b);
paddle::framework::Copy(mat_a, *gpu_place, context, &g_mat_a);
paddle::framework::Copy(vec_b, *gpu_place, context, &g_vec_b);
paddle::operators::math::gemv<paddle::platform::CUDADeviceContext, T>(
context, trans, static_cast<int>(m), static_cast<int>(n), 1., g_data_a,
g_data_b, 0., g_data_c);
paddle::framework::CopyFrom(g_vec_c, paddle::platform::CPUPlace(), context,
paddle::framework::Copy(g_vec_c, paddle::platform::CPUPlace(), context,
&vec_c);
if (!trans) {
......
......@@ -67,7 +67,7 @@ TEST(selected_rows_functor, gpu_add) {
EXPECT_EQ(out_rows[6], 9);
Tensor out_cpu;
CopyFrom(*out_value, cpu_place, ctx, &out_cpu);
Copy(*out_value, cpu_place, ctx, &out_cpu);
ctx.Wait();
auto* out_cpu_data = out_cpu.data<float>();
......@@ -94,7 +94,7 @@ TEST(selected_rows_functor, gpu_add) {
add_tensor_functor(ctx, *output, *tensor1, tensor2.get());
Tensor tensor2_cpu;
CopyFrom(*tensor2, cpu_place, ctx, &tensor2_cpu);
Copy(*tensor2, cpu_place, ctx, &tensor2_cpu);
ctx.Wait();
auto* tensor2_cpu_data = tensor2_cpu.data<float>();
......@@ -167,7 +167,7 @@ TEST(selected_rows_functor, gpu_add_to) {
EXPECT_EQ(out_rows[6], 9);
Tensor out_cpu;
CopyFrom(*out_value, cpu_place, ctx, &out_cpu);
Copy(*out_value, cpu_place, ctx, &out_cpu);
ctx.Wait();
auto* out_cpu_data = out_cpu.data<float>();
......@@ -191,7 +191,7 @@ TEST(selected_rows_functor, gpu_add_to) {
add_to_tensor_functor(ctx, *output, tensor1.get());
Tensor tensor1_cpu;
CopyFrom(*tensor1, cpu_place, ctx, &tensor1_cpu);
Copy(*tensor1, cpu_place, ctx, &tensor1_cpu);
ctx.Wait();
auto* tensor1_cpu_data = tensor1_cpu.data<float>();
......
......@@ -71,7 +71,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
CopyFrom(input_tmp, *place, *context, &input);
Copy(input_tmp, *place, *context, &input);
}
output.mutable_data<float>({1, filter_size, filter_size, filter_size,
output_depth, output_height, output_width},
......@@ -85,7 +85,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output.data<float>();
} else {
CopyFrom(output, paddle::platform::CPUPlace(), *context, &output_tmp);
Copy(output, paddle::platform::CPUPlace(), *context, &output_tmp);
out_cfo_ptr = output_tmp.data<float>();
}
......@@ -99,7 +99,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
CopyFrom(input_tmp, *place, *context, &input);
Copy(input_tmp, *place, *context, &input);
}
paddle::operators::math::Col2VolFunctor<DeviceContext, float> col2vol;
......@@ -109,7 +109,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>();
} else {
CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp);
Copy(input, paddle::platform::CPUPlace(), *context, &input_tmp);
in_ptr = input_tmp.data<float>();
}
......
......@@ -49,7 +49,7 @@ class MergeLoDTensorOp : public framework::OperatorBase {
cpu_mask->ShareDataWith(mask);
} else if (platform::is_gpu_place(mask.place())) {
#ifdef PADDLE_WITH_CUDA
framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get());
framework::Copy(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get());
#else
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
#endif
......@@ -104,8 +104,8 @@ class MergeLoDTensorOp : public framework::OperatorBase {
continue;
}
auto slice = out->Slice(out_offset, out_offset + len);
framework::CopyFrom(input->Slice(start_offset, end_offset), place,
dev_ctx, &slice);
framework::Copy(input->Slice(start_offset, end_offset), place, dev_ctx,
&slice);
out_offset += len;
(*in_idx) += 1;
}
......
......@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows;
// copy index to cpu
Tensor index_t_cpu;
CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
Copy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream();
platform::CUDAPlace place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
......@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows;
// copy index to cpu
Tensor index_t_cpu;
CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
Copy(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu);
auto* index = index_t_cpu.data<int32_t>();
auto stream = ctx.cuda_device_context().stream();
......
......@@ -211,7 +211,7 @@ class ParallelDoGradOp : public OperatorBase {
auto &tt = sub_scopes[place_idx]->FindVar(s)->Get<LoDTensor>();
VLOG(3) << place_idx;
VLOG(3) << tt;
framework::CopyFrom(tt, places[0], t_buf);
framework::Copy(tt, places[0], t_buf);
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, s_buf}}}, {{"Out", {s}}},
......@@ -220,7 +220,7 @@ class ParallelDoGradOp : public OperatorBase {
}
VLOG(3) << t;
framework::CopyFrom(t, place, scope.FindVar(s)->GetMutable<LoDTensor>());
framework::Copy(t, place, scope.FindVar(s)->GetMutable<LoDTensor>());
}
}
};
......
......@@ -290,7 +290,7 @@ class RecurrentOp : public RecurrentBase {
auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1);
// Explicit copy output since the local RNN scope can be destroyed
// early.
framework::CopyFrom(src_tensor, place, dev_ctx, &dst_out);
framework::Copy(src_tensor, place, dev_ctx, &dst_out);
});
scopes.Next();
......@@ -376,7 +376,7 @@ class RecurrentGradOp : public RecurrentBase {
auto *cur_grad_var = cur_scope.Var(cur_grad);
auto cur_grad_tensor =
cur_grad_var->GetMutable<framework::LoDTensor>();
framework::CopyFrom(ex_tensor, place, dev_ctx, cur_grad_tensor);
framework::Copy(ex_tensor, place, dev_ctx, cur_grad_tensor);
}
}
......@@ -450,7 +450,7 @@ class RecurrentGradOp : public RecurrentBase {
}
auto dst = outside->Slice(seq_offset, seq_offset + 1);
framework::CopyFrom(inside, place, dev_ctx, &dst);
framework::Copy(inside, place, dev_ctx, &dst);
});
VLOG(5) << "Link outside gradient finished ";
......@@ -463,7 +463,7 @@ class RecurrentGradOp : public RecurrentBase {
framework::LoDTensor *outside) {
outside->Resize(inside.dims());
outside->mutable_data(place, inside.type());
framework::CopyFrom(inside, place, dev_ctx, outside);
framework::Copy(inside, place, dev_ctx, outside);
});
VLOG(5) << "Link initialize state gradient finished ";
}
......
......@@ -146,7 +146,7 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x_sliced, out_sliced.place(), dev_ctx, &out_sliced);
framework::Copy(x_sliced, out_sliced.place(), dev_ctx, &out_sliced);
out_offset += len;
return out_offset;
}
......
......@@ -28,7 +28,7 @@ class ReshapeKernel : public framework::OpKernel<T> {
auto* in = ctx.Input<framework::Tensor>("X");
auto out_dims = out->dims();
out->mutable_data<T>(ctx.GetPlace());
framework::CopyFrom(*in, ctx.GetPlace(), ctx.device_context(), out);
framework::Copy(*in, ctx.GetPlace(), ctx.device_context(), out);
out->Resize(out_dims);
}
};
......@@ -42,7 +42,7 @@ class ReshapeGradKernel : public framework::OpKernel<T> {
d_x->mutable_data<T>(ctx.GetPlace());
auto in_dims = d_x->dims();
framework::CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context(), d_x);
framework::Copy(*d_out, ctx.GetPlace(), ctx.device_context(), d_x);
d_x->Resize(in_dims);
}
};
......
......@@ -66,12 +66,12 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(),
framework::Copy(*offset, platform::CPUPlace(), ctx.device_context(),
&offset_cpu);
offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(),
framework::Copy(*length, platform::CPUPlace(), ctx.device_context(),
&length_cpu);
length_data = length_cpu.data<int64_t>();
}
......@@ -127,12 +127,12 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
if (platform::is_gpu_place(ctx.GetPlace())) {
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(),
framework::Copy(*offset, platform::CPUPlace(), ctx.device_context(),
&offset_cpu);
offset_data = offset_cpu.data<int64_t>();
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(),
framework::Copy(*length, platform::CPUPlace(), ctx.device_context(),
&length_cpu);
length_data = length_cpu.data<int64_t>();
}
......
......@@ -115,7 +115,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
auto &dout_tensor = dout_var->Get<framework::LoDTensor>();
auto height = dout_tensor.dims()[0];
auto slice = dx_tensor.Slice(0, static_cast<int>(height));
framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice);
framework::Copy(dout_tensor, dout_tensor.place(), dev_ctx, &slice);
if (dx_tensor.dims()[0] > height) {
auto rest_tensor = dx_tensor.Slice(
static_cast<int>(height), static_cast<int>(dx_tensor.dims()[0]));
......
......@@ -53,7 +53,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
cpu_mask->ShareDataWith(mask);
} else if (platform::is_gpu_place(mask.place())) {
#ifdef PADDLE_WITH_CUDA
framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get());
framework::Copy(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get());
#else
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
#endif
......@@ -111,7 +111,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
// out[offset: offset+len] = x[each_range.begin: each_range.end]
auto slice = out->Slice(static_cast<int>(offset),
static_cast<int>(offset + len));
framework::CopyFrom(x.Slice(static_cast<int>(each_range.begin),
framework::Copy(x.Slice(static_cast<int>(each_range.begin),
static_cast<int>(each_range.end)),
x.place(), dev_ctx, &slice);
offset += len;
......
......@@ -107,7 +107,7 @@ class SumKernel : public framework::OpKernel<T> {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::CopyFrom(in_array[i], in_array[i].place(),
framework::Copy(in_array[i], in_array[i].place(),
context.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
......
......@@ -44,7 +44,7 @@ class WriteToArrayOp : public ArrayOp {
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
CopyFrom(x_tensor, place, dev_ctx, out_tensor);
Copy(x_tensor, place, dev_ctx, out_tensor);
out_tensor->set_lod(x_tensor.lod());
} else {
VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
......@@ -135,7 +135,7 @@ class ReadFromArrayOp : public ArrayOp {
platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place);
framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor);
framework::Copy(x_array[offset], place, dev_ctx, out_tensor);
out_tensor->set_lod(x_array[offset].lod());
} else {
VLOG(10) << "offset " << offset << " >= " << x_array.size();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册