Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
ce6dad3b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ce6dad3b
编写于
1月 09, 2018
作者:
Y
Yu Yang
提交者:
Yang Yang(Tony)
1月 09, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Rename CopyFrom to Copy for tensors (#7292)
* Rename Tensor::CopyFrom to Tensor::Copy * Fix CI * Fix compile
上级
32b09b51
变更
36
隐藏空白更改
内联
并排
Showing
36 changed file
with
121 addition
and
122 deletion
+121
-122
paddle/framework/data_transform.h
paddle/framework/data_transform.h
+1
-1
paddle/framework/device_data_transform.cc
paddle/framework/device_data_transform.cc
+1
-1
paddle/framework/device_data_transform_test.cu
paddle/framework/device_data_transform_test.cu
+2
-2
paddle/framework/lod_tensor.cc
paddle/framework/lod_tensor.cc
+1
-1
paddle/framework/lod_tensor.h
paddle/framework/lod_tensor.h
+2
-2
paddle/framework/tensor_util.cc
paddle/framework/tensor_util.cc
+1
-1
paddle/framework/tensor_util.h
paddle/framework/tensor_util.h
+7
-7
paddle/framework/tensor_util_test.cc
paddle/framework/tensor_util_test.cc
+10
-10
paddle/operators/array_operator.h
paddle/operators/array_operator.h
+1
-1
paddle/operators/array_to_lod_tensor_op.cc
paddle/operators/array_to_lod_tensor_op.cc
+2
-2
paddle/operators/assign_op.cc
paddle/operators/assign_op.cc
+2
-2
paddle/operators/detection_output_op.h
paddle/operators/detection_output_op.h
+8
-8
paddle/operators/expand_op.h
paddle/operators/expand_op.h
+1
-2
paddle/operators/feed_op.cc
paddle/operators/feed_op.cc
+1
-1
paddle/operators/fetch_op.cc
paddle/operators/fetch_op.cc
+1
-1
paddle/operators/fill_op.cc
paddle/operators/fill_op.cc
+1
-1
paddle/operators/linear_chain_crf_op.h
paddle/operators/linear_chain_crf_op.h
+7
-7
paddle/operators/load_op.cc
paddle/operators/load_op.cc
+1
-1
paddle/operators/lod_reset_op.h
paddle/operators/lod_reset_op.h
+2
-2
paddle/operators/lod_tensor_to_array_op.cc
paddle/operators/lod_tensor_to_array_op.cc
+3
-3
paddle/operators/math/context_project.h
paddle/operators/math/context_project.h
+2
-2
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+7
-7
paddle/operators/math/math_function_test.cu
paddle/operators/math/math_function_test.cu
+18
-18
paddle/operators/math/selected_rows_functor_test.cu
paddle/operators/math/selected_rows_functor_test.cu
+4
-4
paddle/operators/math/vol2col_test.cc
paddle/operators/math/vol2col_test.cc
+4
-4
paddle/operators/merge_lod_tensor_op.cc
paddle/operators/merge_lod_tensor_op.cc
+3
-3
paddle/operators/multiplex_op.cu
paddle/operators/multiplex_op.cu
+2
-2
paddle/operators/parallel_do_op.cc
paddle/operators/parallel_do_op.cc
+2
-2
paddle/operators/recurrent_op.cc
paddle/operators/recurrent_op.cc
+4
-4
paddle/operators/reorder_lod_tensor_by_rank_op.cc
paddle/operators/reorder_lod_tensor_by_rank_op.cc
+1
-1
paddle/operators/reshape_op.h
paddle/operators/reshape_op.h
+2
-2
paddle/operators/sequence_slice_op.h
paddle/operators/sequence_slice_op.h
+8
-8
paddle/operators/shrink_rnn_memory_op.cc
paddle/operators/shrink_rnn_memory_op.cc
+1
-1
paddle/operators/split_lod_tensor_op.cc
paddle/operators/split_lod_tensor_op.cc
+4
-4
paddle/operators/sum_op.h
paddle/operators/sum_op.h
+2
-2
paddle/operators/tensor_array_read_write_op.cc
paddle/operators/tensor_array_read_write_op.cc
+2
-2
未找到文件。
paddle/framework/data_transform.h
浏览文件 @
ce6dad3b
...
...
@@ -88,7 +88,7 @@ struct CastDataType {
trans
(
*
context
,
in_begin
,
in_end
,
out_begin
,
CastDataTypeFunctor
<
InType
,
OutType
>
());
}
else
{
// TODO(dzhwinter): enhance Copy
From
CPU<->GPU with different data type?
// TODO(dzhwinter): enhance Copy CPU<->GPU with different data type?
PADDLE_THROW
(
"Unsupport CPU <-> GPU!"
);
}
}
...
...
paddle/framework/device_data_transform.cc
浏览文件 @
ce6dad3b
...
...
@@ -37,7 +37,7 @@ Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) {
Tensor
*
out
=
new
Tensor
();
auto
*
dev_ctx
=
GetDeviceContext
(
in
.
place
(),
dst_place
);
dev_ctx
->
Wait
();
Copy
From
(
in
,
dst_place
,
*
dev_ctx
,
out
);
Copy
(
in
,
dst_place
,
*
dev_ctx
,
out
);
dev_ctx
->
Wait
();
return
out
;
}
...
...
paddle/framework/device_data_transform_test.cu
浏览文件 @
ce6dad3b
...
...
@@ -157,8 +157,8 @@ TEST(Operator, CPUtoGPU) {
auto
dev_ctx
=
pool
.
Get
(
cuda_place
);
paddle
::
framework
::
Tensor
output_tensor
;
Copy
From
(
output2
->
Get
<
LoDTensor
>
(),
paddle
::
platform
::
CPUPlace
(),
*
dev_ctx
,
&
output_tensor
);
Copy
(
output2
->
Get
<
LoDTensor
>
(),
paddle
::
platform
::
CPUPlace
(),
*
dev_ctx
,
&
output_tensor
);
dev_ctx
->
Wait
();
float
*
output2_ptr
=
output_tensor
.
data
<
float
>
();
...
...
paddle/framework/lod_tensor.cc
浏览文件 @
ce6dad3b
...
...
@@ -232,7 +232,7 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
auto
dst_ptr
=
dst
.
mutable_data
(
dst_place
,
src
.
type
());
// TODO(tonyyang-svail):
// change the following to framework::Copy
From
// change the following to framework::Copy
auto
src_place
=
src
.
place
();
auto
src_ptr
=
src
.
data
<
void
>
();
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
...
...
paddle/framework/lod_tensor.h
浏览文件 @
ce6dad3b
...
...
@@ -147,8 +147,8 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level,
for
(
size_t
ins
=
0
;
ins
<
num_instances
;
ins
++
)
{
for
(
size_t
elem
=
lod_level
[
ins
];
elem
<
lod_level
[
ins
+
1
];
elem
++
)
{
auto
slice
=
tensor
.
Slice
(
elem
,
elem
+
1
);
Copy
From
(
source
.
Slice
(
ins
,
ins
+
1
),
platform
::
CPUPlace
(),
platform
::
CPUDeviceContext
(),
&
slice
);
Copy
(
source
.
Slice
(
ins
,
ins
+
1
),
platform
::
CPUPlace
(),
platform
::
CPUDeviceContext
(),
&
slice
);
}
}
return
tensor
;
...
...
paddle/framework/tensor_util.cc
浏览文件 @
ce6dad3b
...
...
@@ -69,7 +69,7 @@ struct AnyVisitor : public boost::static_visitor<bool> {
tmp
.
mutable_data
<
bool
>
(
cpu
);
auto
gpuctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
gpu
);
gpuctx
->
Wait
();
Copy
From
(
out
,
cpu
,
*
gpuctx
,
&
tmp
);
Copy
(
out
,
cpu
,
*
gpuctx
,
&
tmp
);
gpuctx
->
Wait
();
return
GetResult
(
tmp
,
cpu
);
}
...
...
paddle/framework/tensor_util.h
浏览文件 @
ce6dad3b
...
...
@@ -29,11 +29,11 @@ namespace framework {
* @param[in] dst_place The dst place.
* @param[in] ctx The device context contains device resources.
*
* @note Copy
From
supports CPU <-> GPU, GPU <-> GPU.
* @note Copy supports CPU <-> GPU, GPU <-> GPU.
*/
inline
void
Copy
From
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
dst
)
{
inline
void
Copy
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
const
platform
::
DeviceContext
&
ctx
,
Tensor
*
dst
)
{
src
.
check_memory_size
();
dst
->
Resize
(
src
.
dims
());
...
...
@@ -88,10 +88,10 @@ inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
}
/**
* @brief Copy
From support
CPU <-> CPU
* @brief Copy
supports
CPU <-> CPU
*/
inline
void
Copy
From
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
Tensor
*
dst
)
{
inline
void
Copy
(
const
Tensor
&
src
,
const
platform
::
Place
&
dst_place
,
Tensor
*
dst
)
{
src
.
check_memory_size
();
dst
->
Resize
(
src
.
dims
());
dst
->
set_layout
(
src
.
layout
());
...
...
@@ -316,7 +316,7 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor,
DeserializedDataFunctor
(
&
buf
,
&
cpu_tensor
,
ctx
.
GetPlace
()));
is
.
read
(
static_cast
<
char
*>
(
buf
),
cpu_tensor
.
memory_size
());
auto
cpu_place
=
new
platform
::
CPUPlace
();
framework
::
Copy
From
(
cpu_tensor
,
*
cpu_place
,
dev_ctx
,
tensor
);
framework
::
Copy
(
cpu_tensor
,
*
cpu_place
,
dev_ctx
,
tensor
);
delete
cpu_place
;
#else
PADDLE_THROW
(
"Unexpected branch"
);
...
...
paddle/framework/tensor_util_test.cc
浏览文件 @
ce6dad3b
...
...
@@ -19,7 +19,7 @@
namespace
paddle
{
namespace
framework
{
TEST
(
Copy
From
,
Tensor
)
{
TEST
(
Copy
,
Tensor
)
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
platform
::
CPUDeviceContext
cpu_ctx
((
platform
::
CPUPlace
()));
...
...
@@ -32,7 +32,7 @@ TEST(CopyFrom, Tensor) {
src_tensor
.
set_layout
(
DataLayout
::
kAnyLayout
);
auto
cpu_place
=
new
platform
::
CPUPlace
();
Copy
From
(
src_tensor
,
*
cpu_place
,
&
dst_tensor
);
Copy
(
src_tensor
,
*
cpu_place
,
&
dst_tensor
);
const
int
*
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
src_ptr
,
dst_ptr
);
...
...
@@ -43,7 +43,7 @@ TEST(CopyFrom, Tensor) {
EXPECT_TRUE
(
dst_tensor
.
layout
()
==
src_tensor
.
layout
());
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
2
);
Copy
From
(
slice_tensor
,
*
cpu_place
,
&
dst_tensor
);
Copy
(
slice_tensor
,
*
cpu_place
,
&
dst_tensor
);
const
int
*
slice_ptr
=
slice_tensor
.
data
<
int
>
();
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
dst_ptr
,
slice_ptr
);
...
...
@@ -67,11 +67,11 @@ TEST(CopyFrom, Tensor) {
// CPU Tensor to GPU Tensor
auto
gpu_place
=
new
platform
::
CUDAPlace
(
0
);
platform
::
CUDADeviceContext
gpu_ctx
(
*
gpu_place
);
Copy
From
(
src_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
Copy
(
src_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
// GPU Tensor to CPU Tensor
auto
cpu_place
=
new
platform
::
CPUPlace
();
Copy
From
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
Copy
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
// Sync before Compare Tensors
gpu_ctx
.
Wait
();
...
...
@@ -84,10 +84,10 @@ TEST(CopyFrom, Tensor) {
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
2
);
// CPU Slice Tensor to GPU Tensor
Copy
From
(
slice_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
Copy
(
slice_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
// GPU Tensor to CPU Tensor
Copy
From
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
Copy
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
// Sync before Compare Slice Tensors
gpu_ctx
.
Wait
();
...
...
@@ -155,7 +155,7 @@ TEST(CopyFromVector, Tensor) {
CUDADeviceContext
gpu_ctx
(
*
gpu_place
);
CopyFromVector
<
int
>
(
src_vec
,
gpu_ctx
,
&
gpu_tensor
);
// Copy from GPU to CPU tensor for comparison
Copy
From
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
Copy
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
// Sync before Compare Tensors
gpu_ctx
.
Wait
();
...
...
@@ -175,7 +175,7 @@ TEST(CopyFromVector, Tensor) {
CopyFromVector
<
int
>
(
src_vec
,
cpu_ctx
,
&
cpu_tensor
);
gpu_tensor
.
Resize
(
make_ddim
({
2
,
2
}));
CopyFromVector
<
int
>
(
src_vec
,
gpu_ctx
,
&
gpu_tensor
);
Copy
From
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
Copy
(
gpu_tensor
,
*
cpu_place
,
gpu_ctx
,
&
dst_tensor
);
// Sync before Compare Tensors
gpu_ctx
.
Wait
();
...
...
@@ -287,7 +287,7 @@ TEST(Tensor, SerializeAndDeserialize) {
auto
gpu_place
=
new
platform
::
CUDAPlace
();
platform
::
CUDADeviceContext
gpu_ctx
(
*
gpu_place
);
Copy
From
(
src_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
Copy
(
src_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
gpu_tensor
,
gpu_ctx
);
...
...
paddle/operators/array_operator.h
浏览文件 @
ce6dad3b
...
...
@@ -42,7 +42,7 @@ class ArrayOp : public framework::OperatorBase {
if
(
platform
::
is_gpu_place
(
i_tensor
.
place
()))
{
// FIXME: Avoid copy from GPU to CPU
framework
::
Tensor
t
;
framework
::
Copy
From
(
i_tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
t
);
framework
::
Copy
(
i_tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
t
);
dev_ctx
.
Wait
();
offset
=
static_cast
<
size_t
>
(
*
t
.
data
<
int64_t
>
());
}
else
{
...
...
paddle/operators/array_to_lod_tensor_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -110,8 +110,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
framework
::
Copy
From
(
x
[
x_idx
].
Slice
(
start_offset
,
end_offset
),
place
,
dev_ctx
,
&
slice
);
framework
::
Copy
(
x
[
x_idx
].
Slice
(
start_offset
,
end_offset
),
place
,
dev_ctx
,
&
slice
);
out_offset
+=
len
;
}
}
...
...
paddle/operators/assign_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -45,7 +45,7 @@ class AssignFunctor {
out_rows
.
set_height
(
rows
.
height
());
auto
&
t
=
rows
.
value
();
auto
*
m
=
out_rows
.
mutable_value
();
framework
::
Copy
From
(
t
,
t
.
place
(),
dev_ctx_
,
m
);
framework
::
Copy
(
t
,
t
.
place
(),
dev_ctx_
,
m
);
}
template
<
typename
T
>
...
...
@@ -57,7 +57,7 @@ class AssignFunctor {
void
copy_tensor
(
const
framework
::
LoDTensor
&
lod_tensor
,
framework
::
LoDTensor
*
out
)
const
{
auto
&
out_tensor
=
*
out
;
Copy
From
(
lod_tensor
,
lod_tensor
.
place
(),
dev_ctx_
,
&
out_tensor
);
Copy
(
lod_tensor
,
lod_tensor
.
place
(),
dev_ctx_
,
&
out_tensor
);
out_tensor
.
set_lod
(
lod_tensor
.
lod
());
}
...
...
paddle/operators/detection_output_op.h
浏览文件 @
ce6dad3b
...
...
@@ -98,16 +98,16 @@ class DetectionOutputKernel : public framework::OpKernel<T> {
T
*
conf_data
=
conf_tensor
.
data
<
T
>
();
if
(
platform
::
is_gpu_place
(
context
.
GetPlace
()))
{
loc_cpu
.
mutable_data
<
T
>
(
loc_tensor
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
loc_tensor
,
platform
::
CPUPlace
(),
context
.
device_context
(),
&
loc_cpu
);
framework
::
Copy
(
loc_tensor
,
platform
::
CPUPlace
(),
context
.
device_context
(),
&
loc_cpu
);
loc_data
=
loc_cpu
.
data
<
T
>
();
conf_cpu
.
mutable_data
<
T
>
(
conf_tensor
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
conf_tensor
,
platform
::
CPUPlace
(),
context
.
device_context
(),
&
conf_cpu
);
framework
::
Copy
(
conf_tensor
,
platform
::
CPUPlace
(),
context
.
device_context
(),
&
conf_cpu
);
conf_data
=
conf_cpu
.
data
<
T
>
();
priorbox_cpu
.
mutable_data
<
T
>
(
in_priorbox
->
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
*
in_priorbox
,
platform
::
CPUPlace
(),
context
.
device_context
(),
&
priorbox_cpu
);
framework
::
Copy
(
*
in_priorbox
,
platform
::
CPUPlace
(),
context
.
device_context
(),
&
priorbox_cpu
);
priorbox_data
=
priorbox_cpu
.
data
<
T
>
();
}
// get decode bboxes
...
...
@@ -158,8 +158,8 @@ class DetectionOutputKernel : public framework::OpKernel<T> {
batch_size
,
all_indices
,
all_decoded_bboxes
,
out_data
);
if
(
platform
::
is_gpu_place
(
context
.
GetPlace
()))
{
framework
::
Copy
From
(
out_cpu
,
platform
::
CUDAPlace
(),
context
.
device_context
(),
out
);
framework
::
Copy
(
out_cpu
,
platform
::
CUDAPlace
(),
context
.
device_context
(),
out
);
}
}
};
...
...
paddle/operators/expand_op.h
浏览文件 @
ce6dad3b
...
...
@@ -126,8 +126,7 @@ class ExpandGradKernel : public framework::OpKernel<T> {
auto
*
in0
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
out0
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
out0
->
mutable_data
<
T
>
(
context
.
GetPlace
());
framework
::
CopyFrom
(
*
in0
,
context
.
GetPlace
(),
context
.
device_context
(),
out0
);
framework
::
Copy
(
*
in0
,
context
.
GetPlace
(),
context
.
device_context
(),
out0
);
}
else
{
switch
(
dims
)
{
REP_EXPAND_GRAD_TEMPLATE
(
72
)
...
...
paddle/operators/feed_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -52,7 +52,7 @@ class FeedOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
framework
::
Copy
From
(
feed_item
,
place
,
dev_ctx
,
out_item
);
framework
::
Copy
(
feed_item
,
place
,
dev_ctx
,
out_item
);
out_item
->
set_lod
(
feed_item
.
lod
());
}
};
...
...
paddle/operators/fetch_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -55,7 +55,7 @@ class FetchOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
src_item
.
place
());
Copy
From
(
src_item
,
platform
::
CPUPlace
(),
dev_ctx
,
&
dst_item
);
Copy
(
src_item
,
platform
::
CPUPlace
(),
dev_ctx
,
&
dst_item
);
dev_ctx
.
Wait
();
dst_item
.
set_lod
(
src_item
.
lod
());
...
...
paddle/operators/fill_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -72,7 +72,7 @@ class FillOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
framework
::
Copy
From
(
tensor
,
place
,
dev_ctx
,
&
out
);
framework
::
Copy
(
tensor
,
place
,
dev_ctx
,
&
out
);
}
}
};
...
...
paddle/operators/linear_chain_crf_op.h
浏览文件 @
ce6dad3b
...
...
@@ -196,7 +196,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto
copyLoDTensor
=
[](
const
platform
::
DeviceContext
&
ctx
,
const
LoDTensor
&
src
,
LoDTensor
*
dst
)
{
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
src
,
platform
::
CPUPlace
(),
ctx
,
dst
);
framework
::
Copy
(
src
,
platform
::
CPUPlace
(),
ctx
,
dst
);
};
copyLoDTensor
(
ctx
,
emission_weights_src
,
emission_weights_dst
);
...
...
@@ -204,8 +204,8 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
transition_weights_dst
->
mutable_data
<
T
>
(
transition_weights_src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
transition_weights_src
,
platform
::
CPUPlace
(),
ctx
,
transition_weights_dst
);
framework
::
Copy
(
transition_weights_src
,
platform
::
CPUPlace
(),
ctx
,
transition_weights_dst
);
}
void
CopyOutputsToGpuMemory
(
const
platform
::
DeviceContext
&
ctx
,
...
...
@@ -220,7 +220,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
auto
copyTensor
=
[](
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
&
src
,
Tensor
*
dst
)
{
dst
->
mutable_data
<
T
>
(
platform
::
CUDAPlace
());
framework
::
Copy
From
(
src
,
platform
::
CUDAPlace
(),
ctx
,
dst
);
framework
::
Copy
(
src
,
platform
::
CUDAPlace
(),
ctx
,
dst
);
};
copyTensor
(
ctx
,
emission_exps_src
,
emission_exps_dst
);
copyTensor
(
ctx
,
transition_exps_src
,
transition_exps_dst
);
...
...
@@ -410,12 +410,12 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
// Copy the inputs from GPU memory to CPU memory when this operators runs on
// GPU device.
label_dst
->
mutable_data
<
T
>
(
label_src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
label_src
,
platform
::
CPUPlace
(),
ctx
,
label_dst
);
framework
::
Copy
(
label_src
,
platform
::
CPUPlace
(),
ctx
,
label_dst
);
auto
copyTensor
=
[](
const
platform
::
DeviceContext
&
ctx
,
const
Tensor
&
src
,
Tensor
*
dst
)
{
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
src
,
platform
::
CPUPlace
(),
ctx
,
dst
);
framework
::
Copy
(
src
,
platform
::
CPUPlace
(),
ctx
,
dst
);
};
copyTensor
(
ctx
,
emission_exps_src
,
emission_exps_dst
);
copyTensor
(
ctx
,
transition_exps_src
,
transition_exps_dst
);
...
...
@@ -434,7 +434,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor
*
dst
)
{
if
(
src
&&
dst
)
{
dst
->
mutable_data
<
T
>
(
platform
::
CUDAPlace
());
framework
::
Copy
From
(
*
src
,
platform
::
CUDAPlace
(),
ctx
,
dst
);
framework
::
Copy
(
*
src
,
platform
::
CUDAPlace
(),
ctx
,
dst
);
}
};
copyTensor
(
ctx
,
emission_grad_src
,
emission_grad_dst
);
...
...
paddle/operators/load_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -53,7 +53,7 @@ class LoadOp : public framework::OperatorBase {
out_var
->
Clear
();
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
set_lod
(
cpu_tensor
.
lod
());
Copy
From
(
cpu_tensor
,
place
,
dev_ctx
,
tensor
);
Copy
(
cpu_tensor
,
place
,
dev_ctx
,
tensor
);
}
}
};
...
...
paddle/operators/lod_reset_op.h
浏览文件 @
ce6dad3b
...
...
@@ -33,8 +33,8 @@ class LoDResetKernel : public framework::OpKernel<T> {
auto
*
lod
=
lod_t
->
data
<
int
>
();
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
framework
::
Tensor
lod_cpu
;
framework
::
Copy
From
(
*
lod_t
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
lod_cpu
);
framework
::
Copy
(
*
lod_t
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
lod_cpu
);
lod
=
lod_cpu
.
data
<
int
>
();
}
level0
=
std
::
vector
<
int
>
(
lod
,
lod
+
lod_t
->
numel
());
...
...
paddle/operators/lod_tensor_to_array_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -92,9 +92,9 @@ class LoDTensorToArrayOp : public framework::OperatorBase {
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
framework
::
Copy
From
(
x
.
Slice
(
static_cast
<
int
>
(
each_range
.
begin
),
static_cast
<
int
>
(
each_range
.
end
)),
x
.
place
(),
dev_ctx
,
&
slice
);
framework
::
Copy
(
x
.
Slice
(
static_cast
<
int
>
(
each_range
.
begin
),
static_cast
<
int
>
(
each_range
.
end
)),
x
.
place
(),
dev_ctx
,
&
slice
);
offset
+=
len
;
}
}
...
...
paddle/operators/math/context_project.h
浏览文件 @
ce6dad3b
...
...
@@ -149,7 +149,7 @@ class ContextProjectFunctor {
Tensor
out_t_sub
=
out_t
.
Slice
(
k
*
context_length
,
k
*
context_length
+
padding_size
);
Tensor
w_sub
=
padding_data
.
Slice
(
k
,
k
+
padding_size
);
framework
::
Copy
From
(
w_sub
,
context
.
GetPlace
(),
context
,
&
out_t_sub
);
framework
::
Copy
(
w_sub
,
context
.
GetPlace
(),
context
,
&
out_t_sub
);
}
}
if
(
down_pad
>
0
)
{
// add down pad
...
...
@@ -179,7 +179,7 @@ class ContextProjectFunctor {
(
down_pad_begin_row
+
t
)
*
context_length
);
Tensor
w_sub
=
padding_data
.
Slice
(
up_pad
+
padding_idx
,
up_pad
+
padding_idx
+
padding_size
);
framework
::
Copy
From
(
w_sub
,
context
.
GetPlace
(),
context
,
&
out_t_sub
);
framework
::
Copy
(
w_sub
,
context
.
GetPlace
(),
context
,
&
out_t_sub
);
}
}
out_t
.
Resize
({
sequence_height
,
context_length
*
sequence_width
});
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
ce6dad3b
...
...
@@ -63,7 +63,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
Copy
From
(
input_tmp
,
*
place
,
*
context
,
&
input
);
Copy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
}
output_cfo
.
mutable_data
<
float
>
(
{
1
,
filter_size
,
filter_size
,
output_height
,
output_width
},
*
place
);
...
...
@@ -88,7 +88,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_cfo_ptr
=
output_cfo
.
data
<
float
>
();
}
else
{
Copy
From
(
output_cfo
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
Copy
(
output_cfo
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
}
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
...
...
@@ -99,7 +99,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_ocf_ptr
=
output_ocf
.
data
<
float
>
();
}
else
{
Copy
From
(
output_ocf
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
Copy
(
output_ocf
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
out_ocf_ptr
=
output_tmp
.
data
<
float
>
();
}
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
...
...
@@ -119,7 +119,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
Copy
From
(
input_tmp
,
*
place
,
*
context
,
&
input
);
Copy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
}
col2im
(
*
context
,
output_cfo
,
dilation
,
stride
,
padding
,
&
input
);
...
...
@@ -128,7 +128,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
}
else
{
Copy
From
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
Copy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
in_ptr
=
input_tmp
.
data
<
float
>
();
}
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
...
...
@@ -140,7 +140,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
Copy
From
(
input_tmp
,
*
place
,
*
context
,
&
input
);
Copy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
}
col2im_ocf
(
*
context
,
output_ocf
,
dilation
,
stride
,
padding
,
&
input
);
...
...
@@ -148,7 +148,7 @@ void testIm2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
}
else
{
Copy
From
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
Copy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
in_ptr
=
input_tmp
.
data
<
float
>
();
}
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
...
...
paddle/operators/math/math_function_test.cu
浏览文件 @
ce6dad3b
...
...
@@ -16,15 +16,15 @@ TEST(math_function, notrans_mul_trans) {
auto
*
gpu_place
=
new
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDADeviceContext
context
(
*
gpu_place
);
paddle
::
framework
::
Copy
From
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
From
(
input1
,
*
gpu_place
,
context
,
&
input2_gpu
);
paddle
::
framework
::
Copy
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
(
input1
,
*
gpu_place
,
context
,
&
input2_gpu
);
out_gpu
.
mutable_data
<
float
>
({
2
,
2
},
*
gpu_place
);
paddle
::
operators
::
math
::
matmul
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
input1_gpu
,
false
,
input2_gpu
,
true
,
1
,
&
out_gpu
,
0
);
paddle
::
framework
::
Copy
From
(
out_gpu
,
*
cpu_place
,
context
,
&
out
);
paddle
::
framework
::
Copy
(
out_gpu
,
*
cpu_place
,
context
,
&
out
);
float
*
out_ptr
=
out
.
data
<
float
>
();
context
.
Wait
();
...
...
@@ -50,15 +50,15 @@ TEST(math_function, trans_mul_notrans) {
auto
*
gpu_place
=
new
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDADeviceContext
context
(
*
gpu_place
);
paddle
::
framework
::
Copy
From
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
From
(
input1
,
*
gpu_place
,
context
,
&
input2_gpu
);
paddle
::
framework
::
Copy
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
(
input1
,
*
gpu_place
,
context
,
&
input2_gpu
);
out_gpu
.
mutable_data
<
float
>
({
3
,
3
},
*
gpu_place
);
paddle
::
operators
::
math
::
matmul
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
input1_gpu
,
true
,
input2_gpu
,
false
,
1
,
&
out_gpu
,
0
);
paddle
::
framework
::
Copy
From
(
out_gpu
,
*
cpu_place
,
context
,
&
out
);
paddle
::
framework
::
Copy
(
out_gpu
,
*
cpu_place
,
context
,
&
out
);
float
*
out_ptr
=
out
.
data
<
float
>
();
context
.
Wait
();
...
...
@@ -99,9 +99,9 @@ TEST(math_function, gemm_notrans_cublas) {
auto
*
gpu_place
=
new
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDADeviceContext
context
(
*
gpu_place
);
paddle
::
framework
::
Copy
From
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
From
(
input2
,
*
gpu_place
,
context
,
&
input2_gpu
);
paddle
::
framework
::
Copy
From
(
input3
,
*
gpu_place
,
context
,
&
input3_gpu
);
paddle
::
framework
::
Copy
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
(
input2
,
*
gpu_place
,
context
,
&
input2_gpu
);
paddle
::
framework
::
Copy
(
input3
,
*
gpu_place
,
context
,
&
input3_gpu
);
float
*
a
=
input1_gpu
.
data
<
float
>
();
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
*
gpu_place
);
...
...
@@ -109,7 +109,7 @@ TEST(math_function, gemm_notrans_cublas) {
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
false
,
false
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
1
,
4
,
1
,
c
+
1
,
4
);
paddle
::
framework
::
Copy
From
(
input3_gpu
,
*
cpu_place
,
context
,
&
input3
);
paddle
::
framework
::
Copy
(
input3_gpu
,
*
cpu_place
,
context
,
&
input3
);
// numpy code:
// a = np.arange(6).reshape(2, 3)
...
...
@@ -154,9 +154,9 @@ TEST(math_function, gemm_trans_cublas) {
auto
*
gpu_place
=
new
paddle
::
platform
::
CUDAPlace
(
0
);
paddle
::
platform
::
CUDADeviceContext
context
(
*
gpu_place
);
paddle
::
framework
::
Copy
From
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
From
(
input2
,
*
gpu_place
,
context
,
&
input2_gpu
);
paddle
::
framework
::
Copy
From
(
input3
,
*
gpu_place
,
context
,
&
input3_gpu
);
paddle
::
framework
::
Copy
(
input1
,
*
gpu_place
,
context
,
&
input1_gpu
);
paddle
::
framework
::
Copy
(
input2
,
*
gpu_place
,
context
,
&
input2_gpu
);
paddle
::
framework
::
Copy
(
input3
,
*
gpu_place
,
context
,
&
input3_gpu
);
float
*
a
=
input1_gpu
.
data
<
float
>
();
float
*
b
=
input2_gpu
.
data
<
float
>
();
float
*
c
=
input3_gpu
.
mutable_data
<
float
>
(
*
gpu_place
);
...
...
@@ -164,7 +164,7 @@ TEST(math_function, gemm_trans_cublas) {
paddle
::
operators
::
math
::
gemm
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
(
context
,
false
,
true
,
m
,
n
,
k
,
1
,
a
,
3
,
b
+
3
,
3
,
1
,
c
+
1
,
4
);
paddle
::
framework
::
Copy
From
(
input3_gpu
,
*
cpu_place
,
context
,
&
input3
);
paddle
::
framework
::
Copy
(
input3_gpu
,
*
cpu_place
,
context
,
&
input3
);
context
.
Wait
();
EXPECT_EQ
(
input3_ptr
[
0
],
0
);
...
...
@@ -205,15 +205,15 @@ void GemvTest(int m, int n, bool trans) {
}
paddle
::
platform
::
CUDADeviceContext
context
(
*
gpu_place
);
paddle
::
framework
::
Copy
From
(
mat_a
,
*
gpu_place
,
context
,
&
g_mat_a
);
paddle
::
framework
::
Copy
From
(
vec_b
,
*
gpu_place
,
context
,
&
g_vec_b
);
paddle
::
framework
::
Copy
(
mat_a
,
*
gpu_place
,
context
,
&
g_mat_a
);
paddle
::
framework
::
Copy
(
vec_b
,
*
gpu_place
,
context
,
&
g_vec_b
);
paddle
::
operators
::
math
::
gemv
<
paddle
::
platform
::
CUDADeviceContext
,
T
>
(
context
,
trans
,
static_cast
<
int
>
(
m
),
static_cast
<
int
>
(
n
),
1.
,
g_data_a
,
g_data_b
,
0.
,
g_data_c
);
paddle
::
framework
::
Copy
From
(
g_vec_c
,
paddle
::
platform
::
CPUPlace
(),
context
,
&
vec_c
);
paddle
::
framework
::
Copy
(
g_vec_c
,
paddle
::
platform
::
CPUPlace
(),
context
,
&
vec_c
);
if
(
!
trans
)
{
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
...
...
paddle/operators/math/selected_rows_functor_test.cu
浏览文件 @
ce6dad3b
...
...
@@ -67,7 +67,7 @@ TEST(selected_rows_functor, gpu_add) {
EXPECT_EQ
(
out_rows
[
6
],
9
);
Tensor
out_cpu
;
Copy
From
(
*
out_value
,
cpu_place
,
ctx
,
&
out_cpu
);
Copy
(
*
out_value
,
cpu_place
,
ctx
,
&
out_cpu
);
ctx
.
Wait
();
auto
*
out_cpu_data
=
out_cpu
.
data
<
float
>
();
...
...
@@ -94,7 +94,7 @@ TEST(selected_rows_functor, gpu_add) {
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
Tensor
tensor2_cpu
;
Copy
From
(
*
tensor2
,
cpu_place
,
ctx
,
&
tensor2_cpu
);
Copy
(
*
tensor2
,
cpu_place
,
ctx
,
&
tensor2_cpu
);
ctx
.
Wait
();
auto
*
tensor2_cpu_data
=
tensor2_cpu
.
data
<
float
>
();
...
...
@@ -167,7 +167,7 @@ TEST(selected_rows_functor, gpu_add_to) {
EXPECT_EQ
(
out_rows
[
6
],
9
);
Tensor
out_cpu
;
Copy
From
(
*
out_value
,
cpu_place
,
ctx
,
&
out_cpu
);
Copy
(
*
out_value
,
cpu_place
,
ctx
,
&
out_cpu
);
ctx
.
Wait
();
auto
*
out_cpu_data
=
out_cpu
.
data
<
float
>
();
...
...
@@ -191,7 +191,7 @@ TEST(selected_rows_functor, gpu_add_to) {
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
Tensor
tensor1_cpu
;
Copy
From
(
*
tensor1
,
cpu_place
,
ctx
,
&
tensor1_cpu
);
Copy
(
*
tensor1
,
cpu_place
,
ctx
,
&
tensor1_cpu
);
ctx
.
Wait
();
auto
*
tensor1_cpu_data
=
tensor1_cpu
.
data
<
float
>
();
...
...
paddle/operators/math/vol2col_test.cc
浏览文件 @
ce6dad3b
...
...
@@ -71,7 +71,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
Copy
From
(
input_tmp
,
*
place
,
*
context
,
&
input
);
Copy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
}
output
.
mutable_data
<
float
>
({
1
,
filter_size
,
filter_size
,
filter_size
,
output_depth
,
output_height
,
output_width
},
...
...
@@ -85,7 +85,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_cfo_ptr
=
output
.
data
<
float
>
();
}
else
{
Copy
From
(
output
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
Copy
(
output
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
output_tmp
);
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
}
...
...
@@ -99,7 +99,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
Copy
From
(
input_tmp
,
*
place
,
*
context
,
&
input
);
Copy
(
input_tmp
,
*
place
,
*
context
,
&
input
);
}
paddle
::
operators
::
math
::
Col2VolFunctor
<
DeviceContext
,
float
>
col2vol
;
...
...
@@ -109,7 +109,7 @@ void testVol2col() {
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_ptr
=
input
.
data
<
float
>
();
}
else
{
Copy
From
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
Copy
(
input
,
paddle
::
platform
::
CPUPlace
(),
*
context
,
&
input_tmp
);
in_ptr
=
input_tmp
.
data
<
float
>
();
}
...
...
paddle/operators/merge_lod_tensor_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -49,7 +49,7 @@ class MergeLoDTensorOp : public framework::OperatorBase {
cpu_mask
->
ShareDataWith
(
mask
);
}
else
if
(
platform
::
is_gpu_place
(
mask
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
framework
::
Copy
From
(
mask
,
platform
::
CPUPlace
(),
dev_ctx
,
cpu_mask
.
get
());
framework
::
Copy
(
mask
,
platform
::
CPUPlace
(),
dev_ctx
,
cpu_mask
.
get
());
#else
PADDLE_THROW
(
"Not supported GPU, Please compile WITH_GPU option"
);
#endif
...
...
@@ -104,8 +104,8 @@ class MergeLoDTensorOp : public framework::OperatorBase {
continue
;
}
auto
slice
=
out
->
Slice
(
out_offset
,
out_offset
+
len
);
framework
::
Copy
From
(
input
->
Slice
(
start_offset
,
end_offset
),
place
,
dev_ctx
,
&
slice
);
framework
::
Copy
(
input
->
Slice
(
start_offset
,
end_offset
),
place
,
dev_ctx
,
&
slice
);
out_offset
+=
len
;
(
*
in_idx
)
+=
1
;
}
...
...
paddle/operators/multiplex_op.cu
浏览文件 @
ce6dad3b
...
...
@@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel<T> {
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
// copy index to cpu
Tensor
index_t_cpu
;
Copy
From
(
*
ids
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
index_t_cpu
);
Copy
(
*
ids
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
index_t_cpu
);
auto
*
index
=
index_t_cpu
.
data
<
int32_t
>
();
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
platform
::
CUDAPlace
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
...
...
@@ -69,7 +69,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
auto
cols
=
ins
[
0
]
->
numel
()
/
rows
;
// copy index to cpu
Tensor
index_t_cpu
;
Copy
From
(
*
ids
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
index_t_cpu
);
Copy
(
*
ids
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
index_t_cpu
);
auto
*
index
=
index_t_cpu
.
data
<
int32_t
>
();
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
...
...
paddle/operators/parallel_do_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -211,7 +211,7 @@ class ParallelDoGradOp : public OperatorBase {
auto
&
tt
=
sub_scopes
[
place_idx
]
->
FindVar
(
s
)
->
Get
<
LoDTensor
>
();
VLOG
(
3
)
<<
place_idx
;
VLOG
(
3
)
<<
tt
;
framework
::
Copy
From
(
tt
,
places
[
0
],
t_buf
);
framework
::
Copy
(
tt
,
places
[
0
],
t_buf
);
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
s
,
s_buf
}}},
{{
"Out"
,
{
s
}}},
...
...
@@ -220,7 +220,7 @@ class ParallelDoGradOp : public OperatorBase {
}
VLOG
(
3
)
<<
t
;
framework
::
Copy
From
(
t
,
place
,
scope
.
FindVar
(
s
)
->
GetMutable
<
LoDTensor
>
());
framework
::
Copy
(
t
,
place
,
scope
.
FindVar
(
s
)
->
GetMutable
<
LoDTensor
>
());
}
}
};
...
...
paddle/operators/recurrent_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -290,7 +290,7 @@ class RecurrentOp : public RecurrentBase {
auto
dst_out
=
dst_tensor
->
Slice
(
seq_offset
,
seq_offset
+
1
);
// Explicit copy output since the local RNN scope can be destroyed
// early.
framework
::
Copy
From
(
src_tensor
,
place
,
dev_ctx
,
&
dst_out
);
framework
::
Copy
(
src_tensor
,
place
,
dev_ctx
,
&
dst_out
);
});
scopes
.
Next
();
...
...
@@ -376,7 +376,7 @@ class RecurrentGradOp : public RecurrentBase {
auto
*
cur_grad_var
=
cur_scope
.
Var
(
cur_grad
);
auto
cur_grad_tensor
=
cur_grad_var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Copy
From
(
ex_tensor
,
place
,
dev_ctx
,
cur_grad_tensor
);
framework
::
Copy
(
ex_tensor
,
place
,
dev_ctx
,
cur_grad_tensor
);
}
}
...
...
@@ -450,7 +450,7 @@ class RecurrentGradOp : public RecurrentBase {
}
auto
dst
=
outside
->
Slice
(
seq_offset
,
seq_offset
+
1
);
framework
::
Copy
From
(
inside
,
place
,
dev_ctx
,
&
dst
);
framework
::
Copy
(
inside
,
place
,
dev_ctx
,
&
dst
);
});
VLOG
(
5
)
<<
"Link outside gradient finished "
;
...
...
@@ -463,7 +463,7 @@ class RecurrentGradOp : public RecurrentBase {
framework
::
LoDTensor
*
outside
)
{
outside
->
Resize
(
inside
.
dims
());
outside
->
mutable_data
(
place
,
inside
.
type
());
framework
::
Copy
From
(
inside
,
place
,
dev_ctx
,
outside
);
framework
::
Copy
(
inside
,
place
,
dev_ctx
,
outside
);
});
VLOG
(
5
)
<<
"Link initialize state gradient finished "
;
}
...
...
paddle/operators/reorder_lod_tensor_by_rank_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -146,7 +146,7 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
framework
::
Copy
From
(
x_sliced
,
out_sliced
.
place
(),
dev_ctx
,
&
out_sliced
);
framework
::
Copy
(
x_sliced
,
out_sliced
.
place
(),
dev_ctx
,
&
out_sliced
);
out_offset
+=
len
;
return
out_offset
;
}
...
...
paddle/operators/reshape_op.h
浏览文件 @
ce6dad3b
...
...
@@ -28,7 +28,7 @@ class ReshapeKernel : public framework::OpKernel<T> {
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
out_dims
=
out
->
dims
();
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
Copy
From
(
*
in
,
ctx
.
GetPlace
(),
ctx
.
device_context
(),
out
);
framework
::
Copy
(
*
in
,
ctx
.
GetPlace
(),
ctx
.
device_context
(),
out
);
out
->
Resize
(
out_dims
);
}
};
...
...
@@ -42,7 +42,7 @@ class ReshapeGradKernel : public framework::OpKernel<T> {
d_x
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
in_dims
=
d_x
->
dims
();
framework
::
Copy
From
(
*
d_out
,
ctx
.
GetPlace
(),
ctx
.
device_context
(),
d_x
);
framework
::
Copy
(
*
d_out
,
ctx
.
GetPlace
(),
ctx
.
device_context
(),
d_x
);
d_x
->
Resize
(
in_dims
);
}
};
...
...
paddle/operators/sequence_slice_op.h
浏览文件 @
ce6dad3b
...
...
@@ -66,13 +66,13 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
offset_cpu
.
mutable_data
<
T
>
(
offset
->
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
*
offset
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
offset_cpu
);
framework
::
Copy
(
*
offset
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
offset_cpu
);
offset_data
=
offset_cpu
.
data
<
int64_t
>
();
length_cpu
.
mutable_data
<
T
>
(
length
->
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
*
length
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
length_cpu
);
framework
::
Copy
(
*
length
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
length_cpu
);
length_data
=
length_cpu
.
data
<
int64_t
>
();
}
...
...
@@ -127,13 +127,13 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
offset_cpu
.
mutable_data
<
T
>
(
offset
->
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
*
offset
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
offset_cpu
);
framework
::
Copy
(
*
offset
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
offset_cpu
);
offset_data
=
offset_cpu
.
data
<
int64_t
>
();
length_cpu
.
mutable_data
<
T
>
(
length
->
dims
(),
platform
::
CPUPlace
());
framework
::
Copy
From
(
*
length
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
length_cpu
);
framework
::
Copy
(
*
length
,
platform
::
CPUPlace
(),
ctx
.
device_context
(),
&
length_cpu
);
length_data
=
length_cpu
.
data
<
int64_t
>
();
}
...
...
paddle/operators/shrink_rnn_memory_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -115,7 +115,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
auto
&
dout_tensor
=
dout_var
->
Get
<
framework
::
LoDTensor
>
();
auto
height
=
dout_tensor
.
dims
()[
0
];
auto
slice
=
dx_tensor
.
Slice
(
0
,
static_cast
<
int
>
(
height
));
framework
::
Copy
From
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
framework
::
Copy
(
dout_tensor
,
dout_tensor
.
place
(),
dev_ctx
,
&
slice
);
if
(
dx_tensor
.
dims
()[
0
]
>
height
)
{
auto
rest_tensor
=
dx_tensor
.
Slice
(
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
dx_tensor
.
dims
()[
0
]));
...
...
paddle/operators/split_lod_tensor_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -53,7 +53,7 @@ class SplitLoDTensorOp : public framework::OperatorBase {
cpu_mask
->
ShareDataWith
(
mask
);
}
else
if
(
platform
::
is_gpu_place
(
mask
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
framework
::
Copy
From
(
mask
,
platform
::
CPUPlace
(),
dev_ctx
,
cpu_mask
.
get
());
framework
::
Copy
(
mask
,
platform
::
CPUPlace
(),
dev_ctx
,
cpu_mask
.
get
());
#else
PADDLE_THROW
(
"Not supported GPU, Please compile WITH_GPU option"
);
#endif
...
...
@@ -111,9 +111,9 @@ class SplitLoDTensorOp : public framework::OperatorBase {
// out[offset: offset+len] = x[each_range.begin: each_range.end]
auto
slice
=
out
->
Slice
(
static_cast
<
int
>
(
offset
),
static_cast
<
int
>
(
offset
+
len
));
framework
::
Copy
From
(
x
.
Slice
(
static_cast
<
int
>
(
each_range
.
begin
),
static_cast
<
int
>
(
each_range
.
end
)),
x
.
place
(),
dev_ctx
,
&
slice
);
framework
::
Copy
(
x
.
Slice
(
static_cast
<
int
>
(
each_range
.
begin
),
static_cast
<
int
>
(
each_range
.
end
)),
x
.
place
(),
dev_ctx
,
&
slice
);
offset
+=
len
;
}
}
...
...
paddle/operators/sum_op.h
浏览文件 @
ce6dad3b
...
...
@@ -107,8 +107,8 @@ class SumKernel : public framework::OpKernel<T> {
out_array
.
resize
(
i
+
1
);
}
if
(
out_array
[
i
].
numel
()
==
0
)
{
framework
::
Copy
From
(
in_array
[
i
],
in_array
[
i
].
place
(),
context
.
device_context
(),
&
out_array
[
i
]);
framework
::
Copy
(
in_array
[
i
],
in_array
[
i
].
place
(),
context
.
device_context
(),
&
out_array
[
i
]);
out_array
[
i
].
set_lod
(
in_array
[
i
].
lod
());
}
else
{
PADDLE_ENFORCE
(
out_array
[
i
].
lod
()
==
in_array
[
i
].
lod
());
...
...
paddle/operators/tensor_array_read_write_op.cc
浏览文件 @
ce6dad3b
...
...
@@ -44,7 +44,7 @@ class WriteToArrayOp : public ArrayOp {
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
Copy
From
(
x_tensor
,
place
,
dev_ctx
,
out_tensor
);
Copy
(
x_tensor
,
place
,
dev_ctx
,
out_tensor
);
out_tensor
->
set_lod
(
x_tensor
.
lod
());
}
else
{
VLOG
(
10
)
<<
"WARNING: The input tensor 'x_tensor' holds no memory, so "
...
...
@@ -135,7 +135,7 @@ class ReadFromArrayOp : public ArrayOp {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
framework
::
Copy
From
(
x_array
[
offset
],
place
,
dev_ctx
,
out_tensor
);
framework
::
Copy
(
x_array
[
offset
],
place
,
dev_ctx
,
out_tensor
);
out_tensor
->
set_lod
(
x_array
[
offset
].
lod
());
}
else
{
VLOG
(
10
)
<<
"offset "
<<
offset
<<
" >= "
<<
x_array
.
size
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录