Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
e3a64fca
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e3a64fca
编写于
10月 16, 2018
作者:
Q
Qiyang Min
提交者:
GitHub
10月 16, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13835 from velconia/fix_reshape_op
Fix Reshape op when input is the same with output
上级
46b0b790
aeec82ac
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
41 addition
and
4 deletion
+41
-4
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+20
-0
paddle/fluid/framework/tensor_util_test.cc
paddle/fluid/framework/tensor_util_test.cc
+14
-0
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+3
-2
paddle/fluid/operators/sequence_concat_op.cc
paddle/fluid/operators/sequence_concat_op.cc
+4
-2
未找到文件。
paddle/fluid/framework/tensor_util.cc
浏览文件 @
e3a64fca
...
@@ -36,6 +36,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -36,6 +36,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
<<
dst_place
;
return
;
}
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
}
...
@@ -71,6 +76,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
...
@@ -71,6 +76,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
auto
stream
=
auto
stream
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
).
stream
();
if
(
platform
::
is_same_place
(
src_place
,
dst_place
))
{
if
(
platform
::
is_same_place
(
src_place
,
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
)
<<
"Skip copy the same data async from "
<<
src_place
<<
" to "
<<
dst_place
;
return
;
}
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
stream
);
stream
);
}
else
{
}
else
{
...
@@ -114,6 +124,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
...
@@ -114,6 +124,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
());
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
());
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
)
{
VLOG
(
3
)
<<
"Skip copy the same data from "
<<
src_place
<<
" to "
<<
dst_place
;
return
;
}
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
}
...
@@ -130,6 +145,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
...
@@ -130,6 +145,11 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_cpu_place
,
src_ptr
,
size
,
nullptr
);
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_cpu_place
,
src_ptr
,
size
,
nullptr
);
}
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
}
else
if
(
platform
::
is_gpu_place
(
src_place
)
&&
platform
::
is_gpu_place
(
dst_place
))
{
platform
::
is_gpu_place
(
dst_place
))
{
if
(
src_ptr
==
dst_ptr
&&
platform
::
is_same_place
(
src_place
,
dst_place
))
{
VLOG
(
3
)
<<
"Skip copy the same data from "
<<
src_place
<<
" to "
<<
dst_place
;
return
;
}
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
src_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
src_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
auto
dst_gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
dst_place
);
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
nullptr
);
memory
::
Copy
(
dst_gpu_place
,
dst_ptr
,
src_gpu_place
,
src_ptr
,
size
,
nullptr
);
...
...
paddle/fluid/framework/tensor_util_test.cc
浏览文件 @
e3a64fca
...
@@ -41,6 +41,11 @@ TEST(TensorCopy, Tensor) {
...
@@ -41,6 +41,11 @@ TEST(TensorCopy, Tensor) {
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
}
}
TensorCopy
(
dst_tensor
,
*
cpu_place
,
&
dst_tensor
);
for
(
size_t
i
=
0
;
i
<
9
;
++
i
)
{
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
}
EXPECT_TRUE
(
dst_tensor
.
layout
()
==
src_tensor
.
layout
());
EXPECT_TRUE
(
dst_tensor
.
layout
()
==
src_tensor
.
layout
());
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
2
);
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
2
);
...
@@ -82,6 +87,15 @@ TEST(TensorCopy, Tensor) {
...
@@ -82,6 +87,15 @@ TEST(TensorCopy, Tensor) {
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
}
}
// Copy the same tensor
TensorCopy
(
gpu_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
gpu_ctx
.
Wait
();
const
int
*
dst_ptr_tmp
=
dst_tensor
.
data
<
int
>
();
EXPECT_NE
(
src_ptr
,
dst_ptr_tmp
);
for
(
size_t
i
=
0
;
i
<
9
;
++
i
)
{
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr_tmp
[
i
]);
}
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
2
);
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
2
);
// CPU Slice Tensor to GPU Tensor
// CPU Slice Tensor to GPU Tensor
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
e3a64fca
...
@@ -259,7 +259,6 @@ class Reshape2Op : public ReshapeOp {
...
@@ -259,7 +259,6 @@ class Reshape2Op : public ReshapeOp {
:
ReshapeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
:
ReshapeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ReshapeOp
::
InferShape
(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of ReshapeOp should not be null."
);
"Output(XShape) of ReshapeOp should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
...
@@ -270,6 +269,8 @@ class Reshape2Op : public ReshapeOp {
...
@@ -270,6 +269,8 @@ class Reshape2Op : public ReshapeOp {
}
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
ReshapeOp
::
InferShape
(
ctx
);
}
}
};
};
...
...
paddle/fluid/operators/sequence_concat_op.cc
浏览文件 @
e3a64fca
...
@@ -90,11 +90,13 @@ REGISTER_OPERATOR(sequence_concat, paddle::framework::OperatorWithKernel,
...
@@ -90,11 +90,13 @@ REGISTER_OPERATOR(sequence_concat, paddle::framework::OperatorWithKernel,
paddle
::
framework
::
DefaultGradOpDescMaker
<
false
>
);
paddle
::
framework
::
DefaultGradOpDescMaker
<
false
>
);
template
<
typename
T
>
template
<
typename
T
>
using
Kernel
=
op
::
SeqConcatKernel
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
;
using
Kernel
=
op
::
SeqConcatKernel
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
;
REGISTER_OP_CPU_KERNEL
(
sequence_concat
,
Kernel
<
float
>
,
Kernel
<
double
>
);
REGISTER_OP_CPU_KERNEL
(
sequence_concat
,
Kernel
<
float
>
,
Kernel
<
double
>
,
Kernel
<
int64_t
>
);
REGISTER_OPERATOR
(
sequence_concat_grad
,
paddle
::
framework
::
OperatorWithKernel
,
REGISTER_OPERATOR
(
sequence_concat_grad
,
paddle
::
framework
::
OperatorWithKernel
,
op
::
SeqConcatGradShapeInferer
);
op
::
SeqConcatGradShapeInferer
);
template
<
typename
T
>
template
<
typename
T
>
using
GradKernel
=
using
GradKernel
=
op
::
SeqConcatGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
;
op
::
SeqConcatGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
;
REGISTER_OP_CPU_KERNEL
(
sequence_concat_grad
,
GradKernel
<
float
>
,
REGISTER_OP_CPU_KERNEL
(
sequence_concat_grad
,
GradKernel
<
float
>
,
GradKernel
<
double
>
);
GradKernel
<
double
>
,
GradKernel
<
int64_t
>
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录