Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
07915c95
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
07915c95
编写于
9月 19, 2017
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Renamed to strided_memcpy and prettify unittests
Add unittests for Crop and Concat
上级
3a4897ab
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
181 addition
and
19 deletion
+181
-19
paddle/operators/CMakeLists.txt
paddle/operators/CMakeLists.txt
+1
-1
paddle/operators/detail/strided_memcpy.h
paddle/operators/detail/strided_memcpy.h
+9
-9
paddle/operators/strided_memcpy.h
paddle/operators/strided_memcpy.h
+11
-9
paddle/operators/strided_memcpy_test.cc
paddle/operators/strided_memcpy_test.cc
+160
-0
未找到文件。
paddle/operators/CMakeLists.txt
浏览文件 @
07915c95
...
...
@@ -96,4 +96,4 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test
(
gather_test SRCS gather_test.cc DEPS tensor
)
cc_test
(
net_op_test SRCS net_op_test.cc DEPS net_op
)
cc_test
(
scatter_test SRCS scatter_test.cc DEPS tensor
)
cc_test
(
tensor_copy_test SRCS tensor_co
py_test.cc DEPS tensor paddle_memory
)
cc_test
(
strided_memcpy_test SRCS strided_memc
py_test.cc DEPS tensor paddle_memory
)
paddle/operators/detail/
tensor_co
py.h
→
paddle/operators/detail/
strided_memc
py.h
浏览文件 @
07915c95
...
...
@@ -22,10 +22,10 @@ namespace operators {
namespace
detail
{
template
<
typename
T
,
int
Rank
>
struct
TensorCo
pyFunctor
;
struct
StridedMemc
pyFunctor
;
template
<
typename
T
>
struct
TensorCo
pyFunctor
<
T
,
1
>
{
struct
StridedMemc
pyFunctor
<
T
,
1
>
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
1
>
src_stride
,
framework
::
Dim
<
1
>
dst_dim
,
framework
::
Dim
<
1
>
dst_stride
,
T
*
dst
)
const
{
...
...
@@ -48,12 +48,12 @@ struct TensorCopyFunctor<T, 1> {
};
template
<
typename
T
,
int
Rank
>
struct
TensorCo
pyFunctor
{
struct
StridedMemc
pyFunctor
{
void
operator
()(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
framework
::
Dim
<
Rank
>
src_stride
,
framework
::
Dim
<
Rank
>
dst_dim
,
framework
::
Dim
<
Rank
>
dst_stride
,
T
*
dst
)
const
{
for
(
int64_t
i
=
0
;
i
<
dst_dim
.
head
;
++
i
)
{
TensorCo
pyFunctor
<
T
,
Rank
-
1
>
func
;
StridedMemc
pyFunctor
<
T
,
Rank
-
1
>
func
;
func
(
dev_ctx
,
src
,
src_stride
.
tail
,
dst_dim
.
tail
,
dst_stride
.
tail
,
dst
);
src
+=
src_stride
.
head
;
dst
+=
dst_stride
.
head
;
...
...
@@ -62,10 +62,10 @@ struct TensorCopyFunctor {
};
template
<
typename
T
>
struct
Tensor
CopyDimVisitor
:
public
boost
::
static_visitor
<
void
>
{
Tensor
CopyDimVisitor
(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride
,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
struct
Strided
CopyDimVisitor
:
public
boost
::
static_visitor
<
void
>
{
Strided
CopyDimVisitor
(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride
,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
:
dev_ctx_
(
dev_ctx
),
src_
(
src
),
src_stride_
(
src_stride
),
...
...
@@ -77,7 +77,7 @@ struct TensorCopyDimVisitor : public boost::static_visitor<void> {
Dim
src_stride
=
boost
::
get
<
Dim
>
(
src_stride_
);
Dim
dst_stride
=
boost
::
get
<
Dim
>
(
dst_stride_
);
constexpr
int
dim
=
Dim
::
dimensions
;
TensorCo
pyFunctor
<
T
,
dim
>
functor
;
StridedMemc
pyFunctor
<
T
,
dim
>
functor
;
functor
(
dev_ctx_
,
src_
,
src_stride
,
dst_dim
,
dst_stride
,
dst_
);
}
...
...
paddle/operators/
tensor_co
py.h
→
paddle/operators/
strided_memc
py.h
浏览文件 @
07915c95
...
...
@@ -13,15 +13,17 @@
limitations under the License. */
#pragma once
#include "paddle/operators/detail/
tensor_co
py.h"
#include "paddle/operators/detail/
strided_memc
py.h"
namespace
paddle
{
namespace
operators
{
// Copy a tensor from src to dst.
// The src and dst should be both on dev_ctx.GetPlace()
// Strided memory copy from src to dst.
//
// the stride of an array (also referred to as increment, pitch or step size) is
// The src and dst should be both on dev_ctx.GetPlace(), otherwise, there will
// be a segment fault.
//
// The stride of an array (also referred to as increment, pitch or step size) is
// the number of locations in memory between beginnings of successive array
// elements
//
...
...
@@ -31,12 +33,12 @@ namespace operators {
// NOTE: When use GPU, the memcpy is async. To sync memcpy, please invoke
// `dev_ctx.Wait()`.
template
<
typename
T
>
inline
void
TensorCo
py
(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride
,
const
framework
::
DDim
&
dst_dim
,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
{
inline
void
StridedMemc
py
(
const
platform
::
DeviceContext
&
dev_ctx
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride
,
const
framework
::
DDim
&
dst_dim
,
const
framework
::
DDim
&
dst_stride
,
T
*
dst
)
{
using
namespace
detail
;
Tensor
CopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
Strided
CopyDimVisitor
<
T
>
func
(
dev_ctx
,
src
,
src_stride
,
dst_stride
,
dst
);
boost
::
apply_visitor
(
func
,
dst_dim
);
}
}
// namespace operators
...
...
paddle/operators/
tensor_co
py_test.cc
→
paddle/operators/
strided_memc
py_test.cc
浏览文件 @
07915c95
...
...
@@ -12,16 +12,21 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/
tensor_co
py.h"
#include "paddle/operators/
strided_memc
py.h"
#include "gtest/gtest.h"
#include "paddle/memory/memory.h"
namespace
paddle
{
namespace
operators
{
TEST
(
TensorCopy
,
CPU_COPY
)
{
TEST
(
StridedMemcpy
,
CPUCrop
)
{
// clang-format off
int
src
[]
=
{
0
,
1
,
2
,
0
,
0
,
0
,
3
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
2
,
0
,
0
,
0
,
3
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
};
// clang-format on
framework
::
DDim
src_stride
({
5
,
1
});
...
...
@@ -30,7 +35,7 @@ TEST(TensorCopy, CPU_COPY) {
framework
::
DDim
dst_stride
({
2
,
1
});
platform
::
CPUDeviceContext
ctx
;
TensorCo
py
<
int
>
(
ctx
,
src
+
1
,
src_stride
,
dst_dim
,
dst_stride
,
dst
);
StridedMemc
py
<
int
>
(
ctx
,
src
+
1
,
src_stride
,
dst_dim
,
dst_stride
,
dst
);
ASSERT_EQ
(
1
,
dst
[
0
]);
ASSERT_EQ
(
2
,
dst
[
1
]);
...
...
@@ -38,11 +43,44 @@ TEST(TensorCopy, CPU_COPY) {
ASSERT_EQ
(
4
,
dst
[
3
]);
}
TEST
(
StridedMemcpy
,
CPUConcat
)
{
// clang-format off
int
src
[]
=
{
1
,
2
,
3
,
4
};
// clang-format on
int
dst
[
8
];
framework
::
DDim
src_stride
({
2
,
1
});
framework
::
DDim
dst_dim
({
2
,
2
});
framework
::
DDim
dst_stride
({
4
,
1
});
platform
::
CPUDeviceContext
ctx
;
StridedMemcpy
<
int
>
(
ctx
,
src
,
src_stride
,
dst_dim
,
dst_stride
,
dst
);
StridedMemcpy
<
int
>
(
ctx
,
src
,
src_stride
,
dst_dim
,
dst_stride
,
dst
+
2
);
// clang-format off
int
expect_dst
[]
=
{
1
,
2
,
1
,
2
,
3
,
4
,
3
,
4
};
// clang-format on
for
(
size_t
i
=
0
;
i
<
sizeof
(
expect_dst
)
/
sizeof
(
int
);
++
i
)
{
ASSERT_EQ
(
expect_dst
[
i
],
dst
[
i
]);
}
}
#ifndef PADDLE_ONLY_CPU
TEST
(
TensorCopy
,
GPU_COPY
)
{
TEST
(
StridedMemcpy
,
GPUCrop
)
{
// clang-format off
int
src
[]
=
{
0
,
1
,
2
,
0
,
0
,
0
,
3
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
2
,
0
,
0
,
0
,
3
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
};
// clang-format on
platform
::
GPUPlace
gpu0
(
0
);
platform
::
CPUPlace
cpu
;
...
...
@@ -59,9 +97,11 @@ TEST(TensorCopy, GPU_COPY) {
framework
::
DDim
dst_stride
({
2
,
1
});
platform
::
CUDADeviceContext
ctx
(
gpu0
);
TensorCopy
<
int
>
(
ctx
,
gpu_src
+
1
,
src_stride
,
dst_dim
,
dst_stride
,
gpu_dst
);
StridedMemcpy
<
int
>
(
ctx
,
gpu_src
+
1
,
src_stride
,
dst_dim
,
dst_stride
,
gpu_dst
);
memory
::
Copy
(
cpu
,
dst
,
gpu0
,
gpu_dst
,
sizeof
(
dst
));
memory
::
Copy
(
cpu
,
dst
,
gpu0
,
gpu_dst
,
sizeof
(
dst
),
ctx
.
stream
());
ctx
.
Wait
();
ASSERT_EQ
(
1
,
dst
[
0
]);
ASSERT_EQ
(
2
,
dst
[
1
]);
...
...
@@ -72,6 +112,49 @@ TEST(TensorCopy, GPU_COPY) {
memory
::
Free
(
gpu0
,
gpu_src
);
}
TEST
(
StridedMemcpy
,
GPUConcat
)
{
// clang-format off
int
src
[]
=
{
1
,
2
,
3
,
4
};
// clang-format on
platform
::
GPUPlace
gpu0
(
0
);
platform
::
CPUPlace
cpu
;
int
*
gpu_src
=
reinterpret_cast
<
int
*>
(
memory
::
Alloc
(
gpu0
,
sizeof
(
src
)));
memory
::
Copy
(
gpu0
,
gpu_src
,
cpu
,
src
,
sizeof
(
src
));
int
dst
[
8
];
int
*
gpu_dst
=
reinterpret_cast
<
int
*>
(
memory
::
Alloc
(
gpu0
,
sizeof
(
dst
)));
framework
::
DDim
src_stride
({
2
,
1
});
framework
::
DDim
dst_dim
({
2
,
2
});
framework
::
DDim
dst_stride
({
4
,
1
});
platform
::
CUDADeviceContext
ctx
(
gpu0
);
StridedMemcpy
<
int
>
(
ctx
,
gpu_src
,
src_stride
,
dst_dim
,
dst_stride
,
gpu_dst
);
StridedMemcpy
<
int
>
(
ctx
,
gpu_src
,
src_stride
,
dst_dim
,
dst_stride
,
gpu_dst
+
2
);
memory
::
Copy
(
cpu
,
dst
,
gpu0
,
gpu_dst
,
sizeof
(
dst
),
ctx
.
stream
());
ctx
.
Wait
();
// clang-format off
int
expect_dst
[]
=
{
1
,
2
,
1
,
2
,
3
,
4
,
3
,
4
};
// clang-format on
for
(
size_t
i
=
0
;
i
<
sizeof
(
expect_dst
)
/
sizeof
(
int
);
++
i
)
{
ASSERT_EQ
(
expect_dst
[
i
],
dst
[
i
]);
}
memory
::
Free
(
gpu0
,
gpu_dst
);
memory
::
Free
(
gpu0
,
gpu_src
);
}
#endif
}
// namespace operators
}
// namespace paddle
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录