Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
24cb1ce0
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
24cb1ce0
编写于
9月 18, 2017
作者:
Y
Yu Yang
提交者:
GitHub
9月 18, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4177 from reyoung/feature/transform_ctx
Feature/transform ctx
上级
c4e783e5
eeaccc83
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
29 addition
and
17 deletion
+29
-17
paddle/platform/CMakeLists.txt
paddle/platform/CMakeLists.txt
+1
-1
paddle/platform/transform.h
paddle/platform/transform.h
+19
-12
paddle/platform/transform_test.cu
paddle/platform/transform_test.cu
+9
-4
未找到文件。
paddle/platform/CMakeLists.txt
浏览文件 @
24cb1ce0
...
...
@@ -24,4 +24,4 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
nv_test
(
device_context_test SRCS device_context_test.cc DEPS device_context gpu_info
)
nv_test
(
cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda
)
nv_test
(
transform_test SRCS transform_test.cu DEPS paddle_memory place
)
nv_test
(
transform_test SRCS transform_test.cu DEPS paddle_memory place
device_context
)
paddle/platform/transform.h
浏览文件 @
24cb1ce0
...
...
@@ -14,6 +14,7 @@
#pragma once
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/hostdevice.h"
#include "paddle/platform/place.h"
...
...
@@ -21,6 +22,7 @@
#include <algorithm>
#include <type_traits>
#ifdef __NVCC__
#include <thrust/execution_policy.h>
#include <thrust/transform.h>
#include "paddle/platform/details/device_ptr_cast.h"
#endif
...
...
@@ -28,34 +30,39 @@
namespace
paddle
{
namespace
platform
{
// Transform on host or device. It provides the same API in std library.
template
<
typename
Place
,
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
Transform
(
Place
place
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
Transform
(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
if
(
is_cpu_place
(
place
))
{
std
::
transform
(
first
,
last
,
result
,
op
);
}
else
{
#ifdef __NVCC__
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
using
namespace
details
;
thrust
::
transform
(
DevPtrCast
(
first
),
DevPtrCast
(
last
),
DevPtrCast
(
resul
t
),
op
);
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
DevPtrCast
(
firs
t
),
DevPtrCast
(
last
),
DevPtrCast
(
result
),
op
);
#else
PADDLE_THROW
(
"Do not invoke `Transform<GPUPlace>` in .cc file"
);
#endif
}
}
template
<
typename
Place
,
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
Transform
(
Place
place
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
Transform
(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
if
(
is_cpu_place
(
place
))
{
std
::
transform
(
first1
,
last1
,
first2
,
result
,
op
);
}
else
{
#ifdef __NVCC__
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
using
namespace
details
;
thrust
::
transform
(
DevPtrCast
(
first1
),
DevPtrCast
(
last1
),
DevPtrCast
(
first2
),
DevPtrCast
(
result
),
op
);
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
DevPtrCast
(
first1
),
DevPtrCast
(
last1
),
DevPtrCast
(
first2
),
DevPtrCast
(
result
),
op
);
#else
PADDLE_THROW
(
"Do not invoke `Transform<GPUPlace>` in .cc file"
);
#endif
...
...
paddle/platform/transform_test.cu
浏览文件 @
24cb1ce0
...
...
@@ -36,8 +36,9 @@ class Multiply {
TEST
(
Transform
,
CPUUnary
)
{
using
namespace
paddle
::
platform
;
CPUDeviceContext
ctx
;
float
buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
Transform
(
CPUPlace
()
,
buf
,
buf
+
4
,
buf
,
Scale
<
float
>
(
10
));
Transform
(
ctx
,
buf
,
buf
+
4
,
buf
,
Scale
<
float
>
(
10
));
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
ASSERT_NEAR
(
buf
[
i
],
static_cast
<
float
>
(
i
+
1
),
1e-5
);
}
...
...
@@ -47,10 +48,12 @@ TEST(Transform, GPUUnary) {
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
memory
;
GPUPlace
gpu0
(
0
);
CUDADeviceContext
ctx
(
gpu0
);
float
cpu_buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
float
*
gpu_buf
=
static_cast
<
float
*>
(
Alloc
(
gpu0
,
sizeof
(
float
)
*
4
));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
cpu_buf
,
sizeof
(
cpu_buf
));
Transform
(
gpu0
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
Scale
<
float
>
(
10
));
Transform
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
Scale
<
float
>
(
10
));
ctx
.
Wait
();
Copy
(
CPUPlace
(),
cpu_buf
,
gpu0
,
gpu_buf
,
sizeof
(
cpu_buf
));
Free
(
gpu0
,
gpu_buf
);
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
...
...
@@ -62,7 +65,7 @@ TEST(Transform, CPUBinary) {
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
memory
;
int
buf
[
4
]
=
{
1
,
2
,
3
,
4
};
Transform
(
CPU
Place
(),
buf
,
buf
+
4
,
buf
,
buf
,
Multiply
<
int
>
());
Transform
(
CPU
DeviceContext
(),
buf
,
buf
+
4
,
buf
,
buf
,
Multiply
<
int
>
());
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
ASSERT_EQ
((
i
+
1
)
*
(
i
+
1
),
buf
[
i
]);
}
...
...
@@ -73,9 +76,11 @@ TEST(Transform, GPUBinary) {
using
namespace
paddle
::
memory
;
int
buf
[
4
]
=
{
1
,
2
,
3
,
4
};
GPUPlace
gpu0
(
0
);
CUDADeviceContext
ctx
(
gpu0
);
int
*
gpu_buf
=
static_cast
<
int
*>
(
Alloc
(
gpu0
,
sizeof
(
buf
)));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
buf
,
sizeof
(
buf
));
Transform
(
gpu0
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
gpu_buf
,
Multiply
<
int
>
());
Transform
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
gpu_buf
,
Multiply
<
int
>
());
ctx
.
Wait
();
Copy
(
CPUPlace
(),
buf
,
gpu0
,
gpu_buf
,
sizeof
(
buf
));
Free
(
gpu0
,
gpu_buf
);
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录