Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
d865b047
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d865b047
编写于
9月 20, 2017
作者:
Q
qingqing01
提交者:
GitHub
9月 20, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4201 from qingqing01/fix_prelu
Refine platform::Transform function and fix prelu_op testing.
上级
a9202e89
2aa4d326
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
74 addition
and
40 deletion
+74
-40
paddle/operators/prelu_op.h
paddle/operators/prelu_op.h
+6
-4
paddle/platform/transform.h
paddle/platform/transform.h
+55
-29
paddle/platform/transform_test.cu
paddle/platform/transform_test.cu
+11
-5
python/paddle/v2/framework/tests/test_prelu_op.py
python/paddle/v2/framework/tests/test_prelu_op.py
+2
-2
未找到文件。
paddle/operators/prelu_op.h
浏览文件 @
d865b047
...
@@ -54,7 +54,8 @@ class PReluKernel : public framework::OpKernel {
...
@@ -54,7 +54,8 @@ class PReluKernel : public framework::OpKernel {
int
numel
=
x
->
numel
();
int
numel
=
x
->
numel
();
Transform
(
context
.
device_context
(),
x_ptr
,
x_ptr
+
numel
,
o_ptr
,
Transform
<
Place
>
trans
;
trans
(
context
.
device_context
(),
x_ptr
,
x_ptr
+
numel
,
o_ptr
,
PReluFunctor
<
T
>
(
alpha_ptr
));
PReluFunctor
<
T
>
(
alpha_ptr
));
}
}
};
};
...
@@ -91,8 +92,9 @@ class PReluGradKernel : public framework::OpKernel {
...
@@ -91,8 +92,9 @@ class PReluGradKernel : public framework::OpKernel {
const
T
*
out_ptr
=
out
->
data
<
T
>
();
const
T
*
out_ptr
=
out
->
data
<
T
>
();
int
numel
=
dx
->
numel
();
int
numel
=
dx
->
numel
();
Transform
(
context
.
device_context
(),
out_ptr
,
out_ptr
+
numel
,
dout_ptr
,
Transform
<
Place
>
trans
;
dx_ptr
,
PReluGradFunctor
<
T
>
(
alpha_ptr
));
trans
(
context
.
device_context
(),
out_ptr
,
out_ptr
+
numel
,
dout_ptr
,
dx_ptr
,
PReluGradFunctor
<
T
>
(
alpha_ptr
));
// TODO (Zhuoyuan): add dalpha upgrade when GPU kernels ready
// TODO (Zhuoyuan): add dalpha upgrade when GPU kernels ready
}
}
...
...
paddle/platform/transform.h
浏览文件 @
d865b047
...
@@ -29,45 +29,71 @@
...
@@ -29,45 +29,71 @@
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
// Transform on host or device. It provides the same API in std library.
// Transform on host or device. It provides the same API in std library.
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
template
<
typename
Place
>
void
Transform
(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
struct
Transform
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
);
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
);
};
template
<
>
struct
Transform
<
platform
::
CPUPlace
>
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
OutputIter
result
,
UnaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
auto
place
=
context
.
GetPlace
();
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE
(
is_cpu_place
(
place
),
"It must use CPU place."
);
std
::
transform
(
first
,
last
,
result
,
op
);
std
::
transform
(
first
,
last
,
result
,
op
);
}
else
{
#ifdef __NVCC__
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
using
namespace
details
;
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
DevPtrCast
(
first
),
DevPtrCast
(
last
),
DevPtrCast
(
result
),
op
);
#else
PADDLE_THROW
(
"Do not invoke `Transform<GPUPlace>` in .cc file"
);
#endif
}
}
}
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
typename
BinaryOperation
>
void
Transform
(
const
DeviceContext
&
context
,
InputIter1
first1
,
void
operator
()
(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
BinaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
auto
place
=
context
.
GetPlace
();
if
(
is_cpu_place
(
place
))
{
PADDLE_ENFORCE
(
is_cpu_place
(
place
),
"It must use CPU place."
);
std
::
transform
(
first1
,
last1
,
first2
,
result
,
op
);
std
::
transform
(
first1
,
last1
,
first2
,
result
,
op
);
}
else
{
}
};
#ifdef __NVCC__
#ifdef __NVCC__
template
<
>
struct
Transform
<
platform
::
GPUPlace
>
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_gpu_place
(
place
),
"It must use GPU place."
);
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
using
namespace
details
;
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
DevPtrCast
(
first1
),
details
::
DevPtrCast
(
first
),
details
::
DevPtrCast
(
last
),
DevPtrCast
(
last1
),
DevPtrCast
(
first2
),
DevPtrCast
(
result
),
details
::
DevPtrCast
(
result
),
op
);
}
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
operator
()(
const
DeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_gpu_place
(
place
),
"It must use GPU place."
);
auto
&
ctx
=
reinterpret_cast
<
const
CUDADeviceContext
&>
(
context
);
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
ctx
.
stream
()),
details
::
DevPtrCast
(
first1
),
details
::
DevPtrCast
(
last1
),
details
::
DevPtrCast
(
first2
),
details
::
DevPtrCast
(
result
),
op
);
op
);
#else
PADDLE_THROW
(
"Do not invoke `Transform<GPUPlace>` in .cc file"
);
#endif
}
}
};
};
#endif
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/platform/transform_test.cu
浏览文件 @
d865b047
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/memory/memcpy.h"
#include "paddle/memory/memcpy.h"
#include "paddle/memory/memory.h"
#include "paddle/memory/memory.h"
#include "paddle/platform/hostdevice.h"
#include "paddle/platform/transform.h"
#include "paddle/platform/transform.h"
template
<
typename
T
>
template
<
typename
T
>
...
@@ -38,7 +39,8 @@ TEST(Transform, CPUUnary) {
...
@@ -38,7 +39,8 @@ TEST(Transform, CPUUnary) {
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
platform
;
CPUDeviceContext
ctx
;
CPUDeviceContext
ctx
;
float
buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
float
buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
Transform
(
ctx
,
buf
,
buf
+
4
,
buf
,
Scale
<
float
>
(
10
));
Transform
<
paddle
::
platform
::
CPUPlace
>
trans
;
trans
(
ctx
,
buf
,
buf
+
4
,
buf
,
Scale
<
float
>
(
10
));
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
ASSERT_NEAR
(
buf
[
i
],
static_cast
<
float
>
(
i
+
1
),
1e-5
);
ASSERT_NEAR
(
buf
[
i
],
static_cast
<
float
>
(
i
+
1
),
1e-5
);
}
}
...
@@ -52,7 +54,8 @@ TEST(Transform, GPUUnary) {
...
@@ -52,7 +54,8 @@ TEST(Transform, GPUUnary) {
float
cpu_buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
float
cpu_buf
[
4
]
=
{
0.1
,
0.2
,
0.3
,
0.4
};
float
*
gpu_buf
=
static_cast
<
float
*>
(
Alloc
(
gpu0
,
sizeof
(
float
)
*
4
));
float
*
gpu_buf
=
static_cast
<
float
*>
(
Alloc
(
gpu0
,
sizeof
(
float
)
*
4
));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
cpu_buf
,
sizeof
(
cpu_buf
));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
cpu_buf
,
sizeof
(
cpu_buf
));
Transform
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
Scale
<
float
>
(
10
));
Transform
<
paddle
::
platform
::
GPUPlace
>
trans
;
trans
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
Scale
<
float
>
(
10
));
ctx
.
Wait
();
ctx
.
Wait
();
Copy
(
CPUPlace
(),
cpu_buf
,
gpu0
,
gpu_buf
,
sizeof
(
cpu_buf
));
Copy
(
CPUPlace
(),
cpu_buf
,
gpu0
,
gpu_buf
,
sizeof
(
cpu_buf
));
Free
(
gpu0
,
gpu_buf
);
Free
(
gpu0
,
gpu_buf
);
...
@@ -65,7 +68,9 @@ TEST(Transform, CPUBinary) {
...
@@ -65,7 +68,9 @@ TEST(Transform, CPUBinary) {
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
platform
;
using
namespace
paddle
::
memory
;
using
namespace
paddle
::
memory
;
int
buf
[
4
]
=
{
1
,
2
,
3
,
4
};
int
buf
[
4
]
=
{
1
,
2
,
3
,
4
};
Transform
(
CPUDeviceContext
(),
buf
,
buf
+
4
,
buf
,
buf
,
Multiply
<
int
>
());
Transform
<
paddle
::
platform
::
CPUPlace
>
trans
;
CPUDeviceContext
ctx
;
trans
(
ctx
,
buf
,
buf
+
4
,
buf
,
buf
,
Multiply
<
int
>
());
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
ASSERT_EQ
((
i
+
1
)
*
(
i
+
1
),
buf
[
i
]);
ASSERT_EQ
((
i
+
1
)
*
(
i
+
1
),
buf
[
i
]);
}
}
...
@@ -79,7 +84,8 @@ TEST(Transform, GPUBinary) {
...
@@ -79,7 +84,8 @@ TEST(Transform, GPUBinary) {
CUDADeviceContext
ctx
(
gpu0
);
CUDADeviceContext
ctx
(
gpu0
);
int
*
gpu_buf
=
static_cast
<
int
*>
(
Alloc
(
gpu0
,
sizeof
(
buf
)));
int
*
gpu_buf
=
static_cast
<
int
*>
(
Alloc
(
gpu0
,
sizeof
(
buf
)));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
buf
,
sizeof
(
buf
));
Copy
(
gpu0
,
gpu_buf
,
CPUPlace
(),
buf
,
sizeof
(
buf
));
Transform
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
gpu_buf
,
Multiply
<
int
>
());
Transform
<
paddle
::
platform
::
GPUPlace
>
trans
;
trans
(
ctx
,
gpu_buf
,
gpu_buf
+
4
,
gpu_buf
,
gpu_buf
,
Multiply
<
int
>
());
ctx
.
Wait
();
ctx
.
Wait
();
Copy
(
CPUPlace
(),
buf
,
gpu0
,
gpu_buf
,
sizeof
(
buf
));
Copy
(
CPUPlace
(),
buf
,
gpu0
,
gpu_buf
,
sizeof
(
buf
));
Free
(
gpu0
,
gpu_buf
);
Free
(
gpu0
,
gpu_buf
);
...
...
python/paddle/v2/framework/tests/test_prelu_op.py
浏览文件 @
d865b047
...
@@ -17,10 +17,10 @@ class PReluTest(OpTest):
...
@@ -17,10 +17,10 @@ class PReluTest(OpTest):
assert
out_np
is
not
self
.
inputs
[
'X'
]
assert
out_np
is
not
self
.
inputs
[
'X'
]
self
.
outputs
=
{
'Out'
:
out_np
}
self
.
outputs
=
{
'Out'
:
out_np
}
def
not_
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
def
not_
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
],
'Out'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录