Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4e62af80
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4e62af80
编写于
9月 08, 2021
作者:
C
cc
提交者:
GitHub
9月 08, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add FP16 PRelu (#35532)
上级
afd1b372
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
66 addition
and
10 deletion
+66
-10
paddle/fluid/operators/math/prelu.cu
paddle/fluid/operators/math/prelu.cu
+9
-3
paddle/fluid/operators/prelu_op.cu
paddle/fluid/operators/prelu_op.cu
+13
-6
python/paddle/fluid/tests/unittests/test_prelu_op.py
python/paddle/fluid/tests/unittests/test_prelu_op.py
+44
-1
未找到文件。
paddle/fluid/operators/math/prelu.cu
浏览文件 @
4e62af80
...
...
@@ -33,7 +33,8 @@ __global__ void PReluChannelWiseKernel(const T *input, const T *alpha,
size_t
channel_index
=
temp
%
channel_num
;
T
scale
=
alpha
[
channel_index
];
T
x
=
input
[
index
];
output
[
index
]
=
(
x
>
0
)
?
x
:
scale
*
x
;
T
zero
=
static_cast
<
T
>
(
0
);
output
[
index
]
=
(
x
>
zero
)
?
x
:
scale
*
x
;
}
}
...
...
@@ -45,7 +46,8 @@ __global__ void PReluElementWiseKernel(const T *input, const T *alpha,
size_t
element_index
=
index
%
spatial_size
;
T
scale
=
alpha
[
element_index
];
T
x
=
input
[
index
];
output
[
index
]
=
(
x
>
0
)
?
x
:
scale
*
x
;
T
zero
=
static_cast
<
T
>
(
0
);
output
[
index
]
=
(
x
>
zero
)
?
x
:
scale
*
x
;
}
}
...
...
@@ -55,7 +57,8 @@ __global__ void PReluScalarKernel(const T *input, const T *alpha, T *output,
T
scale
=
alpha
[
0
];
CUDA_KERNEL_LOOP
(
index
,
numel
)
{
T
x
=
input
[
index
];
output
[
index
]
=
(
x
>
0
)
?
x
:
scale
*
x
;
T
zero
=
static_cast
<
T
>
(
0
);
output
[
index
]
=
(
x
>
zero
)
?
x
:
scale
*
x
;
}
}
...
...
@@ -88,12 +91,15 @@ void PreluScalarDirectCUDAFunctor<T>::operator()(gpuStream_t stream,
}
template
class
PreluChannelWiseDirectCUDAFunctor
<
float
>;
template
class
PreluChannelWiseDirectCUDAFunctor
<
paddle
::
platform
::
float16
>;
template
class
PreluChannelWiseDirectCUDAFunctor
<
double
>;
template
class
PreluElementWiseDirectCUDAFunctor
<
float
>;
template
class
PreluElementWiseDirectCUDAFunctor
<
paddle
::
platform
::
float16
>;
template
class
PreluElementWiseDirectCUDAFunctor
<
double
>;
template
class
PreluScalarDirectCUDAFunctor
<
float
>;
template
class
PreluScalarDirectCUDAFunctor
<
paddle
::
platform
::
float16
>;
template
class
PreluScalarDirectCUDAFunctor
<
double
>;
}
// namespace math
...
...
paddle/fluid/operators/prelu_op.cu
浏览文件 @
4e62af80
...
...
@@ -87,8 +87,9 @@ __global__ void PReluOpGradKernel(const T* x_ptr, const T* alpha_ptr,
}
T
x
=
x_ptr
[
index
];
T
dy
=
dy_ptr
[
index
];
if
(
dx_ptr
!=
nullptr
)
dx_ptr
[
index
]
=
(
x
>
0
)
?
dy
:
scale
*
dy
;
if
(
dalpha_ptr
!=
nullptr
)
dalpha_ptr
[
index
]
=
(
x
>
0
)
?
0
:
x
*
dy
;
T
zero
=
static_cast
<
T
>
(
0
);
if
(
dx_ptr
!=
nullptr
)
dx_ptr
[
index
]
=
(
x
>
zero
)
?
dy
:
scale
*
dy
;
if
(
dalpha_ptr
!=
nullptr
)
dalpha_ptr
[
index
]
=
(
x
>
zero
)
?
zero
:
x
*
dy
;
}
}
...
...
@@ -112,9 +113,11 @@ class PreluOpGradFunctor {
}
};
template
<
typename
T
>
struct
IdentityFunctor
{
HOSTDEVICE
inline
T
operator
()(
const
T
&
x
)
const
{
return
x
;
}
template
<
typename
T
>
HOSTDEVICE
inline
T
operator
()(
const
T
&
x
)
const
{
return
x
;
}
};
template
<
typename
DeviceContext
,
typename
T
>
...
...
@@ -174,9 +177,9 @@ class CUDAPReluGradKernel : public framework::OpKernel<T> {
reduce_dims
.
push_back
(
i
);
}
TensorReduce
<
T
,
T
,
cub
::
Sum
,
IdentityFunctor
<
T
>
>
(
TensorReduce
<
T
,
T
,
cub
::
Sum
,
IdentityFunctor
>
(
dalpha_tmp
,
dalpha
,
reduce_dims
,
static_cast
<
T
>
(
0
),
cub
::
Sum
(),
IdentityFunctor
<
T
>
(),
stream
);
IdentityFunctor
(),
stream
);
}
};
...
...
@@ -184,10 +187,14 @@ class CUDAPReluGradKernel : public framework::OpKernel<T> {
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
prelu
,
ops
::
CUDAPReluKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
CUDAPReluKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
CUDAPReluKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
prelu_grad
,
ops
::
CUDAPReluGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
CUDAPReluGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
plat
::
float16
>
,
ops
::
CUDAPReluGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
python/paddle/fluid/tests/unittests/test_prelu_op.py
浏览文件 @
4e62af80
...
...
@@ -153,11 +153,12 @@ class TestNNPReluAPI(unittest.TestCase):
class
PReluTest
(
OpTest
):
def
setUp
(
self
):
self
.
init_dtype
()
self
.
init_input_shape
()
self
.
init_attr
()
self
.
op_type
=
"prelu"
x_np
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
x_shape
)
x_np
=
np
.
random
.
uniform
(
-
1
,
1
,
self
.
x_shape
)
.
astype
(
self
.
dtype
)
# Since zero point in prelu is not differentiable, avoid randomize
# zero.
x_np
[
np
.
abs
(
x_np
)
<
0.005
]
=
0.02
...
...
@@ -168,6 +169,7 @@ class PReluTest(OpTest):
alpha_np
=
np
.
random
.
uniform
(
-
1
,
-
0.5
,
[
1
,
self
.
x_shape
[
1
],
1
,
1
])
else
:
alpha_np
=
np
.
random
.
uniform
(
-
1
,
-
0.5
,
[
1
]
+
self
.
x_shape
[
1
:])
alpha_np
=
alpha_np
.
astype
(
self
.
dtype
)
self
.
inputs
=
{
'X'
:
x_np
,
'Alpha'
:
alpha_np
}
...
...
@@ -184,6 +186,9 @@ class PReluTest(OpTest):
assert
out_np
is
not
self
.
inputs
[
'X'
]
self
.
outputs
=
{
'Out'
:
out_np
}
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float64
def
init_input_shape
(
self
):
self
.
x_shape
=
[
2
,
100
,
3
,
4
]
...
...
@@ -270,6 +275,44 @@ class TestModeElementRank6(PReluTest):
self
.
attrs
=
{
'mode'
:
"element"
}
def
create_test_fp16_class
(
parent
,
check_grad
=
True
,
atol
=
1e-3
,
max_relative_error
=
0.05
):
@
unittest
.
skipIf
(
not
core
.
is_compiled_with_cuda
(),
"core is not compiled with CUDA"
)
class
TestPReluFp16Case
(
parent
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
def
test_check_output
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
):
self
.
check_output_with_place
(
place
,
atol
=
atol
)
def
test_check_grad
(
self
):
place
=
core
.
CUDAPlace
(
0
)
if
core
.
is_float16_supported
(
place
)
and
check_grad
:
self
.
check_grad_with_place
(
place
,
[
'X'
,
'Alpha'
],
'Out'
,
max_relative_error
=
max_relative_error
)
cls_name
=
"{0}_{1}"
.
format
(
parent
.
__name__
,
"Fp16Op"
)
TestPReluFp16Case
.
__name__
=
cls_name
globals
()[
cls_name
]
=
TestPReluFp16Case
create_test_fp16_class
(
TestModeElt
)
create_test_fp16_class
(
TestModeAllRank3
)
create_test_fp16_class
(
TestModeAllRank6
)
create_test_fp16_class
(
TestModeChannelRank3
)
create_test_fp16_class
(
TestModeChannelRank6
)
create_test_fp16_class
(
TestModeElementRank3
)
create_test_fp16_class
(
TestModeElementRank6
)
def
prelu_t
(
x
,
mode
,
param_attr
=
None
,
name
=
None
):
helper
=
fluid
.
layer_helper
.
LayerHelper
(
'prelu'
,
**
locals
())
alpha_shape
=
[
1
,
x
.
shape
[
1
],
1
,
1
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录