Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
bd40dd9a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bd40dd9a
编写于
10月 25, 2021
作者:
Z
zhangbo9674
提交者:
GitHub
10月 25, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Cherry Pick]Add fp16 kernel for clip_op (#36577) (#36672)
Add fp16 kernel for clip_op.
上级
304fb2b5
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
30 addition
and
10 deletion
+30
-10
paddle/fluid/operators/clip_op.cu
paddle/fluid/operators/clip_op.cu
+6
-2
paddle/fluid/operators/clip_op.h
paddle/fluid/operators/clip_op.h
+3
-3
python/paddle/fluid/tests/unittests/test_clip_op.py
python/paddle/fluid/tests/unittests/test_clip_op.py
+21
-5
未找到文件。
paddle/fluid/operators/clip_op.cu
浏览文件 @
bd40dd9a
...
@@ -19,10 +19,14 @@ REGISTER_OP_CUDA_KERNEL(
...
@@ -19,10 +19,14 @@ REGISTER_OP_CUDA_KERNEL(
clip
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
clip
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
ClipKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
);
REGISTER_OP_CUDA_KERNEL
(
REGISTER_OP_CUDA_KERNEL
(
clip_grad
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
clip_grad
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
);
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
ClipGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/clip_op.h
浏览文件 @
bd40dd9a
...
@@ -54,7 +54,7 @@ class ClipGradFunctor {
...
@@ -54,7 +54,7 @@ class ClipGradFunctor {
public:
public:
explicit
ClipGradFunctor
(
const
T
min
,
const
T
max
)
:
min_
(
min
),
max_
(
max
)
{}
explicit
ClipGradFunctor
(
const
T
min
,
const
T
max
)
:
min_
(
min
),
max_
(
max
)
{}
HOSTDEVICE
T
operator
()(
const
T
&
x
,
const
T
&
y
)
const
{
HOSTDEVICE
T
operator
()(
const
T
&
x
,
const
T
&
y
)
const
{
return
(
y
>
min_
&&
y
<
max_
)
?
x
:
0
;
return
(
y
>
min_
&&
y
<
max_
)
?
x
:
static_cast
<
T
>
(
0
)
;
}
}
private:
private:
...
@@ -79,7 +79,7 @@ class ClipKernel : public framework::OpKernel<T> {
...
@@ -79,7 +79,7 @@ class ClipKernel : public framework::OpKernel<T> {
}
}
max
=
static_cast
<
T
>
(
max
);
max
=
static_cast
<
T
>
(
max
);
auto
min
=
context
.
Attr
<
float
>
(
"min"
);
auto
min
=
static_cast
<
T
>
(
context
.
Attr
<
float
>
(
"min"
)
);
Tensor
min_cpu
;
Tensor
min_cpu
;
if
(
context
.
HasInput
(
"Min"
))
{
if
(
context
.
HasInput
(
"Min"
))
{
auto
*
min_t
=
context
.
Input
<
Tensor
>
(
"Min"
);
auto
*
min_t
=
context
.
Input
<
Tensor
>
(
"Min"
);
...
@@ -156,7 +156,7 @@ class ClipGradKernel : public framework::OpKernel<T> {
...
@@ -156,7 +156,7 @@ class ClipGradKernel : public framework::OpKernel<T> {
}
}
max
=
static_cast
<
T
>
(
max
);
max
=
static_cast
<
T
>
(
max
);
auto
min
=
context
.
Attr
<
float
>
(
"min"
);
auto
min
=
static_cast
<
T
>
(
context
.
Attr
<
float
>
(
"min"
)
);
Tensor
min_cpu
;
Tensor
min_cpu
;
if
(
context
.
HasInput
(
"Min"
))
{
if
(
context
.
HasInput
(
"Min"
))
{
auto
*
min_t
=
context
.
Input
<
Tensor
>
(
"Min"
);
auto
*
min_t
=
context
.
Input
<
Tensor
>
(
"Min"
);
...
...
python/paddle/fluid/tests/unittests/test_clip_op.py
浏览文件 @
bd40dd9a
...
@@ -43,7 +43,7 @@ class TestClipOp(OpTest):
...
@@ -43,7 +43,7 @@ class TestClipOp(OpTest):
else
:
else
:
max_v
=
self
.
attrs
[
'max'
]
max_v
=
self
.
attrs
[
'max'
]
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)
input
=
np
.
random
.
random
(
self
.
shape
).
astype
(
self
.
dtype
)
input
[
np
.
abs
(
input
-
min_v
)
<
self
.
max_relative_error
]
=
0.5
input
[
np
.
abs
(
input
-
min_v
)
<
self
.
max_relative_error
]
=
0.5
input
[
np
.
abs
(
input
-
max_v
)
<
self
.
max_relative_error
]
=
0.5
input
[
np
.
abs
(
input
-
max_v
)
<
self
.
max_relative_error
]
=
0.5
self
.
inputs
[
'X'
]
=
input
self
.
inputs
[
'X'
]
=
input
...
@@ -60,15 +60,17 @@ class TestClipOp(OpTest):
...
@@ -60,15 +60,17 @@ class TestClipOp(OpTest):
paddle
.
disable_static
()
paddle
.
disable_static
()
def
initTestCase
(
self
):
def
initTestCase
(
self
):
self
.
dtype
=
np
.
float32
self
.
shape
=
(
4
,
10
,
10
)
self
.
shape
=
(
4
,
10
,
10
)
self
.
max
=
0.8
self
.
max
=
0.8
self
.
min
=
0.3
self
.
min
=
0.3
self
.
inputs
[
'Max'
]
=
np
.
array
([
0.8
]).
astype
(
'float32'
)
self
.
inputs
[
'Max'
]
=
np
.
array
([
0.8
]).
astype
(
self
.
dtype
)
self
.
inputs
[
'Min'
]
=
np
.
array
([
0.1
]).
astype
(
'float32'
)
self
.
inputs
[
'Min'
]
=
np
.
array
([
0.1
]).
astype
(
self
.
dtype
)
class
TestCase1
(
TestClipOp
):
class
TestCase1
(
TestClipOp
):
def
initTestCase
(
self
):
def
initTestCase
(
self
):
self
.
dtype
=
np
.
float32
self
.
shape
=
(
8
,
16
,
8
)
self
.
shape
=
(
8
,
16
,
8
)
self
.
max
=
0.7
self
.
max
=
0.7
self
.
min
=
0.0
self
.
min
=
0.0
...
@@ -76,6 +78,7 @@ class TestCase1(TestClipOp):
...
@@ -76,6 +78,7 @@ class TestCase1(TestClipOp):
class
TestCase2
(
TestClipOp
):
class
TestCase2
(
TestClipOp
):
def
initTestCase
(
self
):
def
initTestCase
(
self
):
self
.
dtype
=
np
.
float32
self
.
shape
=
(
8
,
16
)
self
.
shape
=
(
8
,
16
)
self
.
max
=
1.0
self
.
max
=
1.0
self
.
min
=
0.0
self
.
min
=
0.0
...
@@ -83,6 +86,7 @@ class TestCase2(TestClipOp):
...
@@ -83,6 +86,7 @@ class TestCase2(TestClipOp):
class
TestCase3
(
TestClipOp
):
class
TestCase3
(
TestClipOp
):
def
initTestCase
(
self
):
def
initTestCase
(
self
):
self
.
dtype
=
np
.
float32
self
.
shape
=
(
4
,
8
,
16
)
self
.
shape
=
(
4
,
8
,
16
)
self
.
max
=
0.7
self
.
max
=
0.7
self
.
min
=
0.2
self
.
min
=
0.2
...
@@ -90,20 +94,32 @@ class TestCase3(TestClipOp):
...
@@ -90,20 +94,32 @@ class TestCase3(TestClipOp):
class
TestCase4
(
TestClipOp
):
class
TestCase4
(
TestClipOp
):
def
initTestCase
(
self
):
def
initTestCase
(
self
):
self
.
dtype
=
np
.
float32
self
.
shape
=
(
4
,
8
,
8
)
self
.
shape
=
(
4
,
8
,
8
)
self
.
max
=
0.7
self
.
max
=
0.7
self
.
min
=
0.2
self
.
min
=
0.2
self
.
inputs
[
'Max'
]
=
np
.
array
([
0.8
]).
astype
(
'float32'
)
self
.
inputs
[
'Max'
]
=
np
.
array
([
0.8
]).
astype
(
self
.
dtype
)
self
.
inputs
[
'Min'
]
=
np
.
array
([
0.3
]).
astype
(
'float32'
)
self
.
inputs
[
'Min'
]
=
np
.
array
([
0.3
]).
astype
(
self
.
dtype
)
class
TestCase5
(
TestClipOp
):
class
TestCase5
(
TestClipOp
):
def
initTestCase
(
self
):
def
initTestCase
(
self
):
self
.
dtype
=
np
.
float32
self
.
shape
=
(
4
,
8
,
16
)
self
.
shape
=
(
4
,
8
,
16
)
self
.
max
=
0.5
self
.
max
=
0.5
self
.
min
=
0.5
self
.
min
=
0.5
class
TestCase6
(
TestClipOp
):
def
initTestCase
(
self
):
self
.
dtype
==
np
.
float16
self
.
shape
=
(
4
,
8
,
8
)
self
.
max
=
0.7
self
.
min
=
0.2
self
.
inputs
[
'Max'
]
=
np
.
array
([
0.8
]).
astype
(
self
.
dtype
)
self
.
inputs
[
'Min'
]
=
np
.
array
([
0.3
]).
astype
(
self
.
dtype
)
class
TestClipOpError
(
unittest
.
TestCase
):
class
TestClipOpError
(
unittest
.
TestCase
):
def
test_errors
(
self
):
def
test_errors
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录