Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
ed478a3e
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ed478a3e
编写于
10月 21, 2021
作者:
Z
zhulei
提交者:
GitHub
10月 21, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] Add p_norm_grad (#36497)
上级
7eab0fa6
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
136 addition
and
1 deletion
+136
-1
paddle/fluid/operators/p_norm_op_npu.cc
paddle/fluid/operators/p_norm_op_npu.cc
+120
-0
python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
...on/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
+16
-1
未找到文件。
paddle/fluid/operators/p_norm_op_npu.cc
浏览文件 @
ed478a3e
...
@@ -81,6 +81,122 @@ class PnormNPUKernel : public framework::OpKernel<T> {
...
@@ -81,6 +81,122 @@ class PnormNPUKernel : public framework::OpKernel<T> {
}
}
};
};
template
<
typename
DeviceContext
,
typename
T
>
class
PnormGradNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
Tensor
=
framework
::
Tensor
;
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
place
=
ctx
.
GetPlace
();
dx
->
mutable_data
<
T
>
(
place
);
auto
xdim
=
x
->
dims
();
float
porder
=
ctx
.
Attr
<
float
>
(
"porder"
);
bool
keepdim
=
ctx
.
Attr
<
bool
>
(
"keepdim"
);
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
axis
=
axis
<
0
?
xdim
.
size
()
+
axis
:
axis
;
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
Tensor
y_share
(
y
->
type
());
Tensor
dy_share
(
dy
->
type
());
y_share
.
ShareDataWith
(
*
y
);
dy_share
.
ShareDataWith
(
*
dy
);
auto
ydim
=
xdim
;
if
(
!
keepdim
)
{
ydim
[
axis
]
=
1
;
}
else
{
ydim
=
y
->
dims
();
}
y_share
.
Resize
(
ydim
);
dy_share
.
Resize
(
ydim
);
if
(
porder
==
0
)
{
FillNpuTensorWithConstant
(
dx
,
static_cast
<
T
>
(
0
));
dx
->
Resize
(
xdim
);
}
else
if
(
porder
==
INFINITY
||
porder
==
-
INFINITY
)
{
Tensor
x_abs
;
x_abs
.
mutable_data
<
T
>
(
xdim
,
place
);
const
auto
&
r_abs
=
NpuOpRunner
(
"Abs"
,
{
*
x
},
{
x_abs
},
{});
r_abs
.
Run
(
stream
);
Tensor
t_cond
;
t_cond
.
mutable_data
<
bool
>
(
xdim
,
place
);
const
auto
&
r_equal
=
NpuOpRunner
(
"Equal"
,
{
x_abs
,
y_share
},
{
t_cond
},
{});
r_equal
.
Run
(
stream
);
Tensor
t_zero
;
t_zero
.
mutable_data
<
T
>
({
1
},
place
);
FillNpuTensorWithConstant
(
&
t_zero
,
static_cast
<
T
>
(
0
));
Tensor
x_sign
;
x_sign
.
mutable_data
<
T
>
(
xdim
,
place
);
const
auto
&
r_sign
=
NpuOpRunner
(
"Sign"
,
{
*
x
},
{
x_sign
},
{});
r_sign
.
Run
(
stream
);
const
auto
&
r_mul
=
NpuOpRunner
(
"Mul"
,
{
x_sign
,
dy_share
},
{
*
dx
},
{});
r_mul
.
Run
(
stream
);
const
auto
&
r_sel
=
NpuOpRunner
(
"SelectV2"
,
{
t_cond
,
*
dx
,
t_zero
},
{
*
dx
},
{});
r_sel
.
Run
(
stream
);
}
else
{
Tensor
x_abs
;
x_abs
.
mutable_data
<
T
>
(
xdim
,
place
);
const
auto
&
r_abs
=
NpuOpRunner
(
"Abs"
,
{
*
x
},
{
x_abs
},
{});
r_abs
.
Run
(
stream
);
Tensor
x_sign
;
x_sign
.
mutable_data
<
T
>
(
xdim
,
place
);
const
auto
&
r_sign
=
NpuOpRunner
(
"Sign"
,
{
*
x
},
{
x_sign
},
{});
r_sign
.
Run
(
stream
);
Tensor
y_pow
;
y_pow
.
mutable_data
<
T
>
(
ydim
,
place
);
if
(
porder
>=
1
)
{
const
auto
&
r_pow1
=
NpuOpRunner
(
"Power"
,
{
x_abs
},
{
x_abs
},
{{
"power"
,
(
porder
-
1
)},
{
"scale"
,
1.0
f
},
{
"shift"
,
0.0
f
}});
r_pow1
.
Run
(
stream
);
const
auto
&
r_pow2
=
NpuOpRunner
(
"Power"
,
{
y_share
},
{
y_pow
},
{{
"power"
,
(
porder
-
1
)},
{
"scale"
,
1.0
f
},
{
"shift"
,
0.0
f
}});
r_pow2
.
Run
(
stream
);
const
auto
&
r_div
=
NpuOpRunner
(
"DivNoNan"
,
{
x_abs
,
y_pow
},
{
*
dx
},
{});
r_div
.
Run
(
stream
);
}
else
{
const
auto
&
r_pow1
=
NpuOpRunner
(
"Power"
,
{
x_abs
},
{
x_abs
},
{{
"power"
,
(
1
-
porder
)},
{
"scale"
,
1.0
f
},
{
"shift"
,
0.0
f
}});
r_pow1
.
Run
(
stream
);
const
auto
&
r_pow2
=
NpuOpRunner
(
"Power"
,
{
y_share
},
{
y_pow
},
{{
"power"
,
(
1
-
porder
)},
{
"scale"
,
1.0
f
},
{
"shift"
,
0.0
f
}});
r_pow2
.
Run
(
stream
);
const
auto
&
r_div
=
NpuOpRunner
(
"DivNoNan"
,
{
y_pow
,
x_abs
},
{
*
dx
},
{});
r_div
.
Run
(
stream
);
}
const
auto
&
r_mul1
=
NpuOpRunner
(
"Mul"
,
{
*
dx
,
x_sign
},
{
*
dx
},
{});
r_mul1
.
Run
(
stream
);
const
auto
&
r_mul2
=
NpuOpRunner
(
"Mul"
,
{
*
dx
,
dy_share
},
{
*
dx
},
{});
r_mul2
.
Run
(
stream
);
}
}
};
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
...
@@ -90,3 +206,7 @@ namespace plat = paddle::platform;
...
@@ -90,3 +206,7 @@ namespace plat = paddle::platform;
REGISTER_OP_NPU_KERNEL
(
REGISTER_OP_NPU_KERNEL
(
p_norm
,
ops
::
PnormNPUKernel
<
plat
::
NPUDeviceContext
,
float
>
,
p_norm
,
ops
::
PnormNPUKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
PnormNPUKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
ops
::
PnormNPUKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
REGISTER_OP_NPU_KERNEL
(
p_norm_grad
,
ops
::
PnormGradNPUKernel
<
plat
::
NPUDeviceContext
,
float
>
,
ops
::
PnormGradNPUKernel
<
plat
::
NPUDeviceContext
,
plat
::
float16
>
);
python/paddle/fluid/tests/unittests/npu/test_p_norm_op_npu.py
浏览文件 @
ed478a3e
...
@@ -27,7 +27,6 @@ paddle.enable_static()
...
@@ -27,7 +27,6 @@ paddle.enable_static()
class
TestPnormOp
(
OpTest
):
class
TestPnormOp
(
OpTest
):
def
set_npu
(
self
):
def
set_npu
(
self
):
self
.
__class__
.
use_npu
=
True
self
.
__class__
.
use_npu
=
True
self
.
__class__
.
no_need_check_grad
=
True
def
setUp
(
self
):
def
setUp
(
self
):
self
.
set_npu
()
self
.
set_npu
()
...
@@ -51,6 +50,12 @@ class TestPnormOp(OpTest):
...
@@ -51,6 +50,12 @@ class TestPnormOp(OpTest):
else
:
else
:
self
.
check_output_with_place
(
paddle
.
NPUPlace
(
0
))
self
.
check_output_with_place
(
paddle
.
NPUPlace
(
0
))
def
test_check_grad
(
self
):
if
self
.
dtype
==
"float16"
:
return
self
.
check_grad_with_place
(
paddle
.
NPUPlace
(
0
),
[
'X'
],
'Out'
,
user_defined_grads
=
self
.
gradient
)
def
init_test_case
(
self
):
def
init_test_case
(
self
):
self
.
shape
=
[
2
,
3
,
4
,
5
]
self
.
shape
=
[
2
,
3
,
4
,
5
]
self
.
axis
=
1
self
.
axis
=
1
...
@@ -131,6 +136,16 @@ class TestPnormOp5(TestPnormOp3):
...
@@ -131,6 +136,16 @@ class TestPnormOp5(TestPnormOp3):
self
.
init_dtype
()
self
.
init_dtype
()
class
TestPnormOp6
(
TestPnormOp3
):
def
init_test_case
(
self
):
self
.
shape
=
[
2
,
3
,
4
,
5
]
self
.
axis
=
1
self
.
epsilon
=
1e-12
self
.
porder
=
0.5
self
.
keepdim
=
False
self
.
init_dtype
()
class
TestPnormOpfp16
(
TestPnormOp
):
class
TestPnormOpfp16
(
TestPnormOp
):
def
init_dtype
(
self
):
def
init_dtype
(
self
):
self
.
dtype
=
"float16"
self
.
dtype
=
"float16"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录