Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
413d6e1b
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
413d6e1b
编写于
8月 31, 2022
作者:
Q
Qi Li
提交者:
GitHub
8月 31, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] fix FillD not work on cann512, test=develop (#45586)
上级
f41b8566
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
28 addition
and
31 deletion
+28
-31
paddle/fluid/operators/fill_constant_op_npu.cc
paddle/fluid/operators/fill_constant_op_npu.cc
+0
-10
paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc
paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc
+28
-21
未找到文件。
paddle/fluid/operators/fill_constant_op_npu.cc
浏览文件 @
413d6e1b
...
...
@@ -65,21 +65,11 @@ class FillConstantNPUKernel : public framework::OpKernel<T> {
tensor_value
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
FillNpuTensorWithConstant
<
T
>
(
&
tensor_value
,
value
);
NpuOpRunner
runner
;
#if (CANN_VERSION_CODE >= 503003 && CANN_VERSION_CODE < 504000)
runner
.
SetType
(
"FillD"
)
.
AddInput
(
tensor_value
)
.
AddOutput
(
*
out_var
)
.
AddAttrs
(
{{
"dims"
,
phi
::
vectorize
(
shape
)
}})
.
Run
(
stream
);
#else
runner
.
SetType
(
"Fill"
)
.
AddInput
(
phi
::
vectorize
(
shape
))
.
AddInput
(
tensor_value
)
.
AddOutput
(
*
out_var
)
.
Run
(
stream
);
#endif
}
else
{
const
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>();
...
...
paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc
浏览文件 @
413d6e1b
...
...
@@ -30,22 +30,25 @@ class NPUReduceMeanOpKernel : public framework::OpKernel<T> {
auto
dims
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
bool
keep_dim
=
ctx
.
Attr
<
bool
>
(
"keep_dim"
);
auto
input_dims
_vec
=
phi
::
vectorize
(
input
->
dims
()
);
auto
input_dims
=
input
->
dims
(
);
if
(
reduce_all
)
{
dims
.
clear
();
for
(
size_t
i
=
0
;
i
<
input_dims_vec
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
input_dims
.
size
();
i
++
)
{
dims
.
push_back
(
static_cast
<
int
>
(
i
));
}
}
const
auto
&
runner
=
NpuOpRunner
(
"ReduceMeanD"
,
{
*
input
},
{
*
output
},
{{
"axes"
,
dims
},
{
"keep_dims"
,
keep_dim
}});
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
runner
.
Run
(
stream
);
NpuOpRunner
runner
;
runner
.
SetType
(
"ReduceMean"
)
.
AddInput
(
*
input
)
.
AddInput
(
std
::
move
(
dims
))
.
AddOutput
(
*
output
)
.
AddAttrs
({{
"keep_dims"
,
keep_dim
}})
.
Run
(
stream
);
}
};
...
...
@@ -60,41 +63,45 @@ class NPUReduceMeanGradOpKernel : public framework::OpKernel<T> {
bool
reduce_all
=
ctx
.
Attr
<
bool
>
(
"reduce_all"
);
auto
reduce_dims
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dim"
);
auto
input_dims
_vec
=
phi
::
vectorize
(
input
->
dims
()
);
auto
input_dims
=
input
->
dims
(
);
int
reduce_numel
=
1
;
if
(
reduce_all
)
{
reduce_dims
.
clear
();
for
(
size_t
d
=
0
;
d
<
input_dims_vec
.
size
();
++
d
)
{
for
(
int
d
=
0
;
d
<
input_dims
.
size
();
++
d
)
{
reduce_dims
.
push_back
(
static_cast
<
int
>
(
d
));
}
}
for
(
auto
&
d
:
reduce_dims
)
{
if
(
d
<
0
)
{
d
=
d
+
input_dims
_vec
.
size
();
d
=
d
+
input_dims
.
size
();
}
reduce_numel
*=
input_dims
_vec
[
d
];
reduce_numel
*=
input_dims
[
d
];
}
const
auto
&
runner
=
NpuOpRunner
(
"FillV2D"
,
{},
{
*
input_grad
},
{{
"value"
,
1.0
f
/
static_cast
<
float
>
(
reduce_numel
)},
{
"dims"
,
input_dims_vec
}});
Tensor
tensor_value
(
input_grad
->
dtype
());
tensor_value
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
FillNpuTensorWithConstant
<
T
>
(
&
tensor_value
,
static_cast
<
T
>
(
1.0
f
/
static_cast
<
T
>
(
reduce_numel
)));
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
runner
.
Run
(
stream
);
NpuOpRunner
runner
;
runner
.
SetType
(
"Fill"
)
.
AddInput
(
phi
::
vectorize
(
input_dims
))
.
AddInput
(
tensor_value
)
.
AddOutput
(
*
input_grad
)
.
Run
(
stream
);
Tensor
transformed_input_grad
,
transformed_out_grad
;
Tensor
tmp_output_grad
;
auto
tmp_output_dims
_vec
=
input_dims_vec
;
auto
tmp_output_dims
=
input_dims
;
for
(
auto
d
:
reduce_dims
)
{
tmp_output_dims
_vec
[
d
]
=
1
;
tmp_output_dims
[
d
]
=
1
;
}
tmp_output_grad
.
ShareDataWith
(
*
output_grad
);
tmp_output_grad
.
Resize
(
phi
::
make_ddim
(
tmp_output_dims_vec
)
);
tmp_output_grad
.
Resize
(
tmp_output_dims
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>();
NpuElementWiseOpBroadcast
<
T
>
(
dev_ctx
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录