Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
cbf22d65
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
cbf22d65
编写于
7月 07, 2021
作者:
L
Leo Chen
提交者:
GitHub
7月 07, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] NpuOpRunner supports host tensor as input (#33992)
* NpuOpRunner supports host tensor as input * fix compile issue
上级
20da7703
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
85 addition
and
19 deletion
+85
-19
paddle/fluid/operators/lookup_table_v2_op_npu.cc
paddle/fluid/operators/lookup_table_v2_op_npu.cc
+7
-7
paddle/fluid/operators/npu_op_runner.cc
paddle/fluid/operators/npu_op_runner.cc
+58
-6
paddle/fluid/operators/npu_op_runner.h
paddle/fluid/operators/npu_op_runner.h
+20
-6
未找到文件。
paddle/fluid/operators/lookup_table_v2_op_npu.cc
浏览文件 @
cbf22d65
...
...
@@ -39,14 +39,14 @@ class LookupTableV2NPUKernel : public framework::OpKernel<T> {
table_var
->
IsType
<
framework
::
LoDTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"npu only accept LoDTensor"
));
output_t
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
framework
::
NPUAttributeMap
attr_input
=
{{
"validate_indices"
,
false
}};
const
auto
&
runner
=
NpuOpRunner
(
"Gather"
,
{
*
table_t
,
*
ids_t
},
{
*
output_t
},
attr_input
);
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
runner
.
Run
(
stream
);
NpuOpRunner
runner
;
runner
.
SetType
(
"GatherV2"
)
.
AddInput
(
*
table_t
)
.
AddInput
(
*
ids_t
)
.
AddInput
(
std
::
vector
<
int32_t
>
{
0
})
.
AddOutput
(
*
output_t
);
runner
.
Run
();
}
};
...
...
paddle/fluid/operators/npu_op_runner.cc
浏览文件 @
cbf22d65
...
...
@@ -74,15 +74,15 @@ aclrtStream GetCurrentNPUStream(int device_id) {
return
dev_ctx
->
stream
();
}
NpuOpRunner
::
NpuOpRunner
(
std
::
string
op_type
)
:
op_type_
(
op_type
)
{
attr_
=
aclopCreateAttr
();
}
NpuOpRunner
::
NpuOpRunner
(
)
{}
NpuOpRunner
::
NpuOpRunner
(
const
std
::
string
&
op_type
)
:
op_type_
(
op_type
)
{
}
NpuOpRunner
::
NpuOpRunner
(
std
::
string
op_type
,
const
std
::
vector
<
Tensor
>
&
inputs
,
NpuOpRunner
::
NpuOpRunner
(
const
std
::
string
&
op_type
,
const
std
::
vector
<
Tensor
>
&
inputs
,
const
std
::
vector
<
Tensor
>
&
outputs
,
const
NPUAttributeMap
&
attrs
)
:
op_type_
(
op_type
)
{
attr_
=
aclopCreateAttr
();
AddInputs
(
inputs
);
AddOutputs
(
outputs
);
AddAttrs
(
attrs
);
...
...
@@ -108,8 +108,16 @@ NpuOpRunner::~NpuOpRunner() {
const
std
::
string
&
NpuOpRunner
::
Type
()
{
return
op_type_
;
}
NpuOpRunner
&
NpuOpRunner
::
SetType
(
const
std
::
string
&
name
)
{
op_type_
=
name
;
return
*
this
;
}
NpuOpRunner
&
NpuOpRunner
::
AddAttr
(
const
std
::
string
&
name
,
const
NPUAttribute
&
attr
)
{
if
(
!
attr_
)
{
attr_
=
aclopCreateAttr
();
}
if
(
attr
.
type
()
==
typeid
(
bool
))
{
PADDLE_ENFORCE_NPU_SUCCESS
(
aclopSetAttrBool
(
attr_
,
name
.
c_str
(),
BOOST_GET_CONST
(
bool
,
attr
)));
...
...
@@ -191,6 +199,46 @@ NpuOpRunner &NpuOpRunner::AddInput(const Tensor &tensor) {
return
*
this
;
}
NpuOpRunner
&
NpuOpRunner
::
AddInput
(
const
Tensor
&
tensor
,
aclMemType
mem_type
)
{
// create aclTensorDesc
input_descs_
.
emplace_back
(
CreateTensorDesc
(
tensor
,
mem_type
));
// create aclDataBuffer
input_buffers_
.
emplace_back
(
CreateDataBuffer
(
tensor
));
return
*
this
;
}
NpuOpRunner
&
NpuOpRunner
::
AddInput
(
std
::
vector
<
int32_t
>
&&
dims
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
platform
::
CPUDeviceContext
*>
(
pool
.
Get
(
platform
::
CPUPlace
()));
Tensor
host_tensor
;
TensorFromVector
(
dims
,
*
dev_ctx
,
&
host_tensor
);
host_tensors_
.
emplace_back
(
host_tensor
);
// create aclTensorDesc
input_descs_
.
emplace_back
(
CreateTensorDesc
(
host_tensor
,
ACL_MEMTYPE_HOST
));
// create aclDataBuffer
input_buffers_
.
emplace_back
(
CreateDataBuffer
(
host_tensor
));
return
*
this
;
}
NpuOpRunner
&
NpuOpRunner
::
AddInput
(
std
::
vector
<
int64_t
>
&&
dims
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
platform
::
CPUDeviceContext
*>
(
pool
.
Get
(
platform
::
CPUPlace
()));
Tensor
host_tensor
;
TensorFromVector
(
dims
,
*
dev_ctx
,
&
host_tensor
);
host_tensors_
.
emplace_back
(
host_tensor
);
// create aclTensorDesc
input_descs_
.
emplace_back
(
CreateTensorDesc
(
host_tensor
,
ACL_MEMTYPE_HOST
));
// create aclDataBuffer
input_buffers_
.
emplace_back
(
CreateDataBuffer
(
host_tensor
));
return
*
this
;
}
NpuOpRunner
&
NpuOpRunner
::
AddOutput
(
const
Tensor
&
tensor
)
{
// create aclTensorDesc
output_descs_
.
emplace_back
(
CreateTensorDesc
(
tensor
));
...
...
@@ -272,7 +320,8 @@ std::vector<aclDataBuffer *> &NpuOpRunner::GetOutputBuffers() {
return
output_buffers_
;
}
aclTensorDesc
*
NpuOpRunner
::
CreateTensorDesc
(
Tensor
tensor
)
{
aclTensorDesc
*
NpuOpRunner
::
CreateTensorDesc
(
Tensor
tensor
,
aclMemType
mem_type
)
{
auto
dtype
=
ConvertToNpuDtype
(
tensor
.
type
());
auto
format
=
ConvertToNpuFormat
(
tensor
.
layout
());
auto
dims
=
framework
::
vectorize
(
tensor
.
dims
());
...
...
@@ -287,6 +336,9 @@ aclTensorDesc *NpuOpRunner::CreateTensorDesc(Tensor tensor) {
PADDLE_ENFORCE_NPU_SUCCESS
(
aclSetTensorStorageFormat
(
desc
,
format
));
PADDLE_ENFORCE_NPU_SUCCESS
(
aclSetTensorStorageShape
(
desc
,
dims
.
size
(),
dims
.
data
()));
if
(
mem_type
==
ACL_MEMTYPE_HOST
)
{
PADDLE_ENFORCE_NPU_SUCCESS
(
aclSetTensorPlaceMent
(
desc
,
mem_type
));
}
return
desc
;
}
...
...
paddle/fluid/operators/npu_op_runner.h
浏览文件 @
cbf22d65
...
...
@@ -35,11 +35,12 @@ using DeviceContextPool = platform::DeviceContextPool;
class
NpuOpRunner
{
public:
explicit
NpuOpRunner
(
std
::
string
op_type
);
explicit
NpuOpRunner
(
std
::
string
op_type
,
const
std
::
vector
<
Tensor
>
&
inputs
=
{},
const
std
::
vector
<
Tensor
>
&
outputs
=
{},
const
NPUAttributeMap
&
attrs
=
{});
NpuOpRunner
();
explicit
NpuOpRunner
(
const
std
::
string
&
op_type
);
NpuOpRunner
(
const
std
::
string
&
op_type
,
const
std
::
vector
<
Tensor
>
&
inputs
=
{},
const
std
::
vector
<
Tensor
>
&
outputs
=
{},
const
NPUAttributeMap
&
attrs
=
{});
// NOTE(zhiqiu): why forbid copy and operator= ?
// Since we will free the tensor_descs and data_buffers in the ~NpuOpRunner,
...
...
@@ -53,12 +54,23 @@ class NpuOpRunner {
const
std
::
string
&
Type
();
NpuOpRunner
&
SetType
(
const
std
::
string
&
name
);
NpuOpRunner
&
AddAttr
(
const
std
::
string
&
name
,
const
NPUAttribute
&
attr
);
NpuOpRunner
&
AddAttrs
(
const
NPUAttributeMap
&
attrs
);
NpuOpRunner
&
AddInput
(
const
Tensor
&
tensor
);
// NOTE(zhiqiu): CANN-5.0.2 support input tensors on host.
// Specifically, the tensor of shape, tensor of dims, etc, which are are small
// vector/list.
NpuOpRunner
&
AddInput
(
const
Tensor
&
tensor
,
aclMemType
mem_type
);
NpuOpRunner
&
AddInput
(
std
::
vector
<
int32_t
>
&&
dims
);
NpuOpRunner
&
AddInput
(
std
::
vector
<
int64_t
>
&&
dims
);
NpuOpRunner
&
AddOutput
(
const
Tensor
&
tensor
);
NpuOpRunner
&
AddInputs
(
const
std
::
vector
<
Tensor
>
&
tensors
);
...
...
@@ -82,7 +94,8 @@ class NpuOpRunner {
void
Run
(
aclrtStream
stream
=
nullptr
)
const
;
private:
aclTensorDesc
*
CreateTensorDesc
(
Tensor
tensor
);
aclTensorDesc
*
CreateTensorDesc
(
Tensor
tensor
,
aclMemType
mem_type
=
ACL_MEMTYPE_DEVICE
);
aclDataBuffer
*
CreateDataBuffer
(
Tensor
tensor
);
private:
...
...
@@ -91,6 +104,7 @@ class NpuOpRunner {
std
::
vector
<
aclDataBuffer
*>
output_buffers_
;
std
::
vector
<
aclTensorDesc
*>
input_descs_
;
std
::
vector
<
aclTensorDesc
*>
output_descs_
;
std
::
vector
<
Tensor
>
host_tensors_
;
aclopAttr
*
attr_
{
nullptr
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录