Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
4ce272ed
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4ce272ed
编写于
8月 23, 2021
作者:
P
pangyoki
提交者:
GitHub
8月 23, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add beam_search_decode npu op (#34967)
上级
7d86737c
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
133 addition
and
8 deletion
+133
-8
paddle/fluid/operators/beam_search_decode_op.cc
paddle/fluid/operators/beam_search_decode_op.cc
+24
-8
python/paddle/fluid/tests/unittests/npu/test_beam_search_decode_op_npu.py
...uid/tests/unittests/npu/test_beam_search_decode_op_npu.py
+109
-0
未找到文件。
paddle/fluid/operators/beam_search_decode_op.cc
浏览文件 @
4ce272ed
...
...
@@ -45,9 +45,15 @@ struct BeamSearchDecodeFunctor {
id_tensor_
(
id_tensor
),
score_tensor_
(
score_tensor
)
{
tensor_on_gpu_
=
false
;
tensor_on_npu_
=
false
;
// First make a copy of GPU data on CPU
if
(
platform
::
is_gpu_place
(
step_ids_origin_
[
0
].
place
())
||
platform
::
is_npu_place
(
step_ids_origin_
[
0
].
place
()))
{
if
(
platform
::
is_gpu_place
(
step_ids_origin_
[
0
].
place
()))
{
tensor_on_gpu_
=
true
;
}
else
{
tensor_on_npu_
=
true
;
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
step_ids_origin_
[
0
].
place
());
...
...
@@ -55,7 +61,9 @@ struct BeamSearchDecodeFunctor {
for
(
auto
&
step_id
:
step_ids_origin_
)
{
framework
::
LoDTensor
out
;
if
(
step_id
.
numel
()
>
0
)
{
if
(
tensor_on_gpu_
)
{
dev_ctx
->
Wait
();
}
framework
::
TensorCopy
(
step_id
,
platform
::
CPUPlace
(),
*
dev_ctx
,
&
out
);
dev_ctx
->
Wait
();
}
...
...
@@ -64,8 +72,13 @@ struct BeamSearchDecodeFunctor {
step_ids_
.
push_back
(
out
);
}
}
if
(
platform
::
is_gpu_place
(
step_scores_origin_
[
0
].
place
())
||
platform
::
is_npu_place
(
step_scores_origin_
[
0
].
place
()))
{
if
(
platform
::
is_gpu_place
(
step_scores_origin_
[
0
].
place
()))
{
tensor_on_gpu_
=
true
;
}
else
{
tensor_on_npu_
=
true
;
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
step_scores_origin_
[
0
].
place
());
...
...
@@ -73,7 +86,9 @@ struct BeamSearchDecodeFunctor {
for
(
auto
&
step_score
:
step_scores_origin_
)
{
framework
::
LoDTensor
out
;
if
(
step_score
.
numel
()
>
0
)
{
if
(
tensor_on_gpu_
)
{
dev_ctx
->
Wait
();
}
framework
::
TensorCopy
(
step_score
,
platform
::
CPUPlace
(),
*
dev_ctx
,
&
out
);
dev_ctx
->
Wait
();
...
...
@@ -89,6 +104,7 @@ struct BeamSearchDecodeFunctor {
void
apply
()
const
;
bool
tensor_on_gpu_
;
bool
tensor_on_npu_
;
size_t
beam_size_
;
int
end_id_
;
// TODO(Superjomn) Here might result serious performance issue in the
...
...
@@ -105,8 +121,8 @@ struct BeamSearchDecodeFunctor {
template
<
typename
T
>
void
BeamSearchDecodeFunctor
::
apply
()
const
{
BeamSearchDecoder
<
T
>
beam_search_decoder
(
beam_size_
,
end_id_
);
// Check if the tensor is on GPU. If so, use the CPU copy instead
if
(
tensor_on_gpu_
)
{
// Check if the tensor is on GPU
or NPU
. If so, use the CPU copy instead
if
(
tensor_on_gpu_
||
tensor_on_npu_
)
{
beam_search_decoder
.
Backtrace
(
step_ids_
,
step_scores_
,
id_tensor_
,
score_tensor_
);
}
else
{
...
...
python/paddle/fluid/tests/unittests/npu/test_beam_search_decode_op_npu.py
0 → 100644
浏览文件 @
4ce272ed
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
from
paddle.fluid.op
import
Operator
import
paddle.fluid
as
fluid
from
paddle.fluid.framework
import
Program
,
program_guard
class
TestBeamSearchDecodeNPUOp
(
unittest
.
TestCase
):
"""unittest of beam_search_decode npu op"""
def
setUp
(
self
):
self
.
scope
=
core
.
Scope
()
self
.
place
=
paddle
.
NPUPlace
(
0
)
def
append_lod_tensor
(
self
,
tensor_array
,
lod
,
data
):
lod_tensor
=
core
.
LoDTensor
()
lod_tensor
.
set_lod
(
lod
)
lod_tensor
.
set
(
data
,
self
.
place
)
tensor_array
.
append
(
lod_tensor
)
def
test_get_set
(
self
):
ids
=
self
.
scope
.
var
(
"ids"
).
get_lod_tensor_array
()
scores
=
self
.
scope
.
var
(
"scores"
).
get_lod_tensor_array
()
# Construct sample data with 5 steps and 2 source sentences
# beam_size = 2, end_id = 1
# start with start_id
[
self
.
append_lod_tensor
(
array
,
[[
0
,
1
,
2
],
[
0
,
1
,
2
]],
np
.
array
(
[
0
,
0
],
dtype
=
dtype
))
for
array
,
dtype
in
((
ids
,
"int64"
),
(
scores
,
"float32"
))
]
[
self
.
append_lod_tensor
(
array
,
[[
0
,
1
,
2
],
[
0
,
2
,
4
]],
np
.
array
(
[
2
,
3
,
4
,
5
],
dtype
=
dtype
))
for
array
,
dtype
in
((
ids
,
"int64"
),
(
scores
,
"float32"
))
]
[
self
.
append_lod_tensor
(
array
,
[[
0
,
2
,
4
],
[
0
,
2
,
2
,
4
,
4
]],
np
.
array
(
[
3
,
1
,
5
,
4
],
dtype
=
dtype
))
for
array
,
dtype
in
((
ids
,
"int64"
),
(
scores
,
"float32"
))
]
[
self
.
append_lod_tensor
(
array
,
[[
0
,
2
,
4
],
[
0
,
1
,
2
,
3
,
4
]],
np
.
array
(
[
1
,
1
,
3
,
5
],
dtype
=
dtype
))
for
array
,
dtype
in
((
ids
,
"int64"
),
(
scores
,
"float32"
))
]
[
self
.
append_lod_tensor
(
array
,
[[
0
,
2
,
4
],
[
0
,
0
,
0
,
2
,
2
]],
np
.
array
(
[
5
,
1
],
dtype
=
dtype
))
for
array
,
dtype
in
((
ids
,
"int64"
),
(
scores
,
"float32"
))
]
sentence_ids
=
self
.
scope
.
var
(
"sentence_ids"
).
get_tensor
()
sentence_scores
=
self
.
scope
.
var
(
"sentence_scores"
).
get_tensor
()
beam_search_decode_op
=
Operator
(
"beam_search_decode"
,
# inputs
Ids
=
"ids"
,
Scores
=
"scores"
,
# outputs
SentenceIds
=
"sentence_ids"
,
SentenceScores
=
"sentence_scores"
,
beam_size
=
2
,
end_id
=
1
,
)
beam_search_decode_op
.
run
(
self
.
scope
,
self
.
place
)
expected_lod
=
[[
0
,
2
,
4
],
[
0
,
4
,
7
,
12
,
17
]]
self
.
assertEqual
(
sentence_ids
.
lod
(),
expected_lod
)
self
.
assertEqual
(
sentence_scores
.
lod
(),
expected_lod
)
expected_data
=
np
.
array
(
[
0
,
2
,
3
,
1
,
0
,
2
,
1
,
0
,
4
,
5
,
3
,
5
,
0
,
4
,
5
,
3
,
1
],
"int64"
)
self
.
assertTrue
(
np
.
array_equal
(
np
.
array
(
sentence_ids
),
expected_data
))
self
.
assertTrue
(
np
.
array_equal
(
np
.
array
(
sentence_scores
),
expected_data
))
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录