未验证 提交 3760be06 编写于 作者: P pangyoki 提交者: GitHub

[NPU] add beam_search npu op (#34860)

* add beam_search npu op

* fix CMakeList and add unittest

* fix bug of beam search npu op

* fix unittest

* let input ids become int64

* set output ids to int64_t

* delete check_dygraph

* fix beam_width=1
上级 9f588cc2
......@@ -51,11 +51,11 @@ class BeamSearchOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_NOT_NULL(
selected_ids,
platform::errors::NotFound(
"Output(selected_scores) of BeamSearchOp is not found."));
"Output(selected_ids) of BeamSearchOp is not found."));
PADDLE_ENFORCE_NOT_NULL(
selected_scores,
platform::errors::NotFound(
"Output(parent_idx) of BeamSearchOp is not found."));
"Output(selected_scores) of BeamSearchOp is not found."));
math::BeamSearchFunctor<DeviceContext, T> alg;
alg(context.template device_context<DeviceContext>(), pre_ids, pre_scores,
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/beam_search_op.h"
#include "paddle/fluid/framework/op_registry.h"
namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL(
beam_search,
ops::BeamSearchOpKernel<paddle::platform::NPUDeviceContext, float>,
ops::BeamSearchOpKernel<paddle::platform::NPUDeviceContext, double>,
ops::BeamSearchOpKernel<paddle::platform::NPUDeviceContext, int>,
ops::BeamSearchOpKernel<paddle::platform::NPUDeviceContext, int64_t>);
......@@ -39,6 +39,10 @@ function(math_library TARGET)
endif()
endfunction()
if (WITH_ASCEND_CL)
cc_library(beam_search_npu SRCS beam_search_npu.cc DEPS npu_op_runner)
endif()
# please add new math_library in alphabetical order
math_library(concat_and_split)
math_library(context_project DEPS im2col math_function)
......@@ -68,7 +72,11 @@ math_library(sequence_padding)
math_library(sequence_pooling DEPS math_function jit_kernel_helper)
math_library(sequence_scale)
math_library(softmax DEPS math_function jit_kernel_helper)
math_library(beam_search DEPS math_function)
if (WITH_ASCEND_CL)
math_library(beam_search DEPS math_function beam_search_npu)
else()
math_library(beam_search DEPS math_function)
endif()
math_library(fc DEPS blas)
math_library(matrix_bit_code)
......
此差异已折叠。
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import paddle
import sys
sys.path.append("..")
from op_test import OpTest
import unittest
import numpy as np
import paddle.fluid as fluid
paddle.enable_static()
class TestBeamSearchNPUOp(OpTest):
def setUp(self):
self.set_npu()
self.place = paddle.NPUPlace(0)
self.op_type = "beam_search"
self.init_data()
self.inputs = {
'pre_ids': (self.pre_ids, self.lod),
'pre_scores': (self.pre_score, self.lod),
'ids': (self.ids, self.lod),
'scores': (self.score, self.lod)
}
# The `target_lod` attribute is still based on offset
self.attrs = {
'level': 0,
'beam_size': self.beam_size,
'end_id': 0,
'is_accumulated': self.is_accumulated
}
self.outputs = {
'selected_ids': (self.selected_ids, self.out_lod),
'selected_scores': (self.selected_scores, self.out_lod),
'parent_idx': self.parent_idx
}
def set_npu(self):
self.__class__.use_npu = True
def init_data(self):
self.beam_size = 2
self.is_accumulated = True
self.pre_ids = np.array([[1], [2], [3], [4]], dtype='int64')
self.ids = np.array(
[[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64')
self.lod = [[2, 2], [1, 1, 1, 1]]
self.out_lod = [[2, 2], [1, 1, 1, 1]]
self.offset_lod = [[0, 2, 4], [0, 1, 2, 3, 4]]
self.score = np.array(
[
[0.5, 0.3, 0.2],
[0.6, 0.3, 0.1],
[0.9, 0.5, 0.1],
[0.7, 0.5, 0.1],
],
dtype='float32')
self.pre_score = np.array([[0.1], [0.2], [0.3], [0.4]], dtype='float32')
self.selected_ids = np.array([4, 2, 3, 8])[:, np.newaxis]
self.selected_scores = np.array([0.5, 0.6, 0.9, 0.7])[:, np.newaxis]
self.parent_idx = np.array([0, 1, 2, 3])
def test_check_output(self):
self.check_output_with_place(self.place, atol=1e-3)
class TestBeamSearchNPUOp2(TestBeamSearchNPUOp):
def init_data(self):
self.beam_size = 2
self.is_accumulated = True
self.pre_ids = np.array([[1], [2], [3], [4]], dtype='int64')
self.ids = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
self.lod = [[2, 2], [1, 1, 1, 1]]
self.out_lod = [[2, 2], [2, 0, 1, 1]]
self.offset_lod = [[0, 2, 4], [0, 2, 2, 3, 4]]
self.score = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.1, 0.7],
], dtype='float32')
self.pre_score = np.array([[0.1], [0.2], [0.3], [0.4]], dtype='float32')
self.selected_ids = np.array([4, 2, 3, 1])[:, np.newaxis]
self.selected_scores = np.array([0.6, 0.9, 0.9, 0.7])[:, np.newaxis]
self.parent_idx = np.array([0, 0, 2, 3])
class TestBeamSearchNPUOp3(TestBeamSearchNPUOp):
def init_data(self):
# end_id = 0
self.beam_size = 2
self.is_accumulated = True
self.pre_ids = np.array([[1], [0], [0], [4]], dtype='int64')
self.ids = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
self.lod = [[2, 2], [1, 1, 1, 1]]
self.out_lod = [[2, 2], [1, 1, 0, 2]]
self.offset_lod = [[0, 2, 4], [0, 1, 2, 2, 4]]
self.score = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.6, 0.7],
], dtype='float32')
self.pre_score = np.array([[0.1], [1.2], [0.5], [0.4]], dtype='float32')
self.selected_ids = np.array([2, 0, 8, 1])[:, np.newaxis]
self.selected_scores = np.array([0.9, 1.2, 0.6, 0.7])[:, np.newaxis]
self.parent_idx = np.array([0, 1, 3, 3])
class TestBeamSearchNPUOp4(TestBeamSearchNPUOp):
def init_data(self):
# is_accumulated = False
self.beam_size = 2
self.is_accumulated = False
self.pre_ids = np.array([[1], [2], [3], [4]], dtype='int64')
self.ids = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
self.lod = [[2, 2], [1, 1, 1, 1]]
self.out_lod = [[2, 2], [0, 2, 1, 1]]
self.offset_lod = [[0, 2, 4], [0, 0, 2, 3, 4]]
self.score = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.1, 0.7],
], dtype='float32')
self.pre_score = np.array([[0.1], [2.2], [0.3], [0.4]], dtype='float32')
self.selected_ids = np.array([7, 3, 3, 1])[:, np.newaxis]
self.selected_scores = np.array(
[1.50685, 0.996027, 0.194639, 0.043325])[:, np.newaxis]
self.parent_idx = np.array([1, 1, 2, 3])
class TestBeamSearchNPUOp5(TestBeamSearchNPUOp):
def init_data(self):
# beam_size = 1
self.beam_size = 1
self.is_accumulated = True
self.pre_ids = np.array([[1], [2], [3], [4]], dtype='int64')
self.ids = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
self.lod = [[1, 1, 1, 1], [1, 1, 1, 1]]
self.out_lod = [[1, 1, 1, 1], [1, 1, 1, 1]]
self.offset_lod = [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]
self.score = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.1, 0.7],
], dtype='float32')
self.pre_score = np.array([[0.1], [0.2], [0.3], [0.4]], dtype='float32')
self.selected_ids = np.array([2, 7, 3, 1])[:, np.newaxis]
self.selected_scores = np.array([0.9, 0.5, 0.9, 0.7])[:, np.newaxis]
self.parent_idx = np.array([0, 1, 2, 3])
if __name__ == '__main__':
unittest.main()
......@@ -38,6 +38,7 @@ class BeamSearchOpTester(unittest.TestCase):
self._create_pre_scores()
self._create_scores()
self._create_pre_ids()
self.set_outputs()
self.scope.var('selected_ids').get_tensor()
self.scope.var('selected_scores').get_tensor()
self.scope.var('parent_idx').get_tensor()
......@@ -53,22 +54,19 @@ class BeamSearchOpTester(unittest.TestCase):
selected_scores='selected_scores',
parent_idx='parent_idx',
level=0,
beam_size=2,
end_id=0, )
beam_size=self.beam_size,
end_id=0,
is_accumulated=self.is_accumulated)
op.run(self.scope, core.CPUPlace())
selected_ids = self.scope.find_var("selected_ids").get_tensor()
selected_scores = self.scope.find_var("selected_scores").get_tensor()
parent_idx = self.scope.find_var("parent_idx").get_tensor()
self.assertTrue(np.allclose(np.array(selected_ids), self.output_ids))
self.assertTrue(
np.allclose(
np.array(selected_ids), np.array([4, 2, 3, 8])[:, np.newaxis]))
np.allclose(np.array(selected_scores), self.output_scores))
self.assertEqual(selected_ids.lod(), self.output_lod)
self.assertTrue(
np.allclose(
np.array(selected_scores),
np.array([0.5, 0.6, 0.9, 0.7])[:, np.newaxis]))
self.assertEqual(selected_ids.lod(), [[0, 2, 4], [0, 1, 2, 3, 4]])
self.assertTrue(
np.allclose(np.array(parent_idx), np.array([0, 1, 2, 3])))
np.allclose(np.array(parent_idx), self.output_parent_idx))
def _create_pre_ids(self):
np_data = np.array([[1, 2, 3, 4]], dtype='int64')
......@@ -97,6 +95,194 @@ class BeamSearchOpTester(unittest.TestCase):
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
def set_outputs(self):
self.beam_size = 2
self.is_accumulated = True
self.output_ids = np.array([4, 2, 3, 8])[:, np.newaxis]
self.output_scores = np.array([0.5, 0.6, 0.9, 0.7])[:, np.newaxis]
self.output_lod = [[0, 2, 4], [0, 1, 2, 3, 4]]
self.output_parent_idx = np.array([0, 1, 2, 3])
class BeamSearchOpTester2(BeamSearchOpTester):
def _create_pre_ids(self):
np_data = np.array([[1], [2], [3], [4]], dtype='int64')
tensor = create_tensor(self.scope, 'pre_ids', np_data)
def _create_pre_scores(self):
np_data = np.array([[0.1, 0.2, 0.3, 0.4]], dtype='float32')
tensor = create_tensor(self.scope, 'pre_scores', np_data)
def _create_ids(self):
self.lod = [[0, 2, 4], [0, 1, 2, 3, 4]]
np_data = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod)
def _create_scores(self):
np_data = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.1, 0.7],
], dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
def set_outputs(self):
self.beam_size = 2
self.is_accumulated = True
self.output_ids = np.array([2, 4, 3, 1])[:, np.newaxis]
self.output_scores = np.array([0.9, 0.6, 0.9, 0.7])[:, np.newaxis]
self.output_lod = [[0, 2, 4], [0, 2, 2, 3, 4]]
self.output_parent_idx = np.array([0, 0, 2, 3])
class BeamSearchOpTester3(BeamSearchOpTester):
# pre_id = end_id
def _create_pre_ids(self):
np_data = np.array([[1], [0], [0], [4]], dtype='int64')
tensor = create_tensor(self.scope, 'pre_ids', np_data)
def _create_pre_scores(self):
np_data = np.array([[0.1], [1.2], [0.5], [0.4]], dtype='float32')
tensor = create_tensor(self.scope, 'pre_scores', np_data)
def _create_ids(self):
self.lod = [[0, 2, 4], [0, 1, 2, 3, 4]]
np_data = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod)
def _create_scores(self):
np_data = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.6, 0.7],
], dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
def set_outputs(self):
self.beam_size = 2
self.is_accumulated = True
self.output_ids = np.array([2, 0, 1, 8])[:, np.newaxis]
self.output_scores = np.array([0.9, 1.2, 0.7, 0.6])[:, np.newaxis]
self.output_lod = [[0, 2, 4], [0, 1, 2, 2, 4]]
self.output_parent_idx = np.array([0, 1, 3, 3])
class BeamSearchOpTester4(BeamSearchOpTester):
# prune beam search while pre_id of in all beams is end_id
def _create_pre_ids(self):
np_data = np.array([[0], [0], [0], [4]], dtype='int64')
tensor = create_tensor(self.scope, 'pre_ids', np_data)
def _create_pre_scores(self):
np_data = np.array([[0.1], [1.2], [0.5], [0.4]], dtype='float32')
tensor = create_tensor(self.scope, 'pre_scores', np_data)
def _create_ids(self):
self.lod = [[0, 2, 4], [0, 1, 2, 3, 4]]
np_data = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod)
def _create_scores(self):
np_data = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.6, 0.7],
], dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
def set_outputs(self):
self.beam_size = 2
self.is_accumulated = True
self.output_ids = np.array([1, 8])[:, np.newaxis]
self.output_scores = np.array([0.7, 0.6])[:, np.newaxis]
self.output_lod = [[0, 2, 4], [0, 0, 0, 0, 2]]
self.output_parent_idx = np.array([3, 3])
class BeamSearchOpTester5(BeamSearchOpTester):
# is_accumulated = False
def _create_pre_ids(self):
np_data = np.array([[1], [2], [3], [4]], dtype='int64')
tensor = create_tensor(self.scope, 'pre_ids', np_data)
def _create_pre_scores(self):
np_data = np.array([[0.1, 2.2, 0.3, 0.4]], dtype='float32')
tensor = create_tensor(self.scope, 'pre_scores', np_data)
def _create_ids(self):
self.lod = [[0, 2, 4], [0, 1, 2, 3, 4]]
np_data = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod)
def _create_scores(self):
np_data = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.1, 0.7],
], dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
def set_outputs(self):
self.beam_size = 2
self.is_accumulated = False
self.output_ids = np.array([7, 3, 3, 1])[:, np.newaxis]
self.output_scores = np.array(
[1.50685, 0.996027, 0.194639, 0.043325])[:, np.newaxis]
self.output_lod = [[0, 2, 4], [0, 0, 2, 3, 4]]
self.output_parent_idx = np.array([1, 1, 2, 3])
class BeamSearchOpTester6(BeamSearchOpTester):
# beam_size = 1
def _create_pre_ids(self):
np_data = np.array([[1], [2], [3], [4]], dtype='int64')
tensor = create_tensor(self.scope, 'pre_ids', np_data)
def _create_pre_scores(self):
np_data = np.array([[0.1, 0.2, 0.3, 0.4]], dtype='float32')
tensor = create_tensor(self.scope, 'pre_scores', np_data)
def _create_ids(self):
self.lod = [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]
np_data = np.array([[4, 2], [7, 3], [3, 5], [8, 1]], dtype='int64')
tensor = create_tensor(self.scope, "ids", np_data)
tensor.set_lod(self.lod)
def _create_scores(self):
np_data = np.array(
[
[0.6, 0.9],
[0.5, 0.3],
[0.9, 0.5],
[0.1, 0.7],
], dtype='float32')
tensor = create_tensor(self.scope, "scores", np_data)
tensor.set_lod(self.lod)
def set_outputs(self):
self.beam_size = 1
self.is_accumulated = True
self.output_ids = np.array([2, 7, 3, 1])[:, np.newaxis]
self.output_scores = np.array([0.9, 0.5, 0.9, 0.7])[:, np.newaxis]
self.output_lod = [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]
self.output_parent_idx = np.array([0, 1, 2, 3])
class TestBeamSearchOpError(unittest.TestCase):
def test_errors(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册