未验证 提交 3bb99c4f 编写于 作者: Q Qingsheng Li 提交者: GitHub

Added auto transform to beam_search_decode_op (#10286)

* Added auto transform to beam_search_decode_op

* Added some comment

* Added unittest for beam_search_decode_op on GPU
上级 ddf61672
...@@ -23,16 +23,54 @@ struct BeamSearchDecodeFunctor { ...@@ -23,16 +23,54 @@ struct BeamSearchDecodeFunctor {
BeamSearchDecodeFunctor(const LoDTensorArray& step_ids, BeamSearchDecodeFunctor(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores, const LoDTensorArray& step_scores,
LoDTensor* id_tensor, LoDTensor* score_tensor) LoDTensor* id_tensor, LoDTensor* score_tensor)
: step_ids_(step_ids), : step_ids_origin_(step_ids),
step_scores_(step_scores), step_scores_origin_(step_scores),
id_tensor_(id_tensor), id_tensor_(id_tensor),
score_tensor_(score_tensor) {} score_tensor_(score_tensor) {
tensor_on_gpu_ = false;
// First make a copy of GPU data on CPU
if (platform::is_gpu_place(step_ids_origin_[0].place())) {
tensor_on_gpu_ = true;
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(step_ids_origin_[0].place());
// Copy all tensors in the input tensor array
for (auto& step_id : step_ids_origin_) {
framework::LoDTensor out;
dev_ctx->Wait();
framework::TensorCopy(step_id, platform::CPUPlace(), *dev_ctx, &out);
dev_ctx->Wait();
out.set_lod(step_id.lod());
step_ids_.push_back(out);
}
}
if (platform::is_gpu_place(step_scores_origin_[0].place())) {
tensor_on_gpu_ = true;
platform::DeviceContextPool& pool =
platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(step_scores_origin_[0].place());
// Copy all tensors in the input tensor array
for (auto& step_score : step_scores_origin_) {
framework::LoDTensor out;
dev_ctx->Wait();
framework::TensorCopy(step_score, platform::CPUPlace(), *dev_ctx, &out);
dev_ctx->Wait();
out.set_lod(step_score.lod());
step_scores_.push_back(out);
}
}
}
template <typename T> template <typename T>
void operator()() const; void operator()() const;
const LoDTensorArray& step_ids_; bool tensor_on_gpu_;
const LoDTensorArray& step_scores_; const LoDTensorArray& step_ids_origin_;
const LoDTensorArray& step_scores_origin_;
LoDTensorArray step_ids_ = LoDTensorArray();
LoDTensorArray step_scores_ = LoDTensorArray();
LoDTensor* id_tensor_; LoDTensor* id_tensor_;
LoDTensor* score_tensor_; LoDTensor* score_tensor_;
}; };
...@@ -40,8 +78,14 @@ struct BeamSearchDecodeFunctor { ...@@ -40,8 +78,14 @@ struct BeamSearchDecodeFunctor {
template <typename T> template <typename T>
void BeamSearchDecodeFunctor::operator()() const { void BeamSearchDecodeFunctor::operator()() const {
BeamSearchDecoder<T> beam_search_decoder; BeamSearchDecoder<T> beam_search_decoder;
// Check if the tensor is on GPU. If so, use the CPU copy instead
if (tensor_on_gpu_) {
beam_search_decoder.PackAllSteps(step_ids_, step_scores_, id_tensor_, beam_search_decoder.PackAllSteps(step_ids_, step_scores_, id_tensor_,
score_tensor_); score_tensor_);
} else {
beam_search_decoder.PackAllSteps(step_ids_origin_, step_scores_origin_,
id_tensor_, score_tensor_);
}
} }
template <> template <>
......
...@@ -22,12 +22,12 @@ from paddle.fluid.op import Operator ...@@ -22,12 +22,12 @@ from paddle.fluid.op import Operator
class TestBeamSearchDecodeOp(unittest.TestCase): class TestBeamSearchDecodeOp(unittest.TestCase):
def setUp(self): def setUp(self):
self.scope = core.Scope() self.scope = core.Scope()
self.cpu_place = core.CPUPlace() self.place = core.CPUPlace()
def append_lod_tensor(self, tensor_array, lod, data): def append_lod_tensor(self, tensor_array, lod, data):
lod_tensor = core.LoDTensor() lod_tensor = core.LoDTensor()
lod_tensor.set_lod(lod) lod_tensor.set_lod(lod)
lod_tensor.set(data, self.cpu_place) lod_tensor.set(data, self.place)
tensor_array.append(lod_tensor) tensor_array.append(lod_tensor)
def test_get_set(self): def test_get_set(self):
...@@ -71,7 +71,7 @@ class TestBeamSearchDecodeOp(unittest.TestCase): ...@@ -71,7 +71,7 @@ class TestBeamSearchDecodeOp(unittest.TestCase):
SentenceIds="sentence_ids", SentenceIds="sentence_ids",
SentenceScores="sentence_scores") SentenceScores="sentence_scores")
beam_search_decode_op.run(self.scope, self.cpu_place) beam_search_decode_op.run(self.scope, self.place)
expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]] expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]]
self.assertEqual(sentence_ids.lod(), expected_lod) self.assertEqual(sentence_ids.lod(), expected_lod)
...@@ -84,5 +84,11 @@ class TestBeamSearchDecodeOp(unittest.TestCase): ...@@ -84,5 +84,11 @@ class TestBeamSearchDecodeOp(unittest.TestCase):
np.array_equal(np.array(sentence_scores), expected_data)) np.array_equal(np.array(sentence_scores), expected_data))
class TestBeamSearchDecodeOpGPU(TestBeamSearchDecodeOp):
def setUp(self):
self.scope = core.Scope()
self.place = core.CUDAPlace(0)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册