// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "lite/backends/cuda/cuda_utils.h" #include "lite/core/context.h" #include "lite/core/tensor.h" namespace paddle { namespace lite { namespace cuda { namespace math { template class CopyMatrixRowsFunctor { public: void operator()(const lite::Tensor& src, lite::Tensor* dst, const std::vector& index_lod, bool is_src_index, const cudaStream_t& stream); private: lite::Tensor index_tensor_; }; template class LoDTensor2BatchFunctor { struct SeqInfo { SeqInfo(size_t start, size_t length, size_t seq_idx) : start_(start), length_(length), seq_idx_(seq_idx) {} size_t start_; size_t length_; size_t seq_idx_; }; public: void operator()(const lite::Tensor& lod_tensor, lite::Tensor* batch_tensor, bool is_reverse, const cudaStream_t& stream) const { auto lods = lod_tensor.lod(); CHECK_EQ(lods.size(), 1UL) << "Only support one level sequence now."; const auto& lod = lods[0]; std::vector seq_info; for (int seq_id = 0; seq_id < static_cast(lod.size()) - 1; ++seq_id) { size_t length = lod[seq_id + 1] - lod[seq_id]; seq_info.emplace_back(lod[seq_id], length, seq_id); } std::sort(seq_info.begin(), seq_info.end(), [](SeqInfo a, SeqInfo b) { return a.length_ > b.length_; }); LoD batch_lods; batch_lods.emplace_back(std::vector{0}); batch_lods.emplace_back(std::vector{0}); batch_lods.emplace_back(std::vector{0}); size_t max_seqlen = seq_info[0].length_; batch_lods[0].resize(max_seqlen + 1); batch_lods[1].resize(static_cast(lod_tensor.dims()[0])); batch_lods[2].resize(seq_info.size()); auto* batch_starts = batch_lods[0].data(); auto* seq2batch_idx = batch_lods[1].data(); batch_starts[0] = 0; for (size_t n = 0; n < max_seqlen; ++n) { size_t batch_id = batch_starts[n]; for (size_t i = 0; i < seq_info.size(); ++i) { size_t seq_len = seq_info[i].length_; size_t start = seq_info[i].start_; if (n < seq_len) { seq2batch_idx[batch_id] = is_reverse ? start + seq_len - 1 - n : start + n; ++batch_id; } else { break; } } batch_starts[n + 1] = batch_id; } auto* seq_order = batch_lods[2].data(); for (size_t i = 0; i < seq_info.size(); ++i) { seq_order[i] = seq_info[i].seq_idx_; } batch_tensor->set_lod(batch_lods); lite::cuda::math::CopyMatrixRowsFunctor to_batch; to_batch(lod_tensor, batch_tensor, batch_lods[1], true, stream); CUDA_POST_KERNEL_CHECK; } }; template class Batch2LoDTensorFunctor { public: void operator()(const lite::Tensor& batch_tensor, lite::Tensor* lod_tensor, const cudaStream_t& stream) { auto in_lod = batch_tensor.lod(); CHECK_GT(in_lod.size(), 2UL) << "The LoD of LoDTensor should include at " "least 2-level sequence infomation."; CHECK_EQ(in_lod[1].size(), static_cast(lod_tensor->dims()[0])) << "The LoD information should be consistent with the dims."; lite::cuda::math::CopyMatrixRowsFunctor to_seq; to_seq(batch_tensor, lod_tensor, in_lod[1], false, stream); CUDA_POST_KERNEL_CHECK; } }; } // namespace math } // namespace cuda } // namespace lite } // namespace paddle