/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" namespace paddle { namespace framework { class LoDRankTable; class Tensor; class OpDesc; class Scope; } // namespace framework namespace imperative { class OpBase; } // namespace imperative } // namespace paddle namespace paddle { namespace operators { class ReorderLoDTensorByRankTableOpProtoMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", "(LoDTensor), the input lod tensor to be reordered according to " "Input(RankTable)."); AddInput("RankTable", "(LoDRankTable), the rank table according to which Input(X) is " "reordered."); AddOutput("Out", "LoDTensor, the reordered lod tensor."); AddComment(R"DOC(ReorderLoDTensorByRankTable operator. Input(X) is a batch of sequences. Input(RankTable) stores new orders of the input sequence batch. The reorder_lod_tensor_by_rank operator reorders the Input(X) according to the information provided by Input(RankTable). For example: If the indices stored in the Input(RankTable) are [3, 0, 2, 1], the Input(X) will be reordered that the fourth sequence in Input(X) will become the first one, and then followed by the original first, third, and the second one. This is: X = [Seq0, Seq1, Seq2, Seq3]. The indices in RankTable are [3, 0, 2, 1]. Out = [Seq3, Seq0, Seq2, Seq1] with a new LoD information. If the LoD information of Input(X) is empty, this means Input(X) is not sequence data. This is also identical to a batch of sequences where each sequence has a fixed length 1. In this case, the reorder_lod_tensor_by_rank operator reorders each slice of Input(X) along the first axis according to Input(RankTable). This is: X = [Slice0, Slice1, Slice2, Slice3] and its LoD information is empty. The indices in RankTable are [3, 0, 2, 1]. Out = [Slice3, Slice0, Slice2, Slice1] with no LoD information is appended. **NOTE**: This operator sorts Input(X) according to a given LoDRankTable which does not need to be calculated according to Input(X). It can be calculated according to another different sequence, and then this operator sorts Input(X) according to the given LoDRankTable. )DOC"); } }; class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { public: ReorderLoDTensorByRankTableBase(const std::string &type, const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { auto &x = GET_DATA_SAFELY(scope.FindVar(Input("X")), "Input", "X", "ReorderLoDTensorByRankTable") .Get(); auto &rank_table = GET_DATA_SAFELY(scope.FindVar(Input("RankTable")), "Input", "RankTable", "ReorderLoDTensorByRankTable") .Get(); auto &out = *(GET_DATA_SAFELY(scope.FindVar(Output("Out")), "Output", "Out", "ReorderLoDTensorByRankTable") .GetMutable()); out.Resize(x.dims()); out.mutable_data(x.place(), x.type()); this->process(place, x, rank_table, &out); } protected: virtual void process(const platform::Place &place, const framework::LoDTensor &x, const framework::LoDRankTable &rank_table, framework::LoDTensor *out) const = 0; struct AbsoluteRankTableItem { size_t offset; // the absolute/accumulated offset. size_t length; // the length framework::LoD lod; }; std::vector GetAbsoluteOffsetAndLengthByLoDRankTable( const framework::LoDTensor &x) const { std::vector absolute_table; if (x.lod().empty()) { // For Tensor without lod, such as the output of sequence_pool_op size_t size = x.dims()[0]; absolute_table.reserve(size); for (size_t i = 0; i < size; ++i) { absolute_table.emplace_back(); absolute_table.back().length = 1; absolute_table.back().offset = i; } } else { size_t level = 0; size_t size = x.lod()[level].size(); for (size_t i = 0; i < size - 1; ++i) { auto lod_offset = framework::GetSubLoDAndAbsoluteOffset(x.lod(), i, i + 1, level); auto &offset = lod_offset.second; absolute_table.emplace_back(); absolute_table.back().length = offset.second - offset.first; absolute_table.back().offset = offset.first; absolute_table.back().lod = lod_offset.first; } } return absolute_table; } size_t CopyTensorAndLod(const platform::Place &place, const AbsoluteRankTableItem &item, const framework::LoDTensor &x, framework::LoDTensor *out, size_t out_offset) const { auto &out_lod = *out->mutable_lod(); auto len = item.length; auto x_offset = item.offset; if (out_lod.empty()) { for (size_t i = 0; i < item.lod.size(); ++i) { out_lod.push_back(std::vector({0})); } } for (size_t i = 0; i < out_lod.size(); ++i) { auto &out_v = out_lod[i]; auto &new_lod_v = item.lod[i]; for (auto &detail : new_lod_v) { out_v.push_back(out_v.back() + detail); } } auto x_sliced = x.Slice(x_offset, x_offset + len); auto out_sliced = out->Slice(out_offset, out_offset + len); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); framework::TensorCopy(x_sliced, out_sliced.place(), dev_ctx, &out_sliced); out_offset += len; return out_offset; } }; class ReorderLoDTensorByRankTableOp : public ReorderLoDTensorByRankTableBase { public: ReorderLoDTensorByRankTableOp(const std::string &type, const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : ReorderLoDTensorByRankTableBase(type, inputs, outputs, attrs) {} protected: void process(const platform::Place &place, const framework::LoDTensor &x, const framework::LoDRankTable &rank_table, framework::LoDTensor *out) const override { auto absolute_table = GetAbsoluteOffsetAndLengthByLoDRankTable(x); size_t out_offset = 0; out->mutable_lod()->clear(); for (auto &item : rank_table.items()) { PADDLE_ENFORCE_LT(item.index, absolute_table.size(), platform::errors::OutOfRange( "The value of rank_table is out of range.")); out_offset = CopyTensorAndLod(place, absolute_table[item.index], x, out, out_offset); } } }; class IdentityInferShape : public framework::InferShapeBase { public: void operator()(framework::InferShapeContext *context) const override { context->SetOutputDim("Out", context->GetInputDim("X")); // X'lod and Out'lod is different on runtime, so there is no need to call // ShareLoD for runtime. While the setting of Out's lod is done in detail // kernel implementation. if (!context->IsRuntime()) { context->ShareLoD("X", /*->*/ "Out"); } } }; template class ReorderLodTensorByRankGradOpMaker : public framework::SingleGradOpMaker { public: using framework::SingleGradOpMaker::SingleGradOpMaker; protected: void Apply(GradOpPtr grad_op) const override { grad_op->SetType("reorder_lod_tensor_by_rank_grad"); grad_op->SetInput("X", this->OutputGrad("Out")); grad_op->SetOutput("Out", this->InputGrad("X")); grad_op->SetInput("RankTable", this->Input("RankTable")); } }; class ReorderLoDTensorByRankGradOp : public ReorderLoDTensorByRankTableBase { public: ReorderLoDTensorByRankGradOp(const std::string &type, const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : ReorderLoDTensorByRankTableBase(type, inputs, outputs, attrs) {} protected: void process(const platform::Place &place, const framework::LoDTensor &x, const framework::LoDRankTable &rank_table, framework::LoDTensor *out) const override { auto absolute_table = GetAbsoluteOffsetAndLengthByLoDRankTable(x); // offsets = enumerate([item.index for item in rank_table.items()]) std::vector> offsets; offsets.reserve(rank_table.items().size()); for (size_t i = 0; i < rank_table.items().size(); ++i) { offsets.push_back({i, rank_table.items()[i].index}); } // offsets.sort(key=lambda x: x[1]) std::sort( offsets.begin(), offsets.end(), [](const std::pair &a, const std::pair &b) { return a.second < b.second; }); // Copy TensorAndLod size_t out_offset = 0; for (auto &offset : offsets) { out_offset = this->CopyTensorAndLod(place, absolute_table[offset.first], x, out, out_offset); } } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; REGISTER_OPERATOR( reorder_lod_tensor_by_rank, ops::ReorderLoDTensorByRankTableOp, ops::ReorderLodTensorByRankGradOpMaker, ops::ReorderLodTensorByRankGradOpMaker, ops::ReorderLoDTensorByRankTableOpProtoMaker, ops::IdentityInferShape); REGISTER_OPERATOR(reorder_lod_tensor_by_rank_grad, ops::ReorderLoDTensorByRankGradOp, ops::IdentityInferShape);