beam_search_decode_op.cc 7.1 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Q
Qiao Longfei 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/beam_search_decode_op.h"
16
#include <string>
Y
Yi Wang 已提交
17
#include "paddle/fluid/platform/device_context.h"
Q
Qiao Longfei 已提交
18 19 20 21

namespace paddle {
namespace operators {

22 23 24 25
struct BeamSearchDecodeFunctor {
  BeamSearchDecodeFunctor(const LoDTensorArray& step_ids,
                          const LoDTensorArray& step_scores,
                          LoDTensor* id_tensor, LoDTensor* score_tensor)
26 27
      : step_ids_origin_(step_ids),
        step_scores_origin_(step_scores),
28
        id_tensor_(id_tensor),
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
        score_tensor_(score_tensor) {
    tensor_on_gpu_ = false;
    // First make a copy of GPU data on CPU
    if (platform::is_gpu_place(step_ids_origin_[0].place())) {
      tensor_on_gpu_ = true;
      platform::DeviceContextPool& pool =
          platform::DeviceContextPool::Instance();
      auto* dev_ctx = pool.Get(step_ids_origin_[0].place());
      // Copy all tensors in the input tensor array
      for (auto& step_id : step_ids_origin_) {
        framework::LoDTensor out;
        dev_ctx->Wait();
        framework::TensorCopy(step_id, platform::CPUPlace(), *dev_ctx, &out);
        dev_ctx->Wait();

        out.set_lod(step_id.lod());
        step_ids_.push_back(out);
      }
    }
    if (platform::is_gpu_place(step_scores_origin_[0].place())) {
      tensor_on_gpu_ = true;
      platform::DeviceContextPool& pool =
          platform::DeviceContextPool::Instance();
      auto* dev_ctx = pool.Get(step_scores_origin_[0].place());
      // Copy all tensors in the input tensor array
      for (auto& step_score : step_scores_origin_) {
        framework::LoDTensor out;
        dev_ctx->Wait();
        framework::TensorCopy(step_score, platform::CPUPlace(), *dev_ctx, &out);
        dev_ctx->Wait();

        out.set_lod(step_score.lod());
        step_scores_.push_back(out);
      }
    }
  }
65 66 67 68

  template <typename T>
  void operator()() const;

69 70 71 72 73
  bool tensor_on_gpu_;
  const LoDTensorArray& step_ids_origin_;
  const LoDTensorArray& step_scores_origin_;
  LoDTensorArray step_ids_ = LoDTensorArray();
  LoDTensorArray step_scores_ = LoDTensorArray();
74 75 76 77 78 79 80
  LoDTensor* id_tensor_;
  LoDTensor* score_tensor_;
};

template <typename T>
void BeamSearchDecodeFunctor::operator()() const {
  BeamSearchDecoder<T> beam_search_decoder;
81 82 83 84 85 86 87 88
  // Check if the tensor is on GPU. If so, use the CPU copy instead
  if (tensor_on_gpu_) {
    beam_search_decoder.PackAllSteps(step_ids_, step_scores_, id_tensor_,
                                     score_tensor_);
  } else {
    beam_search_decoder.PackAllSteps(step_ids_origin_, step_scores_origin_,
                                     id_tensor_, score_tensor_);
  }
89 90 91 92 93 94 95
}

template <>
void BeamSearchDecodeFunctor::operator()<bool>() const {
  PADDLE_THROW("beam search decode op does not support bool!");
}

Q
Qiao Longfei 已提交
96 97 98 99 100 101 102
class BeamSearchDecodeOp : public framework::OperatorBase {
 public:
  BeamSearchDecodeOp(const std::string& type,
                     const framework::VariableNameMap& inputs,
                     const framework::VariableNameMap& outputs,
                     const framework::AttributeMap& attrs)
      : OperatorBase(type, inputs, outputs, attrs) {}
103 104 105 106

 private:
  void RunImpl(const framework::Scope& scope,
               const platform::Place& dev_place) const override {
Y
Yu Yang 已提交
107 108
    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
    auto& dev_ctx = *pool.Get(dev_place);
D
dzhwinter 已提交
109

Q
Qiao Longfei 已提交
110
    framework::ExecutionContext ctx(*this, scope, dev_ctx);
111

Q
Qiao Longfei 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
    const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
    const size_t step_num = ids->size();
    PADDLE_ENFORCE_GT(step_num, 0UL,
                      "beam search steps should be larger than 0");
    const size_t source_num = ids->at(0).lod().at(0).size() - 1;
    PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0");

    for (size_t i = 0; i < step_num; ++i) {
      PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL,
                        "Level of LodTensor should be 2");
    }

    // prepare output
    LoDTensor* sentenceIds = ctx.Output<LoDTensor>("SentenceIds");
    LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");

129 130 131
    framework::VisitDataType(
        framework::ToDataType(scores->at(0).type()),
        BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores));
Q
Qiao Longfei 已提交
132 133 134 135 136
  }
};

class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
137 138
  BeamSearchDecodeOpProtoMaker(OpProto* proto, OpAttrChecker* op_checker)
      : framework::OpProtoAndCheckerMaker(proto, op_checker) {
Q
Qiao Longfei 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
    AddInput("Ids",
             "(LodTensorArray)"
             "score of the candidate words in each step");
    AddInput("Scores",
             "(LodTensorArray)"
             "score of the candidate words in each step");
    AddOutput("SentenceIds",
              "(LodTensor)"
              "All possible result sentences of word ids");
    AddOutput("SentenceScores",
              "(LodTensor)"
              "All possible result sentences of word scores");
    AddComment(R"DOC(
Pack the result of Beam search op into SentenceIds and SentenceScores.
)DOC");
  }
};

class BeamSearchDecodeInferShape : public framework::InferShapeBase {
 public:
  void operator()(framework::InferShapeContext* context) const override {
    PADDLE_ENFORCE(context->HasInput("Ids"),
                   "BeamSearchDecodeOp must has input Ids");
    PADDLE_ENFORCE(context->HasInput("Scores"),
                   "BeamSearchDecodeOp must has input Scores");
    PADDLE_ENFORCE(context->HasOutput("SentenceIds"),
                   "BeamSearchDecodeOp must has output SentenceIds");
    PADDLE_ENFORCE(context->HasOutput("SentenceScores"),
                   "BeamSearchDecodeOp must has output SentenceScores");
  }
};

class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
 public:
Y
Yu Yang 已提交
173 174
  void operator()(const framework::OpDesc& op_desc,
                  framework::BlockDesc* block) const override {
Q
Qiao Longfei 已提交
175
    for (auto& o : op_desc.Output("SentenceIds")) {
176
      block->Var(o)->SetType(framework::proto::VarType::LOD_TENSOR);
Q
Qiao Longfei 已提交
177 178
    }
    for (auto& o : op_desc.Output("SentenceScores")) {
179
      block->Var(o)->SetType(framework::proto::VarType::LOD_TENSOR);
Q
Qiao Longfei 已提交
180 181 182 183 184 185 186 187 188 189 190 191
    }
  }
};

}  // namespace operators
}  // namespace paddle

REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp,
                  paddle::operators::BeamSearchDecodeOpProtoMaker,
                  paddle::operators::BeamSearchDecodeInferShape,
                  paddle::operators::BeamSearchDecodeInferVarType,
                  paddle::framework::EmptyGradOpMaker);