pe_engine.cc 4.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#include "paddle/fluid/jit/engine/pe_engine.h"
16 17 18

#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/details/build_strategy.h"
19 20 21
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/parallel_executor.h"
22 23 24 25 26 27 28 29 30 31 32 33
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/phi/core/enforce.h"

namespace paddle {
namespace jit {

static ExecutionStrategy GetExecutionStrategy(const platform::Place &place) {
  ExecutionStrategy execution_strategy;

  auto device_type = platform::Place2DeviceType(place);
  switch (device_type) {
    case platform::DeviceType::CPU: {
34
      execution_strategy.num_threads_ = 1;
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
      break;
    }
    case platform::DeviceType::CUDA: {
      // NOTE: According experiments, one thread is faster in
      // most model training.
      execution_strategy.num_threads_ = 1;
      break;
    }
    case platform::DeviceType::XPU: {
      execution_strategy.num_threads_ = 1;
      break;
    }
    case platform::DeviceType::IPU: {
      execution_strategy.num_threads_ = 1;
      break;
    }
    default:
      PADDLE_THROW(platform::errors::Unavailable("Unsupported Device type %d.",
                                                 device_type));
  }
  execution_strategy.use_device_ = device_type;

  return execution_strategy;
}

60
PEEngine::PEEngine(const std::shared_ptr<FunctionInfo> &info,
61
                   const VariableMap &params_dict,
62
                   const phi::Place &place)
63 64 65 66 67 68 69 70 71 72 73 74
    : info_(info), place_(place) {
  info_->RemoveDescFeedFetch();
  PADDLE_ENFORCE_GT(
      static_cast<int64_t>(info_->ProgramDesc().Block(0).OpSize()),
      0,
      platform::errors::PreconditionNotMet(
          "There is no operator in ProgramDesc."));
  utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_);
  VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_);
  CreateGraphAndPE();
}

75
void PEEngine::CreateGraphAndPE() {
76
  framework::details::BuildStrategy build_strategy;
H
Hui Zhang 已提交
77
  build_strategy.enable_inference_pass_ = true;  // use pe to inference
78 79 80 81 82 83 84 85 86 87 88 89 90 91
  auto execution_strategy = GetExecutionStrategy(place_);

  auto &program_desc = info_->ProgramDesc();
  const framework::BlockDesc &global_block = program_desc.Block(0);
  int64_t start_op_index = 0;
  int64_t end_op_index = static_cast<int64_t>(global_block.OpSize());

  graph_ = std::make_shared<Graph>(program_desc, start_op_index, end_op_index);
  inner_pe_ = std::make_shared<ParallelExecutor>(
      place_, &scope_, execution_strategy, build_strategy, graph_.get());
  inner_pe_->PrepareVariables(&scope_);
  inner_pe_->SkipMemoryReuse(/*scope_idx=*/0, info_->InputArgNames());
}

92
std::vector<Tensor> PEEngine::operator()(const std::vector<Tensor> &inputs) {
93 94 95 96
  auto dense_tensors = utils::ToDenseTensors(inputs);
  return utils::ToTensors(this->operator()(dense_tensors));
}

97
std::vector<DenseTensor> PEEngine::operator()(
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
    const std::vector<DenseTensor> &inputs) {
  utils::ShareIntoScope(info_->InputArgNames(), inputs, &scope_);

  // update op_handle scope_map in pe->executor_->Graph
  std::unordered_map<framework::Scope *, framework::Scope *> scope_map = {
      {inner_pe_->GetLocalScopes().front(), &scope_}};
  inner_pe_->ResetOpHandleScopeMapOfGraphs(scope_map);
  // need to recreate tmp variables in new scope
  inner_pe_->PrepareVariables(&scope_);

  inner_pe_->RunWithoutFetch(info_->OutputArgNames());

  std::vector<DenseTensor> outputs;
  utils::FetchOuts(info_->OutputArgNames(), scope_, &outputs);
  scope_.DropKids();
  return outputs;
}

116
const std::shared_ptr<FunctionInfo> &PEEngine::Info() const { return info_; }
117 118 119

}  // namespace jit
}  // namespace paddle