pe_function.h 4.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
18
#include <string>
19 20 21
#include <vector>

#include "paddle/fluid/framework/block_desc.h"
22 23
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/execution_strategy.h"
24
#include "paddle/fluid/framework/executor_cache.h"
25
#include "paddle/fluid/framework/ir/graph.h"
26 27 28
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
29
#include "paddle/phi/core/enforce.h"
30

31
#include "paddle/fluid/jit/base_function.h"
32
#include "paddle/fluid/jit/function_schema.h"
33
#include "paddle/fluid/jit/function_utils.h"
34 35 36 37

namespace paddle {
namespace jit {

38 39 40 41
using ExecutionStrategy = framework::details::ExecutionStrategy;
using ParallelExecutor = framework::ParallelExecutor;
using Graph = framework::ir::Graph;

42 43
class PEFunction : public BaseFunction {
 public:
44 45
  PEFunction(const std::shared_ptr<FunctionInfo> &info,
             const Name2VariableMap &params_dict,
46
             const phi::Place &place)
47
      : info_(info), place_(place) {
48 49 50 51 52 53
    info_->RemoveDescFeedFetch();
    PADDLE_ENFORCE_GT(
        static_cast<int64_t>(info_->ProgramDesc().Block(0).OpSize()),
        0,
        platform::errors::PreconditionNotMet(
            "There is no operator in ProgramDesc."));
54
    utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_);
55
    VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_);
56
    CreateGraphAndPE();
57
  }
58

59
  ~PEFunction() noexcept {}
60

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
  static ExecutionStrategy GetExecutionStrategy(const platform::Place &place) {
    ExecutionStrategy execution_strategy;

    auto device_type = platform::Place2DeviceType(place);
    switch (device_type) {
      case platform::DeviceType::CPU: {
        execution_strategy.num_threads_ = 2;
        break;
      }
      case platform::DeviceType::CUDA: {
        // NOTE: According experiments, one thread is faster in
        // most model training.
        execution_strategy.num_threads_ = 1;
        break;
      }
      case platform::DeviceType::XPU: {
        execution_strategy.num_threads_ = 1;
        break;
      }
      case platform::DeviceType::IPU: {
        execution_strategy.num_threads_ = 1;
        break;
      }
      default:
        PADDLE_THROW(platform::errors::Unavailable(
            "Unsupported Device type %d.", device_type));
    }
    execution_strategy.use_device_ = device_type;

    return execution_strategy;
91 92
  }

93 94 95
  void CreateGraphAndPE() {
    framework::details::BuildStrategy build_strategy;
    auto execution_strategy = GetExecutionStrategy(place_);
96

97
    auto &program_desc = info_->ProgramDesc();
98
    const framework::BlockDesc &global_block = program_desc.Block(0);
99 100 101
    int64_t start_op_index = 0;
    int64_t end_op_index = static_cast<int64_t>(global_block.OpSize());

102 103 104 105 106 107 108 109 110 111 112 113 114 115
    graph_ =
        std::make_shared<Graph>(program_desc, start_op_index, end_op_index);
    inner_pe_ = std::make_shared<ParallelExecutor>(
        place_, &scope_, execution_strategy, build_strategy, graph_.get());
    inner_pe_->PrepareVariables(&scope_);
    inner_pe_->SkipMemoryReuse(/*scope_idx=*/0, info_->InputArgNames());
  }

  std::vector<Tensor> operator()(const std::vector<Tensor> &inputs) {
    auto dense_tensors = utils::ToDenseTensors(inputs);
    return utils::ToTensors(this->operator()(dense_tensors));
  }

  std::vector<DenseTensor> operator()(const std::vector<DenseTensor> &inputs) {
116
    utils::ShareIntoScope(info_->InputArgNames(), inputs, &scope_);
117 118 119 120 121 122 123 124 125 126 127 128 129 130

    // update op_handle scope_map in pe->executor_->Graph
    std::unordered_map<framework::Scope *, framework::Scope *> scope_map = {
        {inner_pe_->GetLocalScopes().front(), &scope_}};
    inner_pe_->ResetOpHandleScopeMapOfGraphs(scope_map);
    // need to recreate tmp variables in new scope
    inner_pe_->PrepareVariables(&scope_);

    inner_pe_->RunWithoutFetch(info_->OutputArgNames());

    std::vector<DenseTensor> outputs;
    utils::FetchOuts(info_->OutputArgNames(), scope_, &outputs);
    scope_.DropKids();
    return outputs;
131
  }
132

133 134
  const std::shared_ptr<FunctionInfo> &Info() const { return info_; }

135 136 137 138
 private:
  std::shared_ptr<FunctionInfo> info_;
  framework::Scope scope_;
  phi::Place place_;
139 140
  std::shared_ptr<ParallelExecutor> inner_pe_;
  std::shared_ptr<Graph> graph_;
141 142 143 144
};

}  // namespace jit
}  // namespace paddle