program.h 8.5 KB
Newer Older
Y
Yan Chunwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <list>
17
#include <map>
Y
Yan Chunwei 已提交
18 19 20 21 22 23 24
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
25
#include "lite/model_parser/cpp_desc.h"
26 27 28
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/profiler.h"
#endif
29 30 31
#ifdef LITE_WITH_NVTX
#include "lite/backends/cuda/nvtx_wrapper.h"
#endif
Y
Yan Chunwei 已提交
32 33 34 35 36 37 38 39 40 41 42 43

namespace paddle {
namespace lite {

static const char kKernelTypeAttr[] = "__@kernel_type_attr@__";

// A program is used to represent a code program, in Paddle, a code program
// contains:
// - main block, which is a list of OpLite
// - scope: which contains all the weights
struct Program {
 public:
44 45 46 47 48
  explicit Program(const std::shared_ptr<Scope>& root_scope) {
    scope_ = root_scope;
  }
  Program(const std::shared_ptr<cpp::ProgramDesc>& program_desc,
          const std::shared_ptr<Scope>& root_scope,
49
          const std::vector<Place>& valid_places,
50 51 52 53
          const std::vector<std::string>& vars_to_clone = {})
      : scope_(root_scope),
        valid_places_(valid_places),
        program_desc_(program_desc) {
Y
Yan Chunwei 已提交
54 55
    CHECK(scope_) << "scope should be init first";
    VLOG(4) << "prepare work";
56
    PrepareWorkspace(program_desc_, vars_to_clone);
Y
Yan Chunwei 已提交
57
    VLOG(4) << "build desc";
58
    Build(program_desc_);
Y
Yan Chunwei 已提交
59 60 61 62
    VLOG(4) << "build desc finished";
  }

  std::unique_ptr<Program> Clone() const {
63 64
    return std::unique_ptr<Program>(
        new Program(program_desc_, scope_, valid_places_));
Y
Yan Chunwei 已提交
65 66 67
  }

  const std::list<std::string>& weights() const { return weights_; }
68
  const std::list<std::string>& vars() const { return vars_; }
Y
Yan Chunwei 已提交
69
  std::list<std::string>* mutable_weights() { return &weights_; }
70
  std::list<std::string>* mutable_vars() { return &vars_; }
Y
Yan Chunwei 已提交
71

72 73 74 75 76 77 78 79
  const std::list<std::shared_ptr<OpLite>>& ops(
      int block_idx = kRootBlockIdx) const {
    return ops_[block_idx];
  }
  std::list<std::shared_ptr<OpLite>>* mutable_ops(
      int block_idx = kRootBlockIdx) {
    return &ops_[block_idx];
  }
Y
Yan Chunwei 已提交
80

81
  size_t block_size() { return ops_.size(); }
Y
Yan Chunwei 已提交
82

83 84
  Scope* exec_scope() { return exec_scope_; }
  Scope* scope() { return scope_.get(); }
85

86 87 88 89
  cpp::ProgramDesc* program_desc() { return program_desc_.get(); }

  const std::map<std::string, const Type*>& var_type_map() const {
    return var_type_map_;
90 91
  }

Y
Yan Chunwei 已提交
92 93
 private:
  // Build from a program and scope.
94
  void Build(const std::shared_ptr<cpp::ProgramDesc>& program_desc);
Y
Yan Chunwei 已提交
95
  // Create temporary variables.
96 97
  void PrepareWorkspace(const std::shared_ptr<cpp::ProgramDesc>& program_desc,
                        const std::vector<std::string>& vars_to_clone = {});
Y
Yan Chunwei 已提交
98 99

 private:
100 101
  std::map<std::string, const Type*> var_type_map_;
  std::list<std::string> vars_;
Y
Yan Chunwei 已提交
102
  std::list<std::string> weights_;
103
  std::vector<std::list<std::shared_ptr<OpLite>>> ops_;
Y
Yan Chunwei 已提交
104
  // the scope to run the kernels, NOTE this is the execution scope.
105
  std::shared_ptr<Scope> scope_;
Y
Yan Chunwei 已提交
106 107
  std::vector<Place> valid_places_;
  // Runtime scope.
108 109
  Scope* exec_scope_{};
  std::shared_ptr<cpp::ProgramDesc> program_desc_;
Y
Yan Chunwei 已提交
110 111 112 113 114
};

struct Instruction {
  Instruction(const std::shared_ptr<OpLite>& op,
              std::unique_ptr<KernelBase>&& kernel)
115 116 117 118 119 120
      : op_(op), kernel_(std::move(kernel)) {
    std::string op_type = op->Type();
    if (op_type == "feed" || op_type == "fetch") {
      is_feed_fetch_op_ = true;
    }
  }
Y
Yan Chunwei 已提交
121 122 123 124 125 126 127 128 129 130

  // Run the instruction.
  void Run();

  friend STL::ostream& operator<<(STL::ostream& os, const Instruction& other);

  const OpLite* op() const { return op_.get(); }
  const KernelBase* kernel() const { return kernel_.get(); }
  KernelBase* mutable_kernel() { return kernel_.get(); }

131 132
  bool is_feed_fetch_op() const { return is_feed_fetch_op_; }

133 134 135 136 137 138 139 140 141 142
#ifdef LITE_WITH_CUDA
  bool need_sync() const {
    if (kernel_->target() == TargetType::kCUDA) {
      return kernel_->mutable_context()->As<CUDAContext>().need_sync();
    } else {
      // the io_copy kernel has synced, so cpu kernels don't need sync..
      return false;
    }
  }
  void Sync() const { kernel_->mutable_context()->As<CUDAContext>().Sync(); }
J
jiweibo 已提交
143
  void UpdateCudaStream(cudaStream_t* exec, cudaStream_t* io) {
J
jiweibo 已提交
144
    if (kernel_->target() == TargetType::kCUDA) {
J
jiweibo 已提交
145 146 147 148 149 150
      if (exec) {
        kernel_->mutable_context()->As<CUDAContext>().SetExecStream(*exec);
      }
      if (io) {
        kernel_->mutable_context()->As<CUDAContext>().SetIoStream(*io);
      }
J
jiweibo 已提交
151 152
    }
  }
153 154
#endif

155 156 157 158 159
#ifdef LITE_WITH_PROFILE
  void set_profiler(profile::Profiler* profiler) {
    profiler_ = profiler;
    if (op_->Type() != "feed" && op_->Type() != "fetch") {
      profile::OpCharacter ch;
160
      ch.op_lite = static_cast<void*>(const_cast<paddle::lite::OpLite*>(op()));
161 162 163
      ch.target = kernel()->target();
      ch.op_type = op_->Type();
      ch.kernel_name = kernel()->name();
164 165 166
      ch.kernel_attr = kernel()->name().substr(ch.op_type.size() + 1,
                                               kernel()->name().size());
      // append `ch.kernel_func_name` in StopTiming
167 168 169 170
      profile_id_ = profiler->NewTimer(ch);
      kernel_->SetProfiler(profiler_, profile_id_);
    }
  }
171 172 173 174 175

  void SetProfileRuntimeOpInfo(paddle::lite::profile::OpCharacter* ch) {
    auto* op_lite = static_cast<paddle::lite::OpLite*>(ch->op_lite);
    op_lite->GetOpRuntimeInfo(ch);
  }
176 177
#endif

Y
Yan Chunwei 已提交
178 179 180
 private:
  std::shared_ptr<OpLite> op_;
  std::unique_ptr<KernelBase> kernel_;
181
  bool is_feed_fetch_op_{false};
Y
Yan Chunwei 已提交
182 183 184 185
  bool first_epoch_{true};
  bool has_run_{false};

#ifdef LITE_WITH_PROFILE
186
  profile::Profiler* profiler_;
Y
Yan Chunwei 已提交
187
  int profile_id_{-1};
188
  bool first_epoch_for_profiler_{true};
Y
Yan Chunwei 已提交
189 190 191 192 193 194 195 196
#endif  // LITE_WITH_PROFILE
};

/*
 * A program contains kernels for runtime.
 */
class LITE_API RuntimeProgram {
 public:
197
  explicit RuntimeProgram(std::vector<std::vector<Instruction>>&& insts)
Y
Yan Chunwei 已提交
198
      : instructions_(std::move(insts)) {
199 200 201 202 203 204 205 206 207 208 209 210 211 212
    Init();
  }
  explicit RuntimeProgram(
      const std::shared_ptr<const cpp::ProgramDesc>& program_desc,
      Scope* exec_scope,
      int block_idx = kRootBlockIdx);
  ~RuntimeProgram() {
#ifdef LITE_WITH_PROFILE
    LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kCreate);
    LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch);
#endif  // LITE_WITH_PROFILE
  }

  void Init() {
Y
Yan Chunwei 已提交
213 214 215
    if (instructions_.empty()) {
      LOG(FATAL) << "no instructions";
    }
216 217
#ifdef LITE_WITH_PROFILE
    set_profiler();
218 219 220
#endif
#ifdef LITE_WITH_NVTX
    const NVTXAnnotator& annotator = NVTXAnnotator::Global();
221
    for (auto& inst : instructions_[kRootBlockIdx]) {
222 223 224 225 226
      NVTXRangeAnnotation annotation = annotator.AnnotateBlock();
      register_layer_names_.push_back(annotator.RegisterString(
          const_cast<paddle::lite::OpLite*>(inst.op())->Type().c_str()));
    }
    register_layer_names_.push_back(annotator.RegisterString("one_loop"));
227
#endif
Y
Yan Chunwei 已提交
228 229 230 231
  }

  void Run();

232 233
  void set_exec_scope(Scope* x) { exec_scope_ = x; }
  Scope* exec_scope() { return exec_scope_; }
Y
Yan Chunwei 已提交
234

235 236 237 238
  const std::vector<Instruction>& instructions(
      int block_idx = kRootBlockIdx) const {
    return instructions_[block_idx];
  }
Y
Yan Chunwei 已提交
239

240 241 242 243
  std::vector<Instruction>* mutable_instructions(
      int block_idx = kRootBlockIdx) {
    return &instructions_[block_idx];
  }
Y
Yan Chunwei 已提交
244

245
  size_t block_size() { return instructions_.size(); }
Y
Yan Chunwei 已提交
246

247 248 249
  // Update the ops and vars of all of blocks to the given program_desc
  // according to the instructions
  void SaveToProgram(std::shared_ptr<cpp::ProgramDesc> program_desc);
250

J
jiweibo 已提交
251
#ifdef LITE_WITH_CUDA
J
jiweibo 已提交
252
  // UpdateCudaStream will update the exec stream and io stream of all kernels
J
jiweibo 已提交
253
  // in the program.
J
jiweibo 已提交
254
  void UpdateCudaStream(cudaStream_t* exec, cudaStream_t* io);
J
jiweibo 已提交
255 256
#endif

Y
Yan Chunwei 已提交
257 258
 private:
  RuntimeProgram(const RuntimeProgram&) = delete;
259 260
  std::vector<std::vector<Instruction>> instructions_;
  Scope* exec_scope_{};
261 262 263 264

#ifdef LITE_WITH_PROFILE
  profile::Profiler profiler_;
  void set_profiler() {
265 266
    for (auto& inst : instructions_[kRootBlockIdx]) {
      inst.set_profiler(&profiler_);
267 268 269
    }
  }
#endif
270 271 272
#ifdef LITE_WITH_NVTX
  std::vector<nvtxStringHandle_t> register_layer_names_;
#endif
Y
Yan Chunwei 已提交
273 274 275 276
};

}  // namespace lite
}  // namespace paddle