// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include #include "lite/core/kernel.h" #include "lite/core/op_lite.h" #include "lite/core/op_registry.h" #include "lite/model_parser/cpp_desc.h" #ifdef LITE_WITH_PROFILE #include "lite/core/profile/profiler.h" #endif #ifdef LITE_WITH_NVTX #include "lite/backends/cuda/nvtx_wrapper.h" #endif namespace paddle { namespace lite { static const char kKernelTypeAttr[] = "__@kernel_type_attr@__"; // A program is used to represent a code program, in Paddle, a code program // contains: // - main block, which is a list of OpLite // - scope: which contains all the weights struct Program { public: explicit Program(const std::shared_ptr& root_scope) { scope_ = root_scope; } Program(const std::shared_ptr& program_desc, const std::shared_ptr& root_scope, const std::vector& valid_places, const std::vector& vars_to_clone = {}) : scope_(root_scope), valid_places_(valid_places), program_desc_(program_desc) { CHECK(scope_) << "scope should be init first"; VLOG(4) << "prepare work"; PrepareWorkspace(program_desc_, vars_to_clone); VLOG(4) << "build desc"; Build(program_desc_); VLOG(4) << "build desc finished"; } std::unique_ptr Clone() const { return std::unique_ptr( new Program(program_desc_, scope_, valid_places_)); } const std::list& weights() const { return weights_; } const std::list& vars() const { return vars_; } std::list* mutable_weights() { return &weights_; } std::list* mutable_vars() { return &vars_; } const std::list>& ops( int block_idx = kRootBlockIdx) const { return ops_[block_idx]; } std::list>* mutable_ops( int block_idx = kRootBlockIdx) { return &ops_[block_idx]; } size_t block_size() { return ops_.size(); } Scope* exec_scope() { return exec_scope_; } Scope* scope() { return scope_.get(); } cpp::ProgramDesc* program_desc() { return program_desc_.get(); } const std::map& var_type_map() const { return var_type_map_; } private: // Build from a program and scope. void Build(const std::shared_ptr& program_desc); // Create temporary variables. void PrepareWorkspace(const std::shared_ptr& program_desc, const std::vector& vars_to_clone = {}); private: std::map var_type_map_; std::list vars_; std::list weights_; std::vector>> ops_; // the scope to run the kernels, NOTE this is the execution scope. std::shared_ptr scope_; std::vector valid_places_; // Runtime scope. Scope* exec_scope_{}; std::shared_ptr program_desc_; }; struct Instruction { Instruction(const std::shared_ptr& op, std::unique_ptr&& kernel) : op_(op), kernel_(std::move(kernel)) { std::string op_type = op->Type(); if (op_type == "feed" || op_type == "fetch") { is_feed_fetch_op_ = true; } } // Run the instruction. void Run(); friend STL::ostream& operator<<(STL::ostream& os, const Instruction& other); const OpLite* op() const { return op_.get(); } const KernelBase* kernel() const { return kernel_.get(); } KernelBase* mutable_kernel() { return kernel_.get(); } bool is_feed_fetch_op() const { return is_feed_fetch_op_; } #ifdef LITE_WITH_CUDA bool need_sync() const { if (kernel_->target() == TargetType::kCUDA) { return kernel_->mutable_context()->As().need_sync(); } else { // the io_copy kernel has synced, so cpu kernels don't need sync.. return false; } } void Sync() const { kernel_->mutable_context()->As().Sync(); } void UpdateCudaContext(cudaStream_t exec, cudaStream_t io) { if (kernel_->target() == TargetType::kCUDA) { kernel_->mutable_context()->As().SetExecStream(exec); kernel_->mutable_context()->As().SetIoStream(io); } } #endif #ifdef LITE_WITH_PROFILE void set_profiler(profile::Profiler* profiler) { profiler_ = profiler; if (op_->Type() != "feed" && op_->Type() != "fetch") { profile::OpCharacter ch; ch.op_lite = static_cast(const_cast(op())); ch.target = kernel()->target(); ch.op_type = op_->Type(); ch.kernel_name = kernel()->name(); ch.kernel_attr = kernel()->name().substr(ch.op_type.size() + 1, kernel()->name().size()); // append `ch.kernel_func_name` in StopTiming profile_id_ = profiler->NewTimer(ch); kernel_->SetProfiler(profiler_, profile_id_); } } void SetProfileRuntimeOpInfo(paddle::lite::profile::OpCharacter* ch) { auto* op_lite = static_cast(ch->op_lite); op_lite->GetOpRuntimeInfo(ch); } #endif private: std::shared_ptr op_; std::unique_ptr kernel_; bool is_feed_fetch_op_{false}; bool first_epoch_{true}; bool has_run_{false}; #ifdef LITE_WITH_PROFILE profile::Profiler* profiler_; int profile_id_{-1}; bool first_epoch_for_profiler_{true}; #endif // LITE_WITH_PROFILE }; /* * A program contains kernels for runtime. */ class LITE_API RuntimeProgram { public: explicit RuntimeProgram(std::vector>&& insts) : instructions_(std::move(insts)) { Init(); } explicit RuntimeProgram( const std::shared_ptr& program_desc, Scope* exec_scope, int block_idx = kRootBlockIdx); ~RuntimeProgram() { #ifdef LITE_WITH_PROFILE LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kCreate); LOG(INFO) << "\n" << profiler_.Summary(profile::Type::kDispatch); #endif // LITE_WITH_PROFILE } void Init() { if (instructions_.empty()) { LOG(FATAL) << "no instructions"; } #ifdef LITE_WITH_PROFILE set_profiler(); #endif #ifdef LITE_WITH_NVTX const NVTXAnnotator& annotator = NVTXAnnotator::Global(); for (auto& inst : instructions_[kRootBlockIdx]) { NVTXRangeAnnotation annotation = annotator.AnnotateBlock(); register_layer_names_.push_back(annotator.RegisterString( const_cast(inst.op())->Type().c_str())); } register_layer_names_.push_back(annotator.RegisterString("one_loop")); #endif } void Run(); void set_exec_scope(Scope* x) { exec_scope_ = x; } Scope* exec_scope() { return exec_scope_; } const std::vector& instructions( int block_idx = kRootBlockIdx) const { return instructions_[block_idx]; } std::vector* mutable_instructions( int block_idx = kRootBlockIdx) { return &instructions_[block_idx]; } size_t block_size() { return instructions_.size(); } // Update the ops and vars of all of blocks to the given program_desc // according to the instructions void SaveToProgram(std::shared_ptr program_desc); #ifdef LITE_WITH_CUDA // UpdateCudaContext will update the exec stream and io stream of all kernels // in the program. void UpdateCudaContext(cudaStream_t exec, cudaStream_t io); #endif private: RuntimeProgram(const RuntimeProgram&) = delete; std::vector> instructions_; Scope* exec_scope_{}; #ifdef LITE_WITH_PROFILE profile::Profiler profiler_; void set_profiler() { for (auto& inst : instructions_[kRootBlockIdx]) { inst.set_profiler(&profiler_); } } #endif #ifdef LITE_WITH_NVTX std::vector register_layer_names_; #endif }; } // namespace lite } // namespace paddle