// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/event.h" #include "paddle/phi/core/utils/rw_lock.h" #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); namespace paddle { namespace framework { using OpKernelComputeFunc = std::function; constexpr int kEmptyVarIndex = 0; // stream types constexpr const char* kCustomStream = "CustromStream"; constexpr const char* kDefaultStream = "DefaultStream"; constexpr const char* kD2HStream = "D2HStream"; constexpr const char* kH2DStream = "H2DStream"; enum class Priority { kLowest, kNormal }; class InterpretercoreInferShapeContext : public InferShapeContext { public: InterpretercoreInferShapeContext(const OperatorBase& op, const RuntimeContext& ctx); bool HasInput(const std::string& name) const override; bool HasOutput(const std::string& name) const override; bool HasAttr(const std::string& name) const override; bool HasInputs(const std::string& name) const override; bool HasOutputs(const std::string& name, bool allow_null = false) const override; AttrReader Attrs() const override; std::vector Inputs(const std::string& name) const override; std::vector Outputs(const std::string& name) const override; std::string GetInputNameByIdx(size_t idx) const override; std::string GetOutputNameByIdx(size_t idx) const override; void ShareDim(const std::string& in, const std::string& out, size_t i = 0, size_t j = 0) override; void ShareAllLoD(const std::string& in, const std::string& out) const override; void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, size_t j = 0) const override; int32_t GetLoDLevel(const std::string& in, size_t i = 0) const override; void SetLoDLevel(const std::string& out, int32_t lod_level, size_t j = 0) const override; bool IsRuntime() const override; bool IsRunMKLDNNKernel() const override; // TODO(paddle-dev): Can this be template? paddle::small_vector GetInputVarPtrs(const std::string& name) const override; paddle::small_vector GetOutputVarPtrs(const std::string& name) const override; DDim GetInputDim(const std::string& name) const override; std::vector GetInputsDim(const std::string& name) const override; proto::VarType::Type GetInputVarType(const std::string& name) const override; std::vector GetInputsVarType( const std::string& name) const override; std::vector GetOutputsVarType( const std::string& name) const override; void SetOutputDim(const std::string& name, const DDim& dim) override; void SetOutputsDim(const std::string& name, const std::vector& dims) override; const phi::ArgumentMappingFn* GetPhiArgumentMappingFn() const override; const phi::KernelSignature* GetPhiDefaultKernelSignature() const override; void SetSkipLoD(bool skip); protected: DDim GetDim(Variable* var) const; std::vector GetDims(const std::vector& vars) const; std::vector GetRepeatedDims(const std::string& name) const override; void SetDim(Variable* var, const DDim& dim); void SetDims(const std::vector& vars, const std::vector& dims); void SetRepeatedDims(const std::string& name, const std::vector& dims) override; std::vector GetVarTypes( const std::vector& vars) const; proto::VarType::Type GetVarType(Variable* var) const; private: const std::vector& InputVars(const std::string& name) const; const std::vector& OutputVars(const std::string& name) const; const OperatorBase& op_; const RuntimeContext& ctx_; bool can_skip_lod_; }; struct OpKernelFunc { OpKernelComputeFunc compute_func_; }; struct VariableMetaInfo { int var_ref_count_{0}; framework::VarDesc* var_desc_{nullptr}; bool sikp_inplace_{false}; VariableMetaInfo() {} VariableMetaInfo(int var_ref_count, framework::VarDesc* var_desc) : var_ref_count_(var_ref_count), var_desc_(var_desc) {} }; class VariableScope { public: explicit VariableScope(Scope* scope); Scope* GetMutableScope() const; Scope* GetMutableLocalScope() const; void SetScope(Scope* scope); void SetLocalScope(Scope* local_scope); ~VariableScope(); // Get variable id by name, return -1 if not found int GetIdByName(const std::string& name) const; // Get variable name by id, return "" if not found std::string GetNameById(int id) const; bool HasVar(const std::string& name) const; int VarId(const std::string& name) const; size_t VarSize() const; void AddVar(const std::string& name, VarDesc* var_desc); Variable* VarRef(int id) const; void SetVarDesc(const std::string& name, framework::VarDesc* var_desc); paddle::framework::VarDesc* VarDesc(const std::string& name) const; paddle::framework::VarDesc* VarDesc(int id) const; void CheckExist(int id) const; void CheckExist(const std::string& name) const; std::vector& MutableVecMetaInfo() { return vec_meta_info_; } const std::vector& VecMetaInfo() const { return vec_meta_info_; } const std::vector>& DataTransferAddedVars() const { return data_transfer_added_vars_; } std::vector>& MutableDataTransferAddedVars() { return data_transfer_added_vars_; } std::vector& MutableVarList() { return var_list_; } void SetVarSikpInplace(const std::string& name, bool skip); bool GetVarSikpInplace(int id) const; private: // not owned, better remove it since all vars should be // accessed by Scope instead of VariableScope std::vector var_list_; std::map name2id_; std::vector vec_meta_info_; Scope* scope_{nullptr}; // TODO(zhiqiu): find a better way to support local scope. Scope* local_scope_{nullptr}; // mutable RWLock vars_lock_; // var_name -> var_type std::vector> data_transfer_added_vars_; }; class NextInstructionList { public: void AddDirectRun(size_t id) { direct_run_.push_back(id); } void ADDEventRun(size_t id) { event_wait_run_.push_back(id); } void AddSyncRun(size_t id) { synchronize_run_.push_back(id); } const std::vector& DirectRunIds() const { return direct_run_; } const std::vector& EventRunIds() const { return event_wait_run_; } const std::vector& SyncRunIds() const { return synchronize_run_; } private: std::vector direct_run_; std::vector event_wait_run_; std::vector synchronize_run_; }; struct EventInter { explicit EventInter(size_t var_id, std::shared_ptr event, platform::DeviceType waiter_type) : var_id_(var_id), event_(event), waiter_type_(waiter_type) {} size_t var_id_; std::shared_ptr event_; platform::DeviceType waiter_type_; }; enum class OpFuncType { kQueueSync = 0, // CPU kernel, block host kQueueAsync = 1, // GPU、XPU Kernel or d2h, h2d, send, recv, broadcast }; class RuntimeInferShapeContext; struct OpFuncNode { // TODO(zhiqiu): Better make it unique_ptr std::shared_ptr operator_base_; std::string execution_stream_{kDefaultStream}; std::map> input_index; std::map> output_index; std::unordered_set no_data_transform_index; std::map inplace_back_map; OpKernelComputeFunc kernel_func_; platform::DeviceContext* dev_ctx_; // not owned // fit for phi kernel phi::Kernel* phi_kernel_{nullptr}; // not owned OpFuncType type_; }; class Instruction { public: Instruction(size_t id, OpFuncNode&& op_func_node, const platform::DeviceContext& dev_ctx, const Priority priority); size_t Id() const; const std::map>& Inputs() const; const std::map>& Outputs() const; const std::unordered_set& NoDataTransformVars() const; OpKernelComputeFunc KernelFunc() const; phi::Kernel* PhiKernel() const; OpFuncType KernelType() const; const std::map& InplaceBackMap() const; OperatorBase* OpBase() const; NextInstructionList& NextInstructions(); const NextInstructionList& NextInstructions() const; void AddGCCheckVar(size_t id); const std::vector& GCCheckVars() const; void ResetContext(const VariableValueMap& in_vars, const VariableValueMap& out_vars); void ResetContextWithScope(const VariableValueMap& in_vars, const VariableValueMap& out_vars, const framework::Scope& scope); std::shared_ptr InnerRuntimeContext() const; std::shared_ptr InnerInferShapeContext() const; std::shared_ptr InnerExecutionContext() const; const platform::DeviceContext& DeviceContext() const; const std::vector>& InplaceInfo() const; void AddInplace(Variable* in, Variable* out); void ClearInplace(); const std::vector& InputEvents() const; const std::vector& OutputEvents() const; void AddInputEvent(size_t var_id, std::shared_ptr event, platform::DeviceType waiter_type); void AddOutputEvent(size_t var_id, std::shared_ptr event, platform::DeviceType waiter_type); Priority GetPriority() const { return priority_; } private: size_t id_; OpFuncNode op_func_node_; const platform::DeviceContext& dev_ctx_; // not owned const Priority priority_; std::shared_ptr runtime_ctx_; std::shared_ptr infershape_ctx_; std::shared_ptr execution_ctx_; std::vector gc_check_vars_; NextInstructionList next_instruction_; std::vector intput_events_; std::vector output_events_; std::vector> vec_inplace_in_to_out_; }; namespace interpreter { static constexpr char kMemcpyH2D[] = "memcpy_h2d"; static constexpr char kMemcpyD2H[] = "memcpy_d2h"; static constexpr char kFetchVarName[] = "fetch"; // static_ref_ is the numer of last live ops calculated to statically after // `build` the Instructions. dynamic_ref_ is the runtime version ref which will // be decreased by one dynamiclly after the execution of an op (in last ops // list). var_ is the related variable // The dynamic_ref_ is initialized to static_ref_ first, and is decreased to 1 // during interpretercore's execution, after the interpretercore run, it `reset` // all dynamic_ref_, i.e., dynamic_ref_ = static_ref_ see ResetAtomicGuard for // details class VarRefInfo { public: explicit VarRefInfo(size_t ref, Variable* var) : static_ref_(ref), dynamic_ref_(ref), var_(var) {} size_t DynamicRef() { return dynamic_ref_; } Variable* Var() { return var_; } void ResetDynamicRef() { if (static_ref_ != 1) { dynamic_ref_ = static_ref_; } } void ResetVariable(Variable* new_var) { var_ = new_var; } bool CheckAndDecrease() { return static_ref_ == 1 || (dynamic_ref_.fetch_sub(1) == 1); } private: const size_t static_ref_; std::atomic dynamic_ref_; Variable* var_; }; // static_dep_ is the numer of dependencies (ops that must run before it) of // each op which is calculated to statically. static_dep_ is the runtime // version dep which will be decreased by one dynamiclly after the execution of // one dependency op. // The dynamic_dep_ is initialized to static_dep_ first, and is decreased to 1 // during interpretercore's execution, after the interpretercore run, it `reset` // all dynamic_dep_, i.e., dynamic_dep_ = static_dep_ see ResetAtomicGuard for // details class OpDepInfo { public: explicit OpDepInfo(size_t dep) : static_dep_(dep), dynamic_dep_(dep) {} size_t DynamicDep() { return dynamic_dep_; } void ResetDynamicDep() { if (static_dep_ != 1) { dynamic_dep_ = static_dep_; } } bool CheckAndDecrease() { return static_dep_ == 1 || (dynamic_dep_.fetch_sub(1) == 1); } private: const size_t static_dep_; std::atomic dynamic_dep_; }; class ResetAtomicGuard { public: ResetAtomicGuard(std::vector>* deps, std::vector>* refs) : deps_(deps), refs_(refs) {} ~ResetAtomicGuard() { VLOG(10) << "Reset DynamicDep"; for (auto&& dep : *deps_) { dep->ResetDynamicDep(); } VLOG(10) << "Reset DynamicRef"; for (auto&& ref : *refs_) { ref->ResetDynamicRef(); } } private: std::vector>* deps_; std::vector>* refs_; }; } // namespace interpreter } // namespace framework } // namespace paddle