diff --git a/src/core/impl/utils/infile_persistent_cache.cpp b/src/core/impl/utils/infile_persistent_cache.cpp index 5171ff7898b8a6533905e18aa81c397423ed1f7a..bd5df4806c6d0c112d52175c43555dab66ea575b 100644 --- a/src/core/impl/utils/infile_persistent_cache.cpp +++ b/src/core/impl/utils/infile_persistent_cache.cpp @@ -13,7 +13,7 @@ #if defined(_WIN32) #include -#define F_OK 0 +#define F_OK 0 #define access(a, b) _access(a, b) #elif __linux__ || __unix__ || __APPLE__ #include @@ -32,8 +32,9 @@ public: template void read(T& val) { - static_assert(std::is_trivially_copyable::value, - "only support trivially copyable type"); + static_assert( + std::is_trivially_copyable::value, + "only support trivially copyable type"); mgb_assert(m_offset + sizeof(T) <= m_size); memcpy(&val, m_ptr, sizeof(T)); m_offset += sizeof(T); @@ -42,8 +43,9 @@ public: template void read(T* buf, size_t size) { - static_assert(std::is_trivially_copyable::value && sizeof(T) == 1, - "only support read bytes"); + static_assert( + std::is_trivially_copyable::value && sizeof(T) == 1, + "only support read bytes"); mgb_assert(m_offset + size <= m_size); memcpy(buf, m_ptr, size); m_offset += size; @@ -67,20 +69,21 @@ public: template void read(T& val) { - static_assert(std::is_trivially_copyable::value, - "only support trivially copyable type"); + static_assert( + std::is_trivially_copyable::value, + "only support trivially copyable type"); auto ret = fread(&val, sizeof(T), 1, m_fp); mgb_assert(ret == 1); } template void read(T* buf, size_t size) { - static_assert(std::is_trivially_copyable::value && sizeof(T) == 1, - "only support read bytes"); + static_assert( + std::is_trivially_copyable::value && sizeof(T) == 1, + "only support read bytes"); auto ret = fread(buf, size, 1, m_fp); mgb_assert(ret == 1); } - }; //////////////////////// InFilePersistentCache::OutputFile /////////////// @@ -114,8 +117,8 @@ public: //////////////////////// InFilePersistentCache::BlobStorage /////////////// template -InFilePersistentCache::BlobStorage& -InFilePersistentCache::BlobStorage::init_from_input(Input& inp) { +InFilePersistentCache::BlobStorage& InFilePersistentCache::BlobStorage::init_from_input( + Input& inp) { uint32_t data_size; inp.read(data_size); size = data_size; @@ -125,15 +128,14 @@ InFilePersistentCache::BlobStorage::init_from_input(Input& inp) { return *this; } -void InFilePersistentCache::BlobStorage::write_to_file( - OutputFile& out_file) const { +void InFilePersistentCache::BlobStorage::write_to_file(OutputFile& out_file) const { uint32_t u_size = size; out_file.write(u_size); out_file.write(data_refhold.get(), u_size); } -InFilePersistentCache::BlobStorage& -InFilePersistentCache::BlobStorage::init_data_ref(const Blob& b) { +InFilePersistentCache::BlobStorage& InFilePersistentCache::BlobStorage::init_data_ref( + const Blob& b) { data_refhold = std::make_unique(b.size + 1); memcpy(data_refhold.get(), b.ptr, b.size); data_refhold.get()[b.size] = 0; // for C-string safety @@ -227,8 +229,8 @@ Maybe InFilePersistentCache::get( return iter1->second; } -void InFilePersistentCache::put(const std::string& category, const Blob& key, - const Blob& value) { +void InFilePersistentCache::put( + const std::string& category, const Blob& key, const Blob& value) { BlobStorage key_storage; key_storage.init_data_ref(key).init_hash(); diff --git a/src/core/include/megbrain/utils/infile_persistent_cache.h b/src/core/include/megbrain/utils/infile_persistent_cache.h index 28b28bef02bb5de0ac83ef7cc07c91dc8179ff87..58f069d4d24dc49eecdb3936da76ae3aa6f2af87 100644 --- a/src/core/include/megbrain/utils/infile_persistent_cache.h +++ b/src/core/include/megbrain/utils/infile_persistent_cache.h @@ -49,13 +49,15 @@ class InFilePersistentCache final : public PersistentCache { size_t operator()(const BlobStorage& b) const { return b.hash; } }; }; - std::unordered_map> + std::unordered_map< + std::string, + std::unordered_map> m_cache; MGB_MUTEX m_mtx; template void read_cache(Input& inp); + public: InFilePersistentCache() = default; InFilePersistentCache(const char* path); @@ -68,8 +70,7 @@ public: void dump_cache(const char* path); Maybe get(const std::string& category, const Blob& key) override; - void put(const std::string& category, const Blob& key, - const Blob& value) override; + void put(const std::string& category, const Blob& key, const Blob& value) override; bool support_dump_cache() override { return true; } }; } // namespace mgb diff --git a/src/core/include/megbrain/utils/persistent_cache.h b/src/core/include/megbrain/utils/persistent_cache.h index 8c881aea7a2bd1093d5102d8124b5a746bd846dd..01dc093beffac40aafd636d3cb72cad55ab05ecb 100644 --- a/src/core/include/megbrain/utils/persistent_cache.h +++ b/src/core/include/megbrain/utils/persistent_cache.h @@ -40,7 +40,7 @@ public: const std::string& category, const Blob& key, const Blob& value) = 0; virtual bool support_dump_cache() { return false; } - + //! set an implementation; return the original implementation static std::shared_ptr set_impl( std::shared_ptr impl); diff --git a/src/gopt/impl/global_layout_transform/opr_safe_dump.cpp b/src/gopt/impl/global_layout_transform/opr_safe_dump.cpp index 28c187918068b72a0819b8b6903b794bee00adef..5cf07b3c45c475ffae952162cb51c116cb528a2d 100644 --- a/src/gopt/impl/global_layout_transform/opr_safe_dump.cpp +++ b/src/gopt/impl/global_layout_transform/opr_safe_dump.cpp @@ -18,6 +18,7 @@ #include "megbrain/opr/nn_int.h" #include "megbrain/opr/tensor_manip.h" +#include "megbrain/utils/hash_ct.h" #include "midout.h" MIDOUT_DECL(megbrain_opr_safe_dump) #define MIDOUT_B(...) MIDOUT_BEGIN(megbrain_opr_safe_dump, __VA_ARGS__) { @@ -38,24 +39,34 @@ template <> void write_param(std::string& /* data */, const DType& /* dtype */) {} template -struct OprDumpImpl { - static std::string dump(const cg::OperatorNodeBase* opr_) { - MIDOUT_B(Opr) - auto&& opr = opr_->cast_final_safe(); - std::string data; - write_param(data, opr.param()); - return data; - MIDOUT_E - } -}; +struct OprDumpImpl; -#define INST(_Opr) \ +#define cb(_Opr) \ + template <> \ + struct OprDumpImpl<_Opr> { \ + static std::string dump(const cg::OperatorNodeBase* opr_) { \ + MIDOUT_B(_Opr) \ + auto&& opr = opr_->cast_final_safe<_Opr>(); \ + std::string data; \ + auto opr_hash = MGB_HASH_STR(#_Opr); \ + write_param(data, opr_hash); \ + write_param(data, opr.param()); \ + return data; \ + MIDOUT_E \ + } \ + }; +FOREACH_SUPPORTED_OPR_WITHOUT_EXECUTION_POLICY(cb) +#undef cb + +#define cb(_Opr) \ template <> \ struct OprDumpImpl<_Opr> { \ static std::string dump(const cg::OperatorNodeBase* opr_) { \ MIDOUT_B(_Opr) \ auto&& opr = opr_->cast_final_safe<_Opr>(); \ std::string data; \ + auto opr_hash = MGB_HASH_STR(#_Opr); \ + write_param(data, opr_hash); \ write_param(data, opr.param()); \ using ExecutionPolicy = megdnn::param::ExecutionPolicy; \ ExecutionPolicy policy{ \ @@ -66,11 +77,8 @@ struct OprDumpImpl { MIDOUT_E \ } \ }; -INST(Convolution); -INST(ConvBiasForward); -INST(ConvolutionBackwardData); -INST(PoolingForward); -#undef INST +FOREACH_SUPPORTED_OPR_WITH_EXECUTION_POLICY(cb) +#undef cb } // namespace namespace mgb { @@ -83,8 +91,9 @@ std::string opr_safe_dump(const cg::OperatorNodeBase* opr) { return OprDumpImpl<_Opr>::dump(opr); \ } else FOREACH_SUPPORTED_OPR(cb) { - mgb_throw(InternalError, "unsupported operator(got:%s)", - opr->dyn_typeinfo()->name); + mgb_throw( + InternalError, "unsupported operator(got:%s)", + opr->dyn_typeinfo()->name); } #undef cb } diff --git a/src/gopt/impl/global_layout_transform/opr_safe_dump.h b/src/gopt/impl/global_layout_transform/opr_safe_dump.h index d25be51d23bd8bf889165eea79eff9d06be8b930..5a178de8b99f271c2d4b98920478d798a47aa8f6 100644 --- a/src/gopt/impl/global_layout_transform/opr_safe_dump.h +++ b/src/gopt/impl/global_layout_transform/opr_safe_dump.h @@ -16,10 +16,16 @@ namespace mgb { namespace gopt { namespace intl { -#define FOREACH_SUPPORTED_OPR(cb) \ - cb(Convolution) cb(ConvBiasForward) cb(ConvolutionBackwardData) \ - cb(PoolingForward) cb(WarpPerspective) cb(Resize) cb(Elemwise) \ - cb(ElemwiseMultiType) cb(Concat) cb(PowC) cb(TypeCvt) +#define FOREACH_SUPPORTED_OPR_WITHOUT_EXECUTION_POLICY(cb) \ + cb(WarpPerspective) cb(Resize) cb(Elemwise) cb(ElemwiseMultiType) cb(Concat) \ + cb(PowC) cb(TypeCvt) + +#define FOREACH_SUPPORTED_OPR_WITH_EXECUTION_POLICY(cb) \ + cb(Convolution) cb(ConvBiasForward) cb(ConvolutionBackwardData) cb(PoolingForward) + +#define FOREACH_SUPPORTED_OPR(cb) \ + FOREACH_SUPPORTED_OPR_WITHOUT_EXECUTION_POLICY(cb) \ + FOREACH_SUPPORTED_OPR_WITH_EXECUTION_POLICY(cb) std::string opr_safe_dump(const cg::OperatorNodeBase* opr); diff --git a/src/gopt/impl/global_layout_transform/profiler_cache.cpp b/src/gopt/impl/global_layout_transform/profiler_cache.cpp index cfa703400a73ceadbcdad9d9139beb4b6e066066..b606c99db510340ad1f25c567becd6f96be5303b 100644 --- a/src/gopt/impl/global_layout_transform/profiler_cache.cpp +++ b/src/gopt/impl/global_layout_transform/profiler_cache.cpp @@ -11,8 +11,8 @@ */ #include "./opr_safe_dump.h" -#include "megbrain/gopt/profiler.h" #include "megbrain/comp_node_env.h" +#include "megbrain/gopt/profiler.h" using namespace mgb; using namespace gopt; @@ -21,9 +21,6 @@ using ReformatKey = ReformatManager::ReformatKey; // =================== ProfilerCache ====================== void ProfilerCache::Key::build_blob_from_opr() { auto&& opr = m_key_impl.opr_key.opr; - // process opr type - auto type = opr->dyn_typeinfo()->name; - size_t type_size = strlen(type); // process opr param auto data = intl::opr_safe_dump(opr); @@ -32,11 +29,7 @@ void ProfilerCache::Key::build_blob_from_opr() { size_t nr_inputs = opr->input().size(); size_t nr_outputs = opr->usable_output().size(); size_t nr_layouts = nr_inputs + nr_outputs; - m_blob_storage.reserve(sizeof(TensorLayout) * 3 * nr_layouts + type_size + - param_size); - - // serialize opr type - m_blob_storage.append(type, type_size); + m_blob_storage.reserve(sizeof(TensorLayout) * 3 * nr_layouts + param_size); // serialize param const char* data_ptr = reinterpret_cast(data.data()); @@ -70,12 +63,12 @@ void ProfilerCache::Key::build_blob_from_opr() { } // serialize opr_format - m_blob_storage.append(std::to_string( - static_cast(m_key_impl.opr_key.opr_format))); + m_blob_storage.append( + std::to_string(static_cast(m_key_impl.opr_key.opr_format))); // serialize extra_attribute - m_blob_storage.append(std::to_string( - static_cast(m_key_impl.opr_key.extra_attribute))); + m_blob_storage.append( + std::to_string(static_cast(m_key_impl.opr_key.extra_attribute))); } void ProfilerCache::Key::build_category(CompNode cn) { @@ -85,8 +78,8 @@ void ProfilerCache::Key::build_category(CompNode cn) { #if MGB_CUDA case CompNode::DeviceType::CUDA: { auto&& prop = env.cuda_env().device_prop; - m_category += ssprintf("plat=cuda;dev=%s;cap=%d.%d", prop.name, - prop.major, prop.minor); + m_category += ssprintf( + "plat=cuda;dev=%s;cap=%d.%d", prop.name, prop.major, prop.minor); break; } #endif @@ -94,9 +87,10 @@ void ProfilerCache::Key::build_category(CompNode cn) { m_category += "plat=cpu"; break; default: - mgb_throw(MegBrainError, - "unsupported comp node for global layout transform " - "profiler cache category"); + mgb_throw( + MegBrainError, + "unsupported comp node for global layout transform " + "profiler cache category"); } } @@ -151,9 +145,10 @@ ProfilerCache& ProfilerCache::set_impl(std::unique_ptr impl) { } void ProfilerCache::dump_cache(const char* path) { - mgb_assert(m_impl->support_dump_cache(), - "current impl of ProfilerCache does not support dump cache to " - "file."); + mgb_assert( + m_impl->support_dump_cache(), + "current impl of ProfilerCache does not support dump cache to " + "file."); auto cache = static_cast(m_impl.get()); cache->dump_cache(path); } @@ -165,8 +160,9 @@ Maybe ProfilerCache::get(const Key& key) { // data type of cost is float auto buf = static_cast(raw_buf->ptr); auto size = raw_buf->size; - mgb_assert(buf && size == sizeof(float), - "ProfileCache invalid value: ptr=%p, size=%zu", buf, size); + mgb_assert( + buf && size == sizeof(float), + "ProfileCache invalid value: ptr=%p, size=%zu", buf, size); auto read_f32 = [&]() { auto ret = *reinterpret_cast(buf); return ret; diff --git a/src/gopt/impl/global_layout_transform/profiler_impl.cpp b/src/gopt/impl/global_layout_transform/profiler_impl.cpp index 99a58507084bfc3b514b4a594b302aa539fbeac6..0ea951e9027967679b6ab4c77caf118a4b5272e9 100644 --- a/src/gopt/impl/global_layout_transform/profiler_impl.cpp +++ b/src/gopt/impl/global_layout_transform/profiler_impl.cpp @@ -154,33 +154,30 @@ void MarkInputContiguous::init_output_static_infer_desc() { } // namespace /* ================== ProfilerImpl =================*/ -ProfilerImpl::ProfilerImpl(int runs, float opr_threshold, - float var_node_threshold) +ProfilerImpl::ProfilerImpl(int runs, float opr_threshold, float var_node_threshold) : m_opr_threshold{opr_threshold}, m_var_node_threshold{var_node_threshold}, m_runs{runs} { - m_opr_filter = [this](const OperatorNodeBase* opr, - OperatorNodeBase* new_opr) { + m_opr_filter = [this](const OperatorNodeBase* opr, OperatorNodeBase* new_opr) { /// \note: for the considerations of performance, we skip nchw(naive) /// kernels for conv bias on CUDA platform. to remove this later if (auto conv = try_cast_as_op(new_opr)) { if (conv->output(0)->comp_node().device_type() == CompNode::DeviceType::CUDA && - conv->input(0)->dtype().category() == - DTypeCategory::QUANTIZED && + conv->input(0)->dtype().category() == DTypeCategory::QUANTIZED && conv->param().format == OprFormat::NCHW) { return false; } } - float comp1 = m_opr_footprint.get_computation( - const_cast(opr)); + float comp1 = + m_opr_footprint.get_computation(const_cast(opr)); float comp2 = m_opr_footprint.get_computation(new_opr); if (comp2 > m_opr_threshold * comp1) return false; return true; }; - m_var_node_filter = [this](const VarNode* var, TensorShape from, - TensorShape to, ReformatKey key) { + m_var_node_filter = [this](const VarNode* var, TensorShape from, TensorShape to, + ReformatKey key) { /// \note: due to the alignment requirement of low-bit tensor, we skip /// some layout transform for low-bit tensors. The skipped layout /// transforms do not have corresponding dnn kernel and cannot be @@ -202,8 +199,7 @@ ProfilerImpl::ProfilerImpl(int runs, float opr_threshold, TensorLayout orig_ly = {var->shape(), var->dtype()}, from_ly = {from, var->dtype()}, to_ly = {to, var->dtype()}; float orig_memory = orig_ly.span().dist_byte() * 2.f; - float reformat_memory = - from_ly.span().dist_byte() + to_ly.span().dist_byte(); + float reformat_memory = from_ly.span().dist_byte() + to_ly.span().dist_byte(); if (reformat_memory > orig_memory * m_var_node_threshold) return false; return true; @@ -537,23 +533,20 @@ std::unique_ptr ProfilerBase::make_profiler() { return std::make_unique(); } -std::unique_ptr ProfilerBase::make_cached_profiler( - const char* path) { +std::unique_ptr ProfilerBase::make_cached_profiler(const char* path) { return std::make_unique(path); } /* ================== CachedProfiler =================*/ -CachedProfiler::CachedProfiler(const char* path, int runs, float opr_threshold, - float var_node_threshold) +CachedProfiler::CachedProfiler( + const char* path, int runs, float opr_threshold, float var_node_threshold) : ProfilerImpl(runs, opr_threshold, var_node_threshold), m_path{path} { if (m_path != nullptr) { // file cache - ProfilerCache::inst().set_impl( - std::make_unique(m_path)); + ProfilerCache::inst().set_impl(std::make_unique(m_path)); } } -CachedProfiler::ProfilingResult CachedProfiler::profile( - const Problem& problem) const { +CachedProfiler::ProfilingResult CachedProfiler::profile(const Problem& problem) const { auto ret = ProfilerImpl::profile(problem); if (m_path != nullptr) ProfilerCache::inst().dump_cache(m_path); @@ -563,35 +556,33 @@ CachedProfiler::ProfilingResult CachedProfiler::profile( float CachedProfiler::profile_operator( const OperatorNodeBase* opr, TensorFormats base_format, TensorFormats tensor_format, ReformatAttribute extra_attribute) const { - ProfilerCache::Key key{opr, tensor_formats_to_opr_format(tensor_format), - extra_attribute}; + ProfilerCache::Key key{ + opr, tensor_formats_to_opr_format(tensor_format), extra_attribute}; auto ret = ProfilerCache::inst().get(key); if (ret.valid()) return ret.val(); - auto rst = ProfilerImpl::profile_operator(opr, base_format, tensor_format, - extra_attribute); + auto rst = ProfilerImpl::profile_operator( + opr, base_format, tensor_format, extra_attribute); ProfilerCache::inst().put(key, rst); return rst; } float CachedProfiler::profile_operator( - const OperatorNodeBase* opr, - const OprTensorFormatsConfiguration& base_config, + const OperatorNodeBase* opr, const OprTensorFormatsConfiguration& base_config, const OprTensorFormatsConfiguration& config, ReformatAttribute extra_attribute) const { ProfilerCache::Key key{opr, config.opr_format, extra_attribute}; auto ret = ProfilerCache::inst().get(key); if (ret.valid()) return ret.val(); - auto rst = ProfilerImpl::profile_operator(opr, base_config, config, - extra_attribute); + auto rst = + ProfilerImpl::profile_operator(opr, base_config, config, extra_attribute); ProfilerCache::inst().put(key, rst); return rst; } -float CachedProfiler::profile_var_node(const VarNode* var, - TensorFormats base_format, - const ReformatKey& key) const { +float CachedProfiler::profile_var_node( + const VarNode* var, TensorFormats base_format, const ReformatKey& key) const { ProfilerCache::Key pf_key{var, key}; auto ret = ProfilerCache::inst().get(pf_key); if (ret.valid()) diff --git a/src/gopt/include/megbrain/gopt/profiler.h b/src/gopt/include/megbrain/gopt/profiler.h index 299312c340f7c10dcd0903b830a1af4dce97f717..27a6663f31728a7bb9b7bfcf95ef0bfbf6d94a5c 100644 --- a/src/gopt/include/megbrain/gopt/profiler.h +++ b/src/gopt/include/megbrain/gopt/profiler.h @@ -78,7 +78,7 @@ public: const VarNode*, TensorShape, TensorShape, ReformatManager::ReformatKey)>; ProfilerBase() = default; - + virtual ~ProfilerBase() = default; virtual ProfilingResult profile(const Problem& problem) const = 0; @@ -102,13 +102,12 @@ protected: VarNodeFilter m_var_node_filter; }; - /*! \brief A default profiler impl */ class ProfilerImpl : public ProfilerBase { public: - ProfilerImpl(int runs = 10, float opr_threshold = 2.f, - float var_node_threshold = 2.f); + ProfilerImpl( + int runs = 10, float opr_threshold = 2.f, float var_node_threshold = 2.f); ~ProfilerImpl() = default; ProfilingResult profile(const Problem& problem) const override; @@ -128,22 +127,22 @@ protected: OperatorNodeRecord profile_operator( const OperatorNodeBase* opr, TensorFormats base_format, const SmallVector& available_tensor_formats, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const; + ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; /*! - * \brief prfile opr format agnostic operators (like elemwise, elemwise multi type, typecvt etc.) + * \brief prfile opr format agnostic operators (like elemwise, elemwise multi type, + * typecvt etc.) * * \param opr pointer to the operator to be profiled * \param base_format the original tensor format of the operator node. * \param tensor_format the tensor format to be profiled - * \param extra_attribute identify whether to use image object for OpenCL or automatically padding nhwc layout - * \return elapsed time of operator in the given tensor format configuration + * \param extra_attribute identify whether to use image object for OpenCL or + * automatically padding nhwc layout \return elapsed time of operator in the given + * tensor format configuration */ virtual float profile_operator( const OperatorNodeBase* opr, TensorFormats base_format, TensorFormats tensor_format, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const; + ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; /*! * \brief profile opr format aware operators (like conv, deconv, conv_bias, * etc.) @@ -157,28 +156,29 @@ protected: const OperatorNodeBase* opr, const OprTensorFormatsConfiguration& base_config, const SmallVector& available_configs, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const; + ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; /*! - * \brief prfile opr format aware operators (like conv, deconv, conv_bias, resize, warp etc.) + * \brief prfile opr format aware operators (like conv, deconv, conv_bias, resize, + * warp etc.) * * \param opr pointer to the operator to be profiled - * \param base_config the original opr format configuration of the operator node, + * \param base_config the original opr format configuration of the operator node, * \param config the opr format configuration to be profiled - * \param extra_attribute identify whether to use image object for OpenCL or automatically padding nhwc layout - * \return elapsed time of operator in the given opr format configuration + * \param extra_attribute identify whether to use image object for OpenCL or + * automatically padding nhwc layout \return elapsed time of operator in the given + * opr format configuration */ - virtual float profile_operator(const OperatorNodeBase* opr, - const OprTensorFormatsConfiguration& base_config, - const OprTensorFormatsConfiguration& config, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const; + virtual float profile_operator( + const OperatorNodeBase* opr, + const OprTensorFormatsConfiguration& base_config, + const OprTensorFormatsConfiguration& config, + ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; /*! * \brief profile layout transform of the var node * * \param var pointer to the var node to be profiled * \param base_format the original tensor formats in which the var node is - * stored + * stored * \param available_tensor_formats the available tensor formats * \param extra_attribute the extra attributes (options) of the problem * \return the var node record @@ -186,27 +186,26 @@ protected: VarNodeRecord profile_var_node( const VarNode* var, TensorFormats base_format, const SmallVector& available_tensor_formats, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const; + ReformatAttribute extra_attribute = ReformatAttribute::DEFAULT) const; /*! * \brief profile layout transform of the var node * * \param var pointer to the var node to be profiled * \param base_format the original tensor formats in which the var node is * stored - * \param key type of ReformatKey, identify the information/attributes of the layout transoform - * \return elapsed time of the layout transform + * \param key type of ReformatKey, identify the information/attributes of the layout + * transoform \return elapsed time of the layout transform */ - virtual float profile_var_node(const VarNode* var, - TensorFormats base_format, - const ReformatKey& key) const; + virtual float profile_var_node( + const VarNode* var, TensorFormats base_format, + const ReformatKey& key) const; OprFootprint m_opr_footprint; - float m_opr_threshold; /// a threshold, when the computation of the newly - /// created operator that is built in some opr - /// format configuration is as greater as - /// m_opr_threshold times of the original operator, - /// the opr format configuration will be skipped - /// (i.e. the cost is infinite) + float m_opr_threshold; /// a threshold, when the computation of the newly + /// created operator that is built in some opr + /// format configuration is as greater as + /// m_opr_threshold times of the original operator, + /// the opr format configuration will be skipped + /// (i.e. the cost is infinite) float m_var_node_threshold; /// a threshold, when the memory footprint of /// the layout transform of the var node is as /// larger as m_var_node_threshold as the var @@ -298,23 +297,26 @@ private: class CachedProfiler final : public ProfilerImpl { public: - CachedProfiler(const char* path = nullptr, int runs = 10, - float opr_threshold = 2.f, float var_node_threshold = 2.f); + CachedProfiler( + const char* path = nullptr, int runs = 10, float opr_threshold = 2.f, + float var_node_threshold = 2.f); ProfilingResult profile(const Problem& problem) const override; private: - float profile_operator(const OperatorNodeBase* opr, - TensorFormats base_format, - TensorFormats tensor_format, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const override; - float profile_operator(const OperatorNodeBase* opr, - const OprTensorFormatsConfiguration& base_config, - const OprTensorFormatsConfiguration& config, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const override; - float profile_var_node(const VarNode* var, TensorFormats base_format, - const ReformatKey& key) const override; + float profile_operator( + const OperatorNodeBase* opr, TensorFormats base_format, + TensorFormats tensor_format, + ReformatAttribute extra_attribute = + ReformatAttribute::DEFAULT) const override; + float profile_operator( + const OperatorNodeBase* opr, + const OprTensorFormatsConfiguration& base_config, + const OprTensorFormatsConfiguration& config, + ReformatAttribute extra_attribute = + ReformatAttribute::DEFAULT) const override; + float profile_var_node( + const VarNode* var, TensorFormats base_format, + const ReformatKey& key) const override; const char* m_path; }; diff --git a/src/gopt/test/cache_data.h b/src/gopt/test/cache_data.h index f049589e93605153451273ca3e70c0c71be857e3..696bdc5fe26dbef266a657d5ef8ecf19307b11a9 100644 Binary files a/src/gopt/test/cache_data.h and b/src/gopt/test/cache_data.h differ diff --git a/src/gopt/test/embed_cache.py b/src/gopt/test/embed_cache.py index db50a777a3c683cbb299c113e34fe71e61b24d23..0f3c7cd23240c94ab782cf0c46217d633e838621 100644 --- a/src/gopt/test/embed_cache.py +++ b/src/gopt/test/embed_cache.py @@ -7,19 +7,21 @@ # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# 为了保证全局图优化里的 profiling 结果不受到 ci 环境的影响,所以把写死的 profiling 结果存到了 cache 里去, -# 每次跑测试会从内存里读取 cache 里的 profiling 结果,然后根据 profiling 结果去做全局图优化。 -# 这个脚本用来把 dump 出去的 cache 文件转化成 cache 的头文件,用于测试时读取数据。 -# 如果在 src/gopt/test/layout_transform_pass.cpp 里添加了全局图优化相关的测试,则需要考虑用这个脚本来 -# 处理一下 profiling 数据。 +# 为了保证全局图优化里的 profiling 结果不受到 ci 环境的影响,所以把写死的 profiling 数据存到了 cache 里去, +# 每次跑测试会从内存 cache 里读取 profiling 结果,然后根据 profiling 结果去做全局图优化,这样确保每次运行 +# 结果都是一致的。 +# ProfilerCache 可以支持把内存中 cache 下来的 profiling 数据 dump 成文件。 +# 这个脚本就是用于把 dump 出去的 cache 文件打包成 cache 的头文件,用于测试时读取数据,构建 InMemory 的 ProfilerCache 。 +# 如果在 src/gopt/test/layout_transform_pass.cpp 里新添加了全局图优化相关的测试,则需要考虑用这个脚本来 +# 更新 cache 头文件中的 profiling 数据。 # 1. 首先将 src/gopt/test/layout_transform_pass.cpp 中的 `#define MGB_WITH_CACHED_TEST 1` 修改为 # `#define MGB_WITH_CACHED_TEST 0` # 2. 编译megbrain_test,并运行所有全局图优化相关测试: # ./megbrain_test --gtest_filter="*LayoutTransform*" # 3. 用这个脚本把所有的cache文件打包在一起 # python3 embed_cache.py -o cache_data.h $(ls /path/to/cache/*.cache) -# 4. 将步骤1中的 define 改回去,这样 profile 过程用到的是 cache 下来的数据。随后可以重新构建 megbrain_test , -# 验证测试是否正确。 +# 4. 将步骤1中的 define 语句改回原样,这样 profile 过程就会使用 cache 下来的数据。 +# 5. 最后可以重新构建一下 megbrain_test ,确保测试结果正确。 import os.path import logging import hashlib diff --git a/src/gopt/test/layout_transform_pass.cpp b/src/gopt/test/layout_transform_pass.cpp index 8cba8b16dcb543ff18cb7b753d3c15ca0348e035..3e6db8e8a42bc4feaaf15d2dff37735c48359996 100644 --- a/src/gopt/test/layout_transform_pass.cpp +++ b/src/gopt/test/layout_transform_pass.cpp @@ -78,8 +78,9 @@ OprFormat tensor_formats_to_opr_format(TensorFormats tensor_format) { case TensorFormats::CHWNc4: return OprFormat::CHWN4; default: - mgb_throw(MegBrainError, "tensor format(%u) is not supported", - static_cast(tensor_format)); + mgb_throw( + MegBrainError, "tensor format(%u) is not supported", + static_cast(tensor_format)); } } @@ -92,28 +93,28 @@ public: } ~ProfilerMock() { // reset in memory cache - ProfilerCache::inst().set_impl( - std::make_unique()); + ProfilerCache::inst().set_impl(std::make_unique()); } private: - float profile_operator(const OperatorNodeBase* opr, - TensorFormats base_format, - TensorFormats tensor_format, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const override { - ProfilerCache::Key key{opr, tensor_formats_to_opr_format(tensor_format), - extra_attribute}; + float profile_operator( + const OperatorNodeBase* opr, TensorFormats base_format, + TensorFormats tensor_format, + ReformatAttribute extra_attribute = + ReformatAttribute::DEFAULT) const override { + ProfilerCache::Key key{ + opr, tensor_formats_to_opr_format(tensor_format), extra_attribute}; auto ret = ProfilerCache::inst().get(key); if (ret.valid()) return ret.val(); mgb_assert(false); } - float profile_operator(const OperatorNodeBase* opr, - const OprTensorFormatsConfiguration& base_config, - const OprTensorFormatsConfiguration& config, - ReformatAttribute extra_attribute = - ReformatAttribute::DEFAULT) const override { + float profile_operator( + const OperatorNodeBase* opr, + const OprTensorFormatsConfiguration& base_config, + const OprTensorFormatsConfiguration& config, + ReformatAttribute extra_attribute = + ReformatAttribute::DEFAULT) const override { ProfilerCache::Key key{opr, config.opr_format, extra_attribute}; std::string tmp; tmp.reserve(key.blob().size); @@ -122,8 +123,9 @@ private: return ret.val(); mgb_assert(false); } - float profile_var_node(const VarNode* var, TensorFormats base_format, - const ReformatKey& key) const override { + float profile_var_node( + const VarNode* var, TensorFormats base_format, + const ReformatKey& key) const override { ProfilerCache::Key pf_key{var, key}; auto ret = ProfilerCache::inst().get(pf_key); if (ret.valid()) @@ -174,18 +176,17 @@ TEST(TestLayoutTransform, Resnet18_QS8) { OprFormat::NCHW, TensorFormats::NCHW, Target::UNSPEC, ReformatAttribute::AUTO_PADDING_NHWC}; auto ctx = std::make_unique( - std::move(opr_list), std::move(available_tensor_formats), - attribute); - ctx->add_opr_config(opr::ConvBiasForward::typeinfo(), - {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::CHWN4, - OprFormat::NHWC}) - .add_opr_config(opr::PoolingForward::typeinfo(), - {OprFormat::NCHW4, OprFormat::NCHW32, - OprFormat::NHWC, OprFormat::CHWN4}); + std::move(opr_list), std::move(available_tensor_formats), attribute); + ctx->add_opr_config( + opr::ConvBiasForward::typeinfo(), + {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::CHWN4, OprFormat::NHWC}) + .add_opr_config( + opr::PoolingForward::typeinfo(), + {OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::NHWC, + OprFormat::CHWN4}); #if MGB_WITH_CACHED_TEST auto profiler = std::make_unique( - static_cast( - TestLayoutTransform_Resnet18_QS8.data()), + static_cast(TestLayoutTransform_Resnet18_QS8.data()), TestLayoutTransform_Resnet18_QS8.size()); #else auto profiler = ProfilerBase::make_cached_profiler( @@ -278,8 +279,7 @@ TEST(TestLayoutTransform, Resnet18_QS4) { OprFormat::NHWC, OprFormat::CHWN4}); #if MGB_WITH_CACHED_TEST auto profiler = std::make_unique( - static_cast( - TestLayoutTransform_Resnet18_QS4.data()), + static_cast(TestLayoutTransform_Resnet18_QS4.data()), TestLayoutTransform_Resnet18_QS4.size()); #else auto profiler = ProfilerBase::make_cached_profiler( @@ -401,8 +401,7 @@ TEST(TestLayoutTransform, Detection_QS8) { OprFormat::NHWC, OprFormat::CHWN4}); #if MGB_WITH_CACHED_TEST auto profiler = std::make_unique( - static_cast( - TestLayoutTransform_Detection_QS8.data()), + static_cast(TestLayoutTransform_Detection_QS8.data()), TestLayoutTransform_Detection_QS8.size()); #else auto profiler = ProfilerBase::make_cached_profiler( @@ -479,8 +478,7 @@ TEST(TestLayoutTransform, Detection_QS4) { OprFormat::NHWC, OprFormat::CHWN4}); #if MGB_WITH_CACHED_TEST auto profiler = std::make_unique( - static_cast( - TestLayoutTransform_Detection_QS4.data()), + static_cast(TestLayoutTransform_Detection_QS4.data()), TestLayoutTransform_Detection_QS4.size()); #else auto profiler = ProfilerBase::make_cached_profiler( @@ -553,17 +551,16 @@ TEST(TestLayoutTransform, Wide) { OprFormat::NCHW, TensorFormats::NCHW, Target::UNSPEC, ReformatAttribute::DEFAULT}; auto ctx = std::make_unique( - std::move(opr_list), std::move(available_tensor_formats), - attribute); - ctx->add_opr_config(opr::ConvBiasForward::typeinfo(), - {OprFormat::NCHW, OprFormat::NHWC}); + std::move(opr_list), std::move(available_tensor_formats), attribute); + ctx->add_opr_config( + opr::ConvBiasForward::typeinfo(), {OprFormat::NCHW, OprFormat::NHWC}); #if MGB_WITH_CACHED_TEST auto profiler = std::make_unique( static_cast(TestLayoutTransform_Wide.data()), TestLayoutTransform_Wide.size()); #else - auto profiler = ProfilerBase::make_cached_profiler( - "TestLayoutTransform.Wide.cache"); + auto profiler = + ProfilerBase::make_cached_profiler("TestLayoutTransform.Wide.cache"); #endif std::unique_ptr solver{ new DynamicProgrammingSolver(std::move(profiler))}; @@ -674,8 +671,7 @@ TEST(TestLayoutTransform, DetectionHead) { {OprFormat::NHWC, OprFormat::NCHW4, OprFormat::NCHW64}); #if MGB_WITH_CACHED_TEST auto profiler = std::make_unique( - static_cast( - TestLayoutTransform_DetectionHead.data()), + static_cast(TestLayoutTransform_DetectionHead.data()), TestLayoutTransform_DetectionHead.size()); #else auto profiler = ProfilerBase::make_cached_profiler(