diff --git a/CMakeLists.txt b/CMakeLists.txt index f773219646884b09bf1202ad83c0c7590364cb03..9c299df2d78c1bef07c8358f12c125a8fcebaef6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -213,6 +213,15 @@ if(MGE_WITH_TEST) endif() option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON) +option(MGE_BUILD_XXX "Build _xxx.so instead of mgb.so " OFF) +if(MGE_BUILD_XXX) + set(CMAKE_CXX_STANDARD 17) +endif() + +option(MGE_BUILD_SDK "Build load_and_run" ON) +if(MGE_BUILD_XXX) + set(MGE_BUILD_SDK OFF) +endif() if(NOT MGE_WITH_CUDA) message("-- Disable distributed support, as CUDA is not enabled.") @@ -522,7 +531,7 @@ endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") - +set(MGB_ENABLE_IMPERATIVE, ${MGE_BUILD_XXX}) # Write out megbrain_build_config.h # It defines macros needed by both megbrain and dnn configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) @@ -566,14 +575,23 @@ if(MGE_WITH_DISTRIBUTED) endif() add_subdirectory(src) -add_subdirectory(sdk/load-and-run) + +if(MGE_BUILD_SDK) + add_subdirectory(sdk/load-and-run) +endif() if(MGE_WITH_PYTHON_MODULE) - add_subdirectory(python_module) + if(MGE_BUILD_XXX) + add_subdirectory(imperative) + else() + add_subdirectory(python_module) + endif() endif() if(MGE_WITH_TEST AND MGE_ENABLE_RTTI) - add_subdirectory(test) + if(NOT MGE_BUILD_XXX) + add_subdirectory(test) + endif() endif() if(TARGET mgb) @@ -597,6 +615,21 @@ if(TARGET mgb) DEPENDS mgb VERBATIM ) +elseif(TARGET _xxx) + add_custom_target( + develop + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/$ + ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/$ + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/ops/_internal/generated_ops.py + ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/ops/_internal/generated_ops.py + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/ops/_internal/param_defs.py + ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/ops/_internal/param_defs.py + DEPENDS _xxx + VERBATIM + ) endif() IF(APPLE) diff --git a/dnn/CMakeLists.txt b/dnn/CMakeLists.txt index 61c282979fc694cb50eebecbfaec15b5351fef2d..1dc3729832172612f58959b99dd44436d0e4331c 100644 --- a/dnn/CMakeLists.txt +++ b/dnn/CMakeLists.txt @@ -59,7 +59,9 @@ install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) if(MGE_WITH_TEST) - add_subdirectory(test) + if(NOT MGE_BUILD_XXX) + add_subdirectory(test) + endif() endif() add_subdirectory(src) diff --git a/dnn/scripts/gen_param_defs.py b/dnn/scripts/gen_param_defs.py index ca388f530704473d997d07b55afba6dcffea8d02..a48f3df70a11685f0a746489e2e3a6460e40eae7 100755 --- a/dnn/scripts/gen_param_defs.py +++ b/dnn/scripts/gen_param_defs.py @@ -298,6 +298,9 @@ class PyWriter(IndentWriterBase): _enum_member2num = None + def __init__(self, for_imperative=False): + self._imperative = for_imperative + def __call__(self, fout, defs): super().__call__(fout) self._enum_member2num = [] @@ -339,19 +342,35 @@ class PyWriter(IndentWriterBase): ' return super()._missing_(value)\n' '\n' ) - self._write( - 'def _as_dtype_num(dtype):\n' - ' import megengine._internal.mgb as m\n' - ' return m._get_dtype_num(dtype)\n' - '\n' - ) - self._write( - ''' -def _as_serialized_dtype(dtype): - import megengine._internal.mgb as m - return m._get_serialized_dtype(dtype) -''' - ) + if not self._imperative: + self._write( + 'def _as_dtype_num(dtype):\n' + ' import megengine._internal.mgb as m\n' + ' return m._get_dtype_num(dtype)\n' + '\n' + ) + + self._write( + 'def _as_serialized_dtype(dtype):\n' + ' import megengine._internal.mgb as m\n' + ' return m._get_serialized_dtype(dtype)\n' + '\n' + ) + else: + self._write( + 'def _as_dtype_num(dtype):\n' + ' import xxx._xxx.utils as m\n' + ' return m._get_dtype_num(dtype)\n' + '\n' + ) + + self._write( + 'def _as_serialized_dtype(dtype):\n' + ' import xxx._xxx.utils as m\n' + ' return m._get_serialized_dtype(dtype)\n' + '\n' + ) + self._process(defs) self._write( ''' @@ -777,8 +796,12 @@ def main(): 'cpp file') parser.add_argument('input') parser.add_argument('output') + parser.add_argument('--imperative', action='store_true', + help='generate files for imperatvie ') args = parser.parse_args() + for_imperative = args.imperative + with open(args.input) as fin: inputs = fin.read() exec(inputs, {'pdef': ParamDef, 'Doc': member_defs.Doc}) @@ -787,7 +810,7 @@ def main(): input_hash = input_hash.hexdigest() if args.type == 'py': - writer = PyWriter() + writer = PyWriter(for_imperative=for_imperative) else: assert args.type == 'c++' if args.enumv: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 20ddcbbcb338119e58236882920493ee308dfe93..785c6cd500664529e93839a600c1f6e9a54f15b9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -151,27 +151,31 @@ if(ANDROID) target_link_libraries(megbrain PUBLIC log) endif() -# Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF -add_library(megengine) -target_link_libraries(megengine PUBLIC megbrain megdnn) -if (UNIX AND NOT APPLE) - # TODO: Use target_link_options after upgrading to CMake 3.13 - target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${PROJECT_SOURCE_DIR}/python_module/src/version.ld) -endif() -set_target_properties(megengine PROPERTIES CXX_VISIBILITY_PRESET default) -set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) -if (MGE_WITH_DISTRIBUTED) +if(NOT MGE_BUILD_XXX) + # Build as SHARED or STATIC depending on BUILD_SHARED_LIBS=ON/OFF + add_library(megengine) + target_link_libraries(megengine PUBLIC megbrain megdnn) + if (UNIX AND NOT APPLE) + # TODO: Use target_link_options after upgrading to CMake 3.13 + # FIXME; Please use right directory for mgb or imperative + target_link_options(megengine PRIVATE -Wl,--no-undefined -Wl,--version-script=${PROJECT_SOURCE_DIR}/python_module/src/version.ld) + endif() + set_target_properties(megengine PROPERTIES CXX_VISIBILITY_PRESET default) + set_target_properties(megengine PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready # for this. install(TARGETS megengine LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -else() - install(TARGETS megengine megbrain +endif() + +if (NOT MGE_WITH_DISTRIBUTED) + install(TARGETS megbrain EXPORT ${MGE_EXPORT_TARGETS} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() + foreach(_PATH ${MGB_INC}) install(DIRECTORY ${_PATH}/megbrain DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") endforeach() diff --git a/src/core/impl/graph/cg_impl.cpp b/src/core/impl/graph/cg_impl.cpp index a3b15c3c10abaec0b9b7f3160e7d02c8f68d4f0f..c1492b4ab20e04e03cb6b85b534c5a41ba68c755 100644 --- a/src/core/impl/graph/cg_impl.cpp +++ b/src/core/impl/graph/cg_impl.cpp @@ -271,6 +271,23 @@ OperatorNodeBase* ComputingGraphImpl::insert_opr( std::unique_ptr opr_uniqp) { auto opr = opr_uniqp.get(); + if (options().imperative_proxy_graph) { + if (!opr->inserted_in_graph()) { + m_opr_refkeeper.emplace_back(std::move(opr_uniqp)); + opr->set_inserted_in_graph(); + opr->init_output_comp_node(); + opr->init_output_dtype(); + opr->init_output_format(); + // register static infer + { + auto&& mgr = static_infer_manager_impl(); + auto old = mgr.set_register_allowed_opr(opr); + opr->init_output_static_infer_desc(); + mgr.set_register_allowed_opr(old); + } + } + return opr; + } if (opr->inserted_in_graph()) { // FIXME: it's just a trick used for re-evaluation in eager evaluation // mode. Since comp_graph has already taken an ownership of the opr, diff --git a/src/core/impl/graph/grad_manager.cpp b/src/core/impl/graph/grad_manager.cpp index bacbab9c057dc3564f3a504b147a22ddabed1c80..448897a4567c5b57fd6dfd7d8c747a5149ac84b2 100644 --- a/src/core/impl/graph/grad_manager.cpp +++ b/src/core/impl/graph/grad_manager.cpp @@ -133,6 +133,15 @@ void cg::register_grad_func(Typeinfo *opr_type, OprGradFunc grad) { opr_type->name); } +OprGradFunc* cg::lookup_grad_func(Typeinfo *opr_type) { + auto giter = static_data().grad_func_registry.find(opr_type); + if (giter != static_data().grad_func_registry.end()) { + return &giter->second; + } else { + return nullptr; + } +} + class GradManager::StreamStrongPropInfer { DepOprIter m_opr_iter; ThinHashSet m_strong_oprs; diff --git a/src/core/impl/graph/operator_node.cpp b/src/core/impl/graph/operator_node.cpp index 3be33fcb70b74aebe590fc3b62a2c25d1260cbbe..bef29dbd9352301c8935e1c7f27f18f25c62ae25 100644 --- a/src/core/impl/graph/operator_node.cpp +++ b/src/core/impl/graph/operator_node.cpp @@ -101,6 +101,11 @@ OperatorNodeBase::~OperatorNodeBase() noexcept { } void OperatorNodeBase::execute(ExecEnv &env) { + if (owner_graph()->options().imperative_proxy_graph) { + do_execute(env); + return; + } + owner_graph()->event().signal_inplace(this, &env); // dispatch waiting commands diff --git a/src/core/impl/graph/var_node.cpp b/src/core/impl/graph/var_node.cpp index 8a2b990455f59eff926f0183b75655ab2abab785..1e00f8f08178df1bdb05e95d91d78845f678f6d5 100644 --- a/src/core/impl/graph/var_node.cpp +++ b/src/core/impl/graph/var_node.cpp @@ -230,6 +230,9 @@ VarNode& VarNode::format(TensorFormat format) { bool VarNode::set_fwd_in2out_readonly( VarNode *input, const SubTensorSpec &sub) { + if (owner_graph()->options().imperative_proxy_graph) { + return false; + } return static_cast(owner_graph()) ->var_node_mem_manager().fwd_in2out_readonly(input, sub, this); } @@ -242,6 +245,7 @@ VarNode& VarNode::set_fwd_in2out_writable(VarNode *input) { VarNode& VarNode::set_fwd_in2out_writable_force(VarNode *input) { + mgb_assert(!owner_graph()->options().imperative_proxy_graph); static_cast(owner_graph()) ->var_node_mem_manager().fwd_in2out_writable_force(input, this); return *this; diff --git a/src/core/include/megbrain/graph/cg.h b/src/core/include/megbrain/graph/cg.h index 3f103fe0f84bb85c3d3e5dae61feb710d38ed6b2..4f47d2f654c5f90b44cde58265ca927b2b9599f7 100644 --- a/src/core/include/megbrain/graph/cg.h +++ b/src/core/include/megbrain/graph/cg.h @@ -440,6 +440,8 @@ class ComputingGraph : public std::enable_shared_from_this, bool eager_evaluation = false; #endif + bool imperative_proxy_graph = false; + //! add extra deps for the comp seq if a specific var is dependent ThinHashMap extra_vardeps; diff --git a/src/core/include/megbrain/graph/grad_impl.h b/src/core/include/megbrain/graph/grad_impl.h index f34e707a94ca40c58a5db9e830e9b7bd999a4cff..ba4800a9b0233e8e8334b716e5a82482350fa823 100644 --- a/src/core/include/megbrain/graph/grad_impl.h +++ b/src/core/include/megbrain/graph/grad_impl.h @@ -73,6 +73,11 @@ namespace cg { */ void register_grad_func(Typeinfo *opr_type, OprGradFunc grad); + /*! + * \brief lookup grad func for an operator type + */ + OprGradFunc* lookup_grad_func(Typeinfo *opr_type); + /*! * \brief add a callback to be invoked when grad of given var is computed * diff --git a/src/core/include/megbrain/graph/operator_node.h b/src/core/include/megbrain/graph/operator_node.h index 73fa06b7583fc86dfc9820ce8e7be15b059d524b..64a593a1ace83b006af57774f27428dfa5f6390b 100644 --- a/src/core/include/megbrain/graph/operator_node.h +++ b/src/core/include/megbrain/graph/operator_node.h @@ -69,6 +69,10 @@ class OperatorNodeConfig final: public Hashable { return *this; } + const Maybe& name() const { + return m_name; + } + /*! * \brief update instance ID * diff --git a/src/core/include/megbrain/graph/var_node.h b/src/core/include/megbrain/graph/var_node.h index 2ab3aad98bd36b39aac08d302296f5cff2b26b24..93182edc6da13ca96911140c2a19dde61201298e 100644 --- a/src/core/include/megbrain/graph/var_node.h +++ b/src/core/include/megbrain/graph/var_node.h @@ -22,6 +22,10 @@ #include namespace mgb { +namespace imperative { + class ProxyGraph; +} // namespace imperative + namespace cg { namespace static_infer { class StaticInferManagerImpl; @@ -576,6 +580,7 @@ class VarNode final: public GraphNodeBase { friend class VarDevMemDefragmenter; friend class EagerEvalManager; friend class MemAllocPlan; + friend class imperative::ProxyGraph; }; enum class VarNode::Flag: uint32_t { diff --git a/src/megbrain_build_config.h.in b/src/megbrain_build_config.h.in index 9eb19797d50127284b7e31ec50a9c8bc73b75f84..55afbe61c37f3da9f2d39862b71b7dab78785ae2 100644 --- a/src/megbrain_build_config.h.in +++ b/src/megbrain_build_config.h.in @@ -29,6 +29,8 @@ #cmakedefine01 MGB_ENABLE_FBS_SERIALIZATION #cmakedefine01 MGB_IS_DEV +#cmakedefine01 MGB_ENABLE_IMPERATIVE + // DNN related flags // Platform macro's #cmakedefine01 MEGDNN_WITH_CUDA diff --git a/src/opr/impl/dnn/batch_norm.cpp b/src/opr/impl/dnn/batch_norm.cpp index 1279823e76f37e67c4f8b94ef2e4a5bbb135e8b7..41fd87ebbb7f25d1ce844b9f69973327f7f39aa9 100644 --- a/src/opr/impl/dnn/batch_norm.cpp +++ b/src/opr/impl/dnn/batch_norm.cpp @@ -40,29 +40,37 @@ BatchNormForward::BatchNormForward(VarNode *x, Super{x->owner_graph(), config, "batch_norm", {x, scale, bias, mean, variance}} { - auto check_dest = [&](VarNode* dest) { - auto dest_opr = dest->owner_opr(); - mgb_throw_if(!(dest_opr->same_type() || - dest_opr->same_type()), - GraphError, - "mean&variance in BatchNorm must be SharedDeviceTensor/VolatileSharedDeviceTensor; " - "got %s{%s} actually", - dest_opr->cname(), dest_opr->dyn_typeinfo()->name); - }; - check_dest(mean); - check_dest(variance); + if(owner_graph()->options().imperative_proxy_graph) { + m_force_inplace = false; + } + + if (m_force_inplace) { + auto check_dest = [&](VarNode* dest) { + auto dest_opr = dest->owner_opr(); + mgb_throw_if(!(dest_opr->same_type() || + dest_opr->same_type()), + GraphError, + "mean and variance in BatchNorm must be SharedDeviceTensor " + "or VolatileSharedDeviceTensor; got %s{%s} actually", + dest_opr->cname(), dest_opr->dyn_typeinfo()->name); + }; + check_dest(mean); + check_dest(variance); + } init_megdnn_opr(*this, param); add_input({x, scale, bias, mean, variance}); - output(0)-> - set_fwd_in2out_writable_force(input(3)). - add_flag(VarNode::Flag::NO_MEM_RECLAIM); + if (m_force_inplace) { + output(0)-> + set_fwd_in2out_writable_force(input(3)). + add_flag(VarNode::Flag::NO_MEM_RECLAIM); - output(1)-> - set_fwd_in2out_writable_force(input(4)). - add_flag(VarNode::Flag::NO_MEM_RECLAIM); + output(1)-> + set_fwd_in2out_writable_force(input(4)). + add_flag(VarNode::Flag::NO_MEM_RECLAIM); + } } BatchNormForward::BatchNormForward(VarNode *x, @@ -129,17 +137,40 @@ BatchNormForward::do_make_node_prop() const { void BatchNormForward::scn_do_execute() { auto &&x = input(0)->dev_tensor(); + auto &&y = output(4)->dev_tensor(); + mgb_assert(x.layout().is_contiguous() && + y.layout().is_contiguous()); +#if MGB_ENABLE_IMPERATIVE + if (input().size() == 5) { // need running mean/variance + auto &&o0 = output(0)->dev_tensor(), + &&o1 = output(1)->dev_tensor(), + &&i0 = input(3)->dev_tensor(), + &&i1 = input(4)->dev_tensor(); + mgb_assert(o0.raw_ptr() && o1.raw_ptr()); // non-empty tensor + mgb_assert(o0.comp_node() == i0.comp_node() && + o1.comp_node() == i1.comp_node() && + o0.layout().eq_layout(i0.layout()) && + o1.layout().eq_layout(i1.layout())); + if (!m_force_inplace) { + if (o0.raw_ptr() != i0.raw_ptr()) { + o0.copy_from_fixlayout(i0); + } + if (o1.raw_ptr() != i1.raw_ptr()) { + o1.copy_from_fixlayout(i1); + } + } else { + mgb_assert(o0.raw_ptr() == i0.raw_ptr() + && o1.raw_ptr() == i1.raw_ptr()); + } + } +#endif auto scale = input(1)->dev_tensor().as_megdnn(); auto bias = input(2)->dev_tensor().as_megdnn(); auto mean = output(0)->dev_tensor().as_megdnn(); auto variance = output(1)->dev_tensor().as_megdnn(); auto save_mean = output(2)->dev_tensor().as_megdnn(); auto save_variance = output(3)->dev_tensor().as_megdnn(); - auto &&y = output(4)->dev_tensor(); - auto workspace = intl::get_megdnn_workspace_from_var( - output().back()); - mgb_assert(x.layout().is_contiguous() && - y.layout().is_contiguous()); + auto workspace = intl::get_megdnn_workspace_from_var(output().back()); megdnn_opr()->exec(x.as_megdnn(), scale, bias, mean, variance, save_mean, save_variance, y.as_megdnn(), workspace); } @@ -191,6 +222,14 @@ void BatchNormForward::init_output_dtype() { } } +void BatchNormForward::mem_plan_fwd_in2out_writable() { + if (!m_force_inplace && input().size() == 5) { + // TODO: testing + output(0)->set_fwd_in2out_writable(input(3)); + output(1)->set_fwd_in2out_writable(input(4)); + } +} + MGB_IMPL_OPR_GRAD(BatchNormForward) { mgb_assert(wrt_idx < 5); if (wrt_idx < 3) { diff --git a/src/opr/impl/internal/megdnn_opr_wrapper.cpp b/src/opr/impl/internal/megdnn_opr_wrapper.cpp index 3bcd479b5535fead29e1da84c69d3289620c8f98..82e66a0d296da89f984e1214284b7846f5d0e3b6 100644 --- a/src/opr/impl/internal/megdnn_opr_wrapper.cpp +++ b/src/opr/impl/internal/megdnn_opr_wrapper.cpp @@ -271,17 +271,26 @@ WorkspaceLimitGetter::get_impl(ComputingGraph *graph) { size_t WorkspaceLimitGetter::get_workspace_limit( ComputingGraph *graph, CompNode cn, size_t old_limit) { + if (graph->options().imperative_proxy_graph) { + return old_limit; + } if (!graph->options().seq_opt.enable_mem_reuse_alloc) return old_limit; return get_impl(graph)->get_workspace_limit(cn, old_limit); } bool WorkspaceLimitGetter::is_prealloc_run(ComputingGraph* graph) { + if (graph->options().imperative_proxy_graph) { + return false; + } return graph->options().seq_opt.enable_mem_reuse_alloc && get_impl(graph)->is_prealloc_run(); } VarNode* WorkspaceLimitGetter::register_to_graph(ComputingGraph *graph) { + if (graph->options().imperative_proxy_graph) { + return nullptr; + } auto maker = [graph](){ return std::make_shared(graph); }; diff --git a/src/opr/include/megbrain/opr/dnn/batch_norm.h b/src/opr/include/megbrain/opr/dnn/batch_norm.h index 27f2cbb0b99bc52fe671fca401a7861dfc5268c3..558f9132aae6d2a6f39fb9eda935cb60a0972c9e 100644 --- a/src/opr/include/megbrain/opr/dnn/batch_norm.h +++ b/src/opr/include/megbrain/opr/dnn/batch_norm.h @@ -75,6 +75,10 @@ MGB_DEFINE_OPR_CLASS(BatchNormForward, const TensorShapeArray &output_shapes) const override; void init_output_static_infer_desc() override; void init_output_dtype() override; + void mem_plan_fwd_in2out_writable() override; + + // if set to True, running mean/variance will be updated inplace + bool m_force_inplace = true; }; using BatchNorm = BatchNormForward;