diff --git a/lite/src/mge/network_impl.cpp b/lite/src/mge/network_impl.cpp index 109b3f6d227b35e30810b526ed6458ead84bb200..47e3204540fb6a05b6faff3f1a8656f67678c3d3 100644 --- a/lite/src/mge/network_impl.cpp +++ b/lite/src/mge/network_impl.cpp @@ -210,9 +210,6 @@ void NetworkImplDft::use_tensorrt() { //! set the callback in async model void NetworkImplDft::set_async_callback(const AsyncCallback& callback) { LITE_ASSERT(!m_is_cpu_inplace_mode, "cpu inplace mode not support async mode"); - LITE_ASSERT( - m_user_config->options.comp_node_seq_record_level == 0, - "record mode not support async mode"); LITE_ASSERT( m_user_config->device_type == LiteDeviceType::LITE_CPU || m_user_config->device_type == LiteDeviceType::LITE_CUDA, diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index 6854e4087416684bae67a5c6b6672fad4ea4342a..c37c6247290c6b74605f4ceb3005b076e1a99525 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -659,21 +659,4 @@ void CompNode::ImplBase::add_callback(megdnn::thin_function&&) { locator().to_string().c_str()); } -void CompNode::ImplBase::enable_dispatch() { - mgb_throw( - MegBrainError, - "Unsupported add callback to " - "comp node %s", - locator().to_string().c_str()); -} - -void CompNode::ImplBase::disable_dispatch(bool* flag) { - MGB_MARK_USED_VAR(flag); - mgb_throw( - MegBrainError, - "Unsupported add callback to " - "comp node %s", - locator().to_string().c_str()); -} - // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/core/impl/comp_node/cpu/comp_node.cpp b/src/core/impl/comp_node/cpu/comp_node.cpp index 5dbb9c596b439376958d1a40a585caaa607e94d5..6ec87c6cbdc516a9f7c37209ea285c5bb2374f0d 100644 --- a/src/core/impl/comp_node/cpu/comp_node.cpp +++ b/src/core/impl/comp_node/cpu/comp_node.cpp @@ -810,12 +810,6 @@ public: task(); } } - - void enable_dispatch() override { m_env.cpu_env().enable_dispatch(); } - - void disable_dispatch(bool* flag) override { - m_env.cpu_env().disable_dispatch(flag); - } }; MGB_DYN_TYPE_OBJ_FINAL_IMPL(CompNodeRecorderImpl); #if MGB_HAVE_THREAD diff --git a/src/core/impl/comp_node_env.cpp b/src/core/impl/comp_node_env.cpp index b4421ed127ce289416e711143492dc17316f4e67..9a28f6415e74b089f16e2645d9b1e3b0bc789c32 100644 --- a/src/core/impl/comp_node_env.cpp +++ b/src/core/impl/comp_node_env.cpp @@ -474,35 +474,4 @@ void CompNodeEnv::on_bad_device_type(DeviceType expected) const { MGB_VERSION_SYMBOL3(MEGDNN, MEGDNN_MAJOR, MEGDNN_MINOR, MEGDNN_PATCH); -void CompNodeEnv::CpuEnv::enable_dispatch() { - do_task_inplace = nullptr; -} - -void CompNodeEnv::CpuEnv::disable_dispatch(bool* flag) { - do_task_inplace = flag; -} - -void CompNodeEnv::CpuEnv::dispatch(Task&& task) const { - if (do_task_inplace && *do_task_inplace) { - task(); - } else { - dispatcher->dispatch(std::move(task)); - } -} - -void CompNodeEnv::CpuEnv::dispatch( - MultiThreadingTask&& task, size_t parallelism) const { - if (do_task_inplace && *do_task_inplace) { - for (size_t i = 0; i < parallelism; ++i) { - task(i, 0); - } - } else { - dispatcher->dispatch(std::move(task), parallelism); - } -} - -#if MGB_HAVE_THREAD -MGB_THREAD_LOCAL_PTR(bool) CompNodeEnv::CpuEnv::do_task_inplace = nullptr; -#endif - // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/core/impl/graph/cg_impl.cpp b/src/core/impl/graph/cg_impl.cpp index add192779c6609bde3be6d0531791f48cba79609..900cce4982891e488b1d329595816a68fcd23d04 100644 --- a/src/core/impl/graph/cg_impl.cpp +++ b/src/core/impl/graph/cg_impl.cpp @@ -168,32 +168,12 @@ MGB_DEFINE_OPR_CLASS( ComputingGraphImpl::CallbackCaller, SingleCNOperatorNodeBase) // { std::vector> m_cb; - //! CallbackCaller supports change memory address in output tensor record mode(only - //! on CPU). The whole callback will be dispatched(like dispatching tensor copy - //! instead of dispatching memcpy). - //! Side effect: sync() is not supported in callback anymore. Users should call - //! func->wait() instead out of callback to sync data from Device to Host. - //! Note : only record level 1 supports change memory address in output tensor. - //! HostTensor captured in callback should not on cpu default. void scn_do_execute() override { for (size_t i = 0; i < input().size(); ++i) { auto&& in = input(i)->dev_tensor(); for (auto&& callback : m_cb[i]) { - if (this->owner_graph()->options().comp_node_seq_record_level == 1 && - in.comp_node().device_type() == CompNode::DeviceType::CPU && - in.comp_node() != CompNode::default_cpu()) { - auto record_cb = [&in, &callback]() { - auto comp_node = in.comp_node(); - bool do_task_inplace = true; - comp_node.disable_dispatch(&do_task_inplace); - callback(const_cast(in)); - comp_node.enable_dispatch(); - }; - in.comp_node().add_callback(record_cb); - } else { - // const cast for backward API compatibility - callback(const_cast(in)); - } + // const cast for backward API compatibility + callback(const_cast(in)); } } } diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h index d672a8039bf7c6f46d102acedd82082b43419e55..dfd52193bc3289749838b6dc213f41f415e04426 100644 --- a/src/core/include/megbrain/comp_node.h +++ b/src/core/include/megbrain/comp_node.h @@ -426,16 +426,6 @@ public: return m_impl->add_callback(std::move(cb)); } - /*! - * enable dispatcher - */ - void enable_dispatch() { m_impl->enable_dispatch(); } - - /*! - * disable dispatcher so that task will be done inplace - */ - void disable_dispatch(bool* flag) { m_impl->disable_dispatch(flag); } - enum class Flag : uint32_t { //! Whether computing recorder is supported on this comp node (i.e. //! whether non-zero comp_node_seq_record_level is allowed) @@ -562,10 +552,6 @@ protected: virtual void add_callback(megdnn::thin_function&&); - virtual void enable_dispatch(); - - virtual void disable_dispatch(bool* flag); - virtual uint64_t get_uid() { mgb_throw(MegBrainError, "get_uid is not impl yet"); }; diff --git a/src/core/include/megbrain/comp_node_env.h b/src/core/include/megbrain/comp_node_env.h index f8f2f013de71c48b272a2e5734d4a5023efec0dd..4d051729b80a77d559e71b436b7843a5893adabb 100644 --- a/src/core/include/megbrain/comp_node_env.h +++ b/src/core/include/megbrain/comp_node_env.h @@ -503,20 +503,15 @@ public: using AffinityCallBack = thin_function; std::shared_ptr dispatcher; -#if MGB_HAVE_THREAD - static MGB_THREAD_LOCAL_PTR(bool) do_task_inplace; -#else - bool* do_task_inplace = nullptr; -#endif - - MGE_WIN_DECLSPEC_FUC void enable_dispatch(); - MGE_WIN_DECLSPEC_FUC void disable_dispatch(bool* flag); - - MGE_WIN_DECLSPEC_FUC void dispatch(Task&& task) const; + MGE_WIN_DECLSPEC_FUC void dispatch(Task&& task) const { + dispatcher->dispatch(std::move(task)); + } MGE_WIN_DECLSPEC_FUC void dispatch( - MultiThreadingTask&& task, size_t parallelism) const; + MultiThreadingTask&& task, size_t parallelism) const { + dispatcher->dispatch(std::move(task), parallelism); + } void set_affinity(AffinityCallBack&& cb) const { dispatcher->set_affinity(std::move(cb)); @@ -529,12 +524,6 @@ public: return m_cpu_env; } - CpuEnv& cpu_env() { - if (mgb_unlikely(m_property.type != DeviceType::CPU)) - on_bad_device_type(DeviceType::CPU); - return m_cpu_env; - } - //! init this as a cpu env void init_cpu(const CpuEnv& env, CompNode comp_node); diff --git a/src/core/test/comp_node_helper.cpp b/src/core/test/comp_node_helper.cpp index 43c0fcca3cc01fa8eea6f3a4299a91a21c906db9..a3d84cfa48a458e61b5286d63a9af64df72e26c9 100644 --- a/src/core/test/comp_node_helper.cpp +++ b/src/core/test/comp_node_helper.cpp @@ -44,7 +44,7 @@ void run_comp_seq_rec_basic(CompNode cn, bool fake_first) { graph->options().fake_next_exec = true; graph->options().var_sanity_check_first_run = false; } - auto func = graph->compile({make_callback_copy(z, host_z, false)}); + auto func = graph->compile({make_callback_copy(z, host_z)}); if (fake_first) { func->execute(); // first exec } @@ -55,8 +55,6 @@ void run_comp_seq_rec_basic(CompNode cn, bool fake_first) { } host_x->copy_from_fixlayout(*gen(host_x->shape(), cn)); func->execute(); - func->wait(); - host_z.sync(); auto expect = eval_conv_cpu(*host_x, *host_y, param); MGB_ASSERT_TENSOR_NEAR(expect, host_z, 1e-3) << "iter " << iter; } @@ -72,28 +70,6 @@ void run_comp_seq_rec_basic(CompNode cn, bool fake_first) { ASSERT_EQ(executed[2], change); // create new recorder, exec with recorder ASSERT_EQ(executed[3], change + 1); - - //! then we change host_z's ptr each time and check result - HostTensorND host_iter; - host_iter.copy_from(host_z); - std::vector> m_hosts(10); - for (size_t i = 0; i < 10; i++) { - m_hosts[i] = gen(host_z.shape(), host_z.comp_node()); - } - iter = 0; - for (; iter < 10; ++iter) { - auto host_tmp = m_hosts[iter]; - auto host_z_storage = host_z.storage(); - auto origin_ptr = host_z_storage.raw_storage(); - host_z_storage.reset( - host_z.comp_node(), host_z_storage.size(), - host_tmp->storage().raw_storage()); - auto changed_ptr = host_z_storage.raw_storage(); - ASSERT_TRUE(origin_ptr != changed_ptr); - func->execute(); - func->wait(); - MGB_ASSERT_TENSOR_NEAR(host_iter, host_z, 1e-3) << "iter " << iter; - } } void run_comp_seq_rec_basic_level2(CompNode cn) { @@ -178,7 +154,7 @@ void run_comp_seq_rec_dyn_elemwise(CompNode cn, bool fake_first) { w = opr::Elemwise::make({x, y, z}, opr::Elemwise::Mode::FUSE_MUL_ADD3); HostTensorND host_w; - auto func = graph->compile({make_callback_copy(w, host_w, false)}); + auto func = graph->compile({make_callback_copy(w, host_w)}); if (fake_first) { func->execute(); } @@ -190,30 +166,9 @@ void run_comp_seq_rec_dyn_elemwise(CompNode cn, bool fake_first) { } host_x->copy_from(*gen(host_x->shape(), cn)); func->execute(); - func->wait(); auto expect = check(); MGB_ASSERT_TENSOR_EQ(expect, host_w) << "iter " << i; } - //! then we change host_z's ptr each time and check result - HostTensorND host_iter; - host_iter.copy_from(host_w); - std::vector> m_hosts(10); - for (size_t i = 0; i < 10; i++) { - m_hosts[i] = gen(host_w.shape(), host_w.comp_node()); - } - for (size_t iter = 0; iter < 10; ++iter) { - auto host_tmp = m_hosts[iter]; - auto host_w_storage = host_w.storage(); - auto origin_ptr = host_w_storage.raw_storage(); - host_w_storage.reset( - host_w.comp_node(), host_w_storage.size(), - host_tmp->storage().raw_storage()); - auto changed_ptr = host_w_storage.raw_storage(); - ASSERT_TRUE(origin_ptr != changed_ptr); - func->execute(); - func->wait(); - MGB_ASSERT_TENSOR_EQ(host_iter, host_w) << "iter " << iter; - } } void run_level2(CompNode cn, bool use_multi_holder) { @@ -426,9 +381,6 @@ void run(CompNode cn) { HostTensorND host_y; graph->options().var_sanity_check_first_run = false; graph->options().comp_node_seq_record_level = level; - if (level == 1) { - sync = false; - } auto cb = [&](const DeviceTensorND& dv) { host_y.copy_from(dv); if (sync) { @@ -466,9 +418,6 @@ void run(CompNode cn) { HostTensorND host_y; graph->options().var_sanity_check_first_run = false; graph->options().comp_node_seq_record_level = level; - if (level == 1) { - sync = false; - } auto cb = [&](const DeviceTensorND& dv) { host_y.copy_from(dv); if (sync) { @@ -479,8 +428,8 @@ void run(CompNode cn) { if (level == 2) { ComputingGraph::assert_destroy(graph); } - for (int k = 0; k < 3; ++k) { - host_x->copy_from(*gen(host_x->shape(), cn)); + for (int i = 0; i < 3; ++i) { + host_x->copy_from(*gen(host_x->shape())); HostTensorND expect{host_x->comp_node(), {5, 4}}; auto px = host_x->ptr(), py = expect.ptr(); for (int i = 0; i < 5; ++i) { @@ -555,16 +504,14 @@ void run(CompNode cn) { y = opr::Host2DeviceCopy::make(*graph, host_y), z = opr::Convolution::make(x, y, param); graph->options().comp_node_seq_record_level = 1; - return graph->compile({make_callback_copy(z, host_z_v[graph_id], false)}); + return graph->compile({make_callback_copy(z, host_z_v[graph_id])}); }; funcs.push_back(gen_graph(0)); funcs.push_back(gen_graph(1)); for (int iter = 0; iter < 10; ++iter) { host_x->copy_from_fixlayout(*gen(host_x->shape(), cn)); funcs[0]->execute(); - funcs[0]->wait(); funcs[1]->execute(); - funcs[1]->wait(); auto expect = eval_conv_cpu(*host_x, *host_y, param); MGB_ASSERT_TENSOR_NEAR(expect, host_z_v[0], 1e-3) << "iter " << iter; MGB_ASSERT_TENSOR_NEAR(expect, host_z_v[1], 1e-3) << "iter " << iter; diff --git a/src/core/test/graph/misc.cpp b/src/core/test/graph/misc.cpp index 1378f03996fceee403333711ef1c92da059a812d..aa0d7c808a089aae41d05960770b11aada64ccaa 100644 --- a/src/core/test/graph/misc.cpp +++ b/src/core/test/graph/misc.cpp @@ -1375,17 +1375,13 @@ TEST(TestGraph, CompNodeFinalize) { graph->options().var_sanity_check_first_run = false; graph->options().comp_node_seq_record_level = rec; } - auto sync = (rec != 1); - auto func = graph->compile({make_callback_copy(z, host_z, sync)}); + auto func = graph->compile({make_callback_copy(z, host_z)}); if (rec == 2) { ComputingGraph::assert_destroy(graph); } for (int i = 0; i < 5; ++i) { host_x->copy_from(*gen({1}, cn)); func->execute(); - if (!sync) { - func->wait(); - } MGB_ASSERT_FLOAT_EQ( host_x->ptr()[0] + host_y->ptr()[0], host_z.ptr()[0]); @@ -1937,7 +1933,6 @@ void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { #endif graph->options().graph_opt.weight_preprocess = true; graph->options().comp_node_seq_record_level = record_level; - auto sync = (record_level != 1); auto mkvar = [&](const char* name, const TensorShape& shp) { return opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name); }; @@ -1975,17 +1970,11 @@ void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) { }); HostTensorND host_y; - auto func = graph->compile({make_callback_copy(y, host_y, sync)}); + auto func = graph->compile({make_callback_copy(y, host_y)}); //! flag the no need memory of var func->execute(); - if (!sync) { - func->wait(); - } //! free the no need memory of var func->execute(); - if (!sync) { - func->wait(); - } auto check = [&](SymbolVar v) { ASSERT_TRUE(v.node()->contain_flag(VarNode::Flag::MEMORY_NO_NEED)); ASSERT_TRUE(v.node()->dev_tensor().empty()); diff --git a/src/core/test/graph/multi_thread.cpp b/src/core/test/graph/multi_thread.cpp index 5cf48a4f13bfb9d27780466b99c2d47d96df274d..5f52650db3f357e2d37c7d3715b0f9edb349a46f 100644 --- a/src/core/test/graph/multi_thread.cpp +++ b/src/core/test/graph/multi_thread.cpp @@ -213,13 +213,9 @@ TEST(TestGraph, MultiThreadRecorder) { z = opr::Convolution::make(x, y, param); graph->options().comp_node_seq_record_level = record_level; graph->options().var_sanity_check_first_run = false; - auto sync = (record_level != 1); - auto func = graph->compile({make_callback_copy(z, host_z, sync)}); + auto func = graph->compile({make_callback_copy(z, host_z)}); for (int i = 0; i < 5; i++) { func->execute(); - if (!sync) { - func->wait(); - } } auto expect = eval_conv_cpu(*host_x, *host_y, param); MGB_ASSERT_TENSOR_NEAR(expect, host_z, 1e-3); diff --git a/src/plugin/test/opr_io_dump.cpp b/src/plugin/test/opr_io_dump.cpp index c9466973e4ab9f6bb73a09805d032a0855abc4de..7ca987ef3eaba655f7e3222e8597cf240aaeebe1 100644 --- a/src/plugin/test/opr_io_dump.cpp +++ b/src/plugin/test/opr_io_dump.cpp @@ -50,7 +50,6 @@ void run_test(CompNode cn, const PluginMaker& plugin_maker) { graph->options().var_sanity_check_first_run = false; graph->options().comp_node_seq_record_level = record; graph->options().graph_opt_level = 0; - auto sync = (record != 1); auto plug = plugin_maker(graph.get(), record); // make a non-contiguous value, also introduce some shape dependencies @@ -77,14 +76,11 @@ void run_test(CompNode cn, const PluginMaker& plugin_maker) { cg::DepOprIter{cb_rename}.add(y); HostTensorND host_y; - auto func = graph->compile({make_callback_copy(y, host_y, sync)}); + auto func = graph->compile({make_callback_copy(y, host_y)}); if (record == 2) { ComputingGraph::assert_destroy(graph); } func->execute(); - if (!sync) { - func->wait(); - } plug->flush_lazy(); MGB_ASSERT_TENSOR_EQ(make_expect(), host_y); @@ -95,16 +91,10 @@ void run_test(CompNode cn, const PluginMaker& plugin_maker) { *host_x = *gen(host_x->shape(), cn); } func->execute(); - if (!sync) { - func->wait(); - } MGB_ASSERT_TENSOR_EQ(make_expect(), host_y); for (int i = 0; i < 2; ++i) { host_x->copy_from(*gen(host_x->shape(), cn)); func->execute(); - if (!sync) { - func->wait(); - } MGB_ASSERT_TENSOR_EQ(make_expect(), host_y); }