From dc0ab9b64ea15a043a96b192f426eabd02dddc97 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 8 Aug 2022 15:25:07 +0800 Subject: [PATCH] feat(lite): replace warp when src is discrete input GitOrigin-RevId: 2bf7980ac6373b691081ab7be9975ec6fa57f8ae --- lite/include/lite/network.h | 17 ++ lite/lite-c/include/lite-c/network_c.h | 17 ++ lite/lite-c/src/network.cpp | 20 ++- lite/pylite/megenginelite/network.py | 33 ++++ lite/pylite/test/test_network.py | 42 +++++ lite/src/mge/network_impl.cpp | 182 +++++++++++++++++++++ lite/src/mge/network_impl.h | 17 ++ lite/src/network.cpp | 17 ++ lite/src/network_impl_base.h | 16 ++ lite/test/test_network.cpp | 90 ++++++++++ lite/test/test_network_c.cpp | 42 +++++ src/core/include/megbrain/graph/helper.h | 2 +- src/gopt/include/megbrain/gopt/framework.h | 11 +- src/opr/impl/imgproc.cpp | 12 -- src/opr/include/megbrain/opr/imgproc.h | 2 +- 15 files changed, 500 insertions(+), 20 deletions(-) diff --git a/lite/include/lite/network.h b/lite/include/lite/network.h index b06efa0d5..2b7e5abf8 100644 --- a/lite/include/lite/network.h +++ b/lite/include/lite/network.h @@ -117,6 +117,9 @@ struct LITE_API Options { * * @param auto_optimize_inference lite will detect the device information add * set the options heuristically + * + * @param discrete_input_name configure which input is composed of discrete + * multiple tensors */ struct LITE_API Config { bool has_compression = false; @@ -126,6 +129,7 @@ struct LITE_API Config { std::string bare_model_cryption_name = {}; Options options = {}; bool auto_optimize_inference = false; + std::string discrete_input_name = {}; }; /*! @@ -289,9 +293,22 @@ public: std::shared_ptr get_io_tensor( std::string io_name, LiteTensorPhase phase = LiteTensorPhase::LITE_IO); + /** @brief get the network input tensors which input consists of discrete multiple + * tensors, layout (1, c, h, w) + * + * @param io_name the name of the tensor + * @param phase indicate the tensor is input tensor + */ + std::vector> get_io_tensors( + std::string io_name, LiteTensorPhase phase = LiteTensorPhase::LITE_INPUT); + //! get the network input tensor by index std::shared_ptr get_input_tensor(size_t index); + //! get the network input tensors which input consists of discrete multiple tensors + //! by index + std::vector> get_input_tensors(size_t index); + //! get the network output tensor by index std::shared_ptr get_output_tensor(size_t index); diff --git a/lite/lite-c/include/lite-c/network_c.h b/lite/lite-c/include/lite-c/network_c.h index 7634304a3..8b7316a74 100644 --- a/lite/lite-c/include/lite-c/network_c.h +++ b/lite/lite-c/include/lite-c/network_c.h @@ -103,6 +103,9 @@ extern LITE_API const LiteOptions default_option; *\param auto_optimize_inference lite will detect the device information add * set the options heuristically + * + * \param discrete_input_name configure which input is composed of discrete + * multiple tensors */ typedef struct LiteConfig { int has_compression; @@ -112,6 +115,7 @@ typedef struct LiteConfig { const char* bare_model_cryption_name; LiteOptions options; int auto_optimize_inference; + const char* discrete_input_name; } LiteConfig; //! get default config @@ -298,6 +302,19 @@ LITE_API int LITE_get_io_tensor( LiteNetwork network, const char* io_name, LiteTensorPhase phase, LiteTensor* tensor); +/** + * \brief get the n'th tensor in the network input tensors whose input + * consists of discrete multiple tensors and name is io_name, layout (1, c, h, w) + * \param[in] network The loaded model + * \param[in] io_name The input name + * \param[in] n_idx The index of tensor + * \param[in] phase The tensor phase + * \param[out] tensor The IO tensor get from the network + */ +LITE_API int LITE_get_io_tensors( + LiteNetwork network, const char* io_name, size_t n_idx, LiteTensorPhase phase, + LiteTensor* tensor); + /** * \brief get the input tensor name in the order in loaded model * \param[in] network The loaded model diff --git a/lite/lite-c/src/network.cpp b/lite/lite-c/src/network.cpp index 8325c73c3..0c8ec8a4b 100644 --- a/lite/lite-c/src/network.cpp +++ b/lite/lite-c/src/network.cpp @@ -43,7 +43,8 @@ LiteConfig default_config_t = { .backend = LiteBackend::LITE_DEFAULT, .bare_model_cryption_name = nullptr, .options = default_option, - .auto_optimize_inference = false}; + .auto_optimize_inference = false, + .discrete_input_name = nullptr}; LiteConfig* default_config() { return &default_config_t; } @@ -135,6 +136,9 @@ lite::Config convert_to_lite_config(const LiteConfig c_config) { lite_config.options.enable_nchw64 = c_config.options.enable_nchw64; lite_config.auto_optimize_inference = c_config.auto_optimize_inference; + if (c_config.discrete_input_name) { + lite_config.discrete_input_name = c_config.discrete_input_name; + } return lite_config; } @@ -274,6 +278,20 @@ int LITE_get_io_tensor( LITE_CAPI_END(); } +int LITE_get_io_tensors( + LiteNetwork network, const char* io_name, size_t n_idx, LiteTensorPhase phase, + LiteTensor* tensor) { + LITE_CAPI_BEGIN(); + LITE_ASSERT(network, "The network pass to LITE api is null"); + auto io_tensors = + static_cast(network)->get_io_tensors(io_name, phase); + LITE_ASSERT( + n_idx < io_tensors.size(), "n_idx should be less than %zu", + io_tensors.size()); + *tensor = io_tensors[n_idx].get(); + LITE_CAPI_END(); +} + int LITE_get_input_name(const LiteNetwork network, size_t index, const char** name) { LITE_CAPI_BEGIN(); LITE_ASSERT(network && name, "The network pass to LITE api is null"); diff --git a/lite/pylite/megenginelite/network.py b/lite/pylite/megenginelite/network.py index 57c984ab4..390e7627c 100644 --- a/lite/pylite/megenginelite/network.py +++ b/lite/pylite/megenginelite/network.py @@ -173,6 +173,8 @@ class LiteConfig(Structure): auto_optimize_inference: lite will detect the device information add set the options heuristically + discrete_input_name: configure which input is composed of discrete multiple tensors + Examples: .. code-block:: @@ -193,6 +195,7 @@ class LiteConfig(Structure): ("_bare_model_cryption_name", c_char_p), ("options", LiteOptions), ("auto_optimize_inference", c_int), + ("discrete_input_name", c_char_p), ] def __init__(self, device_type=LiteDeviceType.LITE_CPU, option=None): @@ -207,6 +210,7 @@ class LiteConfig(Structure): self.has_compression = 0 self.backend = LiteBackend.LITE_DEFAULT self.auto_optimize_inference = 0 + self.discrete_input_name = c_char_p(b"") @property def bare_model_cryption_name(self): @@ -229,6 +233,7 @@ class LiteConfig(Structure): "bare_model_cryption_name": self.bare_model_cryption_name, "options": self.options, "auto_optimize_inference": self.auto_optimize_inference, + "discrete_input_name": self.discrete_input_name, } return data.__repr__() @@ -536,6 +541,10 @@ class _NetworkAPI(_LiteCObjBase): [c_char_p, c_size_t, LiteConfig, POINTER(_LiteNetworkIO)], ), ("LITE_extra_configure", [_Cnetwork, LiteExtraConfig]), + ( + "LITE_get_io_tensors", + [_Cnetwork, c_char_p, c_size_t, c_int, POINTER(_Ctensor)], + ), ] @@ -736,6 +745,30 @@ class LiteNetwork(object): tensor.update() return tensor + def get_io_tensors(self, name, n_idx, phase=LiteTensorPhase.LITE_INPUT): + """ + get the n_idx'th tensor in the network input tensors whose + input consists of discrete multiple tensors and tensor name is name + + Args: + name: the name of input tensor + n_idx: the tensor index + phase: the type of LiteTensor, this is useful to separate input tensor with the same name + + Returns: + the tensors with given name and type + """ + if type(name) == str: + c_name = c_char_p(name.encode("utf-8")) + else: + c_name = c_char_p(name) + tensor = LiteTensor(physic_construct=False) + self._api.LITE_get_io_tensors( + self._network, c_name, n_idx, phase, byref(tensor._tensor) + ) + tensor.update() + return tensor + def get_input_name(self, index): """ get the input name by the index in the network diff --git a/lite/pylite/test/test_network.py b/lite/pylite/test/test_network.py index 336c84244..8e0469924 100644 --- a/lite/pylite/test/test_network.py +++ b/lite/pylite/test/test_network.py @@ -500,3 +500,45 @@ class TestNetwork(TestShuffleNet): os.remove(fast_run_cache) os.remove(global_layout_transform_model) + + +class TestDiscreteInputNet(unittest.TestCase): + source_dir = os.getenv("LITE_TEST_RESOURCE") + data0_path = os.path.join(source_dir, "data0.npy") + data1_path = os.path.join(source_dir, "data1.npy") + data2_path = os.path.join(source_dir, "data2.npy") + model_path = os.path.join(source_dir, "test_discrete_input.mge") + data0 = np.load(data0_path) + data1 = np.load(data1_path) + data2 = np.load(data2_path) + + def do_forward(self, network, times=3): + data_name = network.get_input_name(1) + datas = [] + datas.append(network.get_io_tensors(data_name, 0)) + datas.append(network.get_io_tensors(data_name, 1)) + datas.append(network.get_io_tensors(data_name, 2)) + + datas[0].set_data_by_copy(self.data0) + datas[1].set_data_by_copy(self.data1) + datas[2].set_data_by_copy(self.data2) + for i in range(times): + network.forward() + network.wait() + + +class TestDiscreteInput(TestDiscreteInputNet): + def test_discrete_input(self): + config = LiteConfig() + config.discrete_input_name = "data".encode("utf-8") + input_io = LiteIO( + "data", + is_host=True, + io_type=LiteIOType.LITE_IO_VALUE, + layout=LiteLayout([3, 3, 224, 224]), + ) + ios = LiteNetworkIO() + ios.add_input(input_io) + network = LiteNetwork(config, ios) + network.load(self.model_path) + self.do_forward(network) diff --git a/lite/src/mge/network_impl.cpp b/lite/src/mge/network_impl.cpp index 5aec5beb8..a37c9d455 100644 --- a/lite/src/mge/network_impl.cpp +++ b/lite/src/mge/network_impl.cpp @@ -13,6 +13,7 @@ #include "megbrain/comp_node_env.h" #include "megbrain/graph.h" #include "megbrain/graph/cg.h" +#include "megbrain/opr/imgproc.h" #include "megbrain/opr/io.h" #include "megbrain/opr/tensor_manip.h" #include "megbrain/tensor.h" @@ -259,6 +260,88 @@ void NetworkImplDft::make_output_spec() { } } +void NetworkImplDft::replace_src_discrete_input_opr_pass() { + mgb::ThinHashMap out_var_map; + + auto dest_with_extra_deps = + get_dest_vars_with_extra_deps(m_load_result.output_var_list); + gopt::SubGraph graph{dest_with_extra_deps}; + auto rewriter = graph.make_rewriter(); + + auto on_opr = [&](mgb::cg::OperatorNodeBase* opr) { + if (opr->same_type()) { + bool is_h2d = true; + if (opr->input(0)->owner_opr()->same_type()) + is_h2d = true; + else if (opr->input(0) + ->owner_opr() + ->same_type()) + is_h2d = false; + else + return; + + SymbolVarArray srcs; + if (is_h2d) { + auto h2d = opr->input(0)->owner_opr(); + for (auto&& inp : get_io_tensors(m_user_config->discrete_input_name)) { + auto val = TensorHelper::implement(inp) + ->cast_final_safe() + .m_host_tensor; + LITE_ASSERT(val); + srcs.push_back(mgb::opr::Host2DeviceCopy::make( + *m_load_result.graph, val, h2d->config())); + } + } else { + auto volatiled = opr->input(0)->owner_opr(); + for (auto&& inp : get_io_tensors(m_user_config->discrete_input_name)) { + auto val = TensorHelper::implement(inp) + ->cast_final_safe() + .m_dev_tensor; + LITE_ASSERT(val); + srcs.push_back(mgb::opr::VolatileSharedDeviceTensor::make( + *m_load_result.graph, val, volatiled->config())); + } + } + + auto& warp = opr->cast_final(); + SymbolVar new_out; + if (opr->input().size() == 3) { + new_out = mgb::opr::WarpPerspective::make( + srcs, warp.input(1), warp.input(2), warp.param(), + warp.config()); + } else { + LITE_ASSERT(opr->input().size() == 4); + new_out = mgb::opr::WarpPerspective::make( + srcs, warp.input(1), warp.input(2), warp.input(3), warp.param(), + warp.config()); + } + rewriter.replace_var( + warp.output(0), new_out.node(), + "replace WarpPerspective to WarpPerspective multi src version."); + } else { + rewriter.auto_replace_outputs(opr); + } + }; + graph.iter(on_opr); + rewriter.apply_inplace(); + auto new_ovar = graph.endpoint_vars(); + new_ovar.resize(m_load_result.output_var_list.size()); + + for (size_t i = 0; i < new_ovar.size(); ++i) { + out_var_map[m_load_result.output_var_list[i]] = new_ovar[i]; + } + for (auto&& i : m_load_result.output_var_map) { + i.second = out_var_map.at(i.second); + } + for (auto&& i : m_load_result.output_var_map_id) { + i.second = out_var_map.at(i.second); + } + for (size_t i = 0; i < m_load_result.output_var_list.size(); i++) { + new_ovar[i].rename(m_load_result.output_var_list[i].node()->name()); + } + m_load_result.output_var_list = std::move(new_ovar); +} + void NetworkImplDft::replace_dev_input_pass() { mgb::CompNode::Locator locator; m_load_config.comp_node_mapper(locator); @@ -528,6 +611,8 @@ void NetworkImplDft::configure_after_loaded() { void NetworkImplDft::compile_graph() { replace_dev_input_pass(); + if (!m_user_config->discrete_input_name.empty()) + replace_src_discrete_input_opr_pass(); make_output_spec(); m_execute_func = m_load_result.graph_compile(m_output_spec); } @@ -691,6 +776,11 @@ void NetworkImplDft::update_input() { m_network_io->inputs.push_back(io_in); } } + + if (!m_user_config->discrete_input_name.empty()) { + update_input_lite_tensors(); + } + //! delete the IO that is not the network for (auto it = m_network_io->inputs.begin(); it != m_network_io->inputs.end();) { if (it->lite_tensor == nullptr) { @@ -702,6 +792,79 @@ void NetworkImplDft::update_input() { } } +void NetworkImplDft::update_input_lite_tensors() { + auto device_type = m_user_config->device_type; + auto device_id = m_compnode_locator.device; + auto stream_id = m_compnode_locator.stream; + + for (auto&& in_tensor_iter : m_load_result.tensor_map) { + if (in_tensor_iter.first != m_user_config->discrete_input_name) { + continue; + } + bool found = false; + for (auto&& config_in : m_network_io->inputs) { + if (in_tensor_iter.first == config_in.name) { + found = true; + size_t bs = in_tensor_iter.second->shape(0); + auto shape = in_tensor_iter.second->shape(); + shape.shape[0] = 1; + if (config_in.config_layout.ndim) { + bs = config_in.config_layout.shapes[0]; + shape.shape[1] = config_in.config_layout.shapes[1]; + shape.shape[2] = config_in.config_layout.shapes[2]; + shape.shape[3] = config_in.config_layout.shapes[3]; + } + HostTensorND tensor( + in_tensor_iter.second->comp_node(), shape, + in_tensor_iter.second->dtype(), + in_tensor_iter.second->format()); + for (size_t i = 0; i < bs; ++i) { + if (config_in.is_host) { + config_in.lite_tensors.push_back(std::make_shared( + device_id, stream_id, device_type, true)); + TensorHelper::implement(config_in.lite_tensors[i]) + ->cast_final_safe() + .m_host_tensor = std::make_shared(tensor); + config_in.lite_tensors[i]->update_from_implement(); + } else { + config_in.lite_tensors.push_back(std::make_shared( + device_id, stream_id, device_type)); + config_in.lite_tensors[i]->set_layout( + to_lite_layout(tensor.layout())); + } + TensorHelper::implement(config_in.lite_tensors[i]) + ->cast_final_safe() + .m_record_reset = + m_user_config->options.comp_node_seq_record_level > 0; + } + } + } + if (!found) { + size_t bs = in_tensor_iter.second->shape(0); + auto shape = in_tensor_iter.second->shape(); + shape.shape[0] = 1; + HostTensorND tensor( + in_tensor_iter.second->comp_node(), shape, + in_tensor_iter.second->dtype(), in_tensor_iter.second->format()); + IOInner io_in; + io_in.name = in_tensor_iter.first; + for (size_t i = 0; i < bs; ++i) { + io_in.lite_tensors.push_back(std::make_shared( + device_id, stream_id, device_type, true)); + TensorHelper::implement(io_in.lite_tensors[i]) + ->cast_final_safe() + .m_host_tensor = std::make_shared(tensor); + TensorHelper::implement(io_in.lite_tensors[i]) + ->cast_final_safe() + .m_record_reset = + m_user_config->options.comp_node_seq_record_level > 0; + io_in.lite_tensors[i]->update_from_implement(); + } + m_network_io->inputs.push_back(io_in); + } + } +} + void NetworkImplDft::update_output() { auto device_type = m_user_config->device_type; auto device_id = m_compnode_locator.device; @@ -855,10 +1018,29 @@ std::shared_ptr NetworkImplDft::get_io_tensor( return nullptr; } +std::vector> NetworkImplDft::get_io_tensors( + std::string io_name, LiteTensorPhase phase) { + if (phase == LiteTensorPhase::LITE_INPUT) { + for (auto&& config_in : m_network_io->inputs) { + if (io_name == config_in.name && + config_in.name == m_user_config->discrete_input_name) { + return config_in.lite_tensors; + } + } + } + LITE_THROW(mgb::ssprintf( + "tensor name must be %s input tensor name.", io_name.c_str())); + return {}; +} + std::shared_ptr NetworkImplDft::get_input_tensor(size_t index) { return get_io_tensor(get_input_name(index)); } +std::vector> NetworkImplDft::get_input_tensors(size_t index) { + return get_io_tensors(get_input_name(index)); +} + std::shared_ptr NetworkImplDft::get_output_tensor(size_t index) { return get_io_tensor(get_output_name(index)); } diff --git a/lite/src/mge/network_impl.h b/lite/src/mge/network_impl.h index 02999e23e..bef304bcd 100644 --- a/lite/src/mge/network_impl.h +++ b/lite/src/mge/network_impl.h @@ -57,9 +57,19 @@ public: std::string io_name, LiteTensorPhase phase = LiteTensorPhase::LITE_IO) override; + //! get the network input tensors which input consists of discrete multiple tensors, + //! layout (1, c, h, w) + std::vector> get_io_tensors( + std::string io_name, + LiteTensorPhase phase = LiteTensorPhase::LITE_INPUT) override; + //! get the input tensor by index in the load_result tensormap std::shared_ptr get_input_tensor(size_t index) override; + //! get the network input tensors which input consists of discrete multiple tensors + //! by index + std::vector> get_input_tensors(size_t index) override; + //! get the output tensor by index in the load_result output_var_list std::shared_ptr get_output_tensor(size_t index) override; @@ -190,6 +200,11 @@ private: //! VolatileSharedDeviceTensor Opr void replace_dev_input_pass(); + //! if the input to the network is a list of tensors, this pass will replace + //! the opr that supports the input of a list of tensors with the corresponding + //! version, current support WarpPerspective + void replace_src_discrete_input_opr_pass(); + //! check whether the model is cross compnode void cross_compnode_model_detect(); @@ -199,6 +214,8 @@ private: void update_input(); void update_output(); + //! initialization lite_tensors when input is composed of discrete multiple tensors + void update_input_lite_tensors(); //! when the model info have loaded, update the config according the model //! info, finaly use it in compute graph diff --git a/lite/src/network.cpp b/lite/src/network.cpp index 6ee7ff4af..fad1343a6 100644 --- a/lite/src/network.cpp +++ b/lite/src/network.cpp @@ -127,6 +127,15 @@ std::shared_ptr Network::get_io_tensor( LITE_ERROR_HANDLER_END } +std::vector> Network::get_io_tensors( + std::string name, LiteTensorPhase phase) { + LITE_ERROR_HANDLER_BEGIN + LITE_ASSERT(m_loaded, "get_io_tensor should be used after model loaded."); + LITE_CHECK_NON_NULL_POINTER(m_impl); + return m_impl->get_io_tensors(name, phase); + LITE_ERROR_HANDLER_END +} + std::shared_ptr Network::get_input_tensor(size_t index) { LITE_ERROR_HANDLER_BEGIN LITE_ASSERT(m_loaded, "get_input_tensor should be used after model loaded."); @@ -135,6 +144,14 @@ std::shared_ptr Network::get_input_tensor(size_t index) { LITE_ERROR_HANDLER_END } +std::vector> Network::get_input_tensors(size_t index) { + LITE_ERROR_HANDLER_BEGIN + LITE_ASSERT(m_loaded, "get_input_tensor should be used after model loaded."); + LITE_CHECK_NON_NULL_POINTER(m_impl); + return m_impl->get_input_tensors(index); + LITE_ERROR_HANDLER_END +} + std::shared_ptr Network::get_output_tensor(size_t index) { LITE_ERROR_HANDLER_BEGIN LITE_ASSERT(m_loaded, "get_output_tensor should be used after model loaded."); diff --git a/lite/src/network_impl_base.h b/lite/src/network_impl_base.h index 0760c81a7..dd1d3c751 100644 --- a/lite/src/network_impl_base.h +++ b/lite/src/network_impl_base.h @@ -42,6 +42,9 @@ public: bool have_sync = false; //! Real input and output data location std::shared_ptr lite_tensor = nullptr; + //! If the input is consists of discrete multiple tensors, lite_tensors is real + //! input data location + std::vector> lite_tensors; IOInner() = default; IOInner(const IO& io) { @@ -86,9 +89,22 @@ public: virtual std::shared_ptr get_io_tensor( std::string io_name, LiteTensorPhase phase = LiteTensorPhase::LITE_IO) = 0; + //! get the network input tensors which input consists of discrete multiple tensors, + //! layout (1, c, h, w) + virtual std::vector> get_io_tensors( + std::string io_name, LiteTensorPhase phase = LiteTensorPhase::LITE_INPUT) { + return {}; + } + //! get the input tensor by index in the load_result tensormap virtual std::shared_ptr get_input_tensor(size_t index) = 0; + //! get the network input tensors which input consists of discrete multiple tensors + //! by index + virtual std::vector> get_input_tensors(size_t index) { + return {}; + } + //! get the output tensor by index in the load_result output_var_list virtual std::shared_ptr get_output_tensor(size_t index) = 0; diff --git a/lite/test/test_network.cpp b/lite/test/test_network.cpp index ec3133324..d252b3c44 100644 --- a/lite/test/test_network.cpp +++ b/lite/test/test_network.cpp @@ -1387,6 +1387,96 @@ TEST(TestNetWork, DeviceAsyncExec) { } #endif + +TEST(TestNetWork, Discrete_Input) { + auto data = get_input_data("./data_b3.npy"); + auto data_0 = get_input_data("./data0.npy"); + auto data_1 = get_input_data("./data1.npy"); + auto data_2 = get_input_data("./data2.npy"); + std::string model_path = "./test_discrete_input.mge"; + + Config config; + config.device_type = LiteDeviceType::LITE_CUDA; + + std::shared_ptr network0 = std::make_shared(config); + network0->load_model(model_path); + + std::shared_ptr data_tensor = network0->get_io_tensor("data"); + data_tensor->share_memory_with(*data); + + network0->forward(); + network0->wait(); + std::shared_ptr output_tensor0 = network0->get_output_tensor(0); + + config.discrete_input_name = "data"; + NetworkIO ios; + bool is_host = true; + Layout d_ly{{3, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT}; + ios.inputs.push_back({"data", is_host, LiteIOType::LITE_IO_VALUE, d_ly}); + + std::shared_ptr network1 = std::make_shared(config, ios); + network1->load_model(model_path); + + std::vector> data_tensors = + network1->get_io_tensors("data"); + data_tensors[0]->share_memory_with(*data_0); + data_tensors[1]->share_memory_with(*data_1); + data_tensors[2]->share_memory_with(*data_2); + + network1->forward(); + network1->wait(); + std::shared_ptr output_tensor1 = network1->get_output_tensor(0); + + compare_lite_tensor(output_tensor0, output_tensor1); +} + +TEST(TestNetWork, Discrete_Input_Device) { + auto data = get_input_data("./data_b3.npy"); + auto data_0 = get_input_data("./data0.npy"); + auto data_1 = get_input_data("./data1.npy"); + auto data_2 = get_input_data("./data2.npy"); + std::string model_path = "./test_discrete_input.mge"; + + Config config; + config.device_type = LiteDeviceType::LITE_CUDA; + + std::shared_ptr network0 = std::make_shared(config); + network0->load_model(model_path); + + std::shared_ptr data_tensor = network0->get_io_tensor("data"); + data_tensor->share_memory_with(*data); + + network0->forward(); + network0->wait(); + std::shared_ptr output_tensor0 = network0->get_output_tensor(0); + + config.discrete_input_name = "data"; + NetworkIO ios; + bool is_host = false; + Layout d_ly{{3, 3, 224, 224}, 4, LiteDataType::LITE_FLOAT}; + ios.inputs.push_back({"data", is_host, LiteIOType::LITE_IO_VALUE, d_ly}); + + std::shared_ptr network1 = std::make_shared(config, ios); + network1->load_model(model_path); + + std::vector> data_tensors = + network1->get_io_tensors("data"); + auto d0_cuda = Tensor(LiteDeviceType::LITE_CUDA, d_ly); + auto d1_cuda = Tensor(LiteDeviceType::LITE_CUDA, d_ly); + auto d2_cuda = Tensor(LiteDeviceType::LITE_CUDA, d_ly); + d0_cuda.copy_from(*data_0); + d1_cuda.copy_from(*data_1); + d2_cuda.copy_from(*data_2); + data_tensors[0]->share_memory_with(d0_cuda); + data_tensors[1]->share_memory_with(d1_cuda); + data_tensors[2]->share_memory_with(d2_cuda); + + network1->forward(); + network1->wait(); + std::shared_ptr output_tensor1 = network1->get_output_tensor(0); + + compare_lite_tensor(output_tensor0, output_tensor1); +} #endif #if MGB_ATLAS || MGB_CAMBRICON diff --git a/lite/test/test_network_c.cpp b/lite/test/test_network_c.cpp index fcc5ff9a1..53fe3ee35 100644 --- a/lite/test/test_network_c.cpp +++ b/lite/test/test_network_c.cpp @@ -290,6 +290,48 @@ TEST(TestCapiNetWork, GetAllNameAhead) { ASSERT_TRUE(ios_mem.outputs->config_layout.shapes[1] == 1000); } +TEST(TestCapiNetWork, Discrete_Input) { + std::vector> datas; + datas.push_back(lite::get_input_data("./data0.npy")); + datas.push_back(lite::get_input_data("./data1.npy")); + datas.push_back(lite::get_input_data("./data2.npy")); + size_t data_length_in_byte = datas[0]->get_tensor_total_size_in_byte(); + + LiteIO input_io = default_io; + input_io.is_host = true; + input_io.name = "data"; + LiteLayout d_ly; + d_ly.ndim = 4; + d_ly.data_type = LiteDataType::LITE_FLOAT; + std::vector input_shape = {3, 3, 224, 224}; + for (size_t i = 0; i < d_ly.ndim; i++) { + d_ly.shapes[i] = input_shape[i]; + } + input_io.config_layout = d_ly; + + LiteNetworkIO network_io = *default_network_io(); + network_io.inputs = &input_io; + network_io.input_size = 1; + + LiteConfig c_config = *default_config(); + c_config.discrete_input_name = "data"; + LiteNetwork c_network; + LITE_CAPI_CHECK(LITE_make_network(&c_network, c_config, network_io)); + std::string model_path = "./test_discrete_input.mge"; + LITE_CAPI_CHECK(LITE_load_model_from_path(c_network, model_path.c_str())); + + std::vector c_data_tensors(3, nullptr); + for (size_t i = 0; i < 3; i++) { + LITE_CAPI_CHECK(LITE_get_io_tensors( + c_network, "data", i, LITE_INPUT, &c_data_tensors[i])); + LITE_CAPI_CHECK(LITE_reset_tensor_memory( + c_data_tensors[i], datas[i]->get_memory_ptr(), data_length_in_byte)); + } + + ForwardNetwork; + LITE_CAPI_CHECK(LITE_destroy_network(c_network)); +} + #if LITE_BUILD_WITH_RKNPU static int GetTop( diff --git a/src/core/include/megbrain/graph/helper.h b/src/core/include/megbrain/graph/helper.h index b01825b1c..f8c3a571c 100644 --- a/src/core/include/megbrain/graph/helper.h +++ b/src/core/include/megbrain/graph/helper.h @@ -381,7 +381,7 @@ public: }; //! shortcut for calling ExtraDependencyMerger -SymbolVarArray get_dest_vars_with_extra_deps( +MGE_WIN_DECLSPEC_FUC SymbolVarArray get_dest_vars_with_extra_deps( const SymbolVarArray& dest_vars, SpecialOprStat* sopr_stat = nullptr); } // namespace cg diff --git a/src/gopt/include/megbrain/gopt/framework.h b/src/gopt/include/megbrain/gopt/framework.h index 6ab56d7fe..f484bdfd7 100644 --- a/src/gopt/include/megbrain/gopt/framework.h +++ b/src/gopt/include/megbrain/gopt/framework.h @@ -44,13 +44,14 @@ public: //! rewrite vars in a graph class Rewriter; - SubGraph(const SymbolVarArray& endpoint_vars); + MGE_WIN_DECLSPEC_FUC SubGraph(const SymbolVarArray& endpoint_vars); //! get the associated ComputingGraph ComputingGraph* comp_graph() const { return m_comp_graph; } //! iterate in topology order - void iter(const Callback& cb, std::shared_ptr = nullptr) const; + MGE_WIN_DECLSPEC_FUC void iter( + const Callback& cb, std::shared_ptr = nullptr) const; //! make a Rewriter bound to this graph inline Rewriter make_rewriter(); @@ -99,7 +100,7 @@ public: * \return new operator that uses new inputs; it would be * opr if no input is changed */ - OperatorNodeBase* auto_replace_outputs(OperatorNodeBase* opr); + MGE_WIN_DECLSPEC_FUC OperatorNodeBase* auto_replace_outputs(OperatorNodeBase* opr); //! get current var: if var has been replaced, return its //! new value; otherwise return var itself @@ -119,11 +120,11 @@ public: * * \param msg see OptState::on_var_replaced */ - void replace_var(VarNode* src, VarNode* dst, const char* msg); + MGE_WIN_DECLSPEC_FUC void replace_var(VarNode* src, VarNode* dst, const char* msg); //! apply this rewriter to the owner graph and modify owner //! SubGraph inplace - void apply_inplace() const; + MGE_WIN_DECLSPEC_FUC void apply_inplace() const; }; SubGraph::Rewriter SubGraph::make_rewriter() { return {this}; diff --git a/src/opr/impl/imgproc.cpp b/src/opr/impl/imgproc.cpp index 4b5f0529c..a4bd021df 100644 --- a/src/opr/impl/imgproc.cpp +++ b/src/opr/impl/imgproc.cpp @@ -160,18 +160,6 @@ void WarpPerspectiveForward::outshape_by_symvar_do_get_output_shape( "out2d=%s", imgshp.to_string().c_str(), matshp.to_string().c_str(), oshp2d.to_string().c_str()); - if (input().size() - m_srcs_size == 2) { - mgb_assert( - m_srcs_size == matshp[0], "batchsize mismatch: img=%zu mat=%zu", - m_srcs_size, matshp[0]); - } else { - mgb_assert(input().size() - m_srcs_size == 3); - mat_idx_shp = shpinfo.shape_inp_shp.at(m_srcs_size + 1); - mgb_assert( - mat_idx_shp[0] == matshp[0] && mat_idx_shp.ndim == 1, - "invalid mat_idx shape: mat=%zu mat_idx=%s", matshp[0], - mat_idx_shp.to_string().c_str()); - } size_t height_idx = 0; if (param().format == Param::Format::NCHW) { height_idx = 2; diff --git a/src/opr/include/megbrain/opr/imgproc.h b/src/opr/include/megbrain/opr/imgproc.h index f272f2f25..cb392146d 100644 --- a/src/opr/include/megbrain/opr/imgproc.h +++ b/src/opr/include/megbrain/opr/imgproc.h @@ -22,7 +22,7 @@ namespace opr { * Impl note: this operator might have 3 or 4 inputs depending on whether * \p mat_idx is given */ -MGB_DEFINE_OPR_CLASS( +MGB_DEFINE_OPR_CLASS_WITH_EXPORT( WarpPerspectiveForward, intl::WorkspaceSizeInfer>>) // { -- GitLab