diff --git a/lite/example/cpp_example/example.h b/lite/example/cpp_example/example.h index 929fff9328917453ebeae71583d61f2992c3de51..aaafdd387b6f5edcfe05895bd70cc819b3542906 100644 --- a/lite/example/cpp_example/example.h +++ b/lite/example/cpp_example/example.h @@ -67,7 +67,8 @@ bool config_user_allocator(const Args& args); bool register_cryption_method(const Args& args); bool update_cryption_key(const Args& args); bool async_forward(const Args& args); - +bool set_input_callback(const Args& arg); +bool set_output_callback(const Args& arg); #if LITE_WITH_CUDA bool device_input(const Args& args); bool device_input_output(const Args& args); diff --git a/lite/example/cpp_example/main.cpp b/lite/example/cpp_example/main.cpp index cffb245bf705fd253e3aa47245e93e3efb7ad490..0100b230c016f1d860b73bfc5290fa1c9dbcac39 100644 --- a/lite/example/cpp_example/main.cpp +++ b/lite/example/cpp_example/main.cpp @@ -160,6 +160,8 @@ REGIST_EXAMPLE("reset_input", reset_input); REGIST_EXAMPLE("reset_input_output", reset_input_output); REGIST_EXAMPLE("config_user_allocator", config_user_allocator); REGIST_EXAMPLE("async_forward", async_forward); +REGIST_EXAMPLE("set_input_callback", set_input_callback); +REGIST_EXAMPLE("set_output_callback", set_output_callback); REGIST_EXAMPLE("basic_c_interface", basic_c_interface); REGIST_EXAMPLE("device_io_c_interface", device_io_c_interface); diff --git a/lite/example/cpp_example/mge/basic.cpp b/lite/example/cpp_example/mge/basic.cpp index 6dfafc2e72b04ba860bc392a5abb48ef7f4f5410..55e20270f9c0cf03b6519d71693025cf5b1c2686 100644 --- a/lite/example/cpp_example/mge/basic.cpp +++ b/lite/example/cpp_example/mge/basic.cpp @@ -365,6 +365,142 @@ bool lite::example::async_forward(const Args& args) { printf("max=%e, sum=%e\n", max, sum); return true; } + +bool lite::example::set_input_callback(const Args& args) { + std::string network_path = args.model_path; + std::string input_path = args.input_path; + Config config; + config.options.var_sanity_check_first_run = false; + + //! create and load the network + std::shared_ptr network = std::make_shared(config); + + network->load_model(network_path); + + //! set input data to input tensor + std::shared_ptr input_tensor = network->get_input_tensor(0); + //! copy or forward data to network + size_t length = input_tensor->get_tensor_total_size_in_byte(); + void* dst_ptr = input_tensor->get_memory_ptr(); + auto src_tensor = parse_npy(input_path); + void* src = src_tensor->get_memory_ptr(); + memcpy(dst_ptr, src, length); + + //! set input callback + volatile bool finished = false; + network->set_start_callback( + [&finished](const std::unordered_map< + std::string, std::pair>>& inputs) { +#if !__DEPLOY_ON_XP_SP2__ + std::cout << "worker thread_id:" << std::this_thread::get_id() + << std::endl; +#endif + for (auto&& item : inputs) { + std::cout << "input name: " << item.first + << "input dim: " << item.second.second->get_layout().ndim + << std::endl; + } + finished = true; + }); + +#if !__DEPLOY_ON_XP_SP2__ + std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl; +#endif + + //! forward + network->forward(); + size_t count = 0; + while (finished == false) { + count++; + } + printf("Forward finish, count is %zu\n", count); + + //! get the output data or read tensor set in network_in + std::shared_ptr output_tensor = network->get_output_tensor(0); + void* out_data = output_tensor->get_memory_ptr(); + size_t out_length = output_tensor->get_tensor_total_size_in_byte() / + output_tensor->get_layout().get_elem_size(); + printf("length=%zu\n", length); + float max = -1.0f; + float sum = 0.0f; + for (size_t i = 0; i < out_length; i++) { + float data = static_cast(out_data)[i]; + sum += data; + if (max < data) + max = data; + } + printf("max=%e, sum=%e\n", max, sum); + return true; +} + +bool lite::example::set_output_callback(const Args& args) { + std::string network_path = args.model_path; + std::string input_path = args.input_path; + Config config; + config.options.var_sanity_check_first_run = false; + + //! create and load the network + std::shared_ptr network = std::make_shared(config); + + network->load_model(network_path); + + //! set input data to input tensor + std::shared_ptr input_tensor = network->get_output_tensor(0); + //! copy or forward data to network + size_t length = input_tensor->get_tensor_total_size_in_byte(); + void* dst_ptr = input_tensor->get_memory_ptr(); + auto src_tensor = parse_npy(input_path); + void* src = src_tensor->get_memory_ptr(); + memcpy(dst_ptr, src, length); + + //! set output callback + volatile bool finished = false; + network->set_finish_callback( + [&finished](const std::unordered_map< + std::string, std::pair>>& outputs) { +#if !__DEPLOY_ON_XP_SP2__ + std::cout << "worker thread_id:" << std::this_thread::get_id() + << std::endl; +#endif + for (auto&& item : outputs) { + std::cout << "output name: " << item.first + << "output dim: " << item.second.second->get_layout().ndim + << std::endl; + } + finished = true; + }); + +#if !__DEPLOY_ON_XP_SP2__ + std::cout << "out thread_id:" << std::this_thread::get_id() << std::endl; +#endif + + //! forward + network->forward(); + network->wait(); + size_t count = 0; + while (finished == false) { + count++; + } + printf("Forward finish, count is %zu\n", count); + + //! get the output data or read tensor set in network_in + std::shared_ptr output_tensor = network->get_output_tensor(0); + void* out_data = output_tensor->get_memory_ptr(); + size_t out_length = output_tensor->get_tensor_total_size_in_byte() / + output_tensor->get_layout().get_elem_size(); + printf("length=%zu\n", length); + float max = -1.0f; + float sum = 0.0f; + for (size_t i = 0; i < out_length; i++) { + float data = static_cast(out_data)[i]; + sum += data; + if (max < data) + max = data; + } + printf("max=%e, sum=%e\n", max, sum); + return true; +} + #endif // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/lite/lite-c/include/lite-c/network_c.h b/lite/lite-c/include/lite-c/network_c.h index 64ca7a377f7358eebb86afff97acc7ffde0623b6..ff838d0e815e2bfd53182c63fc6b24f08aff9246 100644 --- a/lite/lite-c/include/lite-c/network_c.h +++ b/lite/lite-c/include/lite-c/network_c.h @@ -184,6 +184,8 @@ typedef int (*LiteThreadAffinityCallback)(int thread_id); typedef int (*LiteAsyncCallback)(); +typedef int (*LiteAsyncCallbackWithData)(void* user_data); + /*! * \brief the start/finish callback function * \param unordered_map map from the io tensor name to the pair of which is the @@ -193,9 +195,17 @@ typedef int (*LiteAsyncCallback)(); typedef int (*LiteStartCallback)( const LiteIO* inputs, const LiteTensor* input_tensors, size_t size); +typedef int (*LiteStartCallbackWithData)( + const LiteIO* inputs, const LiteTensor* input_tensors, size_t size, + void* user_data); + typedef int (*LiteFinishCallback)( const LiteIO* outputs, const LiteTensor* output_tensors, size_t size); +typedef int (*LiteFinishCallbackWithData)( + const LiteIO* outputs, const LiteTensor* output_tensors, size_t size, + void* user_data); + /*! * \brief The network is construct form a model, implement model load, init, * forward, and display some model information @@ -442,6 +452,19 @@ LITE_API int LITE_set_network_algo_workspace_limit( LITE_API int LITE_set_async_callback( LiteNetwork network, const LiteAsyncCallback async_callback); +/** + * \brief set the network forward in async mode and set the async callback + * function + * \param[in] network The loaded model + * \param[in] async_callback when network finish forwarding, the callback + * will be called + * \param[in] user_data user defined data for something user want to deploy + * at forward finish stage + */ +LITE_API int LITE_set_async_callback_with_userdata( + LiteNetwork network, const LiteAsyncCallbackWithData async_callback, + void* user_data); + /** * \brief set the start forward callback function, which will be execute beform * forward, this can be used to check network input or dump model inputs @@ -453,6 +476,20 @@ LITE_API int LITE_set_async_callback( LITE_API int LITE_set_start_callback( LiteNetwork network, const LiteStartCallback start_callback); +/** + * \brief set the start forward callback function, which will be execute beform + * forward, this can be used to check network input or dump model inputs + * for debug + * \param[in] network The loaded model + * \param[in] start_callback when network start forwarding, the callbak + * will be called + * \param[in] user_data user defined data for something user want to deploy + * at forward start stage + */ +LITE_API int LITE_set_start_callback_with_userdata( + LiteNetwork network, const LiteStartCallbackWithData start_callback, + void* user_data); + /** * \brief set the finish forward callback function, which will be execute after * forward, this can be used to dump model outputs for debug @@ -463,6 +500,19 @@ LITE_API int LITE_set_start_callback( LITE_API int LITE_set_finish_callback( LiteNetwork network, const LiteFinishCallback finish_callback); +/** + * \brief set the finish forward callback function, which will be execute after + * forward, this can be used to dump model outputs for debug + * \param[in] network The loaded model + * \param[in] finish_callback when network finish forwarding, the callbak + * will be called + * \param[in] user_data user defined data for something user want to deploy + * at finish stage + */ +LITE_API int LITE_set_finish_callback_with_userdata( + LiteNetwork network, const LiteFinishCallbackWithData finish_callback, + void* user_data); + /** * \brief set threads affinity callback * \param[in] network The loaded model diff --git a/lite/lite-c/src/network.cpp b/lite/lite-c/src/network.cpp index d9419df6028967cc0cc90419102bb947dc2f1e93..51df08cf99c10316d36628fff4013afb1af11542 100644 --- a/lite/lite-c/src/network.cpp +++ b/lite/lite-c/src/network.cpp @@ -355,6 +355,22 @@ int LITE_set_async_callback( LITE_CAPI_END(); } +int LITE_set_async_callback_with_userdata( + LiteNetwork network, LiteAsyncCallbackWithData async_callback, + void* user_data) { + LITE_CAPI_BEGIN(); + LITE_ASSERT(network, "The network pass to LITE api is null"); + LITE_ASSERT(async_callback, "The ptr pass to LITE api is null"); + + auto lite_async_callback = [async_callback, user_data]() -> void { + async_callback(user_data); + }; + static_cast(network)->set_async_callback( + std::move(lite_async_callback)); + + LITE_CAPI_END(); +} + int LITE_set_start_callback( LiteNetwork network, const LiteStartCallback start_callback) { LITE_CAPI_BEGIN(); @@ -381,6 +397,34 @@ int LITE_set_start_callback( LITE_CAPI_END(); } +int LITE_set_start_callback_with_userdata( + LiteNetwork network, const LiteStartCallbackWithData start_callback, + void* user_data) { + LITE_CAPI_BEGIN(); + LITE_ASSERT(network, "The network pass to LITE api is null"); + auto lite_start_callback = + [start_callback, + user_data](const std::unordered_map< + std::string, + std::pair>>& inputs_map) + -> void { + std::vector ios; + std::vector io_tensors; + size_t nr_io = 0; + for (const auto& io : inputs_map) { + nr_io++; + auto&& lite_io = io.second.first; + ios.push_back( + {lite_io.name.c_str(), lite_io.is_host, lite_io.io_type, + convert_to_clayout(lite_io.config_layout)}); + io_tensors.push_back(io.second.second.get()); + } + start_callback(ios.data(), io_tensors.data(), nr_io, user_data); + }; + static_cast(network)->set_start_callback(lite_start_callback); + LITE_CAPI_END(); +} + int LITE_set_finish_callback( LiteNetwork network, const LiteFinishCallback finish_callback) { LITE_CAPI_BEGIN(); @@ -407,6 +451,34 @@ int LITE_set_finish_callback( LITE_CAPI_END(); } +int LITE_set_finish_callback_with_userdata( + LiteNetwork network, const LiteFinishCallbackWithData finish_callback, + void* user_data) { + LITE_CAPI_BEGIN(); + LITE_ASSERT(network, "The network pass to LITE api is null"); + auto lite_finish_callback = + [finish_callback, + user_data](const std::unordered_map< + std::string, + std::pair>>& + outputs_map) -> void { + std::vector ios; + std::vector io_tensors; + size_t nr_io = 0; + for (const auto& io : outputs_map) { + nr_io++; + auto&& lite_io = io.second.first; + ios.push_back( + {lite_io.name.c_str(), lite_io.is_host, lite_io.io_type, + convert_to_clayout(lite_io.config_layout)}); + io_tensors.push_back(io.second.second.get()); + } + finish_callback(ios.data(), io_tensors.data(), nr_io, user_data); + }; + static_cast(network)->set_finish_callback(lite_finish_callback); + LITE_CAPI_END(); +} + int LITE_enable_profile_performance( LiteNetwork network, const char* profile_json_file_path) { LITE_CAPI_BEGIN(); diff --git a/lite/test/test_network_c.cpp b/lite/test/test_network_c.cpp index 21b79f63182beb82bf5c13660a0cd774a93b4ad7..0c16857b9f25821aa869103359942fb9cb05cbe9 100644 --- a/lite/test/test_network_c.cpp +++ b/lite/test/test_network_c.cpp @@ -74,11 +74,21 @@ int multi_thread_affinity(int id) { }; volatile bool finished = false; -int finish_callback() { +int async_callback() { finished = true; return 0; } +volatile bool finished_with_data = false; +int async_callback_with_data(void* user_data) { + if (user_data != NULL) { + std::cout << "async_callback user_data addr=" << std::hex << user_data + << std::endl; + } + finished_with_data = true; + return 0; +} + volatile bool start_checked = false; int start_callback(const LiteIO* inputs, const LiteTensor* input_tensors, size_t size) { start_checked = true; @@ -96,6 +106,29 @@ int start_callback(const LiteIO* inputs, const LiteTensor* input_tensors, size_t return 0; } +volatile bool start_checked_with_data = false; +int start_callback_with_data( + const LiteIO* inputs, const LiteTensor* input_tensors, size_t size, + void* user_data) { + start_checked_with_data = true; + auto check_func = [&]() { + if (user_data != NULL) { + std::cout << "start_callback user_data addr=" << std::hex << user_data + << std::endl; + } + ASSERT_EQ(size, 1); + ASSERT_EQ(std::string(inputs->name), "data"); + LiteLayout layout; + LITE_get_tensor_layout(*input_tensors, &layout); + ASSERT_EQ(layout.ndim, 4); + ASSERT_EQ(layout.shapes[1], 3); + ASSERT_EQ(layout.shapes[2], 224); + ASSERT_EQ(layout.shapes[3], 224); + }; + check_func(); + return 0; +} + volatile bool finish_checked = false; int finish_callback( const LiteIO* outputs, const LiteTensor* output_tensors, size_t size) { @@ -113,6 +146,28 @@ int finish_callback( return 0; } +volatile bool finish_checked_with_data = false; +int finish_callback_with_data( + const LiteIO* outputs, const LiteTensor* output_tensors, size_t size, + void* user_data) { + finish_checked_with_data = true; + auto check_func = [&]() { + if (user_data != NULL) { + std::cout << "finish_callback user_data addr=" << std::hex << user_data + << std::endl; + } + ASSERT_EQ(size, 1); + ASSERT_EQ( + std::string(outputs->name), + "TRUE_DIV(EXP[12065],reduce0[12067])[12077]"); + LiteLayout layout; + LITE_get_tensor_layout(*output_tensors, &layout); + ASSERT_EQ(layout.shapes[1], 1000); + }; + check_func(); + return 0; +} + } // namespace #define LITE_CAPI_CHECK(_expr) \ @@ -671,6 +726,21 @@ TEST(TestCapiNetWork, StartCallBack) { LITE_CAPI_CHECK(LITE_destroy_network(c_network)); } +TEST(TestCapiNetWork, StartCallBackWithData) { + ForwardMgb; + MakeNetwork; + LoadNetwork; + size_t user_data = 1; + LITE_CAPI_CHECK(LITE_set_start_callback_with_userdata( + c_network, start_callback_with_data, &user_data)); + SetInput; + ForwardNetwork; + GetOutput; + CompareResult; + ASSERT_TRUE(start_checked_with_data); + LITE_CAPI_CHECK(LITE_destroy_network(c_network)); +} + TEST(TestCapiNetWork, FinishCallBack) { ForwardMgb; MakeNetwork; @@ -684,6 +754,21 @@ TEST(TestCapiNetWork, FinishCallBack) { LITE_CAPI_CHECK(LITE_destroy_network(c_network)); } +TEST(TestCapiNetWork, FinishCallBackWtihData) { + ForwardMgb; + MakeNetwork; + LoadNetwork; + size_t user_data = 1; + LITE_CAPI_CHECK(LITE_set_finish_callback_with_userdata( + c_network, finish_callback_with_data, &user_data)); + SetInput; + ForwardNetwork; + GetOutput; + CompareResult; + ASSERT_TRUE(finish_checked_with_data); + LITE_CAPI_CHECK(LITE_destroy_network(c_network)); +} + TEST(TestCapiNetWork, BasicCryptAes) { ForwardMgb; @@ -723,7 +808,7 @@ TEST(TestCapiNetWork, AsyncExec) { LiteConfig c_config = *default_config(); c_config.options.var_sanity_check_first_run = false; LITE_CAPI_CHECK(LITE_make_network(&c_network, c_config, *default_network_io())); - LITE_CAPI_CHECK(LITE_set_async_callback(c_network, finish_callback)); + LITE_CAPI_CHECK(LITE_set_async_callback(c_network, async_callback)); LoadNetwork; SetInput; @@ -740,6 +825,32 @@ TEST(TestCapiNetWork, AsyncExec) { LITE_CAPI_CHECK(LITE_destroy_network(c_network)); } +TEST(TestCapiNetWork, AsyncExecWithData) { + finished = false; + ForwardMgb; + LiteNetwork c_network; + LiteConfig c_config = *default_config(); + c_config.options.var_sanity_check_first_run = false; + LITE_CAPI_CHECK(LITE_make_network(&c_network, c_config, *default_network_io())); + size_t user_data = 1; + LITE_CAPI_CHECK(LITE_set_async_callback_with_userdata( + c_network, async_callback_with_data, &user_data)); + LoadNetwork; + SetInput; + + LITE_forward(c_network); + size_t count = 0; + while (finished_with_data == false) { + count++; + } + ASSERT_GT(count, 0); + finished_with_data = false; + + GetOutput; + CompareResult; + LITE_CAPI_CHECK(LITE_destroy_network(c_network)); +} + TEST(TestCapiNetWork, OutputShapeOnly) { ForwardMgb; LiteNetwork c_network;