diff --git a/src/tensorrt/impl/tensorrt_opr.cpp b/src/tensorrt/impl/tensorrt_opr.cpp index 91f5cc3bf0d273ed6de8a5d83ed7670a4305ab25..f7647a0cc7fcaf2604bd00fee4f33e158bdc6bf0 100644 --- a/src/tensorrt/impl/tensorrt_opr.cpp +++ b/src/tensorrt/impl/tensorrt_opr.cpp @@ -50,17 +50,6 @@ void TensorRTProfiler::print_layer_times() { printf("Total time: %4.3fms\n", total_time); } -std::shared_ptr TensorRTProfiler::to_json() { - using namespace json; - auto prof_arr = Array::make(); - for (auto&& rec : profile) { - auto&& item = Array::make(); - item->add(String::make(rec.first)); - item->add(Number::make(rec.second)); - prof_arr->add(item); - } - return prof_arr; -} #endif // MGB_ENABLE_JSON @@ -168,7 +157,7 @@ void TensorRTOpr::GpuAllocator::free(void* memory) { void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, nvinfer1::ICudaEngine* engine, - size_t batch) { + size_t batch, bool use_trt_profiler) { auto comp_node = opr->comp_node(); // ICudaEngine is bound to the currently active device @@ -180,22 +169,11 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, comp_node_check.to_string().c_str(), comp_node.to_string().c_str()); } -#if MGB_ENABLE_JSON - auto pf_holder_pair = - opr->owner_graph() - ->options() - .user_data.get_user_data(); - if (m_has_profiler && !pf_holder_pair.second) { - m_context.reset(); - m_has_profiler = false; - } -#endif auto workspace_ptr = opr->output().back()->dev_tensor().raw_ptr(); bool should_reinit_device_memory = !m_context || m_device_workspace_memory_ptr != workspace_ptr; if (!m_context) { m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}}; - m_has_profiler = false; } m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1); bool is_trt_opr = false; @@ -235,11 +213,7 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, bool exec_success = false; -#if MGB_ENABLE_JSON - if (!pf_holder_pair.second) { - mgb_assert(!m_has_profiler, - "Invalid state of TensorRTRuntimeOpr: should not have " - "profiler."); + if (!use_trt_profiler) { #if NV_TENSOR_RT_VERSION >= 6001 if (is_trt_opr) exec_success = m_context->enqueueV2(m_trt_iobuf.data(), @@ -255,7 +229,6 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, } else { TensorRTProfiler trt_profiler; m_context->setProfiler(&trt_profiler); - m_has_profiler = true; // TensorRT documentation stated that IExecutionContext->execute // "Synchronously execute inference on a batch", and it does not take a // cudaStream_t, we expect it do a device synchronize. But it seems like @@ -272,24 +245,9 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, exec_success = m_context->execute(batch, m_trt_iobuf.data()); #endif mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname()); - pf_holder_pair.first[0]->id2object_map[opr] = trt_profiler.to_json(); printf("TRT profile info of opr %s:\n", opr->name().c_str()); trt_profiler.print_layer_times(); } -#else -#if NV_TENSOR_RT_VERSION >= 6001 - if (is_trt_opr) - exec_success = m_context->enqueueV2(m_trt_iobuf.data(), - env.cuda_env().stream, nullptr); - else - exec_success = m_context->enqueue(batch, m_trt_iobuf.data(), - env.cuda_env().stream, nullptr); -#else - exec_success = m_context->enqueue(batch, m_trt_iobuf.data(), - env.cuda_env().stream, nullptr); -#endif - mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname()); -#endif } /* ========================== TensorRTOpr ========================== */ diff --git a/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h b/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h index 04f0086a7ad71033b38c5e21589f6be2c098397e..7d520b89451ca0365ff95e4c9c18e4696b48da5d 100644 --- a/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h +++ b/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h @@ -50,11 +50,11 @@ class TensorRTManager { std::vector m_trt_iobuf; TensorRTUniquePtr m_context; void* m_device_workspace_memory_ptr; - bool m_has_profiler; public: void exec(cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, - nvinfer1::ICudaEngine* engine, size_t batch = 1); + nvinfer1::ICudaEngine* engine, size_t batch = 1, + bool use_trt_profiler = false); void clear_trt_context() { m_context.reset(); } diff --git a/src/tensorrt/test/tensorrt.cpp b/src/tensorrt/test/tensorrt.cpp index c4797601e525e64f00e8807dde754f17baa45123..31ca8dc8533ad38f2b75924b931360f45f1047d7 100644 --- a/src/tensorrt/test/tensorrt.cpp +++ b/src/tensorrt/test/tensorrt.cpp @@ -28,50 +28,6 @@ using namespace mgb; using namespace nvinfer1; using namespace opr; -TEST(TestOprTensorRT, Profile) { - REQUIRE_GPU(1); - intl::ConcatConvTensorRTNetwork net; - - auto p = net.create_trt_network(true); - - auto y2 = TensorRTOpr::make(TensorRTOpr::to_shared_ptr_builder(p.first), - TensorRTOpr::to_shared_ptr_network(p.second), - intl::TensorRTGraphFeatureBits::NCHW_FLOAT, {}, - {net.x0, net.x1})[0]; - - HostTensorND host_z1; - HostTensorND host_z2; - auto func = net.graph->compile({make_callback_copy(net.y, host_z1), - make_callback_copy(y2, host_z2)}); - { - mgb::GraphProfiler profiler(net.graph.get()); - - func->execute(); - - profiler.to_json()->writeto_fpath( - output_file("TestOprTensorRT.Profile.FromProfiler.json")); - auto prof_obj = *static_cast(profiler.to_json().get()); - - auto record_obj = - *static_cast(prof_obj["opr_internal_pf"].get()); - auto opr_prof_arr = *static_cast( - record_obj[y2.node()->owner_opr()->id_str()].get()); - for (auto item_arr : opr_prof_arr.get_impl()) { - auto layer_info_arr = *static_cast(item_arr.get()); - auto layer_time = - *static_cast(layer_info_arr[1].get()); - - mgb_assert(layer_time.get_impl() > 0, "Error occured in json."); - } - - MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); - } - // Run it again after profiler is not in existance. - func->execute(); - - MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); -} - TEST(TestOprTensorRT, Basic) { REQUIRE_GPU(1); intl::SimpleTensorRTNetwork net; diff --git a/src/tensorrt/test/tensorrt_runtime.cpp b/src/tensorrt/test/tensorrt_runtime.cpp index debd27bbda4581a66ddc3040b1eaf1efa2c44136..bc1132f36ecaad71bda0d55c5fbed99b7a2a6a11 100644 --- a/src/tensorrt/test/tensorrt_runtime.cpp +++ b/src/tensorrt/test/tensorrt_runtime.cpp @@ -10,7 +10,6 @@ */ #include "megbrain/comp_node_env.h" -#include "megbrain/plugin/profiler.h" #include "megbrain/test/autocheck.h" #include "megbrain/test/helper.h" #include "megbrain/test/megdnn_helper.h" @@ -102,69 +101,6 @@ TEST(TestOprTensorRT, ConcatRuntimeBasic) { MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); } -TEST(TestOprTensorRT, RuntimeProfile) { - REQUIRE_GPU(1); - intl::ConcatConvTensorRTNetwork net; - SymbolVar y2; - { - auto p = net.create_trt_network(false); - TensorRTUniquePtr trt_net{p.second, {}}; - TensorRTUniquePtr builder{p.first, {}}; - builder->setMaxBatchSize(5); -#if NV_TENSOR_RT_VERSION >= 6001 - TensorRTUniquePtr build_config{ - builder->createBuilderConfig()}; - auto cuda_engine = - builder->buildEngineWithConfig(*trt_net, *build_config); -#else - auto cuda_engine = builder->buildCudaEngine(*trt_net); -#endif - TensorRTUniquePtr mem{cuda_engine->serialize(), {}}; - - FILE* fout = fopen(output_file("trt_cuda_engine").c_str(), "wb"); - auto wr = fwrite(mem->data(), 1, mem->size(), fout); - mgb_assert(wr == mem->size()); - fclose(fout); - - y2 = TensorRTRuntimeOpr::make( - TensorRTRuntimeOpr::to_shared_ptr_engine(cuda_engine), {}, - {net.x0, net.x1})[0]; - } - - HostTensorND host_z1; - HostTensorND host_z2; - auto func = net.graph->compile({make_callback_copy(net.y, host_z1), - make_callback_copy(y2, host_z2)}); - - { - mgb::GraphProfiler profiler(net.graph.get()); - - func->execute(); - - profiler.to_json()->writeto_fpath(output_file( - "TestOprTensorRT.RuntimeProfile.FromProfiler.json")); - - auto prof_obj = *static_cast(profiler.to_json().get()); - auto record_obj = - *static_cast(prof_obj["opr_internal_pf"].get()); - auto opr_prof_arr = *static_cast( - record_obj[y2.node()->owner_opr()->id_str()].get()); - for (auto item_arr : opr_prof_arr.get_impl()) { - auto layer_info_arr = *static_cast(item_arr.get()); - auto layer_time = - *static_cast(layer_info_arr[1].get()); - - mgb_assert(layer_time.get_impl() > 0, "Error occured in json."); - } - - MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); - } - // Run it again after profiler is not in existance. - func->execute(); - - MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); -} - TEST(TestOprTensorRT, RuntimeChangeBatchSize) { REQUIRE_GPU(1); intl::SimpleTensorRTNetwork net;