diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc index b879067d2f2f6294c50e0adb21f9399a7c36698a..efc144394128d542a17f19160cb4fd84a9e028d4 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc @@ -309,6 +309,7 @@ void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); } void SubGraphFuse::ReplaceNodesWithSubGraphs() { auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)(); for (auto &subgraph : subgraphs) { + if (subgraph.size() <= 3) continue; std::unordered_set subgraph_uniq(subgraph.begin(), subgraph.end()); // replace this sub-graph with the first node. Two steps: 1. Create a Block // Node that contains this subgraph 2. Mark the nodes inside the sub-graph diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 508ef1ce40aa0882a0f39a85f97511fd9ea2a8a5..9c057affca9c4b6b3fcd4574a587af1b78145b5c 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -85,3 +85,11 @@ if (WITH_ANAKIN AND WITH_MKL) # only needed in CI DEPS inference_anakin_api_shared dynload_cuda SERIAL) endif() endif() + +if(WITH_GPU AND TENSORRT_FOUND) + set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt") + inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz") + cc_test(test_trt_models SRCS trt_models_tester.cc + ARGS --dirname=${TRT_MODEL_INSTALL_DIR}/trt_test_models + DEPS paddle_inference_tensorrt_subgraph_engine) +endif() diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..79ee9b23a94da6b6b038c89b595b169e24da564c --- /dev/null +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" + +namespace paddle { + +DEFINE_string(dirname, "", "Directory of the inference model."); + +NativeConfig GetConfigNative() { + NativeConfig config; + config.model_dir = FLAGS_dirname; + // LOG(INFO) << "dirname " << config.model_dir; + config.fraction_of_gpu_memory = 0.7; + config.use_gpu = true; + config.device = 0; + return config; +} + +TensorRTConfig GetConfigTRT() { + TensorRTConfig config; + config.model_dir = FLAGS_dirname; + config.use_gpu = true; + config.fraction_of_gpu_memory = 0.1; + config.device = 0; + config.max_batch_size = 3; + return config; +} + +void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { + NativeConfig config0 = GetConfigNative(); + config0.model_dir = model_dirname; + + TensorRTConfig config1 = GetConfigTRT(); + config1.model_dir = model_dirname; + config1.max_batch_size = batch_size; + + auto predictor0 = + CreatePaddlePredictor(config0); + auto predictor1 = + CreatePaddlePredictor(config1); + // Prepare inputs + int height = 224; + int width = 224; + float *data = new float[batch_size * 3 * height * width]; + memset(data, 0, sizeof(float) * (batch_size * 3 * height * width)); + data[0] = 1.0f; + + // Prepare inputs + PaddleTensor tensor; + tensor.name = "input_0"; + tensor.shape = std::vector({batch_size, 3, height, width}); + tensor.data = PaddleBuf(static_cast(data), + sizeof(float) * (batch_size * 3 * height * width)); + tensor.dtype = PaddleDType::FLOAT32; + std::vector paddle_tensor_feeds(1, tensor); + + // Prepare outputs + std::vector outputs0; + std::vector outputs1; + CHECK(predictor0->Run(paddle_tensor_feeds, &outputs0)); + + CHECK(predictor1->Run(paddle_tensor_feeds, &outputs1, batch_size)); + + // Get output. + ASSERT_EQ(outputs0.size(), 1UL); + ASSERT_EQ(outputs1.size(), 1UL); + + const size_t num_elements = outputs0.front().data.length() / sizeof(float); + const size_t num_elements1 = outputs1.front().data.length() / sizeof(float); + EXPECT_EQ(num_elements, num_elements1); + + auto *data0 = static_cast(outputs0.front().data.data()); + auto *data1 = static_cast(outputs1.front().data.data()); + + ASSERT_GT(num_elements, 0UL); + for (size_t i = 0; i < std::min(num_elements, num_elements1); i++) { + EXPECT_NEAR(data0[i], data1[i], 1e-3); + } +} + +TEST(trt_models_test, main) { + std::vector infer_models = {"mobilenet", "resnet50", + "resnext50"}; + for (auto &model_dir : infer_models) { + CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + model_dir); + } +} +} // namespace paddle