From 188581801682fd799698f6f170ce1d4b4951ccba Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Wed, 21 Mar 2018 02:41:26 +0000 Subject: [PATCH] Add multi-thread inference example. --- .../tests/book/test_inference_fit_a_line.cc | 66 ++++++++++++++++ .../tests/test_multi_thread_helper.h | 78 +++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 paddle/fluid/inference/tests/test_multi_thread_helper.h diff --git a/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc b/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc index 9ab808efec3..e8224be2d49 100644 --- a/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc +++ b/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include #include "gflags/gflags.h" #include "paddle/fluid/inference/tests/test_helper.h" +#include "paddle/fluid/inference/tests/test_multi_thread_helper.h" DEFINE_string(dirname, "", "Directory of the inference model."); @@ -40,6 +41,7 @@ TEST(inference, fit_a_line) { cpu_fetchs1.push_back(&output1); // Run inference on CPU + LOG(INFO) << "--- CPU Runs: ---"; TestInference(dirname, cpu_feeds, cpu_fetchs1); LOG(INFO) << output1.dims(); @@ -49,9 +51,73 @@ TEST(inference, fit_a_line) { cpu_fetchs2.push_back(&output2); // Run inference on CUDA GPU + LOG(INFO) << "--- CPU Runs: ---"; TestInference(dirname, cpu_feeds, cpu_fetchs2); LOG(INFO) << output2.dims(); CheckError(output1, output2); #endif } + +TEST(multi_thread_inference, fit_a_line) { + if (FLAGS_dirname.empty()) { + LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; + } + + LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; + std::string dirname = FLAGS_dirname; + + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // In unittests, this is done in paddle/testing/paddle_gtest_main.cc + + int num_threads = 2; + + std::vector> cpu_feeds; + cpu_feeds.resize(num_threads); + for (int i = 0; i < num_threads; ++i) { + auto* input = new paddle::framework::LoDTensor(); + // The second dim of the input tensor should be 13 + // The input data should be >= 0 + int64_t batch_size = 10; + SetupTensor(*input, + {batch_size, 13}, + static_cast(0), + static_cast(10)); + cpu_feeds[i].push_back(input); + } + + std::vector> cpu_fetchs1; + cpu_fetchs1.resize(num_threads); + for (int i = 0; i < num_threads; ++i) { + auto* output = new paddle::framework::LoDTensor(); + cpu_fetchs1[i].push_back(output); + } + + // Run inference on CPU + LOG(INFO) << "--- CPU Runs (Multi Thread): ---"; + TestMultiThreadInference( + dirname, cpu_feeds, cpu_fetchs1, num_threads); + +#ifdef PADDLE_WITH_CUDA + std::vector> cpu_fetchs2; + cpu_fetchs2.resize(num_threads); + for (int i = 0; i < num_threads; ++i) { + auto* output = new paddle::framework::LoDTensor(); + cpu_fetchs2[i].push_back(output); + } + + // Run inference on CUDA GPU + LOG(INFO) << "--- GPU Runs (Multi Thread): ---"; + TestMultiThreadInference( + dirname, cpu_feeds, cpu_fetchs2, num_threads); + + for (int i = 0; i < num_threads; ++i) { + delete cpu_fetchs2[i][0]; + } +#endif + + for (int i = 0; i < num_threads; ++i) { + delete cpu_feeds[i][0]; + delete cpu_fetchs1[i][0]; + } +} diff --git a/paddle/fluid/inference/tests/test_multi_thread_helper.h b/paddle/fluid/inference/tests/test_multi_thread_helper.h new file mode 100644 index 00000000000..54e203833ba --- /dev/null +++ b/paddle/fluid/inference/tests/test_multi_thread_helper.h @@ -0,0 +1,78 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/io.h" + +void ThreadedRunInference( + std::unique_ptr& inference_program, + paddle::framework::Executor& executor, + paddle::framework::Scope* scope, + const std::vector& cpu_feeds, + std::vector& cpu_fetchs) { + // 3. Get the feed_target_names and fetch_target_names + const std::vector& feed_target_names = + inference_program->GetFeedTargetNames(); + const std::vector& fetch_target_names = + inference_program->GetFetchTargetNames(); + + // 4. Prepare inputs: set up maps for feed targets + std::map feed_targets; + for (size_t i = 0; i < feed_target_names.size(); ++i) { + // Please make sure that cpu_feeds[i] is right for feed_target_names[i] + feed_targets[feed_target_names[i]] = cpu_feeds[i]; + } + + // 5. Define Tensor to get the outputs: set up maps for fetch targets + std::map fetch_targets; + for (size_t i = 0; i < fetch_target_names.size(); ++i) { + fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; + } + + // 6. Run the inference program + executor.Run(*inference_program, scope, feed_targets, fetch_targets); +} + +template +void TestMultiThreadInference( + const std::string& dirname, + const std::vector>& cpu_feeds, + std::vector>& cpu_fetchs, + const int num_threads) { + // 1. Define place, executor, scope + auto place = Place(); + auto executor = paddle::framework::Executor(place); + auto* scope = new paddle::framework::Scope(); + + // 2. Initialize the inference_program and load parameters + std::unique_ptr inference_program = + paddle::inference::Load(executor, *scope, dirname); + + std::vector threads; + for (int i = 0; i < num_threads; ++i) { + threads.push_back(new std::thread(ThreadedRunInference, + std::ref(inference_program), + std::ref(executor), + scope, + std::ref(cpu_feeds[i]), + std::ref(cpu_fetchs[i]))); + } + for (int i = 0; i < num_threads; ++i) { + threads[i]->join(); + delete threads[i]; + } + + delete scope; +} -- GitLab