diff --git a/paddle/fluid/framework/program_desc.cc b/paddle/fluid/framework/program_desc.cc index 049731c7216e542dedcf8754eef79f0a672291d6..77d17fbbccca0292e21acd5e8fa90448527b95c0 100644 --- a/paddle/fluid/framework/program_desc.cc +++ b/paddle/fluid/framework/program_desc.cc @@ -85,9 +85,9 @@ ProgramDesc::ProgramDesc(const std::string &binary_str) { } const std::vector ProgramDesc::GetFeedTargetNames() { - BlockDesc *global_block = blocks_[0].get(); + auto &global_block = Block(0); std::vector feed_target_names; - for (auto *op : global_block->AllOps()) { + for (auto *op : global_block.AllOps()) { if (op->Type() == kFeedOpType) { feed_target_names.insert(feed_target_names.begin(), op->Output("Out")[0]); } @@ -96,9 +96,9 @@ const std::vector ProgramDesc::GetFeedTargetNames() { } const std::vector ProgramDesc::GetFetchTargetNames() { - BlockDesc *global_block = blocks_[0].get(); + auto &global_block = Block(0); std::vector fetch_target_names; - for (auto *op : global_block->AllOps()) { + for (auto *op : global_block.AllOps()) { if (op->Type() == kFetchOpType) { fetch_target_names.push_back(op->Input("X")[0]); } @@ -106,5 +106,43 @@ const std::vector ProgramDesc::GetFetchTargetNames() { return fetch_target_names; } +void ProgramDesc::SetFeedHolderName(const std::string &feed_holder_name) { + auto *global_block = MutableBlock(0); + int index = 0; + for (auto *op : global_block->AllOps()) { + if (op->Type() == kFeedOpType) { + // Unify the input's name of all feed_ops to feed_holder_name + global_block->RemoveVar(op->Input("X")[0]); + op->SetInput("X", {feed_holder_name}); + op->SetAttr("col", {index}); + op->CheckAttrs(); + index++; + } + } + + auto *feed_holder = global_block->Var(feed_holder_name); + feed_holder->SetType(proto::VarType::FEED_MINIBATCH); + feed_holder->SetPersistable(true); +} + +void ProgramDesc::SetFetchHolderName(const std::string &fetch_holder_name) { + auto *global_block = MutableBlock(0); + int index = 0; + for (auto *op : global_block->AllOps()) { + if (op->Type() == kFetchOpType) { + // Unify the output's name of all fetch_ops to fetch_holder_name + global_block->RemoveVar(op->Output("Out")[0]); + op->SetOutput("Out", {fetch_holder_name}); + op->SetAttr("col", {index}); + op->CheckAttrs(); + index++; + } + } + + auto *fetch_holder = global_block->Var(fetch_holder_name); + fetch_holder->SetType(proto::VarType::FETCH_LIST); + fetch_holder->SetPersistable(true); +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/program_desc.h b/paddle/fluid/framework/program_desc.h index 538a0372116e6f90fd2fae5f00097b8efc5dcb5c..4288081be72c44c0fc3584b50c41a270eac9e204 100644 --- a/paddle/fluid/framework/program_desc.h +++ b/paddle/fluid/framework/program_desc.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/framework.pb.h" @@ -52,9 +53,26 @@ class ProgramDesc { proto::ProgramDesc *Proto(); + // The output variable of feed_op is referenced as feed_target. + // This function is used to collect the output variable's name of all + // feed_ops. const std::vector GetFeedTargetNames(); + + // The input variable of fetch_op is referenced as fetch_target. + // This function is used to collect the input variable's name of all + // fetch_ops. const std::vector GetFetchTargetNames(); + // The input variable of feed_op that holds input Tensor provided by users is + // referenced as feed_holder. + // This function is used to change or unify the feed_holder variables' name. + void SetFeedHolderName(const std::string &feed_holder_name); + + // The output variable of fetch_op that holds output Tensor needed by users is + // referenced as fetch_holder. + // This function is used to change or unify the fetch_holder variables' name. + void SetFetchHolderName(const std::string &fetch_holder_name); + private: proto::ProgramDesc desc_; diff --git a/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc b/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc index 3e77dc166c355bc141563eda4705ca8d75226ac4..2c5b66a32903f4ffdedb074b31aec53ae6cacaf3 100644 --- a/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc +++ b/paddle/fluid/inference/tests/book/test_inference_fit_a_line.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" +#include "paddle/fluid/inference/tests/test_multi_thread_helper.h" DEFINE_string(dirname, "", "Directory of the inference model."); @@ -26,32 +27,63 @@ TEST(inference, fit_a_line) { // 0. Call `paddle::framework::InitDevices()` initialize all the devices // In unittests, this is done in paddle/testing/paddle_gtest_main.cc - paddle::framework::LoDTensor input; - // The second dim of the input tensor should be 13 - // The input data should be >= 0 - int64_t batch_size = 10; - SetupTensor(&input, {batch_size, 13}, static_cast(0), - static_cast(10)); - std::vector cpu_feeds; - cpu_feeds.push_back(&input); + for (int num_threads : {1, 2}) { + std::vector> cpu_feeds; + cpu_feeds.resize(num_threads); + for (int i = 0; i < num_threads; ++i) { + auto* input = new paddle::framework::LoDTensor(); + // The second dim of the input tensor should be 13 + // The input data should be >= 0 + int64_t batch_size = 10; + SetupTensor(input, {batch_size, 13}, static_cast(0), + static_cast(10)); + cpu_feeds[i].push_back(input); + } - paddle::framework::LoDTensor output1; - std::vector cpu_fetchs1; - cpu_fetchs1.push_back(&output1); + std::vector> cpu_fetchs1; + cpu_fetchs1.resize(num_threads); + for (int i = 0; i < num_threads; ++i) { + auto* output = new paddle::framework::LoDTensor(); + cpu_fetchs1[i].push_back(output); + } - // Run inference on CPU - TestInference(dirname, cpu_feeds, cpu_fetchs1); - LOG(INFO) << output1.dims(); + // Run inference on CPU + LOG(INFO) << "--- CPU Runs (num_threads: " << num_threads << "): ---"; + if (num_threads == 1) { + TestInference(dirname, cpu_feeds[0], + cpu_fetchs1[0]); + } else { + TestMultiThreadInference( + dirname, cpu_feeds, cpu_fetchs1, num_threads); + } #ifdef PADDLE_WITH_CUDA - paddle::framework::LoDTensor output2; - std::vector cpu_fetchs2; - cpu_fetchs2.push_back(&output2); + std::vector> cpu_fetchs2; + cpu_fetchs2.resize(num_threads); + for (int i = 0; i < num_threads; ++i) { + auto* output = new paddle::framework::LoDTensor(); + cpu_fetchs2[i].push_back(output); + } - // Run inference on CUDA GPU - TestInference(dirname, cpu_feeds, cpu_fetchs2); - LOG(INFO) << output2.dims(); + // Run inference on CUDA GPU + LOG(INFO) << "--- GPU Runs (num_threads: " << num_threads << "): ---"; + if (num_threads == 1) { + TestInference(dirname, cpu_feeds[0], + cpu_fetchs2[0]); + } else { + TestMultiThreadInference( + dirname, cpu_feeds, cpu_fetchs2, num_threads); + } - CheckError(output1, output2); + for (int i = 0; i < num_threads; ++i) { + CheckError(*cpu_fetchs1[i][0], *cpu_fetchs2[i][0]); + delete cpu_fetchs2[i][0]; + } #endif + + for (int i = 0; i < num_threads; ++i) { + delete cpu_feeds[i][0]; + delete cpu_fetchs1[i][0]; + } + } // num_threads-loop } diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index aae34ceda07fea6e881cf61b3755ec45d1d6f2dc..064e400f0c750872ab2142c5fc8e28dd3da85b1a 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -25,7 +25,8 @@ limitations under the License. */ template void SetupTensor(paddle::framework::LoDTensor* input, paddle::framework::DDim dims, T lower, T upper) { - std::mt19937 rng(100); // An arbitrarily chosen but fixed seed. + static unsigned int seed = 100; + std::mt19937 rng(seed++); std::uniform_real_distribution uniform_dist(0, 1); T* input_ptr = input->mutable_data(dims, paddle::platform::CPUPlace()); diff --git a/paddle/fluid/inference/tests/test_multi_thread_helper.h b/paddle/fluid/inference/tests/test_multi_thread_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..56745f115db231d4350da72b7de7967175ac9fe8 --- /dev/null +++ b/paddle/fluid/inference/tests/test_multi_thread_helper.h @@ -0,0 +1,90 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include // NOLINT +#include +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/io.h" + +void ThreadedRunInference( + const std::unique_ptr& inference_program, + paddle::framework::Executor* executor, paddle::framework::Scope* scope, + const int thread_id, + const std::vector& cpu_feeds, + const std::vector& cpu_fetchs) { + auto copy_program = std::unique_ptr( + new paddle::framework::ProgramDesc(*inference_program)); + + std::string feed_holder_name = "feed_" + paddle::string::to_string(thread_id); + std::string fetch_holder_name = + "fetch_" + paddle::string::to_string(thread_id); + copy_program->SetFeedHolderName(feed_holder_name); + copy_program->SetFetchHolderName(fetch_holder_name); + + // 3. Get the feed_target_names and fetch_target_names + const std::vector& feed_target_names = + copy_program->GetFeedTargetNames(); + const std::vector& fetch_target_names = + copy_program->GetFetchTargetNames(); + + // 4. Prepare inputs: set up maps for feed targets + std::map feed_targets; + for (size_t i = 0; i < feed_target_names.size(); ++i) { + // Please make sure that cpu_feeds[i] is right for feed_target_names[i] + feed_targets[feed_target_names[i]] = cpu_feeds[i]; + } + + // 5. Define Tensor to get the outputs: set up maps for fetch targets + std::map fetch_targets; + for (size_t i = 0; i < fetch_target_names.size(); ++i) { + fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; + } + + // 6. Run the inference program + executor->Run(*copy_program, scope, feed_targets, fetch_targets, true, + feed_holder_name, fetch_holder_name); +} + +template +void TestMultiThreadInference( + const std::string& dirname, + const std::vector>& cpu_feeds, + const std::vector>& cpu_fetchs, + const int num_threads) { + // 1. Define place, executor, scope + auto place = Place(); + auto executor = paddle::framework::Executor(place); + auto* scope = new paddle::framework::Scope(); + + // 2. Initialize the inference_program and load parameters + std::unique_ptr inference_program = + paddle::inference::Load(executor, *scope, dirname); + + std::vector threads; + for (int i = 0; i < num_threads; ++i) { + threads.push_back(new std::thread( + ThreadedRunInference, std::ref(inference_program), &executor, scope, i, + std::ref(cpu_feeds[i]), std::ref(cpu_fetchs[i]))); + } + for (int i = 0; i < num_threads; ++i) { + threads[i]->join(); + delete threads[i]; + } + + delete scope; +}