提交 e24172eb 编写于 作者: L Liu Yiqun

Simplify the inference unittest of fit a line and add some comment.

上级 bdb21f6b
...@@ -53,10 +53,24 @@ class ProgramDesc { ...@@ -53,10 +53,24 @@ class ProgramDesc {
proto::ProgramDesc *Proto(); proto::ProgramDesc *Proto();
// The output variable of feed_op is referenced as feed_target.
// This function is used to collect the output variable's name of all
// feed_ops.
const std::vector<std::string> GetFeedTargetNames(); const std::vector<std::string> GetFeedTargetNames();
// The input variable of fetch_op is referenced as fetch_target.
// This function is used to collect the input variable's name of all
// fetch_ops.
const std::vector<std::string> GetFetchTargetNames(); const std::vector<std::string> GetFetchTargetNames();
// The input variable of feed_op that holds input Tensor provided by users is
// referenced as feed_holder.
// This function is used to change or unify the feed_holder variables' name.
void SetFeedHolderName(const std::string &feed_holder_name); void SetFeedHolderName(const std::string &feed_holder_name);
// The output variable of fetch_op that holds output Tensor needed by users is
// referenced as fetch_holder.
// This function is used to change or unify the fetch_holder variables' name.
void SetFetchHolderName(const std::string &fetch_holder_name); void SetFetchHolderName(const std::string &fetch_holder_name);
private: private:
......
...@@ -27,96 +27,63 @@ TEST(inference, fit_a_line) { ...@@ -27,96 +27,63 @@ TEST(inference, fit_a_line) {
// 0. Call `paddle::framework::InitDevices()` initialize all the devices // 0. Call `paddle::framework::InitDevices()` initialize all the devices
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
paddle::framework::LoDTensor input; for (int num_threads : {1, 2}) {
// The second dim of the input tensor should be 13 std::vector<std::vector<paddle::framework::LoDTensor*>> cpu_feeds;
// The input data should be >= 0 cpu_feeds.resize(num_threads);
int64_t batch_size = 10; for (int i = 0; i < num_threads; ++i) {
SetupTensor<float>(&input, {batch_size, 13}, static_cast<float>(0), auto* input = new paddle::framework::LoDTensor();
static_cast<float>(10)); // The second dim of the input tensor should be 13
std::vector<paddle::framework::LoDTensor*> cpu_feeds; // The input data should be >= 0
cpu_feeds.push_back(&input); int64_t batch_size = 10;
SetupTensor<float>(input, {batch_size, 13}, static_cast<float>(0),
paddle::framework::LoDTensor output1; static_cast<float>(10));
std::vector<paddle::framework::LoDTensor*> cpu_fetchs1; cpu_feeds[i].push_back(input);
cpu_fetchs1.push_back(&output1); }
// Run inference on CPU std::vector<std::vector<paddle::framework::LoDTensor*>> cpu_fetchs1;
LOG(INFO) << "--- CPU Runs: ---"; cpu_fetchs1.resize(num_threads);
TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds, cpu_fetchs1); for (int i = 0; i < num_threads; ++i) {
LOG(INFO) << output1.dims(); auto* output = new paddle::framework::LoDTensor();
cpu_fetchs1[i].push_back(output);
}
// Run inference on CPU
LOG(INFO) << "--- CPU Runs (num_threads: " << num_threads << "): ---";
if (num_threads == 1) {
TestInference<paddle::platform::CPUPlace>(dirname, cpu_feeds[0],
cpu_fetchs1[0]);
} else {
TestMultiThreadInference<paddle::platform::CPUPlace>(
dirname, cpu_feeds, cpu_fetchs1, num_threads);
}
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
paddle::framework::LoDTensor output2; std::vector<std::vector<paddle::framework::LoDTensor*>> cpu_fetchs2;
std::vector<paddle::framework::LoDTensor*> cpu_fetchs2; cpu_fetchs2.resize(num_threads);
cpu_fetchs2.push_back(&output2); for (int i = 0; i < num_threads; ++i) {
auto* output = new paddle::framework::LoDTensor();
// Run inference on CUDA GPU cpu_fetchs2[i].push_back(output);
LOG(INFO) << "--- GPU Runs: ---"; }
TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2);
LOG(INFO) << output2.dims(); // Run inference on CUDA GPU
LOG(INFO) << "--- GPU Runs (num_threads: " << num_threads << "): ---";
CheckError<float>(output1, output2); if (num_threads == 1) {
TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds[0],
cpu_fetchs2[0]);
} else {
TestMultiThreadInference<paddle::platform::CUDAPlace>(
dirname, cpu_feeds, cpu_fetchs2, num_threads);
}
for (int i = 0; i < num_threads; ++i) {
CheckError<float>(*cpu_fetchs1[i][0], *cpu_fetchs2[i][0]);
delete cpu_fetchs2[i][0];
}
#endif #endif
}
TEST(multi_thread_inference, fit_a_line) {
if (FLAGS_dirname.empty()) {
LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
}
LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
std::string dirname = FLAGS_dirname;
// 0. Call `paddle::framework::InitDevices()` initialize all the devices
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc
int num_threads = 2; for (int i = 0; i < num_threads; ++i) {
delete cpu_feeds[i][0];
std::vector<std::vector<paddle::framework::LoDTensor*>> cpu_feeds; delete cpu_fetchs1[i][0];
cpu_feeds.resize(num_threads); }
for (int i = 0; i < num_threads; ++i) { } // num_threads-loop
auto* input = new paddle::framework::LoDTensor();
// The second dim of the input tensor should be 13
// The input data should be >= 0
int64_t batch_size = 10;
SetupTensor<float>(input, {batch_size, 13}, static_cast<float>(0),
static_cast<float>(10));
cpu_feeds[i].push_back(input);
}
std::vector<std::vector<paddle::framework::LoDTensor*>> cpu_fetchs1;
cpu_fetchs1.resize(num_threads);
for (int i = 0; i < num_threads; ++i) {
auto* output = new paddle::framework::LoDTensor();
cpu_fetchs1[i].push_back(output);
}
// Run inference on CPU
LOG(INFO) << "--- CPU Runs (Multi Thread): ---";
TestMultiThreadInference<paddle::platform::CPUPlace>(
dirname, cpu_feeds, cpu_fetchs1, num_threads);
#ifdef PADDLE_WITH_CUDA
std::vector<std::vector<paddle::framework::LoDTensor*>> cpu_fetchs2;
cpu_fetchs2.resize(num_threads);
for (int i = 0; i < num_threads; ++i) {
auto* output = new paddle::framework::LoDTensor();
cpu_fetchs2[i].push_back(output);
}
// Run inference on CUDA GPU
LOG(INFO) << "--- GPU Runs (Multi Thread): ---";
TestMultiThreadInference<paddle::platform::CUDAPlace>(
dirname, cpu_feeds, cpu_fetchs2, num_threads);
for (int i = 0; i < num_threads; ++i) {
CheckError<float>(*cpu_fetchs1[i][0], *cpu_fetchs2[i][0]);
delete cpu_fetchs2[i][0];
}
#endif
for (int i = 0; i < num_threads; ++i) {
delete cpu_feeds[i][0];
delete cpu_fetchs1[i][0];
}
} }
...@@ -56,7 +56,7 @@ void ThreadedRunInference( ...@@ -56,7 +56,7 @@ void ThreadedRunInference(
} }
// 6. Run the inference program // 6. Run the inference program
executor->Run(*copy_program, scope, feed_targets, fetch_targets, executor->Run(*copy_program, scope, feed_targets, fetch_targets, true,
feed_holder_name, fetch_holder_name); feed_holder_name, fetch_holder_name);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册