提交 400f5e7c 编写于 作者: T tensor-tang

add threads test

上级 ce20dfa2
......@@ -14,6 +14,7 @@ limitations under the License. */
#include <sys/time.h>
#include <time.h>
#include <thread> // NOLINT
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/fluid/inference/tests/test_helper.h"
......@@ -31,76 +32,74 @@ TEST(inference, understand_sentiment) {
LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl;
std::string dirname = FLAGS_dirname;
// 0. Call `paddle::framework::InitDevices()` initialize all the devices
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc
paddle::framework::LoDTensor words;
/*
paddle::framework::LoD lod{{0, 83}};
int64_t word_dict_len = 198392;
SetupLoDTensor(&words, lod, static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
*/
std::vector<int64_t> srcdata{
784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, 1550,
198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, 396, 6463,
6463, 1550, 198391, 6463, 22564, 1612, 291, 68, 164, 784,
784, 1550, 198391, 6463, 13659, 3362, 42468, 6189, 2209, 198391,
6463, 2209, 2209, 198391, 6463, 2209, 1062, 3029, 1831, 3029,
1065, 2281, 100, 11216, 1110, 56, 10869, 9811, 100, 198391,
6463, 100, 9280, 100, 288, 40031, 1680, 1335, 100, 1550,
9280, 7265, 244, 1550, 198391, 6463, 1550, 198391, 6463, 42468,
4376, 10251, 10760};
paddle::framework::LoD lod{{0, srcdata.size()}};
words.set_lod(lod);
int64_t* pdata = words.mutable_data<int64_t>(
{static_cast<int64_t>(srcdata.size()), 1}, paddle::platform::CPUPlace());
memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t));
LOG(INFO) << "number of input size:" << words.numel();
std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&words);
paddle::framework::LoDTensor output1;
std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;
cpu_fetchs1.push_back(&output1);
// Run inference on CPU
const bool model_combined = false;
if (FLAGS_prepare_vars) {
if (FLAGS_prepare_context) {
TestInference<paddle::platform::CPUPlace, false, true>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
} else {
TestInference<paddle::platform::CPUPlace, false, false>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
}
} else {
if (FLAGS_prepare_context) {
TestInference<paddle::platform::CPUPlace, true, true>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
} else {
TestInference<paddle::platform::CPUPlace, true, false>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
}
}
LOG(INFO) << output1.lod();
LOG(INFO) << output1.dims();
int total_work = 100;
int num_threads = 10;
int work_per_thread = total_work / num_threads;
std::vector<std::unique_ptr<std::thread>> infer_threads;
for (int i = 0; i < num_threads; ++i) {
infer_threads.emplace_back(new std::thread([&, i]() {
for (int j = 0; j < work_per_thread; ++j) {
// 0. Call `paddle::framework::InitDevices()` initialize all the devices
// In unittests, this is done in paddle/testing/paddle_gtest_main.cc
paddle::framework::LoDTensor words;
/*
paddle::framework::LoD lod{{0, 83}};
int64_t word_dict_len = 198392;
SetupLoDTensor(&words, lod, static_cast<int64_t>(0),
static_cast<int64_t>(word_dict_len - 1));
*/
std::vector<int64_t> srcdata{
784, 784, 1550, 6463, 56, 75693, 6189, 784, 784,
1550, 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297,
396, 6463, 6463, 1550, 198391, 6463, 22564, 1612, 291,
68, 164, 784, 784, 1550, 198391, 6463, 13659, 3362,
42468, 6189, 2209, 198391, 6463, 2209, 2209, 198391, 6463,
2209, 1062, 3029, 1831, 3029, 1065, 2281, 100, 11216,
1110, 56, 10869, 9811, 100, 198391, 6463, 100, 9280,
100, 288, 40031, 1680, 1335, 100, 1550, 9280, 7265,
244, 1550, 198391, 6463, 1550, 198391, 6463, 42468, 4376,
10251, 10760};
paddle::framework::LoD lod{{0, srcdata.size()}};
words.set_lod(lod);
int64_t* pdata = words.mutable_data<int64_t>(
{static_cast<int64_t>(srcdata.size()), 1},
paddle::platform::CPUPlace());
memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t));
#ifdef PADDLE_WITH_CUDA
paddle::framework::LoDTensor output2;
std::vector<paddle::framework::LoDTensor*> cpu_fetchs2;
cpu_fetchs2.push_back(&output2);
LOG(INFO) << "number of input size:" << words.numel();
std::vector<paddle::framework::LoDTensor*> cpu_feeds;
cpu_feeds.push_back(&words);
// Run inference on CUDA GPU
TestInference<paddle::platform::CUDAPlace>(dirname, cpu_feeds, cpu_fetchs2);
LOG(INFO) << output2.lod();
LOG(INFO) << output2.dims();
paddle::framework::LoDTensor output1;
std::vector<paddle::framework::LoDTensor*> cpu_fetchs1;
cpu_fetchs1.push_back(&output1);
CheckError<float>(output1, output2);
#endif
// Run inference on CPU
if (FLAGS_prepare_vars) {
if (FLAGS_prepare_context) {
TestInference<paddle::platform::CPUPlace, false, true>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
} else {
TestInference<paddle::platform::CPUPlace, false, false>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
}
} else {
if (FLAGS_prepare_context) {
TestInference<paddle::platform::CPUPlace, true, true>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
} else {
TestInference<paddle::platform::CPUPlace, true, false>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined,
FLAGS_use_mkldnn);
}
}
LOG(INFO) << output1.lod();
LOG(INFO) << output1.dims();
}
}));
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册