Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
80e882a3
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
80e882a3
编写于
6月 07, 2018
作者:
T
tensor-tang
提交者:
GitHub
6月 07, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #11247 from tensor-tang/infer_api
Infer multi-threads API Demo and UT
上级
9141bee1
e030741d
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
204 addition
and
3 deletion
+204
-3
paddle/contrib/inference/demo/simple_on_word2vec.cc
paddle/contrib/inference/demo/simple_on_word2vec.cc
+55
-1
paddle/contrib/inference/test_paddle_inference_api_impl.cc
paddle/contrib/inference/test_paddle_inference_api_impl.cc
+149
-2
未找到文件。
paddle/contrib/inference/demo/simple_on_word2vec.cc
浏览文件 @
80e882a3
...
...
@@ -19,8 +19,8 @@ limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <thread>
#include "paddle/contrib/inference/paddle_inference_api.h"
namespace
paddle
{
namespace
demo
{
...
...
@@ -61,13 +61,67 @@ void Main(bool use_gpu) {
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
}
// TODO(Superjomn): this is should be free automatically
free
(
outputs
[
0
].
data
.
data
);
}
}
void
MainThreads
(
int
num_threads
,
bool
use_gpu
)
{
// Multi-threads only support on CPU
// 0. Create PaddlePredictor with a config.
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// 1. clone a predictor which shares the same parameters
auto
predictor
=
main_predictor
->
Clone
();
constexpr
int
num_batches
=
3
;
for
(
int
batch_id
=
0
;
batch_id
<
num_batches
;
++
batch_id
)
{
// 2. Dummy Input Data
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
buf
,
.
dtype
=
PaddleDType
::
INT64
};
std
::
vector
<
PaddleTensor
>
inputs
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
outputs
;
// 3. Run
CHECK
(
predictor
->
Run
(
inputs
,
&
outputs
));
// 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"TID: "
<<
tid
<<
", "
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
}
free
(
outputs
[
0
].
data
.
data
);
}
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
}
TEST
(
demo
,
word2vec_cpu
)
{
Main
(
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_1
)
{
MainThreads
(
1
,
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_4
)
{
MainThreads
(
4
,
false
/*use_gpu*/
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
demo
,
word2vec_gpu
)
{
Main
(
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_1
)
{
MainThreads
(
1
,
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_4
)
{
MainThreads
(
4
,
true
/*use_gpu*/
);
}
#endif
}
// namespace demo
...
...
paddle/contrib/inference/test_paddle_inference_api_impl.cc
浏览文件 @
80e882a3
...
...
@@ -15,6 +15,8 @@ limitations under the License. */
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <thread>
#include "gflags/gflags.h"
#include "paddle/contrib/inference/paddle_inference_api_impl.h"
#include "paddle/fluid/inference/tests/test_helper.h"
...
...
@@ -45,14 +47,19 @@ NativeConfig GetConfig() {
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
LOG
(
INFO
)
<<
"dirname "
<<
config
.
model_dir
;
config
.
fraction_of_gpu_memory
=
0.15
;
#ifdef PADDLE_WITH_CUDA
config
.
use_gpu
=
true
;
#else
config
.
use_gpu
=
false
;
#endif
config
.
device
=
0
;
return
config
;
}
TEST
(
paddle_inference_api_impl
,
word2vec
)
{
void
MainWord2Vec
(
bool
use_gpu
)
{
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
config
.
use_gpu
=
use_gpu
;
framework
::
LoDTensor
first_word
,
second_word
,
third_word
,
fourth_word
;
framework
::
LoD
lod
{{
0
,
1
}};
...
...
@@ -100,11 +107,12 @@ TEST(paddle_inference_api_impl, word2vec) {
free
(
outputs
[
0
].
data
.
data
);
}
TEST
(
paddle_inference_api_impl
,
image_classification
)
{
void
MainImageClassification
(
bool
use_gpu
)
{
int
batch_size
=
2
;
bool
use_mkldnn
=
false
;
bool
repeat
=
false
;
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
...
...
@@ -149,4 +157,143 @@ TEST(paddle_inference_api_impl, image_classification) {
free
(
data
);
}
void
MainThreadsWord2Vec
(
bool
use_gpu
)
{
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
// prepare inputs data and reference results
constexpr
int
num_jobs
=
3
;
std
::
vector
<
std
::
vector
<
framework
::
LoDTensor
>>
jobs
(
num_jobs
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
paddle_tensor_feeds
(
num_jobs
);
std
::
vector
<
framework
::
LoDTensor
>
refs
(
num_jobs
);
for
(
size_t
i
=
0
;
i
<
jobs
.
size
();
++
i
)
{
// each job has 4 words
jobs
[
i
].
resize
(
4
);
for
(
size_t
j
=
0
;
j
<
4
;
++
j
)
{
framework
::
LoD
lod
{{
0
,
1
}};
int64_t
dict_size
=
2073
;
// The size of dictionary
SetupLoDTensor
(
&
jobs
[
i
][
j
],
lod
,
static_cast
<
int64_t
>
(
0
),
dict_size
-
1
);
paddle_tensor_feeds
[
i
].
push_back
(
LodTensorToPaddleTensor
(
&
jobs
[
i
][
j
]));
}
// get reference result of each job
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
ref_feeds
;
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>
ref_fetches
(
1
,
&
refs
[
i
]);
for
(
auto
&
word
:
jobs
[
i
])
{
ref_feeds
.
push_back
(
&
word
);
}
TestInference
<
platform
::
CPUPlace
>
(
config
.
model_dir
,
ref_feeds
,
ref_fetches
);
}
// create threads and each thread run 1 job
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
auto
predictor
=
main_predictor
->
Clone
();
auto
&
local_inputs
=
paddle_tensor_feeds
[
tid
];
std
::
vector
<
PaddleTensor
>
local_outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
local_inputs
,
&
local_outputs
));
// check outputs range
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
ASSERT_LT
(
data
[
j
],
1.0
);
ASSERT_GT
(
data
[
j
],
-
1.0
);
}
// check outputs correctness
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
(
refs
[
tid
].
numel
(),
static_cast
<
int64_t
>
(
len
/
sizeof
(
float
)));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
threads
[
i
].
join
();
}
}
void
MainThreadsImageClassification
(
bool
use_gpu
)
{
constexpr
int
num_jobs
=
4
;
// each job run 1 batch
constexpr
int
batch_size
=
1
;
NativeConfig
config
=
GetConfig
();
config
.
use_gpu
=
use_gpu
;
config
.
model_dir
=
FLAGS_dirname
+
"image_classification_resnet.inference.model"
;
auto
main_predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
std
::
vector
<
framework
::
LoDTensor
>
jobs
(
num_jobs
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
paddle_tensor_feeds
(
num_jobs
);
std
::
vector
<
framework
::
LoDTensor
>
refs
(
num_jobs
);
for
(
size_t
i
=
0
;
i
<
jobs
.
size
();
++
i
)
{
// prepare inputs
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
config
.
model_dir
,
/*is_combined*/
false
);
feed_target_shapes
[
0
][
0
]
=
batch_size
;
framework
::
DDim
input_dims
=
framework
::
make_ddim
(
feed_target_shapes
[
0
]);
SetupTensor
<
float
>
(
&
jobs
[
i
],
input_dims
,
0.
f
,
1.
f
);
paddle_tensor_feeds
[
i
].
push_back
(
LodTensorToPaddleTensor
(
&
jobs
[
i
]));
// get reference result of each job
std
::
vector
<
framework
::
LoDTensor
*>
ref_feeds
(
1
,
&
jobs
[
i
]);
std
::
vector
<
framework
::
LoDTensor
*>
ref_fetches
(
1
,
&
refs
[
i
]);
TestInference
<
platform
::
CPUPlace
>
(
config
.
model_dir
,
ref_feeds
,
ref_fetches
);
}
// create threads and each thread run 1 job
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
auto
predictor
=
main_predictor
->
Clone
();
auto
&
local_inputs
=
paddle_tensor_feeds
[
tid
];
std
::
vector
<
PaddleTensor
>
local_outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
local_inputs
,
&
local_outputs
));
// check outputs correctness
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
(
refs
[
tid
].
numel
(),
len
/
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
threads
[
i
].
join
();
}
}
TEST
(
inference_api_native
,
word2vec_cpu
)
{
MainWord2Vec
(
false
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
word2vec_cpu_threads
)
{
MainThreadsWord2Vec
(
false
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
image_classification_cpu
)
{
MainThreadsImageClassification
(
false
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
image_classification_cpu_threads
)
{
MainThreadsImageClassification
(
false
/*use_gpu*/
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
inference_api_native
,
word2vec_gpu
)
{
MainWord2Vec
(
true
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
word2vec_gpu_threads
)
{
MainThreadsWord2Vec
(
true
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
image_classification_gpu
)
{
MainThreadsImageClassification
(
true
/*use_gpu*/
);
}
TEST
(
inference_api_native
,
image_classification_gpu_threads
)
{
MainThreadsImageClassification
(
true
/*use_gpu*/
);
}
#endif
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录