Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
e90afec4
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e90afec4
编写于
11月 23, 2018
作者:
T
Tao Luo
提交者:
GitHub
11月 23, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14543 from luotao1/threads
add thread related inference api
上级
64ca3d17
116979a4
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
44 addition
and
18 deletion
+44
-18
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+2
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+9
-2
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+2
-0
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-2
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+1
-1
paddle/fluid/inference/api/paddle_api.h
paddle/fluid/inference/api/paddle_api.h
+13
-0
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+1
-0
paddle/fluid/inference/tests/api/config_printer.h
paddle/fluid/inference/tests/api/config_printer.h
+2
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+11
-9
paddle/fluid/operators/math/fc_compute.h
paddle/fluid/operators/math/fc_compute.h
+1
-3
paddle/fluid/platform/cpu_helper.cc
paddle/fluid/platform/cpu_helper.cc
+1
-1
未找到文件。
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
e90afec4
...
...
@@ -46,6 +46,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
prog_file
=
other
.
prog_file
;
param_file
=
other
.
param_file
;
specify_input_name
=
other
.
specify_input_name
;
cpu_math_library_num_threads_
=
other
.
cpu_math_library_num_threads_
;
// fields from this.
enable_ir_optim
=
other
.
enable_ir_optim
;
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
...
...
@@ -72,6 +73,7 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
prog_file
=
other
.
prog_file
;
param_file
=
other
.
param_file
;
specify_input_name
=
other
.
specify_input_name
;
cpu_math_library_num_threads_
=
other
.
cpu_math_library_num_threads_
;
// fields from this.
enable_ir_optim
=
other
.
enable_ir_optim
;
use_feed_fetch_ops
=
other
.
use_feed_fetch_ops
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
e90afec4
...
...
@@ -35,7 +35,6 @@
#include "paddle/fluid/platform/profiler.h"
DECLARE_bool
(
profile
);
DECLARE_int32
(
paddle_num_threads
);
namespace
paddle
{
...
...
@@ -67,7 +66,7 @@ bool AnalysisPredictor::Init(
#endif
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
FLAGS_paddle_num_threads
);
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
()
);
if
(
!
PrepareScope
(
parent_scope
))
{
return
false
;
...
...
@@ -160,6 +159,14 @@ bool AnalysisPredictor::PrepareExecutor() {
return
true
;
}
void
AnalysisPredictor
::
SetMkldnnThreadID
(
int
tid
)
{
#ifdef PADDLE_WITH_MKLDNN
platform
::
set_cur_thread_id
(
tid
);
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
#endif
}
bool
AnalysisPredictor
::
Run
(
const
std
::
vector
<
PaddleTensor
>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
output_data
,
int
batch_size
)
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
e90afec4
...
...
@@ -69,6 +69,8 @@ class AnalysisPredictor : public PaddlePredictor {
framework
::
Scope
*
scope
()
{
return
scope_
.
get
();
}
framework
::
ProgramDesc
&
program
()
{
return
*
inference_program_
;
}
void
SetMkldnnThreadID
(
int
tid
);
protected:
bool
PrepareProgram
(
const
std
::
shared_ptr
<
framework
::
ProgramDesc
>
&
program
);
bool
PrepareScope
(
const
std
::
shared_ptr
<
framework
::
Scope
>
&
parent_scope
);
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
e90afec4
...
...
@@ -28,7 +28,6 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
DEFINE_bool
(
profile
,
false
,
"Turn on profiler for fluid"
);
DECLARE_int32
(
paddle_num_threads
);
namespace
paddle
{
namespace
{
...
...
@@ -76,7 +75,7 @@ bool NativePaddlePredictor::Init(
#endif
// no matter with or without MKLDNN
paddle
::
platform
::
SetNumThreads
(
FLAGS_paddle_num_threads
);
paddle
::
platform
::
SetNumThreads
(
config_
.
cpu_math_library_num_threads
()
);
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
e90afec4
...
...
@@ -51,9 +51,9 @@ struct AnalysisConfig : public NativeConfig {
int
max_batch_size
=
1
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
void
EnableMKLDNN
();
// NOTE this is just for internal development, please not use it.
// NOT stable yet.
void
EnableMKLDNN
();
bool
use_mkldnn
()
const
{
return
use_mkldnn_
;
}
friend
class
::
paddle
::
AnalysisPredictor
;
...
...
paddle/fluid/inference/api/paddle_api.h
浏览文件 @
e90afec4
...
...
@@ -186,6 +186,19 @@ struct NativeConfig : public PaddlePredictor::Config {
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool
specify_input_name
{
false
};
// Set and get the number of cpu math library threads.
void
SetCpuMathLibraryNumThreads
(
int
cpu_math_library_num_threads
)
{
cpu_math_library_num_threads_
=
cpu_math_library_num_threads
;
}
int
cpu_math_library_num_threads
()
const
{
return
cpu_math_library_num_threads_
;
}
protected:
// number of cpu math library (such as MKL, OpenBlas) threads for each
// instance.
int
cpu_math_library_num_threads_
{
1
};
};
// A factory to help create different predictors.
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
e90afec4
...
...
@@ -27,6 +27,7 @@ void SetConfig(AnalysisConfig *cfg) {
cfg
->
device
=
0
;
cfg
->
enable_ir_optim
=
true
;
cfg
->
specify_input_name
=
true
;
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/inference/tests/api/config_printer.h
浏览文件 @
e90afec4
...
...
@@ -53,6 +53,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) {
os
<<
GenSpaces
(
num_spaces
)
<<
"param_file: "
<<
config
.
param_file
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"specify_input_name: "
<<
config
.
specify_input_name
<<
"
\n
"
;
os
<<
GenSpaces
(
num_spaces
)
<<
"cpu_num_threads: "
<<
config
.
cpu_math_library_num_threads
()
<<
"
\n
"
;
num_spaces
--
;
os
<<
GenSpaces
(
num_spaces
)
<<
"}
\n
"
;
return
os
;
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
e90afec4
...
...
@@ -42,6 +42,7 @@ DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode."
);
DECLARE_bool
(
profile
);
DECLARE_int32
(
paddle_num_threads
);
namespace
paddle
{
namespace
inference
{
...
...
@@ -206,22 +207,23 @@ void TestMultiThreadPrediction(
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
predictors
.
emplace_back
(
CreateTestPredictor
(
config
,
use_analysis
));
for
(
int
tid
=
1
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
predictors
.
front
()
->
Clone
());
}
auto
main_predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
size_t
total_time
{
0
};
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
#ifdef PADDLE_WITH_MKLDNN
platform
::
set_cur_thread_id
(
static_cast
<
int
>
(
tid
)
+
1
);
#endif
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
std
::
vector
<
PaddleTensor
>
outputs_tid
;
auto
&
predictor
=
predictors
[
tid
];
// To ensure the thread binding correctly,
// please clone inside the threadpool.
auto
predictor
=
main_predictor
->
Clone
();
#ifdef PADDLE_WITH_MKLDNN
if
(
use_analysis
)
{
static_cast
<
AnalysisPredictor
*>
(
predictor
.
get
())
->
SetMkldnnThreadID
(
static_cast
<
int
>
(
tid
)
+
1
);
}
#endif
// warmup run
LOG
(
INFO
)
<<
"Running thread "
<<
tid
<<
", warm up run..."
;
...
...
paddle/fluid/operators/math/fc_compute.h
浏览文件 @
e90afec4
...
...
@@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
DECLARE_int32
(
paddle_num_threads
);
namespace
paddle
{
namespace
operators
{
namespace
math
{
...
...
@@ -43,7 +41,7 @@ inline void FCCompute(const BlasT<DeviceContext, T>& blas, const int M,
.
template
Get
<
jitkernel
::
VAddKernel
<
T
>
>
(
N
);
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for
if (FLAGS_paddle_num_threads > 1)
#pragma omp parallel for
#endif
for
(
int
i
=
0
;
i
<
M
;
i
++
)
{
T
*
dst
=
Y
+
i
*
N
;
...
...
paddle/fluid/platform/cpu_helper.cc
浏览文件 @
e90afec4
...
...
@@ -41,7 +41,7 @@ void SetNumThreads(int num_threads) {
#elif defined(PADDLE_WITH_MKLML)
int
real_num_threads
=
num_threads
>
1
?
num_threads
:
1
;
platform
::
dynload
::
MKL_Set_Num_Threads
(
real_num_threads
);
omp_set_num_threads
(
num_threads
);
omp_set_num_threads
(
real_
num_threads
);
#else
PADDLE_ENFORCE
(
false
,
"To be implemented."
);
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录