Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
312fe0ec
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
312fe0ec
编写于
1月 16, 2019
作者:
N
nhzlx
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add trt int8 calibration support
fix comments test=develop
上级
c1264e99
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
158 addition
and
98 deletion
+158
-98
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+2
-1
paddle/fluid/inference/analysis/helper.cc
paddle/fluid/inference/analysis/helper.cc
+8
-0
paddle/fluid/inference/analysis/helper.h
paddle/fluid/inference/analysis/helper.h
+6
-2
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+7
-3
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+25
-12
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+6
-6
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+28
-21
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+13
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+6
-2
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+1
-1
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+3
-3
paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
+17
-14
paddle/fluid/inference/tensorrt/trt_int8_calibrator.h
paddle/fluid/inference/tensorrt/trt_int8_calibrator.h
+14
-14
paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
+1
-2
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+21
-17
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
312fe0ec
...
...
@@ -28,6 +28,7 @@
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/platform/variant.h"
namespace
paddle
{
...
...
@@ -128,7 +129,7 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
tensorrt_workspace_size
,
TensorRtWorkspaceSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_min_subgraph_size
,
TensorRtMinSubgraphSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_precision_mode
,
TensorRtPrecisionMode
,
std
::
string
);
contrib
::
AnalysisConfig
::
Precision
);
// The program transformed by IR analysis phase.
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
...
...
paddle/fluid/inference/analysis/helper.cc
浏览文件 @
312fe0ec
...
...
@@ -36,6 +36,14 @@ void SetAttr<int>(framework::proto::OpDesc *op, const std::string &name,
attr
->
set_i
(
data
);
}
template
<
>
void
SetAttr
<
bool
>
(
framework
::
proto
::
OpDesc
*
op
,
const
std
::
string
&
name
,
const
bool
&
data
)
{
auto
*
attr
=
op
->
add_attrs
();
attr
->
set_name
(
name
);
attr
->
set_type
(
paddle
::
framework
::
proto
::
AttrType
::
BOOLEAN
);
attr
->
set_b
(
data
);
}
template
<
>
void
SetAttr
<
int64_t
>
(
framework
::
proto
::
OpDesc
*
op
,
const
std
::
string
&
name
,
const
int64_t
&
data
)
{
auto
*
attr
=
op
->
add_attrs
();
...
...
paddle/fluid/inference/analysis/helper.h
浏览文件 @
312fe0ec
...
...
@@ -156,7 +156,7 @@ static bool PathExists(const std::string &path) {
return
false
;
}
static
std
::
string
SplitPath
(
const
std
::
string
path
)
{
static
std
::
string
GetDirRoot
(
const
std
::
string
path
)
{
char
sep
=
'/'
;
#ifdef _WIN32
...
...
@@ -167,10 +167,14 @@ static std::string SplitPath(const std::string path) {
if
(
i
!=
std
::
string
::
npos
)
{
return
(
path
.
substr
(
0
,
i
));
}
return
path
;
}
static
std
::
string
GetTrtCalibPath
(
const
std
::
string
&
model_root
,
const
std
::
string
&
engine_key
)
{
return
model_root
+
"/trt_calib_"
+
engine_key
;
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
312fe0ec
...
...
@@ -71,13 +71,17 @@ void IRPassManager::CreatePasses(Argument *argument,
"program"
,
new
framework
::
ProgramDesc
*
(
const_cast
<
framework
::
ProgramDesc
*>
(
&
argument
->
main_program
())));
pass
->
Set
(
"precision_mode"
,
new
std
::
string
(
argument
->
tensorrt_precision_mode
()));
bool
enable_int8
=
false
;
if
(
argument
->
tensorrt_precision_mode
()
==
contrib
::
AnalysisConfig
::
Precision
::
kInt8
)
enable_int8
=
true
;
pass
->
Set
(
"enable_int8"
,
new
bool
(
enable_int8
));
pass
->
Set
(
"model_dir"
,
new
std
::
string
(
argument
->
model_path
()));
}
// graph_ = pass->Apply(std::move(graph_));
pre_pass
=
pass_name
;
passes_
.
emplace_back
(
std
::
move
(
pass
));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
312fe0ec
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <algorithm>
#include <set>
#include <string>
#include <vector>
...
...
@@ -93,8 +94,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
}
// collect inputs
std
::
unordered_
set
<
std
::
string
>
input_names
;
std
::
unordered_
set
<
std
::
string
>
input_names_with_id
;
std
::
set
<
std
::
string
>
input_names
;
std
::
set
<
std
::
string
>
input_names_with_id
;
for
(
auto
*
x
:
node
->
inputs
)
{
input_names
.
insert
(
x
->
Name
());
input_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
...
...
@@ -102,8 +103,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
op_desc
->
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
(
input_names
.
begin
(),
input_names
.
end
()));
std
::
unordered_
set
<
std
::
string
>
output_names
;
std
::
unordered_
set
<
std
::
string
>
output_names_with_id
;
std
::
set
<
std
::
string
>
output_names
;
std
::
set
<
std
::
string
>
output_names_with_id
;
for
(
auto
*
x
:
node
->
outputs
)
{
output_names
.
insert
(
x
->
Name
());
output_names_with_id
.
insert
(
x
->
Name
()
+
std
::
to_string
(
x
->
id
()));
...
...
@@ -203,28 +204,40 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
"the block has no var-desc"
);
PADDLE_ENFORCE
(
!
output_mapping
.
empty
());
op_desc
->
SetBlockAttr
(
"sub_block"
,
new_block
);
// Set attrs
SetAttr
(
op_desc
->
Proto
(),
"subgraph"
,
block_desc
.
Proto
()
->
SerializeAsString
());
// Set attrs
SetAttr
(
op_desc
->
Proto
(),
"max_batch_size"
,
Get
<
int
>
(
"max_batch_size"
));
SetAttr
(
op_desc
->
Proto
(),
"workspace_size"
,
Get
<
int
>
(
"workspace_size"
));
SetAttr
(
op_desc
->
Proto
(),
"parameters"
,
ExtractParameters
(
graph
->
Nodes
()));
SetAttr
(
op_desc
->
Proto
(),
"output_name_mapping"
,
output_mapping
);
std
::
string
engine_key
=
std
::
to_string
(
std
::
hash
<
std
::
string
>
()(
block_desc
.
Proto
()
->
SerializeAsString
()));
std
::
string
precision_mode
=
Get
<
std
::
string
>
(
"precision_mode"
);
auto
enable_int8
=
Get
<
bool
>
(
"enable_int8"
);
SetAttr
(
op_desc
->
Proto
(),
"calibration_data"
,
std
::
string
(
""
));
std
::
string
trt_calib_file
=
Get
<
std
::
string
>
(
"model_dir"
)
+
"/trt_calib_"
+
engine_key
;
if
(
precision_mode
==
"INT8"
&&
FileExists
(
trt_calib_file
))
{
// we use the subgraph's inputs and outputs to generate the engine key.
std
::
string
engine_hash_key
=
""
;
for
(
auto
name
:
input_names_with_id
)
{
engine_hash_key
+=
name
;
}
for
(
auto
name
:
output_names_with_id
)
{
engine_hash_key
+=
name
;
}
auto
engine_key
=
std
::
to_string
(
std
::
hash
<
std
::
string
>
()(
engine_hash_key
));
auto
trt_calib_file
=
GetTrtCalibPath
(
Get
<
std
::
string
>
(
"model_dir"
),
engine_key
);
VLOG
(
3
)
<<
"engine key: "
<<
engine_key
;
if
(
enable_int8
&&
FileExists
(
trt_calib_file
))
{
VLOG
(
3
)
<<
"Calibration table file: "
<<
trt_calib_file
<<
"is found here"
;
std
::
ifstream
infile
(
trt_calib_file
,
std
::
ios
::
in
);
std
::
stringstream
buffer
;
buffer
<<
infile
.
rdbuf
();
std
::
string
calibration_data
(
buffer
.
str
());
SetAttr
(
op_desc
->
Proto
(),
"calibration_data"
,
calibration_data
);
}
SetAttr
(
op_desc
->
Proto
(),
"
precision_mode"
,
precision_mode
);
SetAttr
(
op_desc
->
Proto
(),
"
enable_int8"
,
enable_int8
);
SetAttr
(
op_desc
->
Proto
(),
"engine_key"
,
engine_key
);
}
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
312fe0ec
...
...
@@ -122,13 +122,13 @@ void contrib::AnalysisConfig::EnableMKLDNN() {
#endif
}
void
contrib
::
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
int
max_batch_size
,
int
min_subgraph_size
,
std
::
string
precision_mode
)
{
void
contrib
::
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
int
max_batch_size
,
int
min_subgraph_size
,
contrib
::
AnalysisConfig
::
Precision
precision_mode
)
{
use_tensorrt_
=
true
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
tensorrt_precision_mode_
=
precision_mode
;
Update
();
}
...
...
@@ -149,7 +149,7 @@ void contrib::AnalysisConfig::Update() {
<<
"TensorRT engine is not available when EnableGpu() not actived."
;
}
else
{
// Append after the infer_clean pass.
pass_builder
()
->
InsertPass
(
1
,
"tensorrt_subgraph_pass"
);
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
}
...
...
@@ -180,7 +180,7 @@ std::string contrib::AnalysisConfig::SerializeInfoCache() {
ss
<<
use_tensorrt_
;
ss
<<
tensorrt_workspace_size_
;
ss
<<
tensorrt_max_batchsize_
;
ss
<<
tensorrt_
precision_mod
e_
;
ss
<<
tensorrt_
min_subgraph_siz
e_
;
ss
<<
use_mkldnn_
;
ss
<<
enable_ir_optim_
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
312fe0ec
...
...
@@ -30,9 +30,9 @@
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#if PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#endif
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
...
...
@@ -46,8 +46,8 @@ namespace paddle {
using
contrib
::
AnalysisConfig
;
using
inference
::
Singleton
;
using
inference
::
tensorrt
::
TRTInt8Calibrator
;
using
inference
::
tensorrt
::
TRTCalibrator
Res
;
using
inference
::
tensorrt
::
TRTCalibrator
Res
Manager
;
using
inference
::
tensorrt
::
TRTCalibrator
Engine
;
using
inference
::
tensorrt
::
TRTCalibrator
Engine
Manager
;
namespace
{
bool
IsPersistable
(
const
framework
::
VarDesc
*
var
)
{
...
...
@@ -334,7 +334,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
!
config_
.
params_file
().
empty
(),
"Either model_dir or (param_file, prog_file) should be set."
);
PADDLE_ENFORCE
(
!
config_
.
prog_file
().
empty
());
std
::
string
dir
=
inference
::
analysis
::
SplitPath
(
config_
.
prog_file
());
std
::
string
dir
=
inference
::
analysis
::
GetDirRoot
(
config_
.
prog_file
());
argument_
.
SetModelPath
(
dir
);
argument_
.
SetModelProgramPath
(
config_
.
prog_file
());
...
...
@@ -562,6 +562,7 @@ bool AnalysisPredictor::LoadParameters() {
return
true
;
}
#if PADDLE_WITH_TENSORRT
bool
AnalysisPredictor
::
SaveTrtCalibToDisk
()
{
PADDLE_ENFORCE
(
config_
.
tensorrt_engine_enabled
(),
"This func can be invoked only in trt mode"
);
...
...
@@ -570,44 +571,50 @@ bool AnalysisPredictor::SaveTrtCalibToDisk() {
if
(
op_desc
->
Type
()
==
"tensorrt_engine"
)
{
std
::
string
engine_name
=
boost
::
get
<
std
::
string
>
(
op_desc
->
GetAttr
(
"engine_key"
));
if
(
!
Singleton
<
TRTCalibrator
Res
Manager
>::
Global
().
Has
(
engine_name
))
{
if
(
!
Singleton
<
TRTCalibrator
Engine
Manager
>::
Global
().
Has
(
engine_name
))
{
LOG
(
ERROR
)
<<
"You should run the predictor(with trt) on the real data "
"to generate calibration info"
;
return
false
;
}
TRTCalibrator
Res
*
calib_res
=
Singleton
<
TRTCalibrator
Res
Manager
>::
Global
().
Get
(
engine_name
);
TRTCalibrator
Engine
*
calib_engine
=
Singleton
<
TRTCalibrator
Engine
Manager
>::
Global
().
Get
(
engine_name
);
LOG
(
INFO
)
<<
"Wait for calib threads done."
;
calib_
res
->
calib_
->
waitAndSetDone
();
calib_
engine
->
calib_
->
waitAndSetDone
();
LOG
(
INFO
)
<<
"Finish wait."
;
calib_
res
->
thr_
->
join
();
std
::
string
calibration_data
=
calib_
res
->
calib_
->
getCalibrationTableAsString
();
calib_
engine
->
thr_
->
join
();
std
::
string
calibration_
table_
data
=
calib_
engine
->
calib_
->
getCalibrationTableAsString
();
if
(
calibration_
data
.
size
()
==
0
)
{
if
(
calibration_
table_data
.
empty
()
)
{
LOG
(
ERROR
)
<<
"the calibration table is empty."
;
return
false
;
}
std
::
string
calibration_data_path
=
argument_
.
model_path
()
+
"/trt_calib_"
+
engine_name
;
std
::
ofstream
ofile
(
calibration_data_path
,
std
::
ios
::
out
);
LOG
(
INFO
)
<<
"Write Paddle-TRT INT8 calibration data to file "
<<
calibration_data_path
;
ofile
<<
calibration_data
;
std
::
string
calibration_table_data_path
=
inference
::
analysis
::
GetTrtCalibPath
(
argument_
.
model_path
(),
engine_name
);
std
::
ofstream
ofile
(
calibration_table_data_path
,
std
::
ios
::
out
);
LOG
(
INFO
)
<<
"Write Paddle-TRT INT8 calibration table data to file "
<<
calibration_table_data_path
;
ofile
<<
calibration_table_data
;
ofile
.
close
();
}
}
// Free all calibrator resources.
Singleton
<
TRTCalibrator
Res
Manager
>::
Global
().
DeleteALL
();
Singleton
<
TRTCalibrator
Engine
Manager
>::
Global
().
DeleteALL
();
return
true
;
}
#endif
AnalysisPredictor
::~
AnalysisPredictor
()
{
#if PADDLE_WITH_TENSORRT
if
(
config_
.
tensorrt_engine_enabled
()
&&
config_
.
tensorrt_precision_mode_
==
"INT8"
&&
Singleton
<
TRTCalibrator
Res
Manager
>::
Global
().
Has
())
{
config_
.
tensorrt_precision_mode_
==
AnalysisConfig
::
Precision
::
kInt8
&&
Singleton
<
TRTCalibrator
Engine
Manager
>::
Global
().
Has
())
{
SaveTrtCalibToDisk
();
}
#endif
if
(
FLAGS_profile
)
{
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
"./profile.log"
);
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
312fe0ec
...
...
@@ -91,7 +91,20 @@ class AnalysisPredictor : public PaddlePredictor {
void
GetFetchOne
(
const
framework
::
LoDTensor
&
fetchs
,
PaddleTensor
*
output_data
);
#if PADDLE_WITH_TENSORRT
// When we use Paddle-TRT INT8 engine, we need to generate calibration table
// data first,
// the calibration table contains the range for each op's input and output,
// this whole process can be divided into several steps:
//
// 1. Builds a 32-bit engine, runs it on the calibration set, and records a
// histogram for each
// tensor of the distribution of activation values.
// 2. Builds a calibration table from the histograms.
//
// After step 2, we need to store the calibration table on disk
bool
SaveTrtCalibToDisk
();
#endif
~
AnalysisPredictor
();
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
312fe0ec
...
...
@@ -42,6 +42,10 @@ struct AnalysisConfig {
explicit
AnalysisConfig
(
const
std
::
string
&
model_dir
);
explicit
AnalysisConfig
(
const
std
::
string
&
prog_file
,
const
std
::
string
&
params_file
);
enum
class
Precision
{
kFloat32
=
0
,
kInt8
,
};
/** Set model with a directory.
*/
...
...
@@ -136,7 +140,7 @@ struct AnalysisConfig {
*/
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
,
std
::
string
precision
=
"FP32"
);
Precision
precision
=
Precision
::
kFloat32
);
/** A boolean state telling whether the TensorRT engine is used.
*/
bool
tensorrt_engine_enabled
()
const
{
return
use_tensorrt_
;
}
...
...
@@ -232,7 +236,7 @@ struct AnalysisConfig {
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
int
tensorrt_min_subgraph_size_
{
3
};
std
::
string
tensorrt_precision_mode_
;
Precision
tensorrt_precision_mode_
;
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
312fe0ec
...
...
@@ -70,7 +70,7 @@ void TensorRTEngine::FreezeNetwork() {
// build engine.
infer_builder_
->
setMaxBatchSize
(
max_batch_
);
infer_builder_
->
setMaxWorkspaceSize
(
max_workspace_
);
if
(
precision_mode_
==
"INT8"
)
{
if
(
enable_int8_
)
{
infer_builder_
->
setInt8Mode
(
true
);
PADDLE_ENFORCE
(
calibrator_
!=
nullptr
,
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
312fe0ec
...
...
@@ -58,14 +58,14 @@ class TensorRTEngine : public EngineBase {
TensorRTEngine
(
int
max_batch
,
int
max_workspace
,
cudaStream_t
*
stream
=
nullptr
,
int
device
=
0
,
std
::
string
precision_mode
=
"FP32
"
,
bool
enable_int8
=
"false
"
,
TRTInt8Calibrator
*
calibrator
=
nullptr
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
:
max_batch_
(
max_batch
),
max_workspace_
(
max_workspace
),
stream_
(
stream
?
stream
:
&
default_stream_
),
device_
(
device
),
precision_mode_
(
precision_mode
),
enable_int8_
(
enable_int8
),
calibrator_
(
calibrator
),
logger_
(
logger
)
{
freshDeviceId
();
...
...
@@ -168,7 +168,7 @@ class TensorRTEngine : public EngineBase {
// The specific GPU id that the TensorRTEngine bounded to.
int
device_
;
std
::
string
precision_mode
_
;
bool
enable_int8
_
;
TRTInt8Calibrator
*
calibrator_
;
// batch size of the current data, will be updated each Executation.
int
batch_size_
{
-
1
};
...
...
paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
浏览文件 @
312fe0ec
...
...
@@ -25,11 +25,7 @@ int TRTInt8Calibrator::getBatchSize() const { return batch_size_; }
TRTInt8Calibrator
::
TRTInt8Calibrator
(
const
std
::
unordered_map
<
std
::
string
,
size_t
>&
buffers
,
int
batch_size
,
std
::
string
engine_name
,
const
platform
::
Place
place
)
:
batch_size_
(
batch_size
),
calib_running_
(
true
),
data_is_set_
(
false
),
done_
(
false
),
engine_name_
(
engine_name
)
{
:
batch_size_
(
batch_size
),
engine_name_
(
engine_name
)
{
int
i
=
0
;
VLOG
(
4
)
<<
"Init a new calibrator: "
<<
engine_name_
;
for
(
const
auto
it
:
buffers
)
{
...
...
@@ -62,28 +58,32 @@ void TRTInt8Calibrator::waitAndSetDone() {
}
}
// There might be more than one input for trt subgraph,
// So, we use a map to store input information.
bool
TRTInt8Calibrator
::
setBatch
(
const
std
::
unordered_map
<
std
::
string
,
void
*>&
data
)
{
VLOG
(
3
)
<<
"set batch: "
<<
engine_name_
;
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
// There is a producer and a consumer. The producer set the batch data and
// the consumer get the batch data. The size of the data pool is one.
// So, the producer has to wait for the consumer to finish processing before
// they can set the data.
while
((
calib_running_
||
data_is_set_
)
&&
(
!
done_
))
cond_
.
wait
(
lk
);
// The done_ is set to true using waitAndSetDone, When all calibration data
// are processed.
if
(
done_
)
return
false
;
// Sets the batch.
for
(
const
auto
it
:
data
)
{
for
(
const
auto
&
it
:
data
)
{
auto
dataptr
=
data_buffers_
.
find
(
it
.
first
);
if
(
dataptr
==
data_buffers_
.
end
())
{
LOG
(
FATAL
)
<<
"FATAL "
<<
engine_name_
<<
" input name '"
<<
it
.
first
<<
"' does not match with the buffer names"
;
}
const
auto
&
d
=
dataptr
->
second
;
auto
status
=
cudaMemcpy
(
d
.
first
,
it
.
second
,
d
.
second
,
cudaMemcpyDeviceToDevice
);
if
(
status
!=
cudaSuccess
)
{
LOG
(
FATAL
)
<<
"cudaMemcpy "
<<
engine_name_
<<
" for '"
<<
it
.
first
<<
"' failed with "
<<
status
;
}
PADDLE_ENFORCE
(
cudaMemcpy
(
d
.
first
,
it
.
second
,
d
.
second
,
cudaMemcpyDeviceToDevice
),
"Fail to cudaMemcpy %s for %s"
,
engine_name_
,
it
.
first
);
}
data_is_set_
=
true
;
...
...
@@ -95,9 +95,12 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names,
int
num_bindings
)
{
VLOG
(
4
)
<<
"get batch: "
<<
engine_name_
;
std
::
unique_lock
<
std
::
mutex
>
lk
(
mut_
);
// The consumer has just finished processing a data.
// The producer can set the data again.
calib_running_
=
false
;
cond_
.
notify_all
();
// As long as there is data in the pool, the consumer can get it.
while
(
!
data_is_set_
&&
!
done_
)
cond_
.
wait
(
lk
);
if
(
done_
)
return
false
;
...
...
@@ -123,7 +126,7 @@ void TRTInt8Calibrator::setDone() {
cond_
.
notify_all
();
}
const
void
*
TRTInt8Calibrator
::
readCalibrationCache
(
s
td
::
s
ize_t
&
length
)
{
const
void
*
TRTInt8Calibrator
::
readCalibrationCache
(
size_t
&
length
)
{
if
(
calibration_table_
.
empty
())
return
nullptr
;
length
=
calibration_table_
.
size
();
return
calibration_table_
.
data
();
...
...
paddle/fluid/inference/tensorrt/trt_int8_calibrator.h
浏览文件 @
312fe0ec
...
...
@@ -21,8 +21,8 @@
#include <utility>
#include <vector>
#include
"NvInfer.h"
#include
"cuda_runtime_api.h"
#include
<NvInfer.h>
#include
<cuda_runtime_api.h>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -60,9 +60,9 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator {
private:
const
int
batch_size_
;
bool
calib_running_
;
bool
data_is_set_
;
bool
done_
;
bool
calib_running_
{
true
}
;
bool
data_is_set_
{
false
}
;
bool
done_
{
false
}
;
std
::
mutex
mut_
;
std
::
condition_variable
cond_
;
...
...
@@ -74,9 +74,9 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator {
std
::
string
calibration_table_
;
};
class
TRTCalibrator
Res
{
class
TRTCalibrator
Engine
{
public:
TRTCalibrator
Res
()
{}
TRTCalibrator
Engine
()
{}
std
::
unique_ptr
<
TRTInt8Calibrator
>
calib_
;
std
::
unique_ptr
<
std
::
thread
>
thr_
;
std
::
unique_ptr
<
TensorRTEngine
>
engine_
;
...
...
@@ -84,7 +84,7 @@ class TRTCalibratorRes {
/*
* Manager to control the TensorRT Int8 calibration creation and deltetion.
*/
class
TRTCalibrator
Res
Manager
{
class
TRTCalibrator
Engine
Manager
{
public:
bool
Has
()
const
{
return
res_
.
size
()
>
0
;
}
bool
Has
(
const
std
::
string
&
name
)
const
{
...
...
@@ -93,22 +93,22 @@ class TRTCalibratorResManager {
}
// Get Int8Calibrator via name
TRTCalibrator
Res
*
Get
(
const
std
::
string
&
name
)
const
{
TRTCalibrator
Engine
*
Get
(
const
std
::
string
&
name
)
const
{
return
res_
.
at
(
name
).
get
();
}
// Look up or create a calibrator.
TRTCalibrator
Res
*
LookupOrCreate
(
const
std
::
string
&
engine_name
)
{
TRTCalibrator
Engine
*
LookupOrCreate
(
const
std
::
string
&
engine_name
)
{
if
(
res_
.
count
(
engine_name
)
==
0
)
{
auto
*
p
=
new
TRTCalibrator
Res
()
;
auto
*
p
=
new
TRTCalibrator
Engine
;
res_
[
engine_name
].
reset
(
p
);
}
return
res_
.
at
(
engine_name
).
get
();
}
// Create an Int8Calibrator
TRTCalibrator
Res
*
Create
(
const
std
::
string
&
engine_name
)
{
auto
*
p
=
new
TRTCalibrator
Res
()
;
TRTCalibrator
Engine
*
Create
(
const
std
::
string
&
engine_name
)
{
auto
*
p
=
new
TRTCalibrator
Engine
;
res_
[
engine_name
].
reset
(
p
);
return
p
;
}
...
...
@@ -120,7 +120,7 @@ class TRTCalibratorResManager {
}
private:
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
TRTCalibrator
Res
>>
res_
;
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
TRTCalibrator
Engine
>>
res_
;
};
}
// namespace tensorrt
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
浏览文件 @
312fe0ec
...
...
@@ -36,8 +36,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
int
>
(
"max_batch_size"
,
"the maximum batch size."
);
AddAttr
<
int
>
(
"workspace_size"
,
"the workspace size."
);
AddAttr
<
framework
::
BlockDesc
*>
(
"sub_block"
,
"the trt block"
);
AddAttr
<
std
::
string
>
(
"precision_mode"
,
"the precision mode: 'FP32', 'INT8' "
);
AddAttr
<
bool
>
(
"enable_int8"
,
"whether swith to int8 mode"
);
AddComment
(
"TensorRT engine operator."
);
}
};
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
312fe0ec
...
...
@@ -65,8 +65,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
using
inference
::
Singleton
;
using
inference
::
tensorrt
::
TensorRTEngine
;
using
inference
::
tensorrt
::
TRTInt8Calibrator
;
using
inference
::
tensorrt
::
TRTCalibrator
Res
;
using
inference
::
tensorrt
::
TRTCalibrator
Res
Manager
;
using
inference
::
tensorrt
::
TRTCalibrator
Engine
;
using
inference
::
tensorrt
::
TRTCalibrator
Engine
Manager
;
class
TensorRTEngineOp
:
public
framework
::
OperatorBase
{
private:
...
...
@@ -76,7 +76,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
int
max_batch_size_
;
int
workspace_size_
;
std
::
unique_ptr
<
TRTInt8Calibrator
>
calibrator_
;
std
::
string
precision_mode
_
;
bool
enable_int8
_
;
std
::
string
calibration_data_
;
std
::
string
engine_key_
;
bool
calibration_mode_
;
...
...
@@ -90,7 +90,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
input_names_
=
Inputs
(
"Xs"
);
max_batch_size_
=
Attr
<
int
>
(
"max_batch_size"
);
workspace_size_
=
Attr
<
int
>
(
"workspace_size"
);
precision_mode_
=
Attr
<
std
::
string
>
(
"precision_mode
"
);
enable_int8_
=
Attr
<
bool
>
(
"enable_int8
"
);
calibration_data_
=
Attr
<
std
::
string
>
(
"calibration_data"
);
engine_key_
=
Attr
<
std
::
string
>
(
"engine_key"
);
...
...
@@ -98,17 +98,19 @@ class TensorRTEngineOp : public framework::OperatorBase {
for
(
const
auto
&
param
:
params
)
{
param_names_
.
insert
(
param
);
}
calibration_mode_
=
(
precision_mode_
==
"INT8"
&&
calibration_data_
.
size
()
==
0
);
// calibration_mode is ture represents we need to
// generate the calibration table data.
calibration_mode_
=
(
enable_int8_
&&
calibration_data_
.
size
()
==
0
);
if
(
precision_mode_
==
"INT8"
&&
calibration_data_
.
size
())
{
VLOG
(
4
)
<<
"calibration_mode: "
<<
calibration_mode_
;
if
(
enable_int8_
&&
calibration_data_
.
size
())
{
calibrator_
.
reset
(
new
TRTInt8Calibrator
(
calibration_data_
));
}
}
protected:
void
RunNative
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
void
RunNative
Impl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
framework
::
Executor
executor
(
dev_place
);
auto
*
block
=
Attr
<
framework
::
BlockDesc
*>
(
"sub_block"
);
auto
*
program
=
block
->
Program
();
...
...
@@ -128,12 +130,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
void
RunCalibration
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
{
// Create calibrator here.
// This process will builds a 32-bit trt engine, runs it on the calibration
// set, and records a histogram for each
// tensor of the distribution of activation values.
LOG
(
INFO
)
<<
"Running calibration trt int8 ..."
;
int
runtime_batch
=
1
;
if
(
!
Singleton
<
TRTCalibrator
Res
Manager
>::
Global
().
Has
(
engine_key_
))
{
TRTCalibrator
Res
*
calib_res
=
Singleton
<
TRTCalibrator
Res
Manager
>::
Global
().
Create
(
engine_key_
);
if
(
!
Singleton
<
TRTCalibrator
Engine
Manager
>::
Global
().
Has
(
engine_key_
))
{
TRTCalibrator
Engine
*
calib_res
=
Singleton
<
TRTCalibrator
Engine
Manager
>::
Global
().
Create
(
engine_key_
);
std
::
unordered_map
<
std
::
string
,
size_t
>
calib_buffers
;
for
(
auto
&
x
:
input_names_
)
{
if
(
param_names_
.
count
(
x
))
continue
;
...
...
@@ -148,7 +152,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
calib_res
->
thr_
.
reset
(
new
std
::
thread
([
&
]()
{
calib_res
->
engine_
.
reset
(
new
TensorRTEngine
(
max_batch_size_
,
workspace_size_
,
nullptr
,
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_place
).
device
,
precision_mode
_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_place
).
device
,
enable_int8
_
,
calib_res
->
calib_
.
get
()));
VLOG
(
3
)
<<
"start the calib trt engine thread"
;
Prepare
(
scope
,
dev_place
,
calib_res
->
engine_
.
get
());
...
...
@@ -156,7 +160,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
TRTInt8Calibrator
*
temp_calibrator
=
Singleton
<
TRTCalibrator
Res
Manager
>::
Global
()
Singleton
<
TRTCalibrator
Engine
Manager
>::
Global
()
.
Get
(
engine_key_
)
->
calib_
.
get
();
std
::
unordered_map
<
std
::
string
,
void
*>
calib_data
;
...
...
@@ -168,7 +172,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
calib_data
.
emplace
(
x
,
t
.
data
<
void
>
());
}
temp_calibrator
->
setBatch
(
calib_data
);
RunNative
(
scope
,
dev_place
);
RunNative
Impl
(
scope
,
dev_place
);
}
void
RunTrt
(
const
framework
::
Scope
&
scope
,
...
...
@@ -178,7 +182,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
trt_engine_
.
reset
(
new
TensorRTEngine
(
max_batch_size_
,
workspace_size_
,
nullptr
,
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_place
).
device
,
precision_mode
_
,
calibrator_
.
get
()));
enable_int8
_
,
calibrator_
.
get
()));
Prepare
(
scope
,
dev_place
,
trt_engine_
.
get
());
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录