Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
f2a778c9
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f2a778c9
编写于
10月 12, 2022
作者:
Z
zhoutianzi666
提交者:
GitHub
10月 12, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle-TRT]support shape tensor is the input of trt-subgraph (#46482)
上级
5303b66b
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
378 addition
and
55 deletion
+378
-55
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+9
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+14
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+84
-37
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+1
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+29
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+18
-0
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
+137
-0
paddle/fluid/inference/utils/io_utils.cc
paddle/fluid/inference/utils/io_utils.cc
+38
-3
paddle/fluid/inference/utils/io_utils.h
paddle/fluid/inference/utils/io_utils.h
+8
-9
paddle/fluid/inference/utils/io_utils_tester.cc
paddle/fluid/inference/utils/io_utils_tester.cc
+25
-5
paddle/fluid/inference/utils/shape_range_info.proto
paddle/fluid/inference/utils/shape_range_info.proto
+3
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+12
-0
未找到文件。
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
f2a778c9
...
...
@@ -77,6 +77,15 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"optim_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
optim_input_shape
()));
// Now, shape tensor value is not explicit set by user,
// it is collected through API CollectShapeRangeInfo.
pass
->
Set
(
"max_shape_tensor"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
());
pass
->
Set
(
"min_shape_tensor"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
());
pass
->
Set
(
"optim_shape_tensor"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
());
// tuned trt dynamic_shape
pass
->
Set
(
"trt_tuned_dynamic_shape"
,
new
bool
(
argument
->
tensorrt_tuned_dynamic_shape
()));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
f2a778c9
...
...
@@ -317,6 +317,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
auto
opt_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"optim_input_shape"
);
auto
min_shape_tensor
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"min_shape_tensor"
);
auto
max_shape_tensor
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_shape_tensor"
);
auto
opt_shape_tensor
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"optim_shape_tensor"
);
auto
allow_build_at_runtime
=
Get
<
bool
>
(
"trt_allow_build_at_runtime"
);
auto
shape_range_info_path
=
Get
<
std
::
string
>
(
"trt_shape_range_info_path"
);
auto
trt_tuned_dynamic_shape
=
Get
<
bool
>
(
"trt_tuned_dynamic_shape"
);
...
...
@@ -326,7 +333,10 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
inference
::
DeserializeShapeRangeInfo
(
shape_range_info_path
,
&
min_input_shape
,
&
max_input_shape
,
&
opt_input_shape
);
&
opt_input_shape
,
&
min_shape_tensor
,
&
max_shape_tensor
,
&
opt_shape_tensor
);
}
// The following procedure is used to rename all the intermediate
...
...
@@ -511,6 +521,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
min_input_shape
,
max_input_shape
,
opt_input_shape
,
min_shape_tensor
,
max_shape_tensor
,
opt_shape_tensor
,
disable_trt_plugin_fp16
,
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
)));
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_varseqlen"
));
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
f2a778c9
...
...
@@ -1747,10 +1747,39 @@ void AnalysisPredictor::CollectShapeRangeInfo() {
if
(
!
var
->
IsType
<
phi
::
DenseTensor
>
())
{
continue
;
}
framework
::
DDim
dim
=
var
->
Get
<
phi
::
DenseTensor
>
().
dims
();
auto
tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
framework
::
DDim
dim
=
tensor
.
dims
();
std
::
vector
<
int32_t
>
shape
(
dim
.
size
());
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
++
i
)
shape
[
i
]
=
dim
[
i
];
shape_info_
[
name
].
emplace_back
(
shape
);
// We need collect value range for shape tensor for Paddle-TRT's use.
// To be noticed, this method to identify all shape tensors is based on
// assumption that all shape tensors in the model have numbers <= 7.
// This is a simple method to identify all shape tensors with some
// mistakes, but it doesn't matter.
auto
is_shape_tensor
=
tensor
.
numel
()
<=
7
&&
tensor
.
numel
()
>=
1
;
if
(
tensor
.
dtype
()
==
paddle
::
experimental
::
DataType
::
INT32
&&
is_shape_tensor
)
{
std
::
vector
<
int
>
int32_host
(
tensor
.
numel
());
if
(
tensor
.
place
()
==
platform
::
CPUPlace
())
{
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
int32_host
.
data
(),
platform
::
CPUPlace
(),
tensor
.
data
<
int
>
(),
tensor
.
numel
()
*
sizeof
(
int
));
}
else
if
(
tensor
.
place
()
==
platform
::
CUDAPlace
())
{
#if defined(PADDLE_WITH_CUDA)
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
int32_host
.
data
(),
platform
::
CUDAPlace
(),
tensor
.
data
<
int
>
(),
tensor
.
numel
()
*
sizeof
(
int
),
nullptr
);
#endif
}
shape_tensor_value_
[
name
].
emplace_back
(
int32_host
);
}
}
}
...
...
@@ -1758,43 +1787,61 @@ void AnalysisPredictor::StatisticShapeRangeInfo() {
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
max_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
opt_shapes
;
for
(
auto
it
:
shape_info_
)
{
auto
name
=
it
.
first
;
auto
shapes
=
it
.
second
;
std
::
vector
<
int32_t
>
min_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
max_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
opt_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
auto
ShapeMaxFreq
=
[](
const
std
::
map
<
int32_t
,
int32_t
>
&
m
)
->
int32_t
{
std
::
vector
<
std
::
pair
<
int32_t
,
int32_t
>>
counter
;
for
(
auto
&
it
:
m
)
counter
.
push_back
(
it
);
std
::
sort
(
counter
.
begin
(),
counter
.
end
(),
[](
std
::
pair
<
int32_t
,
int32_t
>
&
a
,
std
::
pair
<
int32_t
,
int32_t
>
&
b
)
{
return
a
.
second
>
b
.
second
;
});
return
counter
[
0
].
first
;
};
for
(
size_t
d
=
0
;
d
<
shapes
[
0
].
size
();
++
d
)
{
std
::
map
<
int32_t
,
int32_t
>
counter
;
for
(
size_t
i
=
0
;
i
<
shapes
.
size
();
++
i
)
{
counter
[
shapes
[
i
][
d
]]
+=
1
;
if
(
shapes
[
i
][
d
]
<
min_shape
[
d
])
min_shape
[
d
]
=
shapes
[
i
][
d
];
if
(
shapes
[
i
][
d
]
>
max_shape
[
d
])
max_shape
[
d
]
=
shapes
[
i
][
d
];
}
opt_shape
[
d
]
=
ShapeMaxFreq
(
counter
);
}
min_shapes
[
name
]
=
min_shape
;
max_shapes
[
name
]
=
max_shape
;
opt_shapes
[
name
]
=
opt_shape
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_values
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
max_values
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
opt_values
;
auto
extract_min_max_opt
=
[](
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_data
,
decltype
(
min_data
)
max_data
,
decltype
(
min_data
)
opt_data
,
decltype
(
shape_info_
)
shape_data
)
{
for
(
auto
it
:
shape_data
)
{
auto
name
=
it
.
first
;
auto
shapes
=
it
.
second
;
std
::
vector
<
int32_t
>
min_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
max_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
opt_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
auto
ShapeMaxFreq
=
[](
const
std
::
map
<
int32_t
,
int32_t
>
&
m
)
->
int32_t
{
std
::
vector
<
std
::
pair
<
int32_t
,
int32_t
>>
counter
;
for
(
auto
&
it
:
m
)
counter
.
push_back
(
it
);
std
::
sort
(
counter
.
begin
(),
counter
.
end
(),
[](
std
::
pair
<
int32_t
,
int32_t
>
&
a
,
std
::
pair
<
int32_t
,
int32_t
>
&
b
)
{
return
a
.
second
>
b
.
second
;
});
return
counter
[
0
].
first
;
};
for
(
size_t
d
=
0
;
d
<
shapes
[
0
].
size
();
++
d
)
{
std
::
map
<
int32_t
,
int32_t
>
counter
;
for
(
size_t
i
=
0
;
i
<
shapes
.
size
();
++
i
)
{
counter
[
shapes
[
i
][
d
]]
+=
1
;
if
(
shapes
[
i
][
d
]
<
min_shape
[
d
])
min_shape
[
d
]
=
shapes
[
i
][
d
];
if
(
shapes
[
i
][
d
]
>
max_shape
[
d
])
max_shape
[
d
]
=
shapes
[
i
][
d
];
}
opt_shape
[
d
]
=
ShapeMaxFreq
(
counter
);
}
inference
::
SerializeShapeRangeInfo
(
config_
.
shape_range_info_path
(),
min_shapes
,
max_shapes
,
opt_shapes
);
min_data
[
name
]
=
min_shape
;
max_data
[
name
]
=
max_shape
;
opt_data
[
name
]
=
opt_shape
;
}
};
extract_min_max_opt
(
min_shapes
,
max_shapes
,
opt_shapes
,
shape_info_
);
extract_min_max_opt
(
min_values
,
max_values
,
opt_values
,
shape_tensor_value_
);
inference
::
SerializeShapeRangeInfo
(
config_
.
shape_range_info_path
(),
min_shapes
,
max_shapes
,
opt_shapes
,
min_values
,
max_values
,
opt_values
);
}
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
f2a778c9
...
...
@@ -514,6 +514,7 @@ class AnalysisPredictor : public PaddlePredictor {
bool
status_is_cloned_
{
false
};
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int32_t
>>>
shape_info_
;
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int32_t
>>>
shape_tensor_value_
;
static
int
clone_num_
;
bool
private_context_
{
false
};
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
f2a778c9
...
...
@@ -275,6 +275,35 @@ void TensorRTEngine::FreezeNetwork() {
nvinfer1
::
OptProfileSelector
::
kOPT
,
Vec2TRT_Dims
(
optim_input_shape_
[
input
.
first
],
input
.
first
,
true
));
}
for
(
int
input_id
=
0
;
input_id
<
network
()
->
getNbInputs
();
input_id
++
)
{
auto
input_name
=
network
()
->
getInput
(
input_id
)
->
getName
();
if
(
!
itensor_map_
.
count
(
input_name
))
continue
;
if
(
!
GetITensor
(
input_name
)
->
isShapeTensor
())
continue
;
PADDLE_ENFORCE_EQ
(
min_shape_tensor_
.
count
(
input_name
)
&&
max_shape_tensor_
.
count
(
input_name
)
&&
optim_shape_tensor_
.
count
(
input_name
),
true
,
platform
::
errors
::
InvalidArgument
(
"Fail to find min/max/optim shape value for TRT "
"network's shape tensor input named %s."
,
input_name
));
auto
min_vec
=
min_shape_tensor_
.
at
(
input_name
);
optim_profiles_
[
i
]
->
setShapeValues
(
input_name
,
nvinfer1
::
OptProfileSelector
::
kMIN
,
min_vec
.
data
(),
min_vec
.
size
());
optim_profiles_
[
i
]
->
setShapeValues
(
input_name
,
nvinfer1
::
OptProfileSelector
::
kMAX
,
max_shape_tensor_
[
input_name
].
data
(),
min_vec
.
size
());
optim_profiles_
[
i
]
->
setShapeValues
(
input_name
,
nvinfer1
::
OptProfileSelector
::
kOPT
,
optim_shape_tensor_
[
input_name
].
data
(),
min_vec
.
size
());
}
infer_builder_config_
->
addOptimizationProfile
(
optim_profiles_
[
i
]);
}
if
(
WithFp16
()
&&
disable_trt_plugin_fp16
())
{
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
f2a778c9
...
...
@@ -217,6 +217,9 @@ class TensorRTEngine {
const
ShapeMapType
min_input_shape
=
{},
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
const
ShapeMapType
min_shape_tensor
=
{},
const
ShapeMapType
max_shape_tensor
=
{},
const
ShapeMapType
optim_shape_tensor
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
...
...
@@ -228,6 +231,9 @@ class TensorRTEngine {
min_input_shape_
(
min_input_shape
),
max_input_shape_
(
max_input_shape
),
optim_input_shape_
(
optim_input_shape
),
min_shape_tensor_
(
min_shape_tensor
),
max_shape_tensor_
(
max_shape_tensor
),
optim_shape_tensor_
(
optim_shape_tensor
),
disable_trt_plugin_fp16_
(
disable_trt_plugin_fp16
),
model_precision_
(
model_precision
),
logger_
(
logger
)
{
...
...
@@ -443,6 +449,9 @@ class TensorRTEngine {
ShapeMapType
min_input_shape
()
{
return
min_input_shape_
;
}
ShapeMapType
max_input_shape
()
{
return
max_input_shape_
;
}
ShapeMapType
optim_input_shape
()
{
return
optim_input_shape_
;
}
ShapeMapType
min_shape_tensor
()
{
return
min_shape_tensor_
;
}
ShapeMapType
max_shape_tensor
()
{
return
max_shape_tensor_
;
}
ShapeMapType
optim_shape_tensor
()
{
return
optim_shape_tensor_
;
}
bool
AdjustDynamicShapeRange
(
const
ShapeMapType
&
runtime_input_shape
,
std
::
vector
<
std
::
string
>*
changed
)
{
...
...
@@ -641,6 +650,9 @@ class TensorRTEngine {
ShapeMapType
min_input_shape_
;
ShapeMapType
max_input_shape_
;
ShapeMapType
optim_input_shape_
;
ShapeMapType
min_shape_tensor_
;
ShapeMapType
max_shape_tensor_
;
ShapeMapType
optim_shape_tensor_
;
bool
disable_trt_plugin_fp16_
{
false
};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
bool
use_varseqlen_
{
false
};
...
...
@@ -741,6 +753,9 @@ class TRTEngineManager {
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_shape_tensor
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_shape_tensor
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_shape_tensor
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
...
...
@@ -752,6 +767,9 @@ class TRTEngineManager {
min_input_shape
,
max_input_shape
,
optim_input_shape
,
min_shape_tensor
,
max_shape_tensor
,
optim_shape_tensor
,
disable_trt_plugin_fp16
,
model_precision
,
logger
);
...
...
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
浏览文件 @
f2a778c9
...
...
@@ -31,6 +31,137 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
class
TensorRTDynamicShapeValueEngineTest
:
public
::
testing
::
Test
{
protected:
void
SetUp
()
override
{
ctx_
=
new
phi
::
GPUContext
(
platform
::
CUDAPlace
(
0
));
ctx_
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
platform
::
CUDAPlace
(
0
),
ctx_
->
stream
())
.
get
());
ctx_
->
SetHostAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
ctx_
->
SetZeroAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetZeroAllocator
(
platform
::
CUDAPlace
(
0
))
.
get
());
ctx_
->
SetPinnedAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CUDAPinnedPlace
())
.
get
());
ctx_
->
PartialInitWithAllocator
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
{
{
"input"
,
{
1
,
32
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{
{
"input"
,
{
18
,
32
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{
{
"input"
,
{
18
,
32
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_value
=
{
{
"shape"
,
{
1
,
8
,
4
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_value
=
{
{
"shape"
,
{
18
,
8
,
4
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_value
=
{
{
"shape"
,
{
18
,
8
,
4
}}};
engine_
=
new
TensorRTEngine
(
16
,
1
<<
10
,
AnalysisConfig
::
Precision
::
kFloat32
,
nullptr
,
0
,
min_input_shape
,
max_input_shape
,
optim_input_shape
,
min_input_value
,
max_input_value
,
optim_input_value
,
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
engine_
->
InitNetwork
();
}
void
TearDown
()
override
{
if
(
engine_
)
{
delete
engine_
;
engine_
=
nullptr
;
}
}
void
PrepareInputOutput
(
const
std
::
vector
<
float
>
&
input
,
std
::
vector
<
int
>
output_shape
)
{
paddle
::
framework
::
TensorFromVector
(
input
,
*
ctx_
,
&
input_
);
output_
.
Resize
(
phi
::
make_ddim
(
output_shape
));
}
void
PrepareShapeInput
(
const
std
::
vector
<
int
>
&
input
)
{
paddle
::
framework
::
TensorFromVector
(
input
,
*
ctx_
,
&
shape_
);
}
void
GetOutput
(
std
::
vector
<
float
>
*
output
)
{
paddle
::
framework
::
TensorToVector
(
output_
,
*
ctx_
,
output
);
}
protected:
framework
::
LoDTensor
input_
;
framework
::
LoDTensor
shape_
;
framework
::
LoDTensor
output_
;
TensorRTEngine
*
engine_
;
phi
::
GPUContext
*
ctx_
;
};
TEST_F
(
TensorRTDynamicShapeValueEngineTest
,
test_trt_dynamic_shape_value
)
{
std
::
vector
<
void
*>
buffers
(
3
);
std
::
cout
<<
"with_dynamic_shape: "
<<
engine_
->
with_dynamic_shape
()
<<
std
::
endl
;
auto
*
x
=
engine_
->
DeclareInput
(
"input"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
Dims2
{
-
1
,
32
});
nvinfer1
::
Dims
shape_dim
;
shape_dim
.
nbDims
=
1
;
shape_dim
.
d
[
0
]
=
3
;
auto
*
shape
=
engine_
->
DeclareInput
(
"shape"
,
nvinfer1
::
DataType
::
kINT32
,
shape_dim
);
auto
layer
=
engine_
->
network
()
->
addShuffle
(
*
x
);
layer
->
setInput
(
1
,
*
shape
);
PADDLE_ENFORCE_NOT_NULL
(
layer
,
platform
::
errors
::
InvalidArgument
(
"TRT shuffle layer building failed."
));
engine_
->
DeclareOutput
(
layer
,
0
,
"y"
);
engine_
->
FreezeNetwork
();
ASSERT_EQ
(
engine_
->
engine
()
->
getNbBindings
(),
3
);
std
::
vector
<
float
>
x_v
(
8
*
32
);
for
(
int
i
=
0
;
i
<
8
*
32
;
i
++
)
{
x_v
[
i
]
=
i
%
(
8
*
32
);
}
std
::
vector
<
int
>
shape_v
=
{
8
,
8
,
4
};
PrepareInputOutput
(
x_v
,
{
8
,
8
,
4
});
PrepareShapeInput
(
shape_v
);
engine_
->
context
()
->
setBindingDimensions
(
0
,
nvinfer1
::
Dims2
{
8
,
32
});
engine_
->
context
()
->
setBindingDimensions
(
1
,
shape_dim
);
engine_
->
context
()
->
setInputShapeBinding
(
1
,
shape_v
.
data
());
auto
*
x_gpu_data
=
input_
.
mutable_data
<
float
>
(
ctx_
->
GetPlace
());
auto
*
shape_gpu_data
=
shape_
.
mutable_data
<
int
>
(
ctx_
->
GetPlace
());
auto
*
y_gpu_data
=
output_
.
mutable_data
<
float
>
(
ctx_
->
GetPlace
());
buffers
[
0
]
=
reinterpret_cast
<
void
*>
(
x_gpu_data
);
buffers
[
1
]
=
reinterpret_cast
<
void
*>
(
shape_gpu_data
);
buffers
[
2
]
=
reinterpret_cast
<
void
*>
(
y_gpu_data
);
engine_
->
Execute
(
-
1
,
&
buffers
,
ctx_
->
stream
());
cudaStreamSynchronize
(
ctx_
->
stream
());
std
::
vector
<
float
>
y_cpu
;
GetOutput
(
&
y_cpu
);
ASSERT_EQ
(
y_cpu
[
0
],
0
);
ASSERT_EQ
(
y_cpu
[
1
],
1
);
auto
dims
=
engine_
->
context
()
->
getBindingDimensions
(
2
);
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
d
[
0
],
8
);
ASSERT_EQ
(
dims
.
d
[
1
],
8
);
ASSERT_EQ
(
dims
.
d
[
2
],
4
);
return
;
}
class
TensorRTDynamicEngineTest
:
public
::
testing
::
Test
{
protected:
void
SetUp
()
override
{
...
...
@@ -67,6 +198,9 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
min_input_shape
,
max_input_shape
,
optim_input_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
...
...
@@ -241,6 +375,9 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
min_input_shape
,
max_input_shape
,
optim_input_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
...
...
paddle/fluid/inference/utils/io_utils.cc
浏览文件 @
f2a778c9
...
...
@@ -182,7 +182,10 @@ void SerializeShapeRangeInfo(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_shape
)
{
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
max_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_value
)
{
paddle
::
inference
::
proto
::
ShapeRangeInfos
shape_range_infos
;
for
(
auto
it
:
min_shape
)
{
auto
*
s
=
shape_range_infos
.
add_shape_range_info
();
...
...
@@ -192,10 +195,18 @@ void SerializeShapeRangeInfo(
s
->
add_max_shape
(
max_shape
.
at
(
it
.
first
)[
i
]);
s
->
add_opt_shape
(
opt_shape
.
at
(
it
.
first
)[
i
]);
}
// If it.first is a shape tensor, we should collect values from it.
if
(
min_value
.
count
(
it
.
first
))
{
for
(
size_t
i
=
0
;
i
<
min_value
.
at
(
it
.
first
).
size
();
++
i
)
{
s
->
add_min_value
(
min_value
.
at
(
it
.
first
)[
i
]);
s
->
add_max_value
(
max_value
.
at
(
it
.
first
)[
i
]);
s
->
add_opt_value
(
opt_value
.
at
(
it
.
first
)[
i
]);
}
}
}
inference
::
SerializeShapeRangeInfo
(
path
,
shape_range_infos
);
}
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
paddle
::
inference
::
proto
::
ShapeRangeInfos
*
info
)
{
int
fd
=
open
(
path
.
c_str
(),
O_RDONLY
);
...
...
@@ -213,7 +224,10 @@ void DeserializeShapeRangeInfo(
const
std
::
string
&
path
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
min_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
max_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
opt_shape
)
{
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
opt_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
min_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
max_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
opt_value
)
{
paddle
::
inference
::
proto
::
ShapeRangeInfos
shape_range_infos
;
DeserializeShapeRangeInfo
(
path
,
&
shape_range_infos
);
for
(
int
i
=
0
;
i
<
shape_range_infos
.
shape_range_info_size
();
++
i
)
{
...
...
@@ -236,6 +250,26 @@ void DeserializeShapeRangeInfo(
opt_shape
->
insert
(
std
::
make_pair
(
name
,
tmp
));
}
}
for
(
int
i
=
0
;
i
<
shape_range_infos
.
shape_range_info_size
();
++
i
)
{
auto
info
=
shape_range_infos
.
shape_range_info
(
i
);
auto
name
=
info
.
name
();
if
(
min_value
->
count
(
name
)
||
max_value
->
count
(
name
)
||
opt_value
->
count
(
name
))
{
continue
;
}
else
{
std
::
vector
<
int32_t
>
tmp
(
info
.
min_value_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
min_value
(
k
);
min_value
->
insert
(
std
::
make_pair
(
name
,
tmp
));
tmp
.
resize
(
info
.
max_value_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
max_value
(
k
);
max_value
->
insert
(
std
::
make_pair
(
name
,
tmp
));
tmp
.
resize
(
info
.
opt_value_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
opt_value
(
k
);
opt_value
->
insert
(
std
::
make_pair
(
name
,
tmp
));
}
}
}
void
UpdateShapeRangeInfo
(
...
...
@@ -264,6 +298,7 @@ void UpdateShapeRangeInfo(
}
}
}
inference
::
SerializeShapeRangeInfo
(
path
,
shape_range_infos
);
}
...
...
paddle/fluid/inference/utils/io_utils.h
浏览文件 @
f2a778c9
...
...
@@ -42,23 +42,22 @@ void SerializePDTensorsToFile(const std::string& path,
const
std
::
vector
<
PaddleTensor
>&
tensors
);
void
DeserializePDTensorsToFile
(
const
std
::
string
&
path
,
std
::
vector
<
PaddleTensor
>*
tensors
);
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
paddle
::
inference
::
proto
::
ShapeRangeInfos
&
info
);
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_shape
);
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
paddle
::
inference
::
proto
::
ShapeRangeInfos
*
info
);
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
max_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_value
);
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
min_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
max_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
opt_shape
);
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
opt_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
min_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
max_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
opt_value
);
void
UpdateShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_shape
,
...
...
paddle/fluid/inference/utils/io_utils_tester.cc
浏览文件 @
f2a778c9
...
...
@@ -100,28 +100,48 @@ TEST(infer_io_utils, tensors) {
TEST
(
shape_info_io
,
read_and_write
)
{
const
std
::
string
path
=
"test_shape_info_io"
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_shape
,
max_shape
,
opt_shape
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_value
,
max_value
,
opt_value
;
min_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
112
,
112
}));
max_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
opt_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
min_value
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
112
,
112
}));
max_value
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
opt_value
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
paddle
::
inference
::
SerializeShapeRangeInfo
(
path
,
min_shape
,
max_shape
,
opt_shape
);
path
,
min_shape
,
max_shape
,
opt_shape
,
min_value
,
max_value
,
opt_value
);
min_shape
.
clear
();
max_shape
.
clear
();
opt_shape
.
clear
();
min_value
.
clear
();
max_value
.
clear
();
opt_value
.
clear
();
opt_shape
.
insert
(
std
::
make_pair
(
"test2"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
paddle
::
inference
::
DeserializeShapeRangeInfo
(
path
,
&
min_shape
,
&
max_shape
,
&
opt_shape
);
paddle
::
inference
::
DeserializeShapeRangeInfo
(
path
,
&
min_shape
,
&
max_shape
,
&
opt_shape
,
&
min_value
,
&
max_value
,
&
opt_value
);
min_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
56
,
56
}));
std
::
vector
<
std
::
string
>
names
{
"test1"
};
paddle
::
inference
::
UpdateShapeRangeInfo
(
path
,
min_shape
,
max_shape
,
opt_shape
,
names
);
ASSERT_THROW
(
paddle
::
inference
::
DeserializeShapeRangeInfo
(
"no_exists_file"
,
&
min_shape
,
&
max_shape
,
&
opt_shape
);
ASSERT_THROW
(
paddle
::
inference
::
DeserializeShapeRangeInfo
(
"no_exists_file"
,
&
min_shape
,
&
max_shape
,
&
opt_shape
,
&
min_value
,
&
max_value
,
&
opt_value
);
,
paddle
::
platform
::
EnforceNotMet
);
}
paddle/fluid/inference/utils/shape_range_info.proto
浏览文件 @
f2a778c9
...
...
@@ -23,6 +23,9 @@ message ShapeRangeInfos {
repeated
int32
min_shape
=
2
;
repeated
int32
max_shape
=
3
;
repeated
int32
opt_shape
=
4
;
repeated
int32
min_value
=
5
;
repeated
int32
max_value
=
6
;
repeated
int32
opt_value
=
7
;
}
repeated
ShapeRangeInfo
shape_range_info
=
1
;
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
f2a778c9
...
...
@@ -554,6 +554,18 @@ class TensorRTEngineOp : public framework::OperatorBase {
#if IS_TRT_VERSION_GE(6000)
trt_context
->
setBindingDimensions
(
bind_index
,
inference
::
tensorrt
::
Vec2TRT_Dims
(
t_shape
,
x
,
true
));
// If this x is a shape tensor, we need call setInputShapeBinding
if
(
engine
->
engine
()
->
isShapeBinding
(
bind_index
)
&&
engine
->
engine
()
->
bindingIsInput
(
bind_index
))
{
std
::
vector
<
int
>
shape_v
(
t
.
numel
());
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
shape_v
.
data
(),
platform
::
CUDAPlace
(),
t
.
data
<
int32_t
>
(),
t
.
numel
()
*
sizeof
(
int
),
nullptr
);
trt_context
->
setInputShapeBinding
(
bind_index
,
shape_v
.
data
());
}
#endif
}
runtime_batch
=
t_shape
[
0
];
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录