Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
f2a778c9
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f2a778c9
编写于
10月 12, 2022
作者:
Z
zhoutianzi666
提交者:
GitHub
10月 12, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Paddle-TRT]support shape tensor is the input of trt-subgraph (#46482)
上级
5303b66b
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
378 addition
and
55 deletion
+378
-55
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+9
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+14
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+84
-37
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+1
-0
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+29
-0
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+18
-0
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
+137
-0
paddle/fluid/inference/utils/io_utils.cc
paddle/fluid/inference/utils/io_utils.cc
+38
-3
paddle/fluid/inference/utils/io_utils.h
paddle/fluid/inference/utils/io_utils.h
+8
-9
paddle/fluid/inference/utils/io_utils_tester.cc
paddle/fluid/inference/utils/io_utils_tester.cc
+25
-5
paddle/fluid/inference/utils/shape_range_info.proto
paddle/fluid/inference/utils/shape_range_info.proto
+3
-0
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+12
-0
未找到文件。
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
f2a778c9
...
...
@@ -77,6 +77,15 @@ void IRPassManager::CreatePasses(Argument *argument,
pass
->
Set
(
"optim_input_shape"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(
argument
->
optim_input_shape
()));
// Now, shape tensor value is not explicit set by user,
// it is collected through API CollectShapeRangeInfo.
pass
->
Set
(
"max_shape_tensor"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
());
pass
->
Set
(
"min_shape_tensor"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
());
pass
->
Set
(
"optim_shape_tensor"
,
new
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
());
// tuned trt dynamic_shape
pass
->
Set
(
"trt_tuned_dynamic_shape"
,
new
bool
(
argument
->
tensorrt_tuned_dynamic_shape
()));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
f2a778c9
...
...
@@ -317,6 +317,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
auto
opt_input_shape
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"optim_input_shape"
);
auto
min_shape_tensor
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"min_shape_tensor"
);
auto
max_shape_tensor
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"max_shape_tensor"
);
auto
opt_shape_tensor
=
Get
<
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>>
(
"optim_shape_tensor"
);
auto
allow_build_at_runtime
=
Get
<
bool
>
(
"trt_allow_build_at_runtime"
);
auto
shape_range_info_path
=
Get
<
std
::
string
>
(
"trt_shape_range_info_path"
);
auto
trt_tuned_dynamic_shape
=
Get
<
bool
>
(
"trt_tuned_dynamic_shape"
);
...
...
@@ -326,7 +333,10 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
inference
::
DeserializeShapeRangeInfo
(
shape_range_info_path
,
&
min_input_shape
,
&
max_input_shape
,
&
opt_input_shape
);
&
opt_input_shape
,
&
min_shape_tensor
,
&
max_shape_tensor
,
&
opt_shape_tensor
);
}
// The following procedure is used to rename all the intermediate
...
...
@@ -511,6 +521,9 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
min_input_shape
,
max_input_shape
,
opt_input_shape
,
min_shape_tensor
,
max_shape_tensor
,
opt_shape_tensor
,
disable_trt_plugin_fp16
,
static_cast
<
phi
::
DataType
>
(
Get
<
int
>
(
"model_precision"
)));
trt_engine
->
SetUseOSS
(
Get
<
bool
>
(
"use_varseqlen"
));
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
f2a778c9
...
...
@@ -1747,10 +1747,39 @@ void AnalysisPredictor::CollectShapeRangeInfo() {
if
(
!
var
->
IsType
<
phi
::
DenseTensor
>
())
{
continue
;
}
framework
::
DDim
dim
=
var
->
Get
<
phi
::
DenseTensor
>
().
dims
();
auto
tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
framework
::
DDim
dim
=
tensor
.
dims
();
std
::
vector
<
int32_t
>
shape
(
dim
.
size
());
for
(
size_t
i
=
0
;
i
<
shape
.
size
();
++
i
)
shape
[
i
]
=
dim
[
i
];
shape_info_
[
name
].
emplace_back
(
shape
);
// We need collect value range for shape tensor for Paddle-TRT's use.
// To be noticed, this method to identify all shape tensors is based on
// assumption that all shape tensors in the model have numbers <= 7.
// This is a simple method to identify all shape tensors with some
// mistakes, but it doesn't matter.
auto
is_shape_tensor
=
tensor
.
numel
()
<=
7
&&
tensor
.
numel
()
>=
1
;
if
(
tensor
.
dtype
()
==
paddle
::
experimental
::
DataType
::
INT32
&&
is_shape_tensor
)
{
std
::
vector
<
int
>
int32_host
(
tensor
.
numel
());
if
(
tensor
.
place
()
==
platform
::
CPUPlace
())
{
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
int32_host
.
data
(),
platform
::
CPUPlace
(),
tensor
.
data
<
int
>
(),
tensor
.
numel
()
*
sizeof
(
int
));
}
else
if
(
tensor
.
place
()
==
platform
::
CUDAPlace
())
{
#if defined(PADDLE_WITH_CUDA)
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
int32_host
.
data
(),
platform
::
CUDAPlace
(),
tensor
.
data
<
int
>
(),
tensor
.
numel
()
*
sizeof
(
int
),
nullptr
);
#endif
}
shape_tensor_value_
[
name
].
emplace_back
(
int32_host
);
}
}
}
...
...
@@ -1758,43 +1787,61 @@ void AnalysisPredictor::StatisticShapeRangeInfo() {
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
max_shapes
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
opt_shapes
;
for
(
auto
it
:
shape_info_
)
{
auto
name
=
it
.
first
;
auto
shapes
=
it
.
second
;
std
::
vector
<
int32_t
>
min_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
max_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
opt_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
auto
ShapeMaxFreq
=
[](
const
std
::
map
<
int32_t
,
int32_t
>
&
m
)
->
int32_t
{
std
::
vector
<
std
::
pair
<
int32_t
,
int32_t
>>
counter
;
for
(
auto
&
it
:
m
)
counter
.
push_back
(
it
);
std
::
sort
(
counter
.
begin
(),
counter
.
end
(),
[](
std
::
pair
<
int32_t
,
int32_t
>
&
a
,
std
::
pair
<
int32_t
,
int32_t
>
&
b
)
{
return
a
.
second
>
b
.
second
;
});
return
counter
[
0
].
first
;
};
for
(
size_t
d
=
0
;
d
<
shapes
[
0
].
size
();
++
d
)
{
std
::
map
<
int32_t
,
int32_t
>
counter
;
for
(
size_t
i
=
0
;
i
<
shapes
.
size
();
++
i
)
{
counter
[
shapes
[
i
][
d
]]
+=
1
;
if
(
shapes
[
i
][
d
]
<
min_shape
[
d
])
min_shape
[
d
]
=
shapes
[
i
][
d
];
if
(
shapes
[
i
][
d
]
>
max_shape
[
d
])
max_shape
[
d
]
=
shapes
[
i
][
d
];
}
opt_shape
[
d
]
=
ShapeMaxFreq
(
counter
);
}
min_shapes
[
name
]
=
min_shape
;
max_shapes
[
name
]
=
max_shape
;
opt_shapes
[
name
]
=
opt_shape
;
}
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_values
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
max_values
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
opt_values
;
auto
extract_min_max_opt
=
[](
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_data
,
decltype
(
min_data
)
max_data
,
decltype
(
min_data
)
opt_data
,
decltype
(
shape_info_
)
shape_data
)
{
for
(
auto
it
:
shape_data
)
{
auto
name
=
it
.
first
;
auto
shapes
=
it
.
second
;
std
::
vector
<
int32_t
>
min_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
max_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
std
::
vector
<
int32_t
>
opt_shape
(
shapes
[
0
].
begin
(),
shapes
[
0
].
end
());
auto
ShapeMaxFreq
=
[](
const
std
::
map
<
int32_t
,
int32_t
>
&
m
)
->
int32_t
{
std
::
vector
<
std
::
pair
<
int32_t
,
int32_t
>>
counter
;
for
(
auto
&
it
:
m
)
counter
.
push_back
(
it
);
std
::
sort
(
counter
.
begin
(),
counter
.
end
(),
[](
std
::
pair
<
int32_t
,
int32_t
>
&
a
,
std
::
pair
<
int32_t
,
int32_t
>
&
b
)
{
return
a
.
second
>
b
.
second
;
});
return
counter
[
0
].
first
;
};
for
(
size_t
d
=
0
;
d
<
shapes
[
0
].
size
();
++
d
)
{
std
::
map
<
int32_t
,
int32_t
>
counter
;
for
(
size_t
i
=
0
;
i
<
shapes
.
size
();
++
i
)
{
counter
[
shapes
[
i
][
d
]]
+=
1
;
if
(
shapes
[
i
][
d
]
<
min_shape
[
d
])
min_shape
[
d
]
=
shapes
[
i
][
d
];
if
(
shapes
[
i
][
d
]
>
max_shape
[
d
])
max_shape
[
d
]
=
shapes
[
i
][
d
];
}
opt_shape
[
d
]
=
ShapeMaxFreq
(
counter
);
}
inference
::
SerializeShapeRangeInfo
(
config_
.
shape_range_info_path
(),
min_shapes
,
max_shapes
,
opt_shapes
);
min_data
[
name
]
=
min_shape
;
max_data
[
name
]
=
max_shape
;
opt_data
[
name
]
=
opt_shape
;
}
};
extract_min_max_opt
(
min_shapes
,
max_shapes
,
opt_shapes
,
shape_info_
);
extract_min_max_opt
(
min_values
,
max_values
,
opt_values
,
shape_tensor_value_
);
inference
::
SerializeShapeRangeInfo
(
config_
.
shape_range_info_path
(),
min_shapes
,
max_shapes
,
opt_shapes
,
min_values
,
max_values
,
opt_values
);
}
bool
AnalysisPredictor
::
LoadProgramDesc
()
{
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
f2a778c9
...
...
@@ -514,6 +514,7 @@ class AnalysisPredictor : public PaddlePredictor {
bool
status_is_cloned_
{
false
};
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int32_t
>>>
shape_info_
;
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
vector
<
int32_t
>>>
shape_tensor_value_
;
static
int
clone_num_
;
bool
private_context_
{
false
};
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
f2a778c9
...
...
@@ -275,6 +275,35 @@ void TensorRTEngine::FreezeNetwork() {
nvinfer1
::
OptProfileSelector
::
kOPT
,
Vec2TRT_Dims
(
optim_input_shape_
[
input
.
first
],
input
.
first
,
true
));
}
for
(
int
input_id
=
0
;
input_id
<
network
()
->
getNbInputs
();
input_id
++
)
{
auto
input_name
=
network
()
->
getInput
(
input_id
)
->
getName
();
if
(
!
itensor_map_
.
count
(
input_name
))
continue
;
if
(
!
GetITensor
(
input_name
)
->
isShapeTensor
())
continue
;
PADDLE_ENFORCE_EQ
(
min_shape_tensor_
.
count
(
input_name
)
&&
max_shape_tensor_
.
count
(
input_name
)
&&
optim_shape_tensor_
.
count
(
input_name
),
true
,
platform
::
errors
::
InvalidArgument
(
"Fail to find min/max/optim shape value for TRT "
"network's shape tensor input named %s."
,
input_name
));
auto
min_vec
=
min_shape_tensor_
.
at
(
input_name
);
optim_profiles_
[
i
]
->
setShapeValues
(
input_name
,
nvinfer1
::
OptProfileSelector
::
kMIN
,
min_vec
.
data
(),
min_vec
.
size
());
optim_profiles_
[
i
]
->
setShapeValues
(
input_name
,
nvinfer1
::
OptProfileSelector
::
kMAX
,
max_shape_tensor_
[
input_name
].
data
(),
min_vec
.
size
());
optim_profiles_
[
i
]
->
setShapeValues
(
input_name
,
nvinfer1
::
OptProfileSelector
::
kOPT
,
optim_shape_tensor_
[
input_name
].
data
(),
min_vec
.
size
());
}
infer_builder_config_
->
addOptimizationProfile
(
optim_profiles_
[
i
]);
}
if
(
WithFp16
()
&&
disable_trt_plugin_fp16
())
{
...
...
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
f2a778c9
...
...
@@ -217,6 +217,9 @@ class TensorRTEngine {
const
ShapeMapType
min_input_shape
=
{},
const
ShapeMapType
max_input_shape
=
{},
const
ShapeMapType
optim_input_shape
=
{},
const
ShapeMapType
min_shape_tensor
=
{},
const
ShapeMapType
max_shape_tensor
=
{},
const
ShapeMapType
optim_shape_tensor
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
...
...
@@ -228,6 +231,9 @@ class TensorRTEngine {
min_input_shape_
(
min_input_shape
),
max_input_shape_
(
max_input_shape
),
optim_input_shape_
(
optim_input_shape
),
min_shape_tensor_
(
min_shape_tensor
),
max_shape_tensor_
(
max_shape_tensor
),
optim_shape_tensor_
(
optim_shape_tensor
),
disable_trt_plugin_fp16_
(
disable_trt_plugin_fp16
),
model_precision_
(
model_precision
),
logger_
(
logger
)
{
...
...
@@ -443,6 +449,9 @@ class TensorRTEngine {
ShapeMapType
min_input_shape
()
{
return
min_input_shape_
;
}
ShapeMapType
max_input_shape
()
{
return
max_input_shape_
;
}
ShapeMapType
optim_input_shape
()
{
return
optim_input_shape_
;
}
ShapeMapType
min_shape_tensor
()
{
return
min_shape_tensor_
;
}
ShapeMapType
max_shape_tensor
()
{
return
max_shape_tensor_
;
}
ShapeMapType
optim_shape_tensor
()
{
return
optim_shape_tensor_
;
}
bool
AdjustDynamicShapeRange
(
const
ShapeMapType
&
runtime_input_shape
,
std
::
vector
<
std
::
string
>*
changed
)
{
...
...
@@ -641,6 +650,9 @@ class TensorRTEngine {
ShapeMapType
min_input_shape_
;
ShapeMapType
max_input_shape_
;
ShapeMapType
optim_input_shape_
;
ShapeMapType
min_shape_tensor_
;
ShapeMapType
max_shape_tensor_
;
ShapeMapType
optim_shape_tensor_
;
bool
disable_trt_plugin_fp16_
{
false
};
phi
::
DataType
model_precision_
{
phi
::
DataType
::
FLOAT32
};
bool
use_varseqlen_
{
false
};
...
...
@@ -741,6 +753,9 @@ class TRTEngineManager {
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_shape_tensor
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_shape_tensor
=
{},
const
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_shape_tensor
=
{},
bool
disable_trt_plugin_fp16
=
false
,
phi
::
DataType
model_precision
=
phi
::
DataType
::
FLOAT32
,
nvinfer1
::
ILogger
&
logger
=
NaiveLogger
::
Global
())
{
...
...
@@ -752,6 +767,9 @@ class TRTEngineManager {
min_input_shape
,
max_input_shape
,
optim_input_shape
,
min_shape_tensor
,
max_shape_tensor
,
optim_shape_tensor
,
disable_trt_plugin_fp16
,
model_precision
,
logger
);
...
...
paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
浏览文件 @
f2a778c9
...
...
@@ -31,6 +31,137 @@ namespace paddle {
namespace
inference
{
namespace
tensorrt
{
class
TensorRTDynamicShapeValueEngineTest
:
public
::
testing
::
Test
{
protected:
void
SetUp
()
override
{
ctx_
=
new
phi
::
GPUContext
(
platform
::
CUDAPlace
(
0
));
ctx_
->
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
platform
::
CUDAPlace
(
0
),
ctx_
->
stream
())
.
get
());
ctx_
->
SetHostAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
ctx_
->
SetZeroAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetZeroAllocator
(
platform
::
CUDAPlace
(
0
))
.
get
());
ctx_
->
SetPinnedAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CUDAPinnedPlace
())
.
get
());
ctx_
->
PartialInitWithAllocator
();
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_shape
=
{
{
"input"
,
{
1
,
32
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_shape
=
{
{
"input"
,
{
18
,
32
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_shape
=
{
{
"input"
,
{
18
,
32
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
min_input_value
=
{
{
"shape"
,
{
1
,
8
,
4
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
max_input_value
=
{
{
"shape"
,
{
18
,
8
,
4
}}};
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
optim_input_value
=
{
{
"shape"
,
{
18
,
8
,
4
}}};
engine_
=
new
TensorRTEngine
(
16
,
1
<<
10
,
AnalysisConfig
::
Precision
::
kFloat32
,
nullptr
,
0
,
min_input_shape
,
max_input_shape
,
optim_input_shape
,
min_input_value
,
max_input_value
,
optim_input_value
,
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
engine_
->
InitNetwork
();
}
void
TearDown
()
override
{
if
(
engine_
)
{
delete
engine_
;
engine_
=
nullptr
;
}
}
void
PrepareInputOutput
(
const
std
::
vector
<
float
>
&
input
,
std
::
vector
<
int
>
output_shape
)
{
paddle
::
framework
::
TensorFromVector
(
input
,
*
ctx_
,
&
input_
);
output_
.
Resize
(
phi
::
make_ddim
(
output_shape
));
}
void
PrepareShapeInput
(
const
std
::
vector
<
int
>
&
input
)
{
paddle
::
framework
::
TensorFromVector
(
input
,
*
ctx_
,
&
shape_
);
}
void
GetOutput
(
std
::
vector
<
float
>
*
output
)
{
paddle
::
framework
::
TensorToVector
(
output_
,
*
ctx_
,
output
);
}
protected:
framework
::
LoDTensor
input_
;
framework
::
LoDTensor
shape_
;
framework
::
LoDTensor
output_
;
TensorRTEngine
*
engine_
;
phi
::
GPUContext
*
ctx_
;
};
TEST_F
(
TensorRTDynamicShapeValueEngineTest
,
test_trt_dynamic_shape_value
)
{
std
::
vector
<
void
*>
buffers
(
3
);
std
::
cout
<<
"with_dynamic_shape: "
<<
engine_
->
with_dynamic_shape
()
<<
std
::
endl
;
auto
*
x
=
engine_
->
DeclareInput
(
"input"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
Dims2
{
-
1
,
32
});
nvinfer1
::
Dims
shape_dim
;
shape_dim
.
nbDims
=
1
;
shape_dim
.
d
[
0
]
=
3
;
auto
*
shape
=
engine_
->
DeclareInput
(
"shape"
,
nvinfer1
::
DataType
::
kINT32
,
shape_dim
);
auto
layer
=
engine_
->
network
()
->
addShuffle
(
*
x
);
layer
->
setInput
(
1
,
*
shape
);
PADDLE_ENFORCE_NOT_NULL
(
layer
,
platform
::
errors
::
InvalidArgument
(
"TRT shuffle layer building failed."
));
engine_
->
DeclareOutput
(
layer
,
0
,
"y"
);
engine_
->
FreezeNetwork
();
ASSERT_EQ
(
engine_
->
engine
()
->
getNbBindings
(),
3
);
std
::
vector
<
float
>
x_v
(
8
*
32
);
for
(
int
i
=
0
;
i
<
8
*
32
;
i
++
)
{
x_v
[
i
]
=
i
%
(
8
*
32
);
}
std
::
vector
<
int
>
shape_v
=
{
8
,
8
,
4
};
PrepareInputOutput
(
x_v
,
{
8
,
8
,
4
});
PrepareShapeInput
(
shape_v
);
engine_
->
context
()
->
setBindingDimensions
(
0
,
nvinfer1
::
Dims2
{
8
,
32
});
engine_
->
context
()
->
setBindingDimensions
(
1
,
shape_dim
);
engine_
->
context
()
->
setInputShapeBinding
(
1
,
shape_v
.
data
());
auto
*
x_gpu_data
=
input_
.
mutable_data
<
float
>
(
ctx_
->
GetPlace
());
auto
*
shape_gpu_data
=
shape_
.
mutable_data
<
int
>
(
ctx_
->
GetPlace
());
auto
*
y_gpu_data
=
output_
.
mutable_data
<
float
>
(
ctx_
->
GetPlace
());
buffers
[
0
]
=
reinterpret_cast
<
void
*>
(
x_gpu_data
);
buffers
[
1
]
=
reinterpret_cast
<
void
*>
(
shape_gpu_data
);
buffers
[
2
]
=
reinterpret_cast
<
void
*>
(
y_gpu_data
);
engine_
->
Execute
(
-
1
,
&
buffers
,
ctx_
->
stream
());
cudaStreamSynchronize
(
ctx_
->
stream
());
std
::
vector
<
float
>
y_cpu
;
GetOutput
(
&
y_cpu
);
ASSERT_EQ
(
y_cpu
[
0
],
0
);
ASSERT_EQ
(
y_cpu
[
1
],
1
);
auto
dims
=
engine_
->
context
()
->
getBindingDimensions
(
2
);
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
d
[
0
],
8
);
ASSERT_EQ
(
dims
.
d
[
1
],
8
);
ASSERT_EQ
(
dims
.
d
[
2
],
4
);
return
;
}
class
TensorRTDynamicEngineTest
:
public
::
testing
::
Test
{
protected:
void
SetUp
()
override
{
...
...
@@ -67,6 +198,9 @@ class TensorRTDynamicEngineTest : public ::testing::Test {
min_input_shape
,
max_input_shape
,
optim_input_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
...
...
@@ -241,6 +375,9 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test {
min_input_shape
,
max_input_shape
,
optim_input_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
(),
false
,
phi
::
DataType
::
FLOAT32
,
NaiveLogger
::
Global
());
...
...
paddle/fluid/inference/utils/io_utils.cc
浏览文件 @
f2a778c9
...
...
@@ -182,7 +182,10 @@ void SerializeShapeRangeInfo(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_shape
)
{
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
min_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
max_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
&
opt_value
)
{
paddle
::
inference
::
proto
::
ShapeRangeInfos
shape_range_infos
;
for
(
auto
it
:
min_shape
)
{
auto
*
s
=
shape_range_infos
.
add_shape_range_info
();
...
...
@@ -192,10 +195,18 @@ void SerializeShapeRangeInfo(
s
->
add_max_shape
(
max_shape
.
at
(
it
.
first
)[
i
]);
s
->
add_opt_shape
(
opt_shape
.
at
(
it
.
first
)[
i
]);
}
// If it.first is a shape tensor, we should collect values from it.
if
(
min_value
.
count
(
it
.
first
))
{
for
(
size_t
i
=
0
;
i
<
min_value
.
at
(
it
.
first
).
size
();
++
i
)
{
s
->
add_min_value
(
min_value
.
at
(
it
.
first
)[
i
]);
s
->
add_max_value
(
max_value
.
at
(
it
.
first
)[
i
]);
s
->
add_opt_value
(
opt_value
.
at
(
it
.
first
)[
i
]);
}
}
}
inference
::
SerializeShapeRangeInfo
(
path
,
shape_range_infos
);
}
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
paddle
::
inference
::
proto
::
ShapeRangeInfos
*
info
)
{
int
fd
=
open
(
path
.
c_str
(),
O_RDONLY
);
...
...
@@ -213,7 +224,10 @@ void DeserializeShapeRangeInfo(
const
std
::
string
&
path
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
min_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
max_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
opt_shape
)
{
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
opt_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
min_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
max_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
*
opt_value
)
{
paddle
::
inference
::
proto
::
ShapeRangeInfos
shape_range_infos
;
DeserializeShapeRangeInfo
(
path
,
&
shape_range_infos
);
for
(
int
i
=
0
;
i
<
shape_range_infos
.
shape_range_info_size
();
++
i
)
{
...
...
@@ -236,6 +250,26 @@ void DeserializeShapeRangeInfo(
opt_shape
->
insert
(
std
::
make_pair
(
name
,
tmp
));
}
}
for
(
int
i
=
0
;
i
<
shape_range_infos
.
shape_range_info_size
();
++
i
)
{
auto
info
=
shape_range_infos
.
shape_range_info
(
i
);
auto
name
=
info
.
name
();
if
(
min_value
->
count
(
name
)
||
max_value
->
count
(
name
)
||
opt_value
->
count
(
name
))
{
continue
;
}
else
{
std
::
vector
<
int32_t
>
tmp
(
info
.
min_value_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
min_value
(
k
);
min_value
->
insert
(
std
::
make_pair
(
name
,
tmp
));
tmp
.
resize
(
info
.
max_value_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
max_value
(
k
);
max_value
->
insert
(
std
::
make_pair
(
name
,
tmp
));
tmp
.
resize
(
info
.
opt_value_size
());
for
(
size_t
k
=
0
;
k
<
tmp
.
size
();
++
k
)
tmp
[
k
]
=
info
.
opt_value
(
k
);
opt_value
->
insert
(
std
::
make_pair
(
name
,
tmp
));
}
}
}
void
UpdateShapeRangeInfo
(
...
...
@@ -264,6 +298,7 @@ void UpdateShapeRangeInfo(
}
}
}
inference
::
SerializeShapeRangeInfo
(
path
,
shape_range_infos
);
}
...
...
paddle/fluid/inference/utils/io_utils.h
浏览文件 @
f2a778c9
...
...
@@ -42,23 +42,22 @@ void SerializePDTensorsToFile(const std::string& path,
const
std
::
vector
<
PaddleTensor
>&
tensors
);
void
DeserializePDTensorsToFile
(
const
std
::
string
&
path
,
std
::
vector
<
PaddleTensor
>*
tensors
);
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
paddle
::
inference
::
proto
::
ShapeRangeInfos
&
info
);
void
SerializeShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
max_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_shape
);
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
paddle
::
inference
::
proto
::
ShapeRangeInfos
*
info
);
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_shape
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
max_value
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
opt_value
);
void
DeserializeShapeRangeInfo
(
const
std
::
string
&
path
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
min_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
max_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
opt_shape
);
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
opt_shape
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
min_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
max_value
,
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>*
opt_value
);
void
UpdateShapeRangeInfo
(
const
std
::
string
&
path
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>&
min_shape
,
...
...
paddle/fluid/inference/utils/io_utils_tester.cc
浏览文件 @
f2a778c9
...
...
@@ -100,28 +100,48 @@ TEST(infer_io_utils, tensors) {
TEST
(
shape_info_io
,
read_and_write
)
{
const
std
::
string
path
=
"test_shape_info_io"
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_shape
,
max_shape
,
opt_shape
;
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
min_value
,
max_value
,
opt_value
;
min_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
112
,
112
}));
max_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
opt_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
min_value
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
112
,
112
}));
max_value
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
opt_value
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
paddle
::
inference
::
SerializeShapeRangeInfo
(
path
,
min_shape
,
max_shape
,
opt_shape
);
path
,
min_shape
,
max_shape
,
opt_shape
,
min_value
,
max_value
,
opt_value
);
min_shape
.
clear
();
max_shape
.
clear
();
opt_shape
.
clear
();
min_value
.
clear
();
max_value
.
clear
();
opt_value
.
clear
();
opt_shape
.
insert
(
std
::
make_pair
(
"test2"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
224
,
224
}));
paddle
::
inference
::
DeserializeShapeRangeInfo
(
path
,
&
min_shape
,
&
max_shape
,
&
opt_shape
);
paddle
::
inference
::
DeserializeShapeRangeInfo
(
path
,
&
min_shape
,
&
max_shape
,
&
opt_shape
,
&
min_value
,
&
max_value
,
&
opt_value
);
min_shape
.
insert
(
std
::
make_pair
(
"test1"
,
std
::
vector
<
int32_t
>
{
1
,
3
,
56
,
56
}));
std
::
vector
<
std
::
string
>
names
{
"test1"
};
paddle
::
inference
::
UpdateShapeRangeInfo
(
path
,
min_shape
,
max_shape
,
opt_shape
,
names
);
ASSERT_THROW
(
paddle
::
inference
::
DeserializeShapeRangeInfo
(
"no_exists_file"
,
&
min_shape
,
&
max_shape
,
&
opt_shape
);
ASSERT_THROW
(
paddle
::
inference
::
DeserializeShapeRangeInfo
(
"no_exists_file"
,
&
min_shape
,
&
max_shape
,
&
opt_shape
,
&
min_value
,
&
max_value
,
&
opt_value
);
,
paddle
::
platform
::
EnforceNotMet
);
}
paddle/fluid/inference/utils/shape_range_info.proto
浏览文件 @
f2a778c9
...
...
@@ -23,6 +23,9 @@ message ShapeRangeInfos {
repeated
int32
min_shape
=
2
;
repeated
int32
max_shape
=
3
;
repeated
int32
opt_shape
=
4
;
repeated
int32
min_value
=
5
;
repeated
int32
max_value
=
6
;
repeated
int32
opt_value
=
7
;
}
repeated
ShapeRangeInfo
shape_range_info
=
1
;
...
...
paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
浏览文件 @
f2a778c9
...
...
@@ -554,6 +554,18 @@ class TensorRTEngineOp : public framework::OperatorBase {
#if IS_TRT_VERSION_GE(6000)
trt_context
->
setBindingDimensions
(
bind_index
,
inference
::
tensorrt
::
Vec2TRT_Dims
(
t_shape
,
x
,
true
));
// If this x is a shape tensor, we need call setInputShapeBinding
if
(
engine
->
engine
()
->
isShapeBinding
(
bind_index
)
&&
engine
->
engine
()
->
bindingIsInput
(
bind_index
))
{
std
::
vector
<
int
>
shape_v
(
t
.
numel
());
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
shape_v
.
data
(),
platform
::
CUDAPlace
(),
t
.
data
<
int32_t
>
(),
t
.
numel
()
*
sizeof
(
int
),
nullptr
);
trt_context
->
setInputShapeBinding
(
bind_index
,
shape_v
.
data
());
}
#endif
}
runtime_batch
=
t_shape
[
0
];
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录