Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
86919910
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
86919910
编写于
3月 18, 2022
作者:
W
Wilber
提交者:
GitHub
3月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Trt engine (#40649)
上级
e3b2a035
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
550 addition
and
65 deletion
+550
-65
paddle/infrt/backends/tensorrt/test_trt_engine.cc
paddle/infrt/backends/tensorrt/test_trt_engine.cc
+167
-0
paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
+1
-3
paddle/infrt/dialect/tensorrt/trt_ops.td
paddle/infrt/dialect/tensorrt/trt_ops.td
+33
-0
paddle/infrt/host_context/mlir_to_runtime_translate.cc
paddle/infrt/host_context/mlir_to_runtime_translate.cc
+11
-4
paddle/infrt/kernel/tensor_kernels.cc
paddle/infrt/kernel/tensor_kernels.cc
+2
-2
paddle/infrt/kernel/tensorrt/trt_helper.h
paddle/infrt/kernel/tensorrt/trt_helper.h
+66
-0
paddle/infrt/kernel/tensorrt/trt_kernels.cc
paddle/infrt/kernel/tensorrt/trt_kernels.cc
+66
-56
paddle/infrt/kernel/tensorrt/trt_layers.h
paddle/infrt/kernel/tensorrt/trt_layers.h
+104
-0
paddle/infrt/tests/dialect/tensorrt/disabled_trt.mlir
paddle/infrt/tests/dialect/tensorrt/disabled_trt.mlir
+0
-0
paddle/infrt/tests/dialect/tensorrt/disabled_trt_conv.mlir
paddle/infrt/tests/dialect/tensorrt/disabled_trt_conv.mlir
+54
-0
paddle/infrt/tests/dialect/tensorrt/disabled_trt_fc.mlir
paddle/infrt/tests/dialect/tensorrt/disabled_trt_fc.mlir
+46
-0
未找到文件。
paddle/infrt/backends/tensorrt/test_trt_engine.cc
浏览文件 @
86919910
...
...
@@ -82,9 +82,176 @@ TrtUniquePtr<nvinfer1::INetworkDefinition> ConstructNetwork(
return
network
;
}
TrtUniquePtr
<
nvinfer1
::
INetworkDefinition
>
ConstructFCNetwork
(
nvinfer1
::
IBuilder
*
builder
,
nvinfer1
::
Dims
dims
,
bool
is_static_shape
)
{
TrtUniquePtr
<
nvinfer1
::
INetworkDefinition
>
network
;
if
(
is_static_shape
)
{
network
.
reset
(
builder
->
createNetworkV2
(
0U
));
}
else
{
auto
networkFlags
=
1U
<<
static_cast
<
uint32_t
>
(
nvinfer1
::
NetworkDefinitionCreationFlag
::
kEXPLICIT_BATCH
);
network
.
reset
(
builder
->
createNetworkV2
(
networkFlags
));
}
ITensor
*
data
=
network
->
addInput
(
model_input
,
nvinfer1
::
DataType
::
kFLOAT
,
dims
);
CHECK_NOTNULL
(
data
);
nvinfer1
::
Weights
kernel_weights
;
kernel_weights
.
type
=
nvinfer1
::
DataType
::
kFLOAT
;
kernel_weights
.
count
=
7840
;
std
::
vector
<
float
>
weight_data
(
kernel_weights
.
count
);
for
(
size_t
i
=
0
;
i
<
weight_data
.
size
();
++
i
)
{
weight_data
[
i
]
=
i
%
255
*
0.02
f
;
}
kernel_weights
.
values
=
weight_data
.
data
();
auto
*
layer
=
network
->
addFullyConnected
(
*
data
,
10
,
kernel_weights
,
nvinfer1
::
Weights
{});
CHECK_NOTNULL
(
layer
);
auto
*
out
=
layer
->
getOutput
(
0
);
out
->
setName
(
model_output
);
network
->
markOutput
(
*
out
);
return
network
;
}
TrtUniquePtr
<
nvinfer1
::
INetworkDefinition
>
ConstructConvNetwork
(
nvinfer1
::
IBuilder
*
builder
,
nvinfer1
::
Dims
dims
,
bool
is_static_shape
)
{
TrtUniquePtr
<
nvinfer1
::
INetworkDefinition
>
network
;
if
(
is_static_shape
)
{
network
.
reset
(
builder
->
createNetworkV2
(
0U
));
}
else
{
auto
networkFlags
=
1U
<<
static_cast
<
uint32_t
>
(
nvinfer1
::
NetworkDefinitionCreationFlag
::
kEXPLICIT_BATCH
);
network
.
reset
(
builder
->
createNetworkV2
(
networkFlags
));
}
ITensor
*
data
=
network
->
addInput
(
model_input
,
nvinfer1
::
DataType
::
kFLOAT
,
dims
);
CHECK_NOTNULL
(
data
);
nvinfer1
::
Weights
kernel_weights
,
bias_weights
;
kernel_weights
.
type
=
nvinfer1
::
DataType
::
kFLOAT
;
bias_weights
.
type
=
nvinfer1
::
DataType
::
kFLOAT
;
kernel_weights
.
count
=
81
;
bias_weights
.
count
=
3
;
std
::
vector
<
float
>
weight_data
(
kernel_weights
.
count
);
for
(
size_t
i
=
0
;
i
<
weight_data
.
size
();
++
i
)
{
weight_data
[
i
]
=
i
*
0.02
f
;
}
std
::
vector
<
float
>
bias_data
(
bias_weights
.
count
);
for
(
size_t
i
=
0
;
i
<
bias_data
.
size
();
++
i
)
{
bias_data
[
i
]
=
i
*
0.5
f
;
}
kernel_weights
.
values
=
weight_data
.
data
();
bias_weights
.
values
=
bias_data
.
data
();
nvinfer1
::
Dims
ksize
;
ksize
.
nbDims
=
2
;
ksize
.
d
[
0
]
=
3
;
ksize
.
d
[
1
]
=
3
;
auto
*
layer
=
network
->
addConvolutionNd
(
*
data
,
3
,
ksize
,
kernel_weights
,
bias_weights
);
CHECK_NOTNULL
(
layer
);
auto
*
out
=
layer
->
getOutput
(
0
);
out
->
setName
(
model_output
);
network
->
markOutput
(
*
out
);
return
network
;
}
// sigmoid(x) = 1 / (1 + exp(-x))
inline
float
sigmoid
(
float
x
)
{
return
1.
f
/
(
1.
f
+
exp
(
-
1
*
x
));
}
TEST
(
trt
,
run_fc_static
)
{
TrtEngine
engine
(
0
);
auto
net
=
ConstructFCNetwork
(
engine
.
GetTrtBuilder
(),
nvinfer1
::
Dims3
{
1
,
28
,
28
},
true
);
BuildOptions
build_options
;
build_options
.
max_batch
=
4
;
build_options
.
workspace
=
1024
;
engine
.
Build
(
std
::
move
(
net
),
build_options
);
InferenceOptions
inference_options
;
inference_options
.
batch
=
1
;
phi
::
GPUPlace
place
;
phi
::
GPUContext
context
;
context
.
PartialInitWithoutAllocator
();
context
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
place
,
context
.
stream
())
.
get
());
context
.
PartialInitWithAllocator
();
phi
::
DenseTensorMeta
meta
(
phi
::
DataType
::
FLOAT32
,
phi
::
make_ddim
({
inference_options
.
batch
,
1
,
28
,
28
}));
phi
::
DenseTensor
input
;
input
.
set_meta
(
meta
);
context
.
Alloc
<
float
>
(
&
input
,
input
.
numel
()
*
sizeof
(
float
));
std
::
vector
<
float
>
host_data
(
inference_options
.
batch
*
1
*
28
*
28
,
0
);
for
(
size_t
i
=
0
;
i
<
host_data
.
size
();
++
i
)
{
host_data
[
i
]
=
i
%
100
*
0.016
f
;
}
paddle
::
memory
::
Copy
(
place
,
input
.
data
<
float
>
(),
phi
::
CPUPlace
(),
host_data
.
data
(),
sizeof
(
float
)
*
host_data
.
size
(),
context
.
stream
());
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
inputs
;
inputs
.
emplace
(
std
::
make_pair
(
model_input
,
&
input
));
engine
.
PrepareOutputHandle
(
"output_0"
);
engine
.
SetUpInference
(
inference_options
,
inputs
);
engine
.
GetEngineInfo
();
engine
.
Run
(
context
);
cudaStreamSynchronize
(
context
.
stream
());
}
TEST
(
trt
,
run_conv_static
)
{
TrtEngine
engine
(
0
);
auto
net
=
ConstructConvNetwork
(
engine
.
GetTrtBuilder
(),
nvinfer1
::
Dims3
{
3
,
28
,
28
},
true
);
BuildOptions
build_options
;
build_options
.
max_batch
=
4
;
build_options
.
workspace
=
1024
;
engine
.
Build
(
std
::
move
(
net
),
build_options
);
InferenceOptions
inference_options
;
inference_options
.
batch
=
1
;
phi
::
GPUPlace
place
;
phi
::
GPUContext
context
;
context
.
PartialInitWithoutAllocator
();
context
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
place
,
context
.
stream
())
.
get
());
context
.
PartialInitWithAllocator
();
phi
::
DenseTensorMeta
meta
(
phi
::
DataType
::
FLOAT32
,
phi
::
make_ddim
({
inference_options
.
batch
,
3
,
28
,
28
}));
phi
::
DenseTensor
input
;
input
.
set_meta
(
meta
);
context
.
Alloc
<
float
>
(
&
input
,
input
.
numel
()
*
sizeof
(
float
));
std
::
vector
<
float
>
host_data
(
inference_options
.
batch
*
3
*
28
*
28
,
0
);
for
(
size_t
i
=
0
;
i
<
host_data
.
size
();
++
i
)
{
host_data
[
i
]
=
i
%
100
*
0.016
f
;
}
paddle
::
memory
::
Copy
(
place
,
input
.
data
<
float
>
(),
phi
::
CPUPlace
(),
host_data
.
data
(),
sizeof
(
float
)
*
host_data
.
size
(),
context
.
stream
());
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
inputs
;
inputs
.
emplace
(
std
::
make_pair
(
model_input
,
&
input
));
engine
.
PrepareOutputHandle
(
"output_0"
);
engine
.
SetUpInference
(
inference_options
,
inputs
);
engine
.
GetEngineInfo
();
engine
.
Run
(
context
);
cudaStreamSynchronize
(
context
.
stream
());
}
TEST
(
trt
,
run_static
)
{
TrtEngine
static_trt_engine
(
0
);
auto
net
=
ConstructNetwork
(
...
...
paddle/infrt/dialect/infrt/ir/infrt_dialect.cc
浏览文件 @
86919910
...
...
@@ -142,9 +142,6 @@ mlir::Type InfrtDialect::parseType(::mlir::DialectAsmParser &parser) const {
return
infrt
::
DenseTensorListType
::
get
(
parser
.
getContext
());
}
if
(
keyword
==
"dense_tensor_map"
)
{
return
DenseTensorMapType
::
get
(
parser
.
getContext
());
}
// Todo: parse other type
return
mlir
::
Type
();
}
...
...
@@ -181,6 +178,7 @@ void InfrtDialect::printType(::mlir::Type type,
if
(
type
.
isa
<
infrt
::
DenseTensorListType
>
())
{
os
<<
"tensor_list"
;
return
;
}
// print DenseTensorType, for example: !infrt.dense_tensor<CPU, FP32, NCHW>
if
(
type
.
isa
<
DenseTensorMapType
>
())
{
...
...
paddle/infrt/dialect/tensorrt/trt_ops.td
浏览文件 @
86919910
...
...
@@ -60,6 +60,39 @@ def TRT_ActivationOp : TRT_Op<"Activation", [NoSideEffect]> {
let results = (outs DenseTensor:$output);
}
def TRT_FullyConnectedOp : TRT_Op<"FullyConnected", [NoSideEffect]> {
let summary = "TensorRT IFullyConnectedLayer";
let description = [{
TensorRT IFullyConnectedLayer
}];
let arguments = (ins
DenseTensor:$input_tensor,
DenseTensor:$kernel_weights,
DenseTensor:$bias_weights,
SI32Attr:$out_channel_num
);
let results = (outs
DenseTensor:$output_tensor
);
}
def TRT_ConvolutionOp : TRT_Op<"Convolution", [NoSideEffect]> {
let summary = "TensorRT IConvolutionLayer";
let description = [{
TensorRT IConvolutionLayer
}];
let arguments = (ins
DenseTensor:$input_tensor,
DenseTensor:$kernel_weights,
DenseTensor:$bias_weights,
SI32Attr:$out_channel_num,
I32ArrayAttr:$kernel_size
);
let results = (outs
DenseTensor:$output_tensor
);
}
def TRT_ElementWiseOp : TRT_Op<"ElementWise", [NoSideEffect]> {
let summary = "TensorRT IElementWiseLayer";
let description = [{
...
...
paddle/infrt/host_context/mlir_to_runtime_translate.cc
浏览文件 @
86919910
...
...
@@ -298,14 +298,21 @@ bool MlirToRuntimeTranslator::EmitGeneralOp(
// add a naive implement.
for
(
int
i
=
0
,
e
=
op
->
getNumOperands
();
i
<
e
;
++
i
)
{
auto
operand
=
op
->
getOperand
(
i
);
Value
*
arg_value
{
nullptr
};
if
(
operand
.
isa
<
mlir
::
BlockArgument
>
())
{
mlir
::
BlockArgument
arg
=
operand
.
dyn_cast
<
mlir
::
BlockArgument
>
();
Value
*
arg_value
=
GetValue
(
arg
);
if
(
arg_value
->
is_type
<
phi
::
DenseTensor
>
())
{
impl_
->
runtime
->
FeedInArgs
(
std
::
make_pair
(
std
::
to_string
(
i
),
ValueRef
(
arg_value
)));
arg_value
=
GetValue
(
arg
);
}
else
{
arg_value
=
GetValue
(
operand
);
if
(
!
arg_value
)
{
auto
upstream_op
=
operand
.
getDefiningOp
();
arg_value
=
GetOpResult
(
upstream_op
);
}
}
if
(
arg_value
->
is_type
<
phi
::
DenseTensor
>
())
{
impl_
->
runtime
->
FeedInArgs
(
std
::
make_pair
(
std
::
to_string
(
i
),
ValueRef
(
arg_value
)));
}
}
#else
CHECK
(
false
)
<<
"should not reach here"
;
...
...
paddle/infrt/kernel/tensor_kernels.cc
浏览文件 @
86919910
...
...
@@ -146,8 +146,8 @@ void RegisterTensorKernels(host_context::KernelRegistry *registry) {
// TensorList related methods.
#ifdef INFRT_WITH_PHI
registry
->
AddKernel
(
"dt.tensor_list_get_tensor"
,
INFRT_KERNEL
(
TensorListGetTensor
)
);
registry
->
AddKernel
WithAttrs
(
"dt.tensor_list_get_tensor"
,
INFRT_KERNEL
(
TensorListGetTensor
),
{
"id"
}
);
registry
->
AddKernel
(
"dt.tensor_list_get_size"
,
INFRT_KERNEL
(
TensorListGetSize
));
#endif
...
...
paddle/infrt/kernel/tensorrt/trt_helper.h
0 → 100644
浏览文件 @
86919910
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <NvInfer.h>
#include <NvInferRuntime.h>
#include <NvInferRuntimeCommon.h>
#include "glog/logging.h"
#include "llvm/Support/ErrorHandling.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
infrt
{
namespace
kernel
{
namespace
tensorrt
{
static
nvinfer1
::
DataType
TensorTypeToWeightType
(
phi
::
DataType
tensor_type
)
{
switch
(
tensor_type
)
{
case
phi
::
DataType
::
FLOAT32
:
return
nvinfer1
::
DataType
::
kFLOAT
;
case
phi
::
DataType
::
INT32
:
return
nvinfer1
::
DataType
::
kINT32
;
case
phi
::
DataType
::
FLOAT16
:
return
nvinfer1
::
DataType
::
kHALF
;
default:
llvm_unreachable
(
"should not reach here"
);
}
}
static
nvinfer1
::
Dims
ArrayAttrToNvDims
(
const
mlir
::
ArrayAttr
&
int_array_attr
)
{
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
int_array_attr
.
size
();
CHECK
(
!
int_array_attr
.
empty
());
CHECK
(
int_array_attr
[
0
].
getType
().
isIntOrIndex
());
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
++
i
)
{
dims
.
d
[
i
]
=
int_array_attr
[
i
].
cast
<
mlir
::
IntegerAttr
>
().
getInt
();
}
return
dims
;
}
static
nvinfer1
::
Weights
TensorToWeights
(
phi
::
DenseTensor
*
tensor
)
{
CHECK_NOTNULL
(
tensor
);
nvinfer1
::
Weights
ret
;
ret
.
type
=
TensorTypeToWeightType
(
tensor
->
dtype
());
ret
.
count
=
tensor
->
numel
();
ret
.
values
=
tensor
->
data
();
return
ret
;
}
}
// namespace tensorrt
}
// namespace kernel
}
// namespace infrt
paddle/infrt/kernel/tensorrt/trt_kernels.cc
浏览文件 @
86919910
...
...
@@ -21,13 +21,19 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/Value.h"
#include "paddle/infrt/kernel/tensorrt/trt_helper.h"
#include "paddle/infrt/kernel/tensorrt/trt_layers.h"
#include "paddle/infrt/backends/tensorrt/trt_engine.h"
#include "paddle/infrt/backends/tensorrt/trt_options.h"
#include "paddle/infrt/dialect/tensorrt/trt_ops.h"
#include "paddle/infrt/host_context/symbol_table.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
infrt
{
...
...
@@ -35,8 +41,7 @@ namespace kernel {
namespace
tensorrt
{
::
infrt
::
backends
::
tensorrt
::
TrtEngine
CreateTrtEngine
(
MlirOperationWithInfrtSymbol
create_engine_op
/*, input_tensors, output_tensors, weights*/
)
{
MlirOperationWithInfrtSymbol
create_engine_op
)
{
// TODO(wilber): The device_id needs to get from mlir.
int
device_id
=
0
;
backends
::
tensorrt
::
TrtEngine
engine
(
device_id
);
...
...
@@ -51,6 +56,7 @@ namespace tensorrt {
// TODO(wilber): The build option shoule be fiiled from mlir info.
backends
::
tensorrt
::
BuildOptions
options
;
options
.
max_batch
=
4
;
options
.
workspace
=
1024
;
// Parse mlir Region which only has one block.
mlir
::
Operation
&
operation
=
*
create_engine_op
.
operation
;
...
...
@@ -62,8 +68,9 @@ namespace tensorrt {
auto
&
region
=
operation
.
getRegion
(
0
);
auto
&
block
=
region
.
getBlocks
().
front
();
llvm
::
DenseMap
<
mlir
::
Value
,
nvinfer1
::
ITensor
*>
map_info
;
std
::
unordered_map
<
std
::
string
,
phi
::
DenseTensor
*>
trt_bind_inputs
;
ValueToITensorMap
value_to_trt_tensor_map
;
ValueToTensorMap
value_to_tensor_map
;
for
(
auto
index_operand
:
llvm
::
enumerate
(
operation
.
getOperands
()))
{
mlir
::
Value
operand
=
index_operand
.
value
();
...
...
@@ -73,69 +80,72 @@ namespace tensorrt {
auto
*
v
=
symbol_table
->
GetValue
(
std
::
to_string
(
idx
));
CHECK_NOTNULL
(
v
);
auto
*
t
=
&
v
->
get
<
phi
::
DenseTensor
>
();
trt_bind_inputs
[
input_name
]
=
t
;
value_to_tensor_map
[
operand
]
=
t
;
// TODO(wilber): get input info from mlir.
// TODO(wilber): input dims, now only support static_shape, and just remove
// the first dimension.
// the first dimension. If the first dim is not -1, maybe we can pass the
// origin dims.
// TODO(wilber): now only suppot float input.
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
t
->
dims
().
size
()
-
1
;
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
++
i
)
{
dims
.
d
[
i
]
=
t
->
dims
()[
i
+
1
];
}
auto
*
in
=
network
->
addInput
(
input_name
.
c_str
(),
nvinfer1
::
DataType
::
kFLOAT
,
dims
);
map_info
[
operand
]
=
in
;
}
// TODO(wilber): Find a way to add layer.
for
(
auto
&
inner_op
:
block
.
without_terminator
())
{
if
(
inner_op
.
getName
().
getStringRef
()
==
"trt.Activation"
)
{
trt
::
ActivationOp
act_op
=
llvm
::
dyn_cast
<
trt
::
ActivationOp
>
(
inner_op
);
auto
in_arg
=
act_op
.
getOperand
();
if
(
!
map_info
.
count
(
in_arg
))
{
CHECK
(
false
)
<<
"map_info not has in_arg."
;
if
(
operand
.
isa
<
mlir
::
BlockArgument
>
())
{
// TODO(wilber): A trick: the weights are CPU tensor and inputs are GPU
// tensor, so we treat all GPU tensors as inputs to trt.
if
(
t
->
place
().
GetType
()
==
phi
::
AllocationType
::
GPU
)
{
trt_bind_inputs
[
input_name
]
=
t
;
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
t
->
dims
().
size
()
-
1
;
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
++
i
)
{
dims
.
d
[
i
]
=
t
->
dims
()[
i
+
1
];
}
auto
*
in
=
network
->
addInput
(
input_name
.
c_str
(),
nvinfer1
::
DataType
::
kFLOAT
,
dims
);
value_to_trt_tensor_map
[
operand
]
=
in
;
}
nvinfer1
::
ActivationType
act_type
=
static_cast
<
nvinfer1
::
ActivationType
>
(
act_op
.
activation_type
());
auto
*
act_layer
=
network
->
addActivation
(
*
map_info
[
in_arg
],
act_type
);
act_layer
->
setAlpha
(
act_op
.
alpha
().
convertToFloat
());
act_layer
->
setBeta
(
act_op
.
beta
().
convertToFloat
());
for
(
size_t
i
=
0
;
i
<
act_op
->
getNumResults
();
++
i
)
{
nvinfer1
::
ITensor
*
act_out_tensor
=
act_layer
->
getOutput
(
i
);
mlir
::
Value
act_out
=
act_op
->
getResult
(
i
);
map_info
[
act_out
]
=
act_out_tensor
;
}
else
{
// TODO(wilber): Replace with the op name that generates the weights.
if
(
operand
.
getDefiningOp
()
->
getName
().
getStringRef
()
!=
"phi_dt.create_dense_tensor.cpu"
)
{
trt_bind_inputs
[
input_name
]
=
t
;
nvinfer1
::
Dims
dims
;
dims
.
nbDims
=
t
->
dims
().
size
()
-
1
;
for
(
int
i
=
0
;
i
<
dims
.
nbDims
;
++
i
)
{
dims
.
d
[
i
]
=
t
->
dims
()[
i
+
1
];
}
auto
*
in
=
network
->
addInput
(
input_name
.
c_str
(),
nvinfer1
::
DataType
::
kFLOAT
,
dims
);
value_to_trt_tensor_map
[
operand
]
=
in
;
}
}
// if (inner_op.getName().getStringRef() == "trt.Constant") {
// trt::ConstantOp op = llvm::dyn_cast<trt::ConstantOp>(inner_op);
// mlir::Value op_out = op.getResult();
// std::vector<float> weight_data{1};
// auto* layer = network->addConstant(nvinfer1::Dims2(1, 1),
// nvinfer1::Weights{nvinfer1::DataType::kFLOAT, weight_data.data(), 1});
// auto* op_out_tenor = layer->getOutput(0);
// map_info[op_out] = op_out_tenor;
// }
}
for
(
auto
&
inner_op
:
block
.
without_terminator
())
{
for
(
mlir
::
Value
v
:
inner_op
.
getResults
())
{
for
(
mlir
::
Operation
*
user
:
v
.
getUsers
())
{
if
(
user
->
getName
().
getStringRef
()
==
"infrt.return"
)
{
if
(
!
map_info
.
count
(
v
))
{
CHECK
(
false
)
<<
"map_info not has value"
;
}
network
->
markOutput
(
*
map_info
[
v
]);
}
}
// TODO(wilber): Find a way to add layer.
for
(
auto
&
operation
:
block
.
without_terminator
())
{
if
(
trt
::
ActivationOp
op
=
llvm
::
dyn_cast
<
trt
::
ActivationOp
>
(
operation
))
{
ActivationFunc
(
op
,
network
.
get
(),
value_to_trt_tensor_map
,
value_to_tensor_map
);
}
else
if
(
trt
::
FullyConnectedOp
op
=
llvm
::
dyn_cast
<
trt
::
FullyConnectedOp
>
(
operation
))
{
FcFunc
(
op
,
network
.
get
(),
value_to_trt_tensor_map
,
value_to_tensor_map
);
}
else
if
(
trt
::
ConvolutionOp
op
=
llvm
::
dyn_cast
<
trt
::
ConvolutionOp
>
(
operation
))
{
ConvFunc
(
op
,
network
.
get
(),
value_to_trt_tensor_map
,
value_to_tensor_map
);
}
else
{
CHECK
(
false
)
<<
"not supported operation."
;
}
}
// std::unordered_map<std::string, phi::DenseTensor*> trt_bind_outputs;
mlir
::
Operation
*
ret
=
block
.
getTerminator
();
for
(
unsigned
int
i
=
0
;
i
<
ret
->
getNumOperands
();
++
i
)
{
mlir
::
Value
arg
=
ret
->
getOperand
(
i
);
CHECK
(
map_info
.
count
(
arg
));
map_info
[
arg
]
->
setName
((
"output_"
+
std
::
to_string
(
i
)).
c_str
());
for
(
auto
index_operand
:
llvm
::
enumerate
(
block
.
getTerminator
()
->
getOperands
()))
{
mlir
::
Value
arg
=
index_operand
.
value
();
CHECK
(
value_to_trt_tensor_map
.
count
(
arg
));
// TODO(wilber): A trick that we name trt output tensor's name as output_0,
// output_1, ...
value_to_trt_tensor_map
[
arg
]
->
setName
(
(
"output_"
+
std
::
to_string
(
index_operand
.
index
())).
c_str
());
network
->
markOutput
(
*
value_to_trt_tensor_map
[
arg
]);
}
for
(
int
i
=
0
;
i
<
network
->
getNbOutputs
();
++
i
)
{
engine
.
PrepareOutputHandle
(
network
->
getOutput
(
i
)
->
getName
());
...
...
paddle/infrt/kernel/tensorrt/trt_layers.h
0 → 100644
浏览文件 @
86919910
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <NvInfer.h>
#include <mlir/IR/Operation.h>
#include <string>
#include "paddle/infrt/dialect/tensorrt/trt_ops.h"
#include "paddle/infrt/kernel/tensorrt/trt_helper.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
infrt
{
namespace
kernel
{
namespace
tensorrt
{
using
ValueToTensorMap
=
llvm
::
DenseMap
<
mlir
::
Value
,
phi
::
DenseTensor
*>
;
using
ValueToITensorMap
=
llvm
::
DenseMap
<
mlir
::
Value
,
nvinfer1
::
ITensor
*>
;
inline
void
ActivationFunc
(
trt
::
ActivationOp
&
act_op
,
// NOLINT
nvinfer1
::
INetworkDefinition
*
network
,
ValueToITensorMap
&
value_to_trt_tensor_map
,
// NOLINT
ValueToTensorMap
&
value_to_tensor_map
)
{
// NOLINT
auto
in_arg
=
act_op
.
getOperand
();
CHECK
(
value_to_trt_tensor_map
.
count
(
in_arg
))
<<
"value_to_trt_tensor_map not has in_arg."
;
nvinfer1
::
ActivationType
act_type
=
static_cast
<
nvinfer1
::
ActivationType
>
(
act_op
.
activation_type
());
auto
*
act_layer
=
network
->
addActivation
(
*
value_to_trt_tensor_map
[
in_arg
],
act_type
);
act_layer
->
setAlpha
(
act_op
.
alpha
().
convertToFloat
());
act_layer
->
setBeta
(
act_op
.
beta
().
convertToFloat
());
for
(
size_t
i
=
0
;
i
<
act_op
->
getNumResults
();
++
i
)
{
nvinfer1
::
ITensor
*
act_out_tensor
=
act_layer
->
getOutput
(
i
);
mlir
::
Value
act_out
=
act_op
->
getResult
(
i
);
value_to_trt_tensor_map
[
act_out
]
=
act_out_tensor
;
}
}
inline
void
ConvFunc
(
trt
::
ConvolutionOp
&
op
,
// NOLINT
nvinfer1
::
INetworkDefinition
*
network
,
ValueToITensorMap
&
value_to_trt_tensor_map
,
// NOLINT
ValueToTensorMap
&
value_to_tensor_map
)
{
// NOLINT
mlir
::
Value
input_tensor_repr
=
op
.
input_tensor
();
int
out_channel_num
=
op
.
out_channel_num
();
auto
size_attrs
=
op
.
kernel_size
();
nvinfer1
::
Dims
dims
=
ArrayAttrToNvDims
(
size_attrs
);
auto
kernel_weights
=
TensorToWeights
(
value_to_tensor_map
[
op
.
kernel_weights
()]);
auto
bias_weights
=
TensorToWeights
(
value_to_tensor_map
[
op
.
bias_weights
()]);
auto
*
layer
=
network
->
addConvolutionNd
(
*
value_to_trt_tensor_map
[
input_tensor_repr
],
out_channel_num
,
dims
,
kernel_weights
,
bias_weights
);
CHECK_NOTNULL
(
layer
);
mlir
::
Value
out_repr
=
op
.
output_tensor
();
nvinfer1
::
ITensor
*
out_tensor
=
layer
->
getOutput
(
0
);
value_to_trt_tensor_map
[
out_repr
]
=
out_tensor
;
}
inline
void
FcFunc
(
trt
::
FullyConnectedOp
&
op
,
// NOLINT
nvinfer1
::
INetworkDefinition
*
network
,
ValueToITensorMap
&
value_to_trt_tensor_map
,
// NOLINT
ValueToTensorMap
&
value_to_tensor_map
)
{
// NOLINT
mlir
::
Value
input_tensor_repr
=
op
.
input_tensor
();
CHECK
(
value_to_trt_tensor_map
.
count
(
input_tensor_repr
));
auto
kernel_weights
=
TensorToWeights
(
value_to_tensor_map
[
op
.
kernel_weights
()]);
auto
bias_weights
=
TensorToWeights
(
value_to_tensor_map
[
op
.
bias_weights
()]);
int
out_channel_num
=
op
.
out_channel_num
();
auto
*
layer
=
network
->
addFullyConnected
(
*
value_to_trt_tensor_map
[
input_tensor_repr
],
out_channel_num
,
kernel_weights
,
bias_weights
);
mlir
::
Value
out_repr
=
op
.
output_tensor
();
nvinfer1
::
ITensor
*
out_tensor
=
layer
->
getOutput
(
0
);
value_to_trt_tensor_map
[
out_repr
]
=
out_tensor
;
}
}
// namespace tensorrt
}
// namespace kernel
}
// namespace infrt
paddle/infrt/tests/dialect/disabled_trt.mlir
→
paddle/infrt/tests/dialect/
tensorrt/
disabled_trt.mlir
浏览文件 @
86919910
文件已移动
paddle/infrt/tests/dialect/tensorrt/disabled_trt_conv.mlir
0 → 100644
浏览文件 @
86919910
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @run_trt
func @run_trt(%input_tensor : !infrt.dense_tensor<GPU, FP32, NCHW>, %kernel_weight : !infrt.dense_tensor<CPU, FP32, NCHW>, %kernel_bias : !infrt.dense_tensor<CPU, FP32, NCHW>, %gpu_ctx : !phi.context<GPU>) {
%a = "trt.create_engine"(%input_tensor, %kernel_weight, %kernel_bias) ({
%1 = "trt.Activation"(%input_tensor) {activation_type = 1 : si32, alpha = 1.0 : f32, beta = 6.0 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.Convolution"(%input_tensor, %kernel_weight, %kernel_bias) {out_channel_num = 3 : si32, kernel_size = [3:i32, 3:i32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
"infrt.return"(%1, %2) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
}) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !trt.engine
"trt.inspect_engine"(%a) {} : (!trt.engine) -> ()
%res = "trt.compute"(%a, %gpu_ctx) {} : (!trt.engine, !phi.context<GPU>) -> (!infrt.tensor_list)
%size = "dt.tensor_list_get_size"(%res) {} : (!infrt.tensor_list) -> (i32)
"infrt.print.i32"(%size) {} : (i32) -> ()
%ts0 = "dt.tensor_list_get_tensor"(%res) {id = 0 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts0) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%ts1 = "dt.tensor_list_get_tensor"(%res) {id = 1 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts1) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
infrt.return
}
// CHECK-LABEL: @main
func @main() {
%gpu_ctx = "phi_dt.create_context.gpu" (): () -> !phi.context<GPU>
%cpu_ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%input_tensor = "phi_dt.create_dense_tensor.gpu" (%gpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[1:i64, 3:i64, 28:i64, 28:i64], lod=[0:i64]}: (!phi.context<GPU>) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%input_tensor) {value=[3.8:f32, 2.4:f32, 1.3:f32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
// "phi_dt.print_tensor" (%input_tensor) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%kernel_weight = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[3:i64, 3:i64, 3:i64, 3:i64], lod=[0:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_weight) {value=[1.:f32, 2.:f32, 3.:f32, 4.:f32, 5.:f32, 6.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
// "phi_dt.print_tensor" (%kernel_weight) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%kernel_bias = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[3:i64], lod=[0:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_bias) {value=[1.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
// "phi_dt.print_tensor" (%kernel_bias) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
infrt.call @run_trt(%input_tensor, %kernel_weight, %kernel_bias, %gpu_ctx) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !phi.context<GPU>) -> ()
infrt.return
}
paddle/infrt/tests/dialect/tensorrt/disabled_trt_fc.mlir
0 → 100644
浏览文件 @
86919910
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @main
func @main() {
%ctx = "phi_dt.create_context.gpu" (): () -> !phi.context<GPU>
%cpu_ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%input_tensor = "phi_dt.create_dense_tensor.gpu" (%ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[1:i64, 3:i64, 1:i64, 1:i64], lod=[1:i64]}: (!phi.context<GPU>) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%input_tensor) {value=[3.8:f32, 2.4:f32, 1.3:f32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
//"phi_dt.print_tensor" (%input_tensor) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%kernel_weight = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[2:i64, 3:i64], lod=[1:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_weight) {value=[1.:f32, 2.:f32, 3.:f32, 4.:f32, 5.:f32, 6.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
//"phi_dt.print_tensor" (%kernel_weight) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%kernel_bias = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[2:i64], lod=[1:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_bias) {value=[1.:f32, 2.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
//"phi_dt.print_tensor" (%kernel_bias) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%engine = "trt.create_engine"(%input_tensor, %kernel_weight, %kernel_bias) ({
%1 = "trt.Activation"(%input_tensor) {activation_type = 1 : si32, alpha = 1.0 : f32, beta = 6.0 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.FullyConnected"(%input_tensor, %kernel_weight, %kernel_bias) {out_channel_num = 2 : si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
"infrt.return"(%1, %2) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
}) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !trt.engine
%res = "trt.compute"(%engine, %ctx) {} : (!trt.engine, !phi.context<GPU>) -> (!infrt.tensor_list)
%size = "dt.tensor_list_get_size"(%res) {} : (!infrt.tensor_list) -> (i32)
"infrt.print.i32"(%size) {} : (i32) -> ()
%ts0 = "dt.tensor_list_get_tensor"(%res) {id = 0 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts0) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%ts1 = "dt.tensor_list_get_tensor"(%res) {id = 1 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts1) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
infrt.return
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录