Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2012aeb6
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2012aeb6
编写于
4月 02, 2022
作者:
W
Wilber
提交者:
GitHub
4月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add trt pool and ut (#41258)
上级
ad0c106c
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
289 addition
and
119 deletion
+289
-119
paddle/infrt/dialect/tensorrt/convert.h
paddle/infrt/dialect/tensorrt/convert.h
+155
-1
paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
+2
-1
paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
+0
-27
paddle/infrt/dialect/tensorrt/trt_ops.td
paddle/infrt/dialect/tensorrt/trt_ops.td
+4
-1
paddle/infrt/kernel/tensorrt/trt_helper.h
paddle/infrt/kernel/tensorrt/trt_helper.h
+5
-5
paddle/infrt/kernel/tensorrt/trt_kernels.cc
paddle/infrt/kernel/tensorrt/trt_kernels.cc
+3
-0
paddle/infrt/kernel/tensorrt/trt_layers.h
paddle/infrt/kernel/tensorrt/trt_layers.h
+54
-2
paddle/infrt/tests/dialect/tensorrt/disabled_trt.mlir
paddle/infrt/tests/dialect/tensorrt/disabled_trt.mlir
+0
-37
paddle/infrt/tests/dialect/tensorrt/disabled_trt_activation.mlir
...infrt/tests/dialect/tensorrt/disabled_trt_activation.mlir
+21
-0
paddle/infrt/tests/dialect/tensorrt/disabled_trt_fc.mlir
paddle/infrt/tests/dialect/tensorrt/disabled_trt_fc.mlir
+24
-45
paddle/infrt/tests/dialect/tensorrt/disabled_trt_pool.mlir
paddle/infrt/tests/dialect/tensorrt/disabled_trt_pool.mlir
+21
-0
未找到文件。
paddle/infrt/dialect/tensorrt/convert.h
浏览文件 @
2012aeb6
...
...
@@ -14,17 +14,49 @@
#pragma once
#include <glog/logging.h>
#include <llvm/Support/ErrorHandling.h>
#include <llvm/include/mlir/IR/Attributes.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/BuiltinAttributes.h>
#include <mlir/IR/PatternMatch.h>
#include <mlir/Transforms/DialectConversion.h>
#include "paddle/infrt/dialect/infrt/common/types.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/pd/ir/pd_ops.h"
#include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h"
#include "paddle/infrt/dialect/tensorrt/trt_ops.h"
#include "paddle/infrt/kernel/tensorrt/trt_helper.h"
namespace
infrt
{
namespace
trt
{
#ifdef INFRT_WITH_TRT
#define STRING_TO_ENUM_TYPE(enum_type) enum_type
#define STRING_TO_ENUM_VALUE(enum_value) enum_value
#include <NvInfer.h>
#else // INFRT_WITH_TRT
#define STRING_TO_ENUM_TYPE(enum_type) std::string
#define STRING_TO_ENUM_VALUE(enum_value) #enum_value
#endif // INFRT_WITH_TRT
template
<
typename
T
>
::
mlir
::
IntegerAttr
createNvinferEnumAttr
(
::
mlir
::
PatternRewriter
&
rewriter
,
// NOLINT
T
enum_value
)
{
return
rewriter
.
getSI32IntegerAttr
((
int32_t
)
enum_value
);
}
template
<
>
::
mlir
::
IntegerAttr
createNvinferEnumAttr
<
std
::
string
>
(
::
mlir
::
PatternRewriter
&
rewriter
,
std
::
string
enum_value
)
{
// NOLINT
(
void
)
enum_value
;
return
rewriter
.
getSI32IntegerAttr
(
-
1
);
}
static
mlir
::
Value
createTRTConv2dOp
(
mlir
::
PatternRewriter
&
rewriter
,
// NOLINT
mlir
::
Operation
*
op
)
{
auto
conv_op
=
::
llvm
::
dyn_cast
<
infrt
::
pd
::
Conv2dOp
>
(
op
);
...
...
@@ -205,5 +237,127 @@ static mlir::Value createTRTShuffledOp(
return
rewriter
.
create
<
trt
::
ShuffleOp
>
(
op
->
getLoc
(),
resultTypes
,
operands
,
attributes
);
}
inline
mlir
::
IntegerAttr
CreatePoolingType
(
mlir
::
PatternRewriter
&
builder
,
// NOLINT
mlir
::
StringAttr
pool_type
)
{
// pool_type.
auto
ptype
=
pool_type
.
str
();
if
(
ptype
==
"max"
)
{
return
createNvinferEnumAttr
(
builder
,
nvinfer1
::
PoolingType
::
kMAX
);
}
else
if
(
ptype
==
"avg"
)
{
return
createNvinferEnumAttr
(
builder
,
nvinfer1
::
PoolingType
::
kAVERAGE
);
}
else
{
llvm_unreachable
(
"unknown pool_type."
);
return
{};
}
}
inline
mlir
::
IntegerAttr
CreatePaddingMode
(
mlir
::
PatternRewriter
&
builder
,
// NOLINT
mlir
::
StringAttr
padding_algorithm
,
mlir
::
BoolAttr
ceil_mode
)
{
// TODO(Inference): Phi pool kernel seems not process ceil_mode.
auto
padding_algo
=
padding_algorithm
.
str
();
if
(
padding_algo
==
"SAME"
)
{
return
createNvinferEnumAttr
(
builder
,
nvinfer1
::
PaddingMode
::
kSAME_UPPER
);
}
if
(
ceil_mode
.
getValue
()
&&
padding_algo
!=
"SAME"
)
{
return
createNvinferEnumAttr
(
builder
,
nvinfer1
::
PaddingMode
::
kEXPLICIT_ROUND_UP
);
}
else
{
return
createNvinferEnumAttr
(
builder
,
nvinfer1
::
PaddingMode
::
kEXPLICIT_ROUND_DOWN
);
}
}
inline
::
llvm
::
SmallVector
<::
mlir
::
Value
,
4
>
CreatePaddleTrtPoolingOp
(
mlir
::
PatternRewriter
&
builder
,
// NOLINT
mlir
::
Value
input
,
mlir
::
StringAttr
pool_type
,
mlir
::
ArrayAttr
ksize
,
mlir
::
BoolAttr
global_pooling
,
mlir
::
ArrayAttr
strides
,
mlir
::
ArrayAttr
paddings
,
mlir
::
BoolAttr
exclusive
,
mlir
::
BoolAttr
adaptive
,
mlir
::
BoolAttr
ceil_mode
,
mlir
::
StringAttr
data_format
,
mlir
::
StringAttr
padding_algorithm
)
{
::
llvm
::
SmallVector
<::
mlir
::
Value
,
4
>
tblgen_repl_values
;
// TODO(inference): Support NHWC.
if
(
data_format
.
str
()
!=
"NCHW"
)
{
CHECK
(
false
)
<<
"The pool2d converter now only support NCHW."
;
}
// TODO(Wilber): How to support dynamic shape?
auto
*
input_producer
=
input
.
getDefiningOp
();
// Process pool_type.
auto
pool_type_attr
=
CreatePoolingType
(
builder
,
pool_type
);
// Update padding.
auto
padding_algorithm_str
=
padding_algorithm
.
str
();
auto
paddings_attr
=
paddings
;
if
(
padding_algorithm_str
==
"EXPLICIT"
)
{
// Do nothing on paddings.
}
else
if
(
padding_algorithm_str
==
"SAME"
)
{
// We should process this case in trt network build phase.
}
else
if
(
padding_algorithm_str
==
"VALID"
)
{
// Set padding to zero.
paddings_attr
=
builder
.
getI32ArrayAttr
({
0
,
0
});
}
else
{
CHECK
(
false
)
<<
"Unknown padding_algotithm."
;
}
// if global_pooling == true or adaptive == true, padding will be ignored
if
(
global_pooling
.
getValue
()
||
adaptive
.
getValue
())
{
paddings_attr
=
builder
.
getI32ArrayAttr
({
0
,
0
});
}
// if global_pooling == true, then we should update kernel size to input dims.
if
(
global_pooling
.
getValue
()
==
true
)
{
// Update ksize to input dims.
}
// The adaptive logic should be processed when we get the context of
// INetworkDefinition, so we place the logic in infrt runtime(trt compile
// time).
// The `exclusive` may be a naive attr, which can be forward to trt.
auto
padding_mode_attr
=
CreatePaddingMode
(
builder
,
padding_algorithm
,
ceil_mode
);
if
(
global_pooling
.
getValue
()
==
true
)
{
CHECK
(
false
)
<<
"Temporarily not support global_pool"
;
return
tblgen_repl_values
;
}
PoolingOp
pool_op
;
{
auto
ods_loc
=
builder
.
getFusedLoc
({
input_producer
->
getLoc
()});
builder
.
create
<
PoolingOp
>
(
ods_loc
,
input
.
getType
(),
input
,
pool_type_attr
,
ksize
,
strides
,
paddings_attr
,
padding_mode_attr
,
exclusive
,
adaptive
,
padding_algorithm
);
}
for
(
auto
v
:
::
llvm
::
SmallVector
<::
mlir
::
Value
,
4
>
{
pool_op
.
getODSResults
(
0
)})
{
tblgen_repl_values
.
push_back
(
v
);
}
return
tblgen_repl_values
;
}
}
// namespace trt
}
// namespace infrt
paddle/infrt/dialect/tensorrt/pd_lower_to_trt.td
浏览文件 @
2012aeb6
...
...
@@ -31,9 +31,10 @@ def PD2TRT_Conv2d_Lower : Pat<
(PD_Conv2dOp:$old_value $Input, $Filter, $strides, $paddings, $padding_algorithm, $groups, $dilations, $data_format),
(createTRTConv2dOp $old_value)>;
def createTrtPoolingOp : NativeCodeCall<"::infrt::trt::CreatePaddleTrtPoolingOp($_builder, $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10)">;
def PD2TRT_Pooling_Lower : Pat<
(PD_Pool2dOp $Input, $pooling_type, $ksize, $global_pooling, $strides, $paddings, $exclusive, $adaptive, $ceil_mode, $data_format, $padding_algorithm),
(
TRT_PoolingOp $Input, (INFRT_createI32Attr<"0">)/*kmax*/, $ksize, $strides, $paddings
, $padding_algorithm)>;
(
createTrtPoolingOp $Input, $pooling_type, $ksize, $global_pooling, $strides, $paddings, $exclusive, $adaptive, $ceil_mode, $data_format
, $padding_algorithm)>;
def PD2TRT_MatrixMultipl_Lower : Pat<
(PD_MulOp $Input1, $Input2, $x_num_col_dims, $y_num_col_dims),
...
...
paddle/infrt/dialect/tensorrt/trt_op_converter_pass.cc
浏览文件 @
2012aeb6
...
...
@@ -28,33 +28,6 @@
namespace
infrt
{
namespace
trt
{
#ifdef INFRT_WITH_TRT
#define STRING_TO_ENUM_TYPE(enum_type) enum_type
#define STRING_TO_ENUM_VALUE(enum_value) enum_value
#include <NvInfer.h>
#else // INFRT_WITH_TRT
#define STRING_TO_ENUM_TYPE(enum_type) std::string
#define STRING_TO_ENUM_VALUE(enum_value) #enum_value
#endif // INFRT_WITH_TRT
template
<
typename
T
>
::
mlir
::
IntegerAttr
createNvinferEnumAttr
(
::
mlir
::
PatternRewriter
&
rewriter
,
// NOLINT
T
enum_value
)
{
return
rewriter
.
getSI32IntegerAttr
((
int32_t
)
enum_value
);
}
template
<
>
::
mlir
::
IntegerAttr
createNvinferEnumAttr
<
std
::
string
>
(
::
mlir
::
PatternRewriter
&
rewriter
,
std
::
string
enum_value
)
{
// NOLINT
(
void
)
enum_value
;
return
rewriter
.
getSI32IntegerAttr
(
-
1
);
}
#include "paddle/infrt/dialect/tensorrt/pd_lower_to_trt.cpp.inc" // NOLINT
struct
PD2TRT_GraphLower
:
public
::
mlir
::
RewritePattern
{
...
...
paddle/infrt/dialect/tensorrt/trt_ops.td
浏览文件 @
2012aeb6
...
...
@@ -101,7 +101,10 @@ def TRT_PoolingOp : TRT_Op<"Pooling", [NoSideEffect]> {
I32ArrayAttr:$window_size,
I32ArrayAttr:$strides,
I32ArrayAttr:$paddings,
StrAttr:$padding_mode
I32Attr:$padding_mode,
BoolAttr:$exclusive,
BoolAttr:$adaptive,
StrAttr:$padding_algorithm
);
let results = (outs
DenseTensor:$output_tensor
...
...
paddle/infrt/kernel/tensorrt/trt_helper.h
浏览文件 @
2012aeb6
...
...
@@ -28,13 +28,13 @@ namespace infrt {
namespace
kernel
{
namespace
tensorrt
{
static
nvinfer1
::
DataType
TensorTypeToWeightType
(
phi
::
DataType
tensor_type
)
{
static
nvinfer1
::
DataType
TensorTypeToWeightType
(
::
phi
::
DataType
tensor_type
)
{
switch
(
tensor_type
)
{
case
phi
::
DataType
::
FLOAT32
:
case
::
phi
::
DataType
::
FLOAT32
:
return
nvinfer1
::
DataType
::
kFLOAT
;
case
phi
::
DataType
::
INT32
:
case
::
phi
::
DataType
::
INT32
:
return
nvinfer1
::
DataType
::
kINT32
;
case
phi
::
DataType
::
FLOAT16
:
case
::
phi
::
DataType
::
FLOAT16
:
return
nvinfer1
::
DataType
::
kHALF
;
default:
llvm_unreachable
(
"should not reach here"
);
...
...
@@ -52,7 +52,7 @@ static nvinfer1::Dims ArrayAttrToNvDims(const mlir::ArrayAttr& int_array_attr) {
return
dims
;
}
static
nvinfer1
::
Weights
TensorToWeights
(
phi
::
DenseTensor
*
tensor
)
{
static
nvinfer1
::
Weights
TensorToWeights
(
::
phi
::
DenseTensor
*
tensor
)
{
CHECK_NOTNULL
(
tensor
);
nvinfer1
::
Weights
ret
;
ret
.
type
=
TensorTypeToWeightType
(
tensor
->
dtype
());
...
...
paddle/infrt/kernel/tensorrt/trt_kernels.cc
浏览文件 @
2012aeb6
...
...
@@ -129,6 +129,7 @@ namespace tensorrt {
// TODO(wilber): Find a way to add layer.
for
(
auto
&
operation
:
block
.
without_terminator
())
{
VLOG
(
1
)
<<
"process "
<<
operation
.
getName
().
getStringRef
().
str
()
<<
" ..."
;
if
(
trt
::
ActivationOp
op
=
llvm
::
dyn_cast
<
trt
::
ActivationOp
>
(
operation
))
{
ActivationFunc
(
op
,
network
.
get
(),
value_to_trt_tensor_map
,
value_to_tensor_map
);
...
...
@@ -138,6 +139,8 @@ namespace tensorrt {
}
else
if
(
trt
::
ConvolutionOp
op
=
llvm
::
dyn_cast
<
trt
::
ConvolutionOp
>
(
operation
))
{
ConvFunc
(
op
,
network
.
get
(),
value_to_trt_tensor_map
,
value_to_tensor_map
);
}
else
if
(
trt
::
PoolingOp
op
=
llvm
::
dyn_cast
<
trt
::
PoolingOp
>
(
operation
))
{
PoolFunc
(
op
,
network
.
get
(),
value_to_trt_tensor_map
,
value_to_tensor_map
);
}
else
{
CHECK
(
false
)
<<
"not supported operation."
;
}
...
...
paddle/infrt/kernel/tensorrt/trt_layers.h
浏览文件 @
2012aeb6
...
...
@@ -15,13 +15,15 @@
#pragma once
#include <NvInfer.h>
#include <llvm/ADT/StringRef.h>
#include <mlir/IR/BuiltinAttributes.h>
#include <mlir/IR/Operation.h>
#include <mlir/IR/Value.h>
#include <string>
#include "paddle/infrt/dialect/tensorrt/trt_ops.h"
#include "paddle/infrt/kernel/tensorrt/trt_helper.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
infrt
{
...
...
@@ -63,7 +65,12 @@ inline void ConvFunc(trt::ConvolutionOp& op, // NOLINT
nvinfer1
::
Dims
dims
=
ArrayAttrToNvDims
(
size_attrs
);
auto
kernel_weights
=
TensorToWeights
(
value_to_tensor_map
[
op
.
kernel_weights
()]);
auto
bias_weights
=
TensorToWeights
(
value_to_tensor_map
[
op
.
bias_weights
()]);
nvinfer1
::
Weights
bias_weights
;
if
(
op
.
bias_weights
()
==
mlir
::
Value
())
{
bias_weights
=
nvinfer1
::
Weights
{};
}
else
{
bias_weights
=
TensorToWeights
(
value_to_tensor_map
[
op
.
bias_weights
()]);
}
auto
*
layer
=
network
->
addConvolutionNd
(
*
value_to_trt_tensor_map
[
input_tensor_repr
],
...
...
@@ -77,6 +84,51 @@ inline void ConvFunc(trt::ConvolutionOp& op, // NOLINT
value_to_trt_tensor_map
[
out_repr
]
=
out_tensor
;
}
inline
void
PoolFunc
(
trt
::
PoolingOp
&
op
,
// NOLINT
nvinfer1
::
INetworkDefinition
*
network
,
ValueToITensorMap
&
value_to_trt_tensor_map
,
// NOLINT
ValueToTensorMap
&
value_to_tensor_map
)
{
// NOLINT
mlir
::
Value
input_tensor_repr
=
op
.
input_tensor
();
nvinfer1
::
ITensor
*
input_itensor
=
value_to_trt_tensor_map
[
input_tensor_repr
];
// nvinfer1::Dims input_shape = input_itensor->getDimensions();
// int input_dims = input_shape.nbDims;
auto
padding_mode
=
op
.
padding_mode
();
auto
pool_type
=
op
.
pool_type
();
mlir
::
ArrayAttr
paddings
=
op
.
paddings
();
mlir
::
ArrayAttr
strides
=
op
.
strides
();
mlir
::
ArrayAttr
ksize
=
op
.
window_size
();
bool
exclusive
=
op
.
exclusive
();
bool
adaptive
=
op
.
adaptive
();
auto
padding_algorithm
=
op
.
padding_algorithm
().
str
();
if
(
padding_algorithm
==
"SAME"
)
{
// TODO(wilber)
CHECK
(
false
)
<<
"Not supported `same` padding algorithm"
;
}
if
(
adaptive
)
{
// TODO(Inference)
CHECK
(
false
)
<<
"Not supported adaptive pool"
;
}
nvinfer1
::
Dims
window_size
=
ArrayAttrToNvDims
(
ksize
);
auto
*
layer
=
network
->
addPoolingNd
(
*
input_itensor
,
static_cast
<
nvinfer1
::
PoolingType
>
(
pool_type
),
window_size
);
CHECK_NOTNULL
(
layer
);
layer
->
setPaddingMode
(
static_cast
<
nvinfer1
::
PaddingMode
>
(
padding_mode
));
layer
->
setPaddingNd
(
ArrayAttrToNvDims
(
paddings
));
layer
->
setStrideNd
(
ArrayAttrToNvDims
(
strides
));
layer
->
setAverageCountExcludesPadding
(
exclusive
);
mlir
::
Value
out_repr
=
op
.
output_tensor
();
nvinfer1
::
ITensor
*
out_tensor
=
layer
->
getOutput
(
0
);
value_to_trt_tensor_map
[
out_repr
]
=
out_tensor
;
}
inline
void
FcFunc
(
trt
::
FullyConnectedOp
&
op
,
// NOLINT
nvinfer1
::
INetworkDefinition
*
network
,
ValueToITensorMap
&
value_to_trt_tensor_map
,
// NOLINT
...
...
paddle/infrt/tests/dialect/tensorrt/disabled_trt.mlir
已删除
100644 → 0
浏览文件 @
ad0c106c
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @run_trt
func @run_trt(%0 : !infrt.dense_tensor<GPU, FP32, NCHW>, %ctx : !phi.context<GPU>) {
%a = "trt.create_engine"(%0) ({
%1 = "trt.Activation"(%0) {activation_type = 1 : si32, alpha = 1.0 : f32, beta = 6.0 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
"infrt.return"(%1) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
}) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !trt.engine
"trt.inspect_engine"(%a) {} : (!trt.engine) -> ()
%res = "trt.compute"(%a, %ctx) {} : (!trt.engine, !phi.context<GPU>) -> (!infrt.tensor_list)
%size = "dt.tensor_list_get_size"(%res) {} : (!infrt.tensor_list) -> (i32)
"infrt.print.i32"(%size) {} : (i32) -> ()
%ts0 = "dt.tensor_list_get_tensor"(%res) {id = 0 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts0) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
infrt.return
}
// CHECK-LABEL: @main
func @main() {
%ctx = "phi_dt.create_context.gpu" (): () -> !phi.context<GPU>
%t = "phi_dt.create_dense_tensor.gpu" (%ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[1:i64, 3:i64, 1:i64, 1:i64], lod=[1:i64]}: (!phi.context<GPU>) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32, 2.4:f32, 1.3:f32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
"phi_dt.print_tensor" (%t) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
//%res =
infrt.call @run_trt(%t, %ctx) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> ()
//-> (!infrt.dense_tensor<GPU, FP32, NCHW>)
infrt.return
}
paddle/infrt/tests/dialect/tensorrt/disabled_trt_activation.mlir
0 → 100644
浏览文件 @
2012aeb6
module {
func @main_graph(%arg0: !infrt.dense_tensor<CPU, FP32, ANY>) -> !infrt.dense_tensor<CPU, FP32, ANY> {
%0 = "phi_dt.create_context.gpu"() : () -> !phi.context<GPU>
%1 = "phi_dt.memcpy.gpu"(%arg0, %0) {d2h = false} : (!infrt.dense_tensor<CPU, FP32, ANY>, !phi.context<GPU>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.create_engine"(%1) ( {
%6 = "trt.Activation"(%1) {activation_type = 1 : si32, alpha = 0.000000e+00 : f32, beta = 0.000000e+00 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %6 : !infrt.dense_tensor<GPU, FP32, NCHW>
}) {run_once = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !trt.engine
%3 = "trt.compute"(%2, %0) : (!trt.engine, !phi.context<GPU>) -> !infrt.tensor_list
%4 = "dt.tensor_list_get_tensor"(%3) {id = 0 : i32} : (!infrt.tensor_list) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%5 = "phi_dt.memcpy.gpu"(%4, %0) {d2h = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> !infrt.dense_tensor<CPU, FP32, ANY>
infrt.return %5 : !infrt.dense_tensor<CPU, FP32, ANY>
}
func @main() {
%0 = "phi_dt.create_context.cpu"() : () -> !phi.context<CPU>
%1 = "phi_dt.create_inited_dense_tensor.cpu.f32"(%0) {dims = [3, 6, 1, 1], layout = #infrt.layout<NCHW>, lod = [0], value = 1.500000e+00 : f32} : (!phi.context<CPU>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%2 = infrt.call @main_graph(%1) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
}
paddle/infrt/tests/dialect/tensorrt/disabled_trt_fc.mlir
浏览文件 @
2012aeb6
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @main
func @main() {
%ctx = "phi_dt.create_context.gpu" (): () -> !phi.context<GPU>
%cpu_ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%input_tensor = "phi_dt.create_dense_tensor.gpu" (%ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[1:i64, 3:i64, 1:i64, 1:i64], lod=[1:i64]}: (!phi.context<GPU>) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%input_tensor) {value=[3.8:f32, 2.4:f32, 1.3:f32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
//"phi_dt.print_tensor" (%input_tensor) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%kernel_weight = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[2:i64, 3:i64], lod=[1:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_weight) {value=[1.:f32, 2.:f32, 3.:f32, 4.:f32, 5.:f32, 6.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
//"phi_dt.print_tensor" (%kernel_weight) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%kernel_bias = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[2:i64], lod=[1:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_bias) {value=[1.:f32, 2.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
//"phi_dt.print_tensor" (%kernel_bias) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%engine = "trt.create_engine"(%input_tensor, %kernel_weight, %kernel_bias) ({
%1 = "trt.Activation"(%input_tensor) {activation_type = 1 : si32, alpha = 1.0 : f32, beta = 6.0 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.FullyConnected"(%input_tensor, %kernel_weight, %kernel_bias) {out_channel_num = 2 : si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
"infrt.return"(%1, %2) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
}) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !trt.engine
%res = "trt.compute"(%engine, %ctx) {} : (!trt.engine, !phi.context<GPU>) -> (!infrt.tensor_list)
%size = "dt.tensor_list_get_size"(%res) {} : (!infrt.tensor_list) -> (i32)
"infrt.print.i32"(%size) {} : (i32) -> ()
%ts0 = "dt.tensor_list_get_tensor"(%res) {id = 0 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts0) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%ts1 = "dt.tensor_list_get_tensor"(%res) {id = 1 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts1) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
module {
func @main_graph(%arg0: !infrt.dense_tensor<CPU, FP32, ANY>) -> !infrt.dense_tensor<CPU, FP32, ANY> {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%0 = "phi_dt.create_context.gpu"() : () -> !phi.context<GPU>
%1 = "phi_dt.memcpy.gpu"(%arg0, %0) {d2h = false} : (!infrt.dense_tensor<CPU, FP32, ANY>, !phi.context<GPU>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%4 = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[0], dims=[2, 6]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%3 = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[0], dims=[2]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%5 = "trt.create_engine"(%1, %4, %3) ( {
%10 = "trt.FullyConnected"(%1, %4, %3) {out_channel_num = 2 : si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %10 : !infrt.dense_tensor<GPU, FP32, NCHW>
}) {run_once = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !trt.engine
%6 = "trt.compute"(%5, %0) : (!trt.engine, !phi.context<GPU>) -> !infrt.tensor_list
%7 = "dt.tensor_list_get_tensor"(%6) {id = 0 : i32} : (!infrt.tensor_list) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%8 = "phi_dt.memcpy.gpu"(%7, %0) {d2h = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> !infrt.dense_tensor<CPU, FP32, ANY>
infrt.return %8 : !infrt.dense_tensor<CPU, FP32, ANY>
}
func @main() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%input_tensor = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[0], dims=[3, 6, 1, 1]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%res = infrt.call @main_graph(%input_tensor) {} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
"phi_dt.print_tensor" (%res) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
infrt.return
}
}
paddle/infrt/tests/dialect/tensorrt/disabled_trt_pool.mlir
0 → 100644
浏览文件 @
2012aeb6
module {
func @main_graph(%arg0: !infrt.dense_tensor<CPU, FP32, ANY>) -> !infrt.dense_tensor<CPU, FP32, ANY> {
%0 = "phi_dt.create_context.gpu"() : () -> !phi.context<GPU>
%1 = "phi_dt.memcpy.gpu"(%arg0, %0) {d2h = false} : (!infrt.dense_tensor<CPU, FP32, ANY>, !phi.context<GPU>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.create_engine"(%1) ( {
%6 = "trt.Pooling"(%1) {padding_mode = 0 : i32, paddings = [1 : i32, 1 : i32], pool_type = 0 : i32, strides = [2 : i32, 2 : i32], window_size = [3 : i32, 3 : i32], exclusive = false, adaptive = false, padding_algorithm = "EXPLICIT"} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %6 : !infrt.dense_tensor<GPU, FP32, NCHW>
}) {run_once = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !trt.engine
%3 = "trt.compute"(%2, %0) : (!trt.engine, !phi.context<GPU>) -> !infrt.tensor_list
%4 = "dt.tensor_list_get_tensor"(%3) {id = 0 : i32} : (!infrt.tensor_list) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%5 = "phi_dt.memcpy.gpu"(%4, %0) {d2h = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> !infrt.dense_tensor<CPU, FP32, ANY>
infrt.return %5 : !infrt.dense_tensor<CPU, FP32, ANY>
}
func @main() {
%0 = "phi_dt.create_context.cpu"() : () -> !phi.context<CPU>
%1 = "phi_dt.create_inited_dense_tensor.cpu.f32"(%0) {dims = [1, 3, 10, 10], layout = #infrt.layout<NCHW>, lod = [0], value = 1.500000e+00 : f32} : (!phi.context<CPU>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%2 = infrt.call @main_graph(%1) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录