未验证 提交 c4abe26c 编写于 作者: J jjyaoao 提交者: GitHub

[Test Mv] remove infrt tests (#52063)

上级 66098bff
......@@ -113,7 +113,6 @@ add_subdirectory(tensor)
add_subdirectory(support)
add_subdirectory(external_kernels)
add_subdirectory(paddle)
add_subdirectory(tests)
# MLIR td file generations
set(infrt_mlir_incs basic_kernels_inc test_kernels_inc tensor_shape_inc
......
.DS_Store
.idea
*.log
tmp/
Output
cc_test_tiny(test_abs_model SRCS models/test_abs.cc DEPS infrt ${MLIR_IR_LIBS})
configure_file(lit.cfg.py.in
"${CMAKE_SOURCE_DIR}/paddle/infrt/tests/lit.cfg.py")
add_test(
NAME test_infrt_by_lit
COMMAND
sh -c
"lit -v ${CMAKE_SOURCE_DIR}/paddle/infrt/tests --filter-out \"disabled_*\""
DEPENDS infrtopt infrtexec)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensor/tensor_map.mlir.in
${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensor/tensor_map.mlir)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/linear_cpu.mlir.in
${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/linear_cpu.mlir)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/resnet50.mlir.in
${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/resnet50.mlir)
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensorrt/disabled_linear.mlir.in
${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensorrt/disabled_linear.mlir)
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @basic_f32
func @basic_f32() -> f32 {
%v0 = infrt.constant.f32 1.0
%v1 = infrt.constant.f32 2.0
%value = "infrt.add.f32"(%v0, %v1) : (f32, f32) -> f32
// CHECK-NEXT: 3
"infrt.print.f32"(%value) : (f32) -> ()
infrt.return %value : f32
}
/// ================================================================
/// @caller call the other function @callee
func @callee.add.f32(%x : f32, %y : f32, %y1 : f32) -> f32 {
%z = "infrt.add.f32"(%x, %y) : (f32, f32) -> f32
%z1 = "infrt.add.f32"(%z, %y1) : (f32, f32) -> f32
infrt.return %z1 : f32
}
// CHECK-LABEL: @caller.add.f32
func @caller.add.f32() -> f32 {
%x = infrt.constant.f32 1.0
%y = infrt.constant.f32 2.0
%y1 = infrt.constant.f32 3.0
%z = infrt.call @callee.add.f32(%x, %y, %y1) : (f32, f32, f32) -> f32
// CHECK-NEXT: 6
"infrt.print.f32"(%z) : (f32) -> ()
infrt.return %z : f32
}
/// <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @benchmark
func @benchmark() {
// CHECK-LABEL: BM:add.f32:Count: 3
// CHECK-LABEL: BM:add.f32:Duration(ns)
// CHECK-LABEL: BM:add.f32:Time Min(ns)
// CHECK-LABEL: BM:add.f32:Time 50%(ns)
// CHECK-LABEL: BM:add.f32:Time 95%(ns)
// CHECK-LABEL: BM:add.f32:Time 99%(ns)
// CHECK-LABEL: BM:add.f32:CPU Min(ns)
// CHECK-LABEL: BM:add.f32:CPU 50%(ns)
// CHECK-LABEL: BM:add.f32:CPU 95%(ns)
// CHECK-LABEL: BM:add.f32:CPU 99%(ns)
// CHECK-LABEL: BM:add.f32:CPU utilization(percent)
infrt.benchmark "add.f32"() duration_secs = 1, max_count = 3, num_warmup_runs = 3
{
%0 = infrt.constant.f32 1.0
%1 = infrt.constant.f32 2.0
%res = "infrt.add.f32"(%0, %1) : (f32, f32) -> f32
"infrt.print.f32"(%res) : (f32) -> ()
infrt.return %res : f32
}
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: dense_shape0
func @dense_shape0() {
%shape = ts.build_shape [1:i64, 57:i64]
%a = dt.create_uninit_tensor.f32 [12:i64, 23:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return
}
func @predict(%a: !infrt.dense_tensor<CPU, FP32, NCHW>, %b: !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) {
%a0 = dt.shallow_copy_tensor %a : !infrt.dense_tensor<CPU, FP32, NCHW> -> !infrt.dense_tensor<CPU, FP32, NCHW>
%b0 = dt.shallow_copy_tensor %b : !infrt.dense_tensor<CPU, FP32, NCHW> -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %a0, %b0: !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>
}
func @main() {
%shape = ts.build_shape [1:i64, 57:i64]
%a = dt.create_uninit_tensor.f32 [12:i64, 23:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
%b, %c = infrt.call @predict(%a, %a) : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
// CHECK-LABEL: @main
func @main(%a:tensor<?x3x256x256xf32>) -> tensor<?xf32> {
%filter = "pd.constant"(){value = dense<1.000000e+00> : tensor<3x64x3x3xf32>} : () -> tensor<3x64x3x3xf32>
%bias = "pd.constant"(){value = dense<1.000000e+00> : tensor<64xf32>} : () -> tensor<64xf32>
%scale = "pd.constant"(){value = dense<1.000000e+00> : tensor<64xf32>} : () -> tensor<64xf32>
%bias2 = "pd.constant"(){value = dense<1.000000e+00> : tensor<64xf32>} : () -> tensor<64xf32>
%mean = "pd.constant"(){value = dense<1.000000e+00> : tensor<64xf32>} : () -> tensor<64xf32>
%var = "pd.constant"(){value = dense<1.000000e+00> : tensor<64xf32>} : () -> tensor<64xf32>
%c = "pd.conv2d"(%a, %filter, %bias) {} : (tensor<?x3x256x256xf32>, tensor<3x64x3x3xf32>, tensor<64xf32>) -> tensor<?x3x256x256xf32>
%d = "pd.batch_norm"(%c, %scale, %bias2, %mean, %var) {} : (tensor<?x3x256x256xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>, tensor<64xf32>) -> tensor<?x3x256x256xf32>
infrt.return %d:tensor<?x3x256x256xf32>
}
// CHECK-LABEL: @predict
func @predict(%input:!infrt.dense_tensor<CPU, FP32, NCHW>, %map: !infrt.dense_tensor_map) -> (!infrt.dense_tensor<CPU, FP32, NCHW>) {
%w = dt.get_param(%map, "create_parameter_0.w_0") -> !infrt.dense_tensor<CPU, FP32, NCHW>
%bias = dt.get_param(%map, "create_parameter_1.w_0") -> !infrt.dense_tensor<CPU, FP32, NCHW>
%out = dt.create_uninit_tensor.f32 [3, 3] -> !infrt.dense_tensor<CPU, FP32, NCHW>
// fc
"external.matmul"(%input, %w, %out) {}: (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
"external.elementwise_add"(%out, %bias, %out) {axis = -1}: (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
"external.sigmoid"(%out, %out) {}: (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
//dt.print_tensor (%out : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return %out : !infrt.dense_tensor<CPU, FP32, NCHW>
}
// CHECK-LABEL: @main
func @main() {
%input = dt.create_uninit_tensor.f32 [3, 3] -> !infrt.dense_tensor<CPU, FP32, NCHW>
dt.fill_tensor_with_constant.f32 (%input : !infrt.dense_tensor<CPU, FP32, NCHW>) {value=1.0:f32}
// CHECK-LABEL: loading params
%map = dt.load_params() {path="/Infrt/build/paddle/paddle_1.8_fc_model"}
%out = infrt.call @predict(%input, %map): (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor_map) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
dt.print_tensor (%out : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
// RUN: infrtopt --pd-op-fuse %s | FileCheck %s
// CHECK-LABEL: @main
func @main(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2:tensor<?xf32>, %arg3:tensor<?xf32>, %arg4:tensor<?xf32>, %arg5:tensor<?xf32>, %arg6:tensor<?xf32>) -> tensor<?xf32> {
// CHECK: %0 = "pd.FC"(%arg0, %arg1, %arg4) {in_num_col_dims = 1 : i32} : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%c = "pd.matmul_v2"(%arg0, %arg1) {transpose_y=false} : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%d = "pd.elementwise_add"(%c, %arg4) {axis=1:si32} : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%e = "pd.relu6"(%d) {} : (tensor<?xf32>) -> tensor<?xf32>
// CHECK: %2 = "pd.FC"(%1, %arg2, %arg5) {in_num_col_dims = 1 : i32} : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%c1 = "pd.matmul_v2"(%e, %arg2) {transpose_x=false, transpose_y=false} : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%d1 = "pd.elementwise_add"(%c1, %arg5) {axis=1:si32} : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%e1 = "pd.relu"(%d1) {} : (tensor<?xf32>) -> tensor<?xf32>
// CHECK: %4 = "pd.FC"(%3, %arg3, %arg6) {in_num_col_dims = 1 : i32} : (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%c2 = "pd.matmul_v2"(%e1, %arg3) {transpose_x=true, transpose_y=false} : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%d2 = "pd.elementwise_add"(%c2, %arg6) {axis=1:si32} : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32>
%e2 = "pd.relu"(%d2) {} : (tensor<?xf32>) -> tensor<?xf32>
infrt.return %e2:tensor<?xf32>
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @sign_any_float32_execute
func @sign_any_float32_execute() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%t = "phi_dt.create_dense_tensor.cpu" (%ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[1:i64]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%t) {value=[3.8:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%e = "phi_cpu.sign.float32.any"(%ctx, %t) : (!phi.context<CPU>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
// CHECK: dense_tensor: shape=shape[1], value=[1]
"phi_dt.print_tensor" (%e) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
infrt.return
}
// RUN: infrtexec -i %s
module {
func @predict(%arg0: !infrt.dense_tensor<CPU, FP32, NCHW>,%filter: !infrt.dense_tensor<CPU, FP32, NCHW>, %arg1: !infrt.dense_tensor<CPU, FP32, NCHW>, %arg2: !infrt.dense_tensor<CPU, FP32, NCHW>, %arg3: !infrt.dense_tensor<CPU, FP32, NCHW>, %arg4: !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW> {
%2 = "pd.abs"(%arg0) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%3 = "pd.matmul_v2"(%arg0, %2) {trans_x = false, trans_y = false} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%4 = "pd.conv2d"(%3, %filter) {data_format = "NCHW", dilations = [1 : i32, 1 : i32], groups = 1 : si32, padding_algorithm = "EXPLICIT", paddings = [1 : i32, 1 : i32], strides = [2 : i32, 2 : i32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%Y, %MeanOut, %VarianceOut = "pd.batch_norm"(%4, %arg1, %arg2, %arg3, %arg4) {data_layout = "NCHW", epsilon = 9.99999974E-6 : f32, momentum = 0.899999976 : f32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>)
%out = "pd.relu"(%Y) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%5 = "pd.elementwise_add"(%out, %out) {axis = -1:si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%6 = "pd.pool2d"(%5) {adaptive = false, pooling_type = "avg", ceil_mode = false, data_format = "NCHW", exclusive = true, global_pooling = false, ksize = [3 : i32, 3 : i32], padding_algorithm = "EXPLICIT", paddings = [1 : i32, 1 : i32], strides = [2 : i32, 2 : i32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%7 = "pd.flatten_contiguous_range"(%6) {start_axis = 1 : si32, stop_axis = 3 : si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %7 : !infrt.dense_tensor<CPU, FP32, NCHW>
}
func @main() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%t = "phi_dt.create_inited_dense_tensor.cpu.f32"(%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[1, 3, 8, 8]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%filter = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3, 3, 8, 8]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%bias = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%mean = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%scale = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%var = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=3.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%2 = infrt.call@predict(%t, %filter, %bias, %mean, %scale, %var) : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>,!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
}
// RUN: infrtexec -i %s | FileCheck %s
module {
func @main() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%0 = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value = 2.0 : f32, layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[1, 3, 6, 6]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%1 = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value = 2.0 : f32, layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[1, 3, 3, 3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%2 = "pd.conv2d"(%0, %1) {data_format = "NCHW", dilations = [1 : i32, 1 : i32], groups = 1 : si32, padding_algorithm = "EXPLICIT", paddings = [3 : i32, 3 : i32], strides = [2 : i32, 2 : i32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: dense_tensor: shape=shape[1, 1, 5, 5], value=[0,0,0,0,0,0,48,72,72,24,0,72,108,108,36,0,72,108,108,36,0,24,36,36,12]
phi_dt.print_tensor (%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
%3 = "pd.relu"(%2) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// dense_tensor: shape=shape[1, 1, 5, 5], value=[0,0,0,0,0,0,48,72,72,24,0,72,108,108,36,0,72,108,108,36,0,24,36,36,12]
phi_dt.print_tensor (%3 : !infrt.dense_tensor<CPU, FP32, NCHW>)
%4 = "pd.pool2d"(%2) {adaptive = false, ceil_mode = false, data_format = "NCHW", exclusive = true, global_pooling = false, ksize = [2 : i32, 2 : i32], padding_algorithm = "EXPLICIT", paddings = [1 : i32, 1 : i32], pooling_type = "avg", strides = [2 : i32, 2 : i32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: dense_tensor: shape=shape[1, 1, 3, 3], value=[0,0,0,0,75,60,0,60,48]
phi_dt.print_tensor (%4 : !infrt.dense_tensor<CPU, FP32, NCHW>)
%5 = "pd.flatten_contiguous_range"(%4) {start_axis = 1 : si32, stop_axis = 3 : si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: dense_tensor: shape=shape[1, 9], value=[0,0,0,0,75,60,0,60,48]
phi_dt.print_tensor (%5 : !infrt.dense_tensor<CPU, FP32, NCHW>)
%6 = "pd.elementwise_add"(%5, %5) {axis = 1 : si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: dense_tensor: shape=shape[1, 9], value=[0,0,0,0,150,120,0,120,96]
phi_dt.print_tensor (%6 : !infrt.dense_tensor<CPU, FP32, NCHW>)
%7 = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value = 4.0 : f32, layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[9, 3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%8 = "pd.matmul_v2"(%5, %7) {trans_x = false, trans_y = false} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: dense_tensor: shape=shape[1, 3], value=[972,972,972]
phi_dt.print_tensor (%8 : !infrt.dense_tensor<CPU, FP32, NCHW>)
%scale = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.0:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%bias = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.8:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%mean = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=2.0:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%var = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=0.0:f32, layout=#infrt.layout<NCHW>, lod=[1], dims=[3]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%Y, %MeanOut, %VarianceOut = "pd.batch_norm"(%1, %scale, %bias, %mean, %var) {data_layout = "NCHW", epsilon = 0.01 : f32, momentum = 0.5 : f32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>)
// CHECK: dense_tensor: shape=shape[1, 3, 3, 3], value=[1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8,1.8]
phi_dt.print_tensor (%Y : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
}
// RUN: infrtexec -i %s
module {
func @main_graph(%arg0: !phi.dense_tensor_map, %arg1: !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW> {
%0 = phi_dt.tensor_map_get_tensor(%arg0) {name = "linear_0.w_0"} -> !infrt.dense_tensor<CPU, FP32, NCHW>
%1 = phi_dt.tensor_map_get_tensor(%arg0) {name = "linear_0.b_0"} -> !infrt.dense_tensor<CPU, FP32, NCHW>
%2 = "phi_dt.create_context.cpu"() : () -> !phi.context<CPU>
%5 = "phi_cpu.matmul.float32.any"(%2, %arg1, %0) {trans_x = false, trans_y = false} : (!phi.context<CPU>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%7 = "phi_cpu.add.float32.any"(%2, %5, %1): (!phi.context<CPU>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %7 : !infrt.dense_tensor<CPU, FP32, NCHW>
}
func @main() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%1 = "phi_dt.create_dense_tensor.cpu" (%ctx) {precision=#infrt.precision<FP32>, layout=#infrt.layout<NCHW>, lod=[1:i64], dims=[16:i64, 784:i64]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%map = phi_dt.load_combined_params(){model_path="@CMAKE_BINARY_DIR@/linear/linear.pdmodel",params_path="@CMAKE_BINARY_DIR@/linear/linear.pdiparams"}
%2 = infrt.call@main_graph(%map, %1) : (!phi.dense_tensor_map, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
phi_dt.print_tensor (%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
}
// RUN: infrtopt -phi-op-convert=valid-targets=CPU-FP32-NCHW -infrt-op-fuse %s
// CHECK-LABEL: @ops
func @ops(%a:!infrt.dense_tensor<CPU, FP32, NCHW>, %b:!infrt.dense_tensor<CPU, FP32, NCHW>) {
%g = "pd.elementwise_add"(%a, %b) {axis=1:si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%h = "pd.abs"(%g):(!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %h:!infrt.dense_tensor<CPU, FP32, NCHW>
}
// CHECK-LABEL: @op_execute
func @op_execute(%a:!infrt.dense_tensor<CPU, FP32, NCHW>, %b:!infrt.dense_tensor<CPU, FP32, NCHW>, %c:!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW> {
%g = "pd.elementwise_add"(%a, %b) {axis=1:si32} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%h = "pd.abs"(%g):(!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %h:!infrt.dense_tensor<CPU, FP32, NCHW>
}
.DS_Store
.idea
*.log
tmp/
tensor_map.mlir
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: dense_shape0
func @dense_shape0() {
%a = dt.create_uninit_tensor.f32 [12:i64, 23:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return
}
func @predict(%a: !infrt.dense_tensor<CPU, FP32, NCHW>, %b: !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) {
%a0 = dt.shallow_copy_tensor %a : !infrt.dense_tensor<CPU, FP32, NCHW> -> !infrt.dense_tensor<CPU, FP32, NCHW>
%b0 = dt.shallow_copy_tensor %b : !infrt.dense_tensor<CPU, FP32, NCHW> -> !infrt.dense_tensor<CPU, FP32, NCHW>
infrt.return %a0, %b0: !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>
}
func @main() {
%shape = ts.build_shape [1:i64, 57:i64]
%a = dt.create_uninit_tensor.f32 [12:i64, 23:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
%b, %c = infrt.call @predict(%a, %a) : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: naive_elementwise_add
func @naive_elementwise_add() {
// create a
%a = dt.create_uninit_tensor.f32 [2:i64, 8:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
dt.fill_tensor_with_constant.f32 (%a : !infrt.dense_tensor<CPU, FP32, NCHW>) {value=1.0:f32}
// create b
%b = dt.create_uninit_tensor.f32 [2:i64, 8:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
dt.fill_tensor_with_constant.f32 (%b : !infrt.dense_tensor<CPU, FP32, NCHW>) {value=2.0:f32}
// get c
%c = dt.naive_elementwise_add.f32(%a, %b) {} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: tensor: shape=shape[2,8], values=[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
dt.print_tensor (%c : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: naive_matmul
func @naive_matmul() {
// create a
%a = dt.create_uninit_tensor.f32 [2:i64, 8:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
dt.fill_tensor_with_constant.f32 (%a : !infrt.dense_tensor<CPU, FP32, NCHW>) {value=1.0:f32}
// create b
%b = dt.create_uninit_tensor.f32 [8:i64, 4:i64] -> !infrt.dense_tensor<CPU, FP32, NCHW>
dt.fill_tensor_with_constant.f32 (%b : !infrt.dense_tensor<CPU, FP32, NCHW>) {value=2.0:f32}
// get c
%c = dt.naive_matmul.f32(%a, %b) {} : (!infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: tensor: shape=shape[2,4], values=[16, 16, 16, 16, 16, 16, 16, 16]
dt.print_tensor (%c : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
func @load_tensor_map() {
%map = dt.load_params(){path="@CMAKE_BINARY_DIR@/multi_fc_model"}
%size = dt.tensor_map_get_size(%map) -> i32
infrt.print.i32 %size
%a = dt.tensor_map_get_tensor(%map) {name="fc_bias"} -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: tensor: shape=shape[2], values=[0, 0]
dt.print_tensor (%a : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
func @load_phi_tensor_map() {
%map = phi_dt.load_params(){path="@CMAKE_BINARY_DIR@/multi_fc_model"}
%size = phi_dt.tensor_map_get_size(%map) -> i32
infrt.print.i32 %size
%a = phi_dt.tensor_map_get_tensor(%map) {name="fc_bias"} -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: dense_tensor: shape=shape[2], value=[0,0]
phi_dt.print_tensor (%a : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
func @load_combined_phi_tensor_map() {
%map = phi_dt.load_combined_params(){model_path="@CMAKE_BINARY_DIR@/multi_fc_model/fc.pdmodel",
params_path="@CMAKE_BINARY_DIR@/multi_fc_model/fc.pdiparams"}
%size = phi_dt.tensor_map_get_size(%map) -> i32
infrt.print.i32 %size
%a = phi_dt.tensor_map_get_tensor(%map) {name="fc_bias"} -> !infrt.dense_tensor<CPU, FP32, NCHW>
// CHECK: dense_tensor: shape=shape[2], value=[0,0]
phi_dt.print_tensor (%a : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @build_tensor1
func @build_tensor1() {
%a = ts.build_shape [1:i64, 57:i64, 92:i64]
// CHECK: shape[1,57,92]
ts.print_shape %a
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: test_tensor_type
func @test_tensor_type() {
%a = dt.create_uninit_tensor.f32 [3, 4] -> !infrt.dense_tensor<CPU, FP32, NCHW>
dt.fill_tensor_with_constant.f32 (%a : !infrt.dense_tensor<CPU, FP32, NCHW>) {value=1.0:f32}
// CHECK: tensor: shape=shape[3,4], values=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
dt.print_tensor (%a : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @build_tensor1
func @build_tensor1() {
%a = ts.build_shape [1:i64, 57:i64, 92:i64]
// CHECK: shape[1,57,92]
ts.print_shape %a
infrt.return
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: test_tensor_type
func @test_tensor_type() {
%a = dt.create_uninit_tensor.f32 [3, 4] -> !infrt.dense_tensor<CPU, FP32, NCHW>
dt.fill_tensor_with_constant.f32 (%a : !infrt.dense_tensor<CPU, FP32, NCHW>) {value=1.0:f32}
// CHECK: tensor: shape=shape[3,4], values=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
dt.print_tensor (%a : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
module {
func @main_graph(%map: !phi.dense_tensor_map, %arg0: !infrt.dense_tensor<CPU, FP32, ANY>) -> !infrt.dense_tensor<CPU, FP32, ANY> {
%0 = "phi_dt.create_context.gpu"() : () -> !phi.context<GPU>
%1 = "phi_dt.memcpy.gpu"(%arg0, %0) {d2h = false} : (!infrt.dense_tensor<CPU, FP32, ANY>, !phi.context<GPU>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%3 = phi_dt.tensor_map_get_tensor(%map) {name = "linear_0.b_0"} -> !infrt.dense_tensor<CPU, FP32, NCHW>
%4 = phi_dt.tensor_map_get_tensor(%map) {name = "linear_0.w_0"} -> !infrt.dense_tensor<CPU, FP32, NCHW>
%5 = "trt.create_engine"(%1, %4, %3) ( {
%10 = "trt.FullyConnected"(%1, %4, %3) {out_channel_num = 10 : si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %10 : !infrt.dense_tensor<GPU, FP32, NCHW>
}) {run_once = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !trt.engine
%6 = "trt.compute"(%5, %0) : (!trt.engine, !phi.context<GPU>) -> !infrt.tensor_list
%7 = "dt.tensor_list_get_tensor"(%6) {id = 0 : i32} : (!infrt.tensor_list) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%8 = "phi_dt.memcpy.gpu"(%7, %0) {d2h = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> !infrt.dense_tensor<CPU, FP32, ANY>
infrt.return %8 : !infrt.dense_tensor<CPU, FP32, ANY>
}
func @main() {
%map = phi_dt.load_combined_params(){model_path="@CMAKE_BINARY_DIR@/linear/linear.pdmodel",
params_path="@CMAKE_BINARY_DIR@/linear/linear.pdiparams"}
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%input_tensor = "phi_dt.create_dense_tensor.cpu" (%ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[3:i64, 784:i64, 1:i64, 1:i64], lod=[1:i64]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%input_tensor) {value=[3.8:f32, 2.4:f32, 1.3:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%res = infrt.call @main_graph(%map, %input_tensor) {} : (!phi.dense_tensor_map, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
"phi_dt.print_tensor" (%res) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
infrt.return
}
}
module {
func @main_graph(%arg0: !infrt.dense_tensor<CPU, FP32, ANY>) -> !infrt.dense_tensor<CPU, FP32, ANY> {
%0 = "phi_dt.create_context.gpu"() : () -> !phi.context<GPU>
%1 = "phi_dt.memcpy.gpu"(%arg0, %0) {d2h = false} : (!infrt.dense_tensor<CPU, FP32, ANY>, !phi.context<GPU>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.create_engine"(%1) ( {
%6 = "trt.Activation"(%1) {activation_type = 1 : si32, alpha = 0.000000e+00 : f32, beta = 0.000000e+00 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %6 : !infrt.dense_tensor<GPU, FP32, NCHW>
}) {run_once = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !trt.engine
%3 = "trt.compute"(%2, %0) : (!trt.engine, !phi.context<GPU>) -> !infrt.tensor_list
%4 = "dt.tensor_list_get_tensor"(%3) {id = 0 : i32} : (!infrt.tensor_list) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%5 = "phi_dt.memcpy.gpu"(%4, %0) {d2h = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> !infrt.dense_tensor<CPU, FP32, ANY>
infrt.return %5 : !infrt.dense_tensor<CPU, FP32, ANY>
}
func @main() {
%0 = "phi_dt.create_context.cpu"() : () -> !phi.context<CPU>
%1 = "phi_dt.create_inited_dense_tensor.cpu.f32"(%0) {dims = [3, 6, 1, 1], layout = #infrt.layout<NCHW>, lod = [0], value = 1.500000e+00 : f32} : (!phi.context<CPU>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%2 = infrt.call @main_graph(%1) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
}
// RUN: infrtexec -i %s | FileCheck %s
// CHECK-LABEL: @run_trt
func @run_trt(%input_tensor : !infrt.dense_tensor<GPU, FP32, NCHW>, %kernel_weight : !infrt.dense_tensor<CPU, FP32, NCHW>, %kernel_bias : !infrt.dense_tensor<CPU, FP32, NCHW>, %gpu_ctx : !phi.context<GPU>) {
%a = "trt.create_engine"(%input_tensor, %kernel_weight, %kernel_bias) ({
%1 = "trt.Activation"(%input_tensor) {activation_type = 1 : si32, alpha = 1.0 : f32, beta = 6.0 : f32} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.Convolution"(%input_tensor, %kernel_weight, %kernel_bias) {out_channel_num = 3 : si32, kernel_size = [3:i32, 3:i32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
"infrt.return"(%1, %2) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
}) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !trt.engine
"trt.inspect_engine"(%a) {} : (!trt.engine) -> ()
%res = "trt.compute"(%a, %gpu_ctx) {} : (!trt.engine, !phi.context<GPU>) -> (!infrt.tensor_list)
%size = "dt.tensor_list_get_size"(%res) {} : (!infrt.tensor_list) -> (i32)
"infrt.print.i32"(%size) {} : (i32) -> ()
%ts0 = "dt.tensor_list_get_tensor"(%res) {id = 0 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts0) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%ts1 = "dt.tensor_list_get_tensor"(%res) {id = 1 : i32} : (!infrt.tensor_list) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.print_tensor" (%ts1) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
infrt.return
}
// CHECK-LABEL: @main
func @main() {
%gpu_ctx = "phi_dt.create_context.gpu" (): () -> !phi.context<GPU>
%cpu_ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%input_tensor = "phi_dt.create_dense_tensor.gpu" (%gpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[1:i64, 3:i64, 28:i64, 28:i64], lod=[0:i64]}: (!phi.context<GPU>) -> (!infrt.dense_tensor<GPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%input_tensor) {value=[3.8:f32, 2.4:f32, 1.3:f32]} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
// "phi_dt.print_tensor" (%input_tensor) : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> ()
%kernel_weight = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[3:i64, 3:i64, 3:i64, 3:i64], lod=[0:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_weight) {value=[1.:f32, 2.:f32, 3.:f32, 4.:f32, 5.:f32, 6.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
// "phi_dt.print_tensor" (%kernel_weight) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
%kernel_bias = "phi_dt.create_dense_tensor.cpu"(%cpu_ctx) {
precision=#infrt.precision<FP32>,
layout=#infrt.layout<NCHW>,
dims=[3:i64], lod=[0:i64]} : (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
"phi_dt.fill_dense_tensor.f32"(%kernel_bias) {value=[1.:f32]} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
// "phi_dt.print_tensor" (%kernel_bias) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
infrt.call @run_trt(%input_tensor, %kernel_weight, %kernel_bias, %gpu_ctx) : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !phi.context<GPU>) -> ()
infrt.return
}
module {
func @main_graph(%arg0: !infrt.dense_tensor<CPU, FP32, ANY>) -> !infrt.dense_tensor<CPU, FP32, ANY> {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%0 = "phi_dt.create_context.gpu"() : () -> !phi.context<GPU>
%1 = "phi_dt.memcpy.gpu"(%arg0, %0) {d2h = false} : (!infrt.dense_tensor<CPU, FP32, ANY>, !phi.context<GPU>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%4 = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[0], dims=[2, 6]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%3 = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[0], dims=[2]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%5 = "trt.create_engine"(%1, %4, %3) ( {
%10 = "trt.FullyConnected"(%1, %4, %3) {out_channel_num = 2 : si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %10 : !infrt.dense_tensor<GPU, FP32, NCHW>
}) {run_once = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !trt.engine
%6 = "trt.compute"(%5, %0) : (!trt.engine, !phi.context<GPU>) -> !infrt.tensor_list
%7 = "dt.tensor_list_get_tensor"(%6) {id = 0 : i32} : (!infrt.tensor_list) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%8 = "phi_dt.memcpy.gpu"(%7, %0) {d2h = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> !infrt.dense_tensor<CPU, FP32, ANY>
infrt.return %8 : !infrt.dense_tensor<CPU, FP32, ANY>
}
func @main() {
%ctx = "phi_dt.create_context.cpu" (): () -> !phi.context<CPU>
%input_tensor = "phi_dt.create_inited_dense_tensor.cpu.f32" (%ctx) {value=1.5:f32, layout=#infrt.layout<NCHW>, lod=[0], dims=[3, 6, 1, 1]}: (!phi.context<CPU>) -> (!infrt.dense_tensor<CPU, FP32, NCHW>)
%res = infrt.call @main_graph(%input_tensor) {} : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
"phi_dt.print_tensor" (%res) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> ()
infrt.return
}
}
module {
func @main_graph(%arg0: !infrt.dense_tensor<CPU, FP32, ANY>) -> !infrt.dense_tensor<CPU, FP32, ANY> {
%0 = "phi_dt.create_context.gpu"() : () -> !phi.context<GPU>
%1 = "phi_dt.memcpy.gpu"(%arg0, %0) {d2h = false} : (!infrt.dense_tensor<CPU, FP32, ANY>, !phi.context<GPU>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%2 = "trt.create_engine"(%1) ( {
%6 = "trt.Pooling"(%1) {padding_mode = 0 : i32, paddings = [1 : i32, 1 : i32], pool_type = 0 : i32, strides = [2 : i32, 2 : i32], window_size = [3 : i32, 3 : i32], exclusive = false, adaptive = false, padding_algorithm = "EXPLICIT"} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %6 : !infrt.dense_tensor<GPU, FP32, NCHW>
}) {run_once = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !trt.engine
%3 = "trt.compute"(%2, %0) : (!trt.engine, !phi.context<GPU>) -> !infrt.tensor_list
%4 = "dt.tensor_list_get_tensor"(%3) {id = 0 : i32} : (!infrt.tensor_list) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%5 = "phi_dt.memcpy.gpu"(%4, %0) {d2h = true} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !phi.context<GPU>) -> !infrt.dense_tensor<CPU, FP32, ANY>
infrt.return %5 : !infrt.dense_tensor<CPU, FP32, ANY>
}
func @main() {
%0 = "phi_dt.create_context.cpu"() : () -> !phi.context<CPU>
%1 = "phi_dt.create_inited_dense_tensor.cpu.f32"(%0) {dims = [1, 3, 10, 10], layout = #infrt.layout<NCHW>, lod = [0], value = 1.500000e+00 : f32} : (!phi.context<CPU>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
%2 = infrt.call @main_graph(%1) : (!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW>
phi_dt.print_tensor(%2 : !infrt.dense_tensor<CPU, FP32, NCHW>)
infrt.return
}
}
// RUN: trt-exec %s
// CHECK-LABEL: @main
func @main(%bias:!infrt.dense_tensor<GPU, FP32, NCHW>, %c:!infrt.dense_tensor<GPU, FP32, NCHW>, %b1:!infrt.dense_tensor<GPU, FP32, NCHW>, %b2:!infrt.dense_tensor<GPU, FP32, NCHW>, %bias1:!infrt.dense_tensor<GPU, FP32, NCHW>, %bias2:!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW> {
%d = "pd.elementwise_add"(%c, %bias) {axis=-1:si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%e = "pd.relu6"(%d) {} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%c1 = "pd.matmul"(%e, %b1) {transpose_x=false, transpose_y=false} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%d1 = "pd.elementwise_add"(%c1, %bias1) {axis=-1:si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%e1 = "pd.relu"(%d1) {} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%c2 = "pd.matmul"(%e1, %b2) {transpose_x=true, transpose_y=false} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%d2 = "pd.elementwise_add"(%c2, %bias2) {axis=-1:si32} : (!infrt.dense_tensor<GPU, FP32, NCHW>, !infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
%e2 = "pd.relu"(%d2) {} : (!infrt.dense_tensor<GPU, FP32, NCHW>) -> !infrt.dense_tensor<GPU, FP32, NCHW>
infrt.return %e2 : !infrt.dense_tensor<GPU, FP32, NCHW>
}
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import lit.formats
import os
config.name = "MLIR tests"
config.test_format = lit.formats.ShTest(True)
build_dir = "@CMAKE_BINARY_DIR@"
config.llvm_tools_dir = os.path.join(build_dir, "third_party/install/llvm/bin")
config.llvm_tools_dir = os.path.join(build_dir, "/third_party/install/llvm/lib")
infrtopt_bin = os.path.join(build_dir, "paddle/infrt/dialect/")
trtexec_bin = os.path.join(build_dir, "paddle/infrt/dialect/tensorrt/")
infrtexec_bin = os.path.join(build_dir, "paddle/infrt/host_context/")
phi_ir_exec_bin = os.path.join(build_dir, "paddle/infrt/dialect/phi")
llvm_bin = os.path.join(build_dir, "third_party/install/llvm/bin/")
config.environment['PATH'] = os.path.pathsep.join(
(infrtopt_bin, infrtexec_bin, trtexec_bin, phi_ir_exec_bin, llvm_bin, config.environment['PATH']))
config.suffixes = ['.mlir']
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
from paddle.jit import to_static
from paddle.static import InputSpec
class AbsNet(paddle.nn.Layer):
def __init__(self):
super().__init__()
def forward(self, x):
x = paddle.abs(x)
return x
if __name__ == '__main__':
# build network
model = AbsNet()
# save inferencing format model
net = to_static(
model, input_spec=[InputSpec(shape=[None, 1, 28, 28], name='x')]
)
paddle.jit.save(net, sys.argv[1])
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
# url: https://aistudio.baidu.com/aistudio/projectdetail/3756986?forkThirdPart=1
from net import EfficientNet
import paddle
from paddle.jit import to_static
from paddle.static import InputSpec
model = EfficientNet.from_name('efficientnet-b4')
net = to_static(
model, input_spec=[InputSpec(shape=[None, 3, 256, 256], name='x')]
)
paddle.jit.save(net, sys.argv[1])
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .efficientnet import EfficientNet
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
from paddle.jit import to_static
from paddle.static import InputSpec
from paddle.vision.models import resnet50
model = resnet50(True)
net = to_static(
model, input_spec=[InputSpec(shape=[None, 3, 256, 256], name='x')]
)
paddle.jit.save(net, sys.argv[1])
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <llvm/Support/CommandLine.h>
#include <mlir/Pass/PassManager.h>
#include <iostream>
#include <string>
#include "llvm/Support/DynamicLibrary.h"
#include "paddle/infrt/common/global.h"
#include "paddle/infrt/dialect/mlir_loader.h"
#include "paddle/infrt/host_context/core_runtime.h"
#include "paddle/infrt/host_context/kernel_registry.h"
#include "paddle/infrt/host_context/mlir_to_runtime_translate.h"
#include "paddle/infrt/kernel/basic_kernels.h"
#include "paddle/infrt/kernel/control_flow_kernels.h"
#include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.h"
#include "paddle/infrt/kernel/phi/registry.h"
#include "paddle/infrt/kernel/tensor_kernels.h"
#include "paddle/infrt/kernel/tensor_shape_kernels.h"
#include "paddle/infrt/kernel/test_kernels.h"
#include "paddle/infrt/kernel/phi/infershaped/infershaped_utils.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/meta_tensor.h"
#include "paddle/infrt/dialect/infrt/ir/basic_kernels.h"
#include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h"
#include "paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.h"
#include "paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h"
#include "paddle/infrt/host_context/paddle_mlir.h"
#include "paddle/infrt/dialect/dense_tensor.h"
#include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h"
#include "paddle/infrt/dialect/phi/ir/phi_base.h"
#include "paddle/infrt/dialect/phi/ir/phi_kernels.h"
static llvm::cl::list<std::string> cl_shared_libs( // NOLINT
"shared_libs",
llvm::cl::desc("Specify shared library with kernels."),
llvm::cl::ZeroOrMore,
llvm::cl::MiscFlags::CommaSeparated);
TEST(ABS_MODEL, convert_and_execute) {
std::string model_file_name = "./abs.pdmodel";
std::string params_file_name = "./abs.pdiparams";
// convert model
MLIRModelGenImpl myGen;
auto module_ = myGen.ImportPaddleModel(model_file_name, params_file_name);
module_.dump();
// pick kernel
mlir::MLIRContext* context = infrt::Global::getMLIRContext();
context->allowUnregisteredDialects();
context->getOrLoadDialect<mlir::StandardOpsDialect>();
context->getOrLoadDialect<infrt::InfrtDialect>();
context->getOrLoadDialect<infrt::ts::TensorShapeDialect>();
context->getOrLoadDialect<infrt::InfrtDialect>();
context->getOrLoadDialect<infrt::dt::DTDialect>();
context->getOrLoadDialect<infrt::pd::PaddleDialect>();
context->getOrLoadDialect<infrt::phi::PHIDenseTensorDialect>();
context->getOrLoadDialect<infrt::phi::PHICPUKernelDialect>();
context->getOrLoadDialect<infrt::phi::PHIGPUKernelDialect>();
context->getOrLoadDialect<infrt::phi::PHIDialect>();
context->loadAllAvailableDialects();
mlir::PassManager pm(context);
mlir::OpPassManager& phi_pass_manager = pm.nest<mlir::FuncOp>();
std::vector<infrt::Place> valid_places = {{infrt::TargetType::CPU,
infrt::PrecisionType::FLOAT32,
infrt::LayoutType::NCHW}};
phi_pass_manager.addPass(infrt::CreatePhiOpCvtPass(valid_places));
phi_pass_manager.addPass(infrt::CreateInfrtOpFusePass());
if (mlir::failed(pm.run(module_))) {
std::cout << "\npass failed!\n" << std::endl;
}
module_.dump();
// executate
infrt::host_context::KernelRegistry registry;
infrt::kernel::RegisterBasicKernels(&registry);
infrt::kernel::RegisterTestKernels(&registry);
infrt::kernel::RegisterTensorShapeKernels(&registry);
infrt::kernel::RegisterTensorKernels(&registry);
infrt::kernel::RegisterControlFlowKernels(&registry);
infrt::kernel::RegisterPhiKernels(&registry);
infrt::kernel::RegisterInferShapeLaunchers(&registry);
// load extra shared library
for (const auto& lib_path : cl_shared_libs) {
std::string err;
llvm::sys::DynamicLibrary dynLib =
llvm::sys::DynamicLibrary::getPermanentLibrary(lib_path.c_str(), &err);
if (!dynLib.isValid()) {
llvm::errs() << "Load shared library failed. Error: " << err << "\n";
break;
}
if (auto reg_sym = dynLib.SearchForAddressOfSymbol("RegisterKernels")) {
auto reg_func =
reinterpret_cast<void (*)(infrt::host_context::KernelRegistry*)>(
reg_sym);
reg_func(&registry);
} else {
llvm::outs() << "Symbol \"RegisterKernels\" not found in \"" << lib_path
<< "\". Skip.\n";
}
}
infrt::host_context::TestMlir(module_, &registry);
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <chrono>
#include <ctime>
#include <sstream>
#include <vector>
namespace infrt {
namespace tests {
template <typename ClockT>
class ChronoTimer {
public:
using TimePoint = std::chrono::time_point<ClockT>;
ChronoTimer() : start_{TimePoint::min()} {}
void Clear() { start_ = TimePoint::min(); }
void Start() { start_ = ClockT::now(); }
double GetMs() {
auto diff = ClockT::now() - start_;
return static_cast<double>(
std::chrono::duration_cast<std::chrono::duration<double>>(diff)
.count()) *
1000.0;
}
private:
TimePoint start_;
};
// To learn more about the difference between system_clock and steady_clock,
// please refer to https://www.cnblogs.com/zhongpan/p/7490657.html.
// To learn more about the difference between Wall Time and CPU Time,
// please refer to https://blog.csdn.net/aganlengzi/article/details/21888351
// and https://blog.csdn.net/filyouzicha/article/details/52447887.
using WallClockTimer = ChronoTimer<std::chrono::system_clock>;
class CpuClockTimer {
public:
CpuClockTimer() = default;
void Clear() { start_ = 0; }
void Start() { start_ = std::clock(); }
double GetMs() {
std::clock_t diff = std::clock() - start_;
return static_cast<double>(diff * 1000.0 / CLOCKS_PER_SEC);
}
private:
std::clock_t start_{0};
};
class BenchmarkStats {
public:
void Start() {
wall_timer_.Start();
cpu_timer_.Start();
}
void Stop() {
wall_time_.push_back(wall_timer_.GetMs());
cpu_time_.push_back(cpu_timer_.GetMs());
}
std::string Summerize(const std::vector<float>& percents) {
std::stringstream ss;
std::sort(wall_time_.begin(), wall_time_.end());
std::sort(cpu_time_.begin(), cpu_time_.end());
auto percentile = [](float p, const std::vector<float>& stats) {
size_t mark = stats.size() * p;
mark = std::max(mark, static_cast<size_t>(0));
mark = std::min(mark, stats.size() - 1);
return stats[mark];
};
for (auto p : percents) {
ss << "=== Wall Time (ms): \n";
ss << " * percent " << std::to_string(static_cast<int>(p * 100));
ss << ": " << percentile(p, wall_time_) << '\n';
}
for (auto p : percents) {
ss << "=== CPU Time (ms): \n";
ss << " * percent " << std::to_string(static_cast<int>(p * 100));
ss << ": " << percentile(p, cpu_time_) << '\n';
}
return ss.str();
}
private:
WallClockTimer wall_timer_;
std::vector<float> wall_time_;
CpuClockTimer cpu_timer_;
std::vector<float> cpu_time_;
};
} // namespace tests
} // namespace infrt
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册