Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
aa67c292
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
aa67c292
编写于
4月 19, 2022
作者:
H
huzhiqiang
提交者:
GitHub
4月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[infrt] support resnet50 on gpu backend (#41473)
上级
9ac6b7ed
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
269 addition
and
35 deletion
+269
-35
paddle/infrt/api/infrt_api.cc
paddle/infrt/api/infrt_api.cc
+18
-5
paddle/infrt/api/infrt_api.h
paddle/infrt/api/infrt_api.h
+4
-0
paddle/infrt/api/infrt_api_test.cc.in
paddle/infrt/api/infrt_api_test.cc.in
+114
-0
paddle/infrt/backends/host/phi_allocator.h
paddle/infrt/backends/host/phi_allocator.h
+3
-6
paddle/infrt/dialect/init_dialects.cc
paddle/infrt/dialect/init_dialects.cc
+1
-2
paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
+15
-0
paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
+62
-13
paddle/infrt/kernel/phi/dense_tensor_kernels.cc
paddle/infrt/kernel/phi/dense_tensor_kernels.cc
+38
-1
paddle/infrt/kernel/phi/dense_tensor_kernels.h
paddle/infrt/kernel/phi/dense_tensor_kernels.h
+3
-0
paddle/infrt/kernel/phi/registry.cc
paddle/infrt/kernel/phi/registry.cc
+3
-0
paddle/infrt/tests/dialect/phi/phi_pass.mlir
paddle/infrt/tests/dialect/phi/phi_pass.mlir
+8
-8
未找到文件。
paddle/infrt/api/infrt_api.cc
浏览文件 @
aa67c292
...
@@ -270,6 +270,12 @@ int InfRtPredictor::Init(const InfRtConfig& config) {
...
@@ -270,6 +270,12 @@ int InfRtPredictor::Init(const InfRtConfig& config) {
{
::
infrt
::
TargetType
::
CPU
,
{
::
infrt
::
TargetType
::
CPU
,
::
infrt
::
PrecisionType
::
FLOAT32
,
::
infrt
::
PrecisionType
::
FLOAT32
,
::
infrt
::
LayoutType
::
NCHW
}};
::
infrt
::
LayoutType
::
NCHW
}};
if
(
config
.
gpu_enabled
())
{
valid_places
.
insert
(
valid_places
.
begin
(),
::
infrt
::
Place
(
::
infrt
::
TargetType
::
GPU
,
::
infrt
::
PrecisionType
::
FLOAT32
,
::
infrt
::
LayoutType
::
NCHW
));
}
pass_manager
.
addPass
(
CreatePhiOpCvtPass
(
valid_places
));
pass_manager
.
addPass
(
CreatePhiOpCvtPass
(
valid_places
));
pass_manager
.
addPass
(
CreateInfrtOpFusePass
());
pass_manager
.
addPass
(
CreateInfrtOpFusePass
());
}
}
...
@@ -300,12 +306,19 @@ int InfRtPredictor::Init(const InfRtConfig& config) {
...
@@ -300,12 +306,19 @@ int InfRtPredictor::Init(const InfRtConfig& config) {
}
}
// Load params
// Load params
auto
tensor_map
=
::
infrt
::
kernel
::
phi
::
LoadCombinedParameters
(
if
(
config
.
gpu_enabled
()
&&
!
config
.
tensorrt_enabled
())
{
config
.
model_dir
(),
config
.
param_dir
());
auto
tensor_map
=
::
infrt
::
kernel
::
phi
::
LoadCombinedParamsToGpu
(
config
.
model_dir
(),
config
.
param_dir
());
impl_
->
executor
.
reset
(
new
PredictExecutor
(
module_op
,
registry
,
std
::
move
(
tensor_map
)));
}
else
{
auto
tensor_map
=
::
infrt
::
kernel
::
phi
::
LoadCombinedParameters
(
config
.
model_dir
(),
config
.
param_dir
());
impl_
->
executor
.
reset
(
new
PredictExecutor
(
module_op
,
registry
,
std
::
move
(
tensor_map
)));
}
// Create PredictExecutor
impl_
->
executor
.
reset
(
new
PredictExecutor
(
module_op
,
registry
,
std
::
move
(
tensor_map
)));
return
0
;
return
0
;
}
}
...
...
paddle/infrt/api/infrt_api.h
浏览文件 @
aa67c292
...
@@ -27,6 +27,7 @@ class InfRtConfig {
...
@@ -27,6 +27,7 @@ class InfRtConfig {
std
::
vector
<
std
::
string
>
shared_libs_
;
std
::
vector
<
std
::
string
>
shared_libs_
;
// TODO(wilber): Design an easy-to-use interface.
// TODO(wilber): Design an easy-to-use interface.
bool
gpu_enabled_
{
false
};
bool
tensorrt_enabled_
{
false
};
bool
tensorrt_enabled_
{
false
};
public:
public:
...
@@ -42,6 +43,9 @@ class InfRtConfig {
...
@@ -42,6 +43,9 @@ class InfRtConfig {
}
}
const
std
::
vector
<
std
::
string
>&
shared_libs
()
const
{
return
shared_libs_
;
}
const
std
::
vector
<
std
::
string
>&
shared_libs
()
const
{
return
shared_libs_
;
}
void
enable_gpu
()
{
gpu_enabled_
=
true
;
}
bool
gpu_enabled
()
const
{
return
gpu_enabled_
;
}
// TODO(wilber): Design an easy-to-use interface.
// TODO(wilber): Design an easy-to-use interface.
void
enable_tensorrt
()
{
tensorrt_enabled_
=
true
;
}
void
enable_tensorrt
()
{
tensorrt_enabled_
=
true
;
}
void
disable_tensorrt
()
{
tensorrt_enabled_
=
false
;
}
void
disable_tensorrt
()
{
tensorrt_enabled_
=
false
;
}
...
...
paddle/infrt/api/infrt_api_test.cc.in
浏览文件 @
aa67c292
...
@@ -57,6 +57,57 @@ TEST(InfRtPredictor, predictor) {
...
@@ -57,6 +57,57 @@ TEST(InfRtPredictor, predictor) {
ASSERT_EQ(output->dims(), ::phi::DDim({16, 10}));
ASSERT_EQ(output->dims(), ::phi::DDim({16, 10}));
}
}
TEST(InfRtPredictor, cpu_predictor) {
std::vector<std::string> shared_libs;
InfRtConfig config;
config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
::infrt::backends::CpuPhiAllocator cpu_allocator;
::phi::DenseTensor* input = predictor->GetInput(0);
input->Resize({2, 3, 256, 256});
input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32);
auto* input_data = reinterpret_cast<float*>(input->data());
for (int i = 0; i < input->numel(); i++) input_data[i] = 1.0;
for(int i = 0; i < 10; i++) {
predictor->Run();
}
auto start = std::chrono::steady_clock::now();
for(int i = 0; i < 10; i++) {
predictor->Run();
}
auto end = std::chrono::steady_clock::now();
auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
std::cout <<"One predict period costs " << msec.count()/1000 << "ms.\n";
// get and print output tensor
auto* output = predictor->GetOutput(0);
ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
const std::vector<float> true_vals {
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02,
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02
};
for (size_t i = 0; i < true_vals.size(); i+=100) {
CHECK_NEAR(output->data<float>()[i*100], true_vals[i], 1e-5);
}
}
#ifdef INFRT_WITH_TRT
#ifdef INFRT_WITH_TRT
TEST(InfRtPredictor, trt_predictor) {
TEST(InfRtPredictor, trt_predictor) {
std::vector<std::string> shared_libs;
std::vector<std::string> shared_libs;
...
@@ -100,4 +151,67 @@ TEST(InfRtPredictor, trt_predictor) {
...
@@ -100,4 +151,67 @@ TEST(InfRtPredictor, trt_predictor) {
}
}
#endif
#endif
#ifdef INFRT_WITH_GPU
TEST(InfRtPredictor, gpu_predictor) {
std::vector<std::string> shared_libs;
InfRtConfig config;
config.enable_gpu();
config.set_model_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdmodel");
config.set_param_dir("@CMAKE_BINARY_DIR@/models/resnet50/model.pdiparams");
std::unique_ptr<InfRtPredictor> predictor = CreateInfRtPredictor(config);
::infrt::backends::GpuPhiAllocator gpu_allocator;
::phi::DenseTensor* input = predictor->GetInput(0);
input->Resize({2, 3, 256, 256});
input->AllocateFrom(&gpu_allocator, ::phi::DataType::FLOAT32);
auto* data = reinterpret_cast<float*>(input->data());
std::vector<float> input_data(2 * 3 * 256 * 256, 1.0);
cudaMemcpy(data,
input_data.data(),
sizeof(float) * input->numel(),
cudaMemcpyHostToDevice);
for(int i = 0; i < 10; i++) {
predictor->Run();
}
auto start = std::chrono::steady_clock::now();
for(int i = 0; i < 1000; i++) {
predictor->Run();
}
auto end = std::chrono::steady_clock::now();
auto msec = std::chrono::duration_cast<std::chrono::milliseconds>(end-start);
std::cout <<"One predict period costs " << msec.count()/1000 << "ms.\n";
auto* output = predictor->GetOutput(0);
std::vector<float> output_data(output->numel());
cudaMemcpy(output_data.data(),
output->data<float>(),
sizeof(float) * output->numel(),
cudaMemcpyDeviceToHost);
ASSERT_EQ(output->dims(), ::phi::DDim({2, 1000}));
const std::vector<float> true_vals {
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02,
-3.319006264209747314e-01, -1.418896913528442383e+00,
-6.934890151023864746e-01, -1.498023152351379395e+00,
3.078042864799499512e-01, -1.340998053550720215e+00,
3.508620023727416992e+00, 2.274388313293457031e+00,
-1.321727275848388672e+00, -8.888689428567886353e-02
};
for (size_t i = 0; i < true_vals.size(); i+=100) {
CHECK_NEAR(output_data[i*100], true_vals[i], 1e-5);
}
}
#endif
} // namespace infrt
} // namespace infrt
paddle/infrt/backends/host/phi_allocator.h
浏览文件 @
aa67c292
...
@@ -11,6 +11,7 @@ limitations under the License. */
...
@@ -11,6 +11,7 @@ limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/memory/malloc.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/allocator.h"
#ifdef INFRT_WITH_GPU
#ifdef INFRT_WITH_GPU
...
@@ -40,12 +41,8 @@ class GpuPhiAllocator : public phi::Allocator {
...
@@ -40,12 +41,8 @@ class GpuPhiAllocator : public phi::Allocator {
static
void
deleter
(
phi
::
Allocation
*
ptr
)
{
cudaFree
(
ptr
->
ptr
());
}
static
void
deleter
(
phi
::
Allocation
*
ptr
)
{
cudaFree
(
ptr
->
ptr
());
}
AllocationPtr
Allocate
(
size_t
bytes_size
)
{
AllocationPtr
Allocate
(
size_t
bytes_size
)
{
void
*
ptr
;
return
paddle
::
memory
::
Alloc
(
phi
::
Place
(
phi
::
AllocationType
::
GPU
),
cudaMalloc
(
&
ptr
,
bytes_size
);
bytes_size
);
return
AllocationPtr
(
new
phi
::
Allocation
(
ptr
,
bytes_size
,
phi
::
Place
(
phi
::
AllocationType
::
GPU
)),
deleter
);
}
}
};
};
#endif
#endif
...
...
paddle/infrt/dialect/init_dialects.cc
浏览文件 @
aa67c292
...
@@ -34,9 +34,8 @@ void registerCinnDialects(mlir::DialectRegistry ®istry) { // NOLINT
...
@@ -34,9 +34,8 @@ void registerCinnDialects(mlir::DialectRegistry ®istry) { // NOLINT
InfrtDialect
,
InfrtDialect
,
dt
::
DTDialect
,
dt
::
DTDialect
,
pd
::
PaddleDialect
,
pd
::
PaddleDialect
,
trt
::
TensorRTDialect
trt
::
TensorRTDialect
,
#ifdef INFRT_WITH_PHI
#ifdef INFRT_WITH_PHI
,
phi
::
PHIDenseTensorDialect
,
phi
::
PHIDenseTensorDialect
,
phi
::
PHICPUKernelDialect
,
phi
::
PHICPUKernelDialect
,
phi
::
PHIGPUKernelDialect
,
phi
::
PHIGPUKernelDialect
,
...
...
paddle/infrt/dialect/phi/ir/infrt_phi_tensor.td
浏览文件 @
aa67c292
...
@@ -40,6 +40,13 @@ def CreateHostInitedDenseTensorOp : PDT_Op<"create_host_inited_dense_tensor.f32"
...
@@ -40,6 +40,13 @@ def CreateHostInitedDenseTensorOp : PDT_Op<"create_host_inited_dense_tensor.f32"
let results = (outs DenseTensor:$output);
let results = (outs DenseTensor:$output);
}
}
def CreateInitedGpuFLOAT32DenseTensorOp
: PDT_Op<"create_inited_dense_tensor.gpu.f32", [NoSideEffect]> {
let arguments = (ins Context:$context, I64ArrayAttr:$dims,
LayoutAttr:$layout, I64ArrayAttr:$lod, F32Attr:$value);
let results = (outs DenseTensor:$output);
}
def CreateInitedCpuFLOAT32DenseTensorOp
def CreateInitedCpuFLOAT32DenseTensorOp
: PDT_Op<"create_inited_dense_tensor.cpu.f32", [NoSideEffect]> {
: PDT_Op<"create_inited_dense_tensor.cpu.f32", [NoSideEffect]> {
let arguments = (ins Context:$context, I64ArrayAttr:$dims,
let arguments = (ins Context:$context, I64ArrayAttr:$dims,
...
@@ -86,6 +93,14 @@ def PDT_LoadCombinedParamsOp : PDT_Op<"load_combined_params", [NoSideEffect]> {
...
@@ -86,6 +93,14 @@ def PDT_LoadCombinedParamsOp : PDT_Op<"load_combined_params", [NoSideEffect]> {
let assemblyFormat = "`(``)`attr-dict";
let assemblyFormat = "`(``)`attr-dict";
}
}
def PDT_LoadCombinedParamsGpuOp : PDT_Op<"load_combined_params_to_gpu", [NoSideEffect]> {
// input path of model params.
let arguments = (ins StrAttr:$model_path, StrAttr:$params_path);
let results = (outs PD_DenseTensorMap:$out);
let assemblyFormat = "`(``)`attr-dict";
}
def PDT_TensorMapGetSizeOp : PDT_Op<"tensor_map_get_size", [NoSideEffect]> {
def PDT_TensorMapGetSizeOp : PDT_Op<"tensor_map_get_size", [NoSideEffect]> {
let arguments = (ins PD_DenseTensorMap:$map);
let arguments = (ins PD_DenseTensorMap:$map);
let results = (outs I32:$size);
let results = (outs I32:$size);
...
...
paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc
浏览文件 @
aa67c292
...
@@ -76,6 +76,7 @@ class PhiOpConvertPass
...
@@ -76,6 +76,7 @@ class PhiOpConvertPass
void
getDependentDialects
(
mlir
::
DialectRegistry
&
registry
)
const
override
;
void
getDependentDialects
(
mlir
::
DialectRegistry
&
registry
)
const
override
;
private:
private:
void
updateInputsAndResults
(
infrt
::
TargetType
target
);
void
convertStage
();
void
convertStage
();
void
dispatchStage
();
void
dispatchStage
();
...
@@ -110,10 +111,50 @@ mlir::LogicalResult PhiOpConvertPass::initialize(mlir::MLIRContext *context) {
...
@@ -110,10 +111,50 @@ mlir::LogicalResult PhiOpConvertPass::initialize(mlir::MLIRContext *context) {
// Implementation of the PhiOpConvertPass.
// Implementation of the PhiOpConvertPass.
void
PhiOpConvertPass
::
runOnFunction
()
{
void
PhiOpConvertPass
::
runOnFunction
()
{
updateInputsAndResults
(
valid_places_
[
0
].
target
);
convertStage
();
convertStage
();
dispatchStage
();
dispatchStage
();
}
}
void
PhiOpConvertPass
::
updateInputsAndResults
(
infrt
::
TargetType
target
)
{
mlir
::
Block
&
body
=
getFunction
().
front
();
auto
loc
=
getFunction
().
getLoc
();
mlir
::
Operation
&
operation
=
body
.
front
();
mlir
::
MLIRContext
*
context
=
operation
.
getContext
();
size_t
num_input
=
body
.
getNumArguments
();
// step1. update input cpu tensors into gpu tensors
for
(
size_t
index
=
0
;
index
<
num_input
;
index
++
)
{
auto
argument
=
body
.
getArgument
(
index
);
if
(
auto
t
=
argument
.
getType
().
dyn_cast
<::
infrt
::
DenseTensorType
>
())
{
mlir
::
Type
replace_type
=
infrt
::
DenseTensorType
::
get
(
context
,
target
,
t
.
getPrecision
(),
infrt
::
LayoutType
::
NCHW
);
getFunction
().
insertArgument
(
index
,
replace_type
,
{},
loc
);
argument
.
replaceAllUsesWith
(
getFunction
().
getArgument
(
index
));
getFunction
().
eraseArgument
(
index
+
1
);
}
}
// update output tensors
unsigned
int
num_result
=
getFunction
().
getNumResults
();
for
(
unsigned
int
index
=
0
;
index
<
num_result
;
index
++
)
{
mlir
::
Type
replace_type
=
infrt
::
DenseTensorType
::
get
(
context
,
target
,
infrt
::
PrecisionType
::
FLOAT32
,
infrt
::
LayoutType
::
NCHW
);
getFunction
().
eraseResult
(
index
);
getFunction
().
insertResult
(
index
,
replace_type
,
{});
}
// update dense_tensor_map
mlir
::
Type
replace_type
=
infrt
::
DenseTensorType
::
get
(
context
,
target
,
infrt
::
PrecisionType
::
FLOAT32
,
infrt
::
LayoutType
::
NCHW
);
for
(
auto
&
op
:
body
.
without_terminator
())
{
if
(
op
.
getName
().
getIdentifier
().
str
()
==
"phi_dt.tensor_map_get_tensor"
)
op
.
getResult
(
0
).
setType
(
replace_type
);
}
}
void
PhiOpConvertPass
::
convertStage
()
{
void
PhiOpConvertPass
::
convertStage
()
{
mlir
::
Block
&
body
=
getFunction
().
front
();
mlir
::
Block
&
body
=
getFunction
().
front
();
std
::
vector
<
mlir
::
Operation
*>
worklist
;
std
::
vector
<
mlir
::
Operation
*>
worklist
;
...
@@ -200,6 +241,7 @@ void PhiOpConvertPass::dispatchStage() {
...
@@ -200,6 +241,7 @@ void PhiOpConvertPass::dispatchStage() {
mlir
::
OpBuilder
builder
(
&
block
,
block
.
begin
());
mlir
::
OpBuilder
builder
(
&
block
,
block
.
begin
());
std
::
map
<
infrt
::
TargetType
,
mlir
::
Value
>
phi_context
;
std
::
map
<
infrt
::
TargetType
,
mlir
::
Value
>
phi_context
;
for
(
infrt
::
KernelOp
kernel_op
:
worklist
)
{
for
(
infrt
::
KernelOp
kernel_op
:
worklist
)
{
std
::
string
kernel_name
=
kernel_op
.
name
().
str
();
std
::
string
kernel_name
=
kernel_op
.
name
().
str
();
std
::
vector
<
infrt
::
PhiKernelDesc
>
candidates
=
std
::
vector
<
infrt
::
PhiKernelDesc
>
candidates
=
...
@@ -257,15 +299,25 @@ void PhiOpConvertPass::dispatchStage() {
...
@@ -257,15 +299,25 @@ void PhiOpConvertPass::dispatchStage() {
for
(
size_t
index
=
0
;
index
<
phi_kernel_desc
.
input_types
.
size
();
for
(
size_t
index
=
0
;
index
<
phi_kernel_desc
.
input_types
.
size
();
++
index
)
{
++
index
)
{
mlir
::
Value
input
=
kernel_op
.
getOperand
(
index
);
mlir
::
Value
input
=
kernel_op
.
getOperand
(
index
);
auto
cvt_tensor_type_op
=
builder
.
create
<
infrt
::
TensorCastOp
>
(
if
(
input
.
getType
().
dyn_cast
<::
infrt
::
DenseTensorType
>
().
getTarget
()
==
kernel_op
.
getLoc
(),
::
infrt
::
TargetType
::
CPU
&&
infrt
::
DenseTensorType
::
get
(
phi_kernel_desc
.
input_types
[
index
].
target
==
kernel_op
.
getContext
(),
::
infrt
::
TargetType
::
GPU
)
{
phi_kernel_desc
.
input_types
[
index
].
target
,
auto
cvt_tensor_type_op
=
builder
.
create
<
infrt
::
phi
::
GpuMemCopyOp
>
(
phi_kernel_desc
.
input_types
[
index
].
precision
,
kernel_op
.
getLoc
(),
phi_kernel_desc
.
input_types
[
index
].
layout
),
infrt
::
DenseTensorType
::
get
(
input
);
kernel_op
.
getContext
(),
operation_state
.
addOperands
(
cvt_tensor_type_op
.
output
());
phi_kernel_desc
.
input_types
[
index
].
target
,
phi_kernel_desc
.
input_types
[
index
].
precision
,
phi_kernel_desc
.
input_types
[
index
].
layout
),
input
,
phi_context
[
infrt
::
TargetType
::
GPU
],
mlir
::
BoolAttr
::
get
(
kernel_op
.
getContext
(),
/*d2h*/
false
));
operation_state
.
addOperands
(
cvt_tensor_type_op
.
output
());
}
else
{
operation_state
.
addOperands
(
input
);
}
}
}
for
(
size_t
index
=
0
;
index
<
phi_kernel_desc
.
output_types
.
size
();
for
(
size_t
index
=
0
;
index
<
phi_kernel_desc
.
output_types
.
size
();
...
@@ -280,11 +332,8 @@ void PhiOpConvertPass::dispatchStage() {
...
@@ -280,11 +332,8 @@ void PhiOpConvertPass::dispatchStage() {
mlir
::
Operation
*
phi_operation
=
builder
.
createOperation
(
operation_state
);
mlir
::
Operation
*
phi_operation
=
builder
.
createOperation
(
operation_state
);
for
(
size_t
index
=
0
;
index
<
phi_kernel_desc
.
output_types
.
size
();
for
(
size_t
index
=
0
;
index
<
phi_kernel_desc
.
output_types
.
size
();
++
index
)
{
++
index
)
{
mlir
::
Value
input
=
phi_operation
->
getResult
(
index
);
auto
cvt_tensor_type_op
=
builder
.
create
<
infrt
::
TensorCastOp
>
(
kernel_op
.
getLoc
(),
kernel_op
.
getResultTypes
()[
index
],
input
);
kernel_op
.
getResult
(
index
).
replaceAllUsesWith
(
kernel_op
.
getResult
(
index
).
replaceAllUsesWith
(
cvt_tensor_type_op
.
output
(
));
phi_operation
->
getResult
(
index
));
}
}
kernel_op
.
erase
();
kernel_op
.
erase
();
}
}
...
...
paddle/infrt/kernel/phi/dense_tensor_kernels.cc
浏览文件 @
aa67c292
...
@@ -62,7 +62,7 @@ namespace phi {
...
@@ -62,7 +62,7 @@ namespace phi {
::
phi
::
make_ddim
(
dims
.
get
()),
::
phi
::
make_ddim
(
dims
.
get
()),
ConvertLayoutToPhi
(
layout
.
get
()),
ConvertLayoutToPhi
(
layout
.
get
()),
{}));
{}));
float
*
a_data
=
dense_tensor
.
mutable_data
<
float
>
(
::
phi
::
CPU
Place
());
float
*
a_data
=
dense_tensor
.
mutable_data
<
float
>
(
context
.
Get
Place
());
for
(
int64_t
i
=
0
;
i
<
dense_tensor
.
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
dense_tensor
.
numel
();
++
i
)
{
a_data
[
i
]
=
value
.
get
();
a_data
[
i
]
=
value
.
get
();
}
}
...
@@ -260,6 +260,43 @@ void PrintDenseTensor(::phi::DenseTensor* dense_tensor) {
...
@@ -260,6 +260,43 @@ void PrintDenseTensor(::phi::DenseTensor* dense_tensor) {
return
map
;
return
map
;
}
}
::
infrt
::
phi
::
DenseTensorMap
LoadCombinedParamsToGpu
(
const
std
::
string
&
model_path
,
const
std
::
string
&
params_path
)
{
::
infrt
::
phi
::
DenseTensorMap
map
;
auto
pb_proto_prog
=
paddle
::
LoadProgram
(
model_path
);
auto
main_block
=
pb_proto_prog
->
blocks
(
0
);
std
::
ifstream
param_file
(
params_path
,
std
::
ios
::
binary
);
std
::
set
<
std
::
string
>
tmp
;
for
(
auto
&
var
:
main_block
.
vars
())
{
if
(
var
.
name
()
==
"feed"
||
var
.
name
()
==
"fetch"
||
!
var
.
persistable
())
{
continue
;
}
if
(
var
.
type
().
type
()
==
::
paddle
::
framework
::
proto
::
VarType_Type_LOD_TENSOR
)
{
tmp
.
emplace
(
var
.
name
());
}
else
{
llvm_unreachable
(
"the tensor type is illegal."
);
}
}
#ifdef INFRT_WITH_GPU
::
phi
::
GPUContext
ctx
;
ctx
.
PartialInitWithoutAllocator
();
for
(
auto
&
var
:
tmp
)
{
std
::
unique_ptr
<::
phi
::
DenseTensor
>
tensor
{
std
::
make_unique
<::
phi
::
DenseTensor
>
()};
::
paddle
::
framework
::
DeserializeFromStream
(
param_file
,
tensor
.
get
(),
ctx
);
map
.
SetDenseTensor
(
var
,
std
::
move
(
tensor
));
}
#endif
return
map
;
}
::
infrt
::
phi
::
DenseTensorMap
LoadCombinedParams
(
::
infrt
::
phi
::
DenseTensorMap
LoadCombinedParams
(
host_context
::
Attribute
<
std
::
string
>
model_path
,
host_context
::
Attribute
<
std
::
string
>
model_path
,
host_context
::
Attribute
<
std
::
string
>
params_path
)
{
host_context
::
Attribute
<
std
::
string
>
params_path
)
{
...
...
paddle/infrt/kernel/phi/dense_tensor_kernels.h
浏览文件 @
aa67c292
...
@@ -73,6 +73,9 @@ void PrintDenseTensor(::phi::DenseTensor* dense_tensor);
...
@@ -73,6 +73,9 @@ void PrintDenseTensor(::phi::DenseTensor* dense_tensor);
::
infrt
::
phi
::
DenseTensorMap
LoadCombinedParameters
(
::
infrt
::
phi
::
DenseTensorMap
LoadCombinedParameters
(
const
std
::
string
&
model_path
,
const
std
::
string
&
params_path
);
const
std
::
string
&
model_path
,
const
std
::
string
&
params_path
);
::
infrt
::
phi
::
DenseTensorMap
LoadCombinedParamsToGpu
(
const
std
::
string
&
model_path
,
const
std
::
string
&
params_path
);
int32_t
TensorMapGetSize
(
const
::
infrt
::
phi
::
DenseTensorMap
&
map
);
int32_t
TensorMapGetSize
(
const
::
infrt
::
phi
::
DenseTensorMap
&
map
);
#ifdef INFRT_WITH_GPU
#ifdef INFRT_WITH_GPU
...
...
paddle/infrt/kernel/phi/registry.cc
浏览文件 @
aa67c292
...
@@ -68,6 +68,9 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
...
@@ -68,6 +68,9 @@ void RegisterPhiKernels(host_context::KernelRegistry* registry) {
registry
->
AddKernel
(
"phi_dt.load_params"
,
registry
->
AddKernel
(
"phi_dt.load_params"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
LoadParams
),
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
LoadParams
),
{
"path"
});
{
"path"
});
registry
->
AddKernel
(
"phi_dt.load_combined_params_to_gpu"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
LoadCombinedParamsToGpu
),
{
"model_path"
,
"params_path"
});
registry
->
AddKernel
(
"phi_dt.load_combined_params"
,
registry
->
AddKernel
(
"phi_dt.load_combined_params"
,
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
LoadCombinedParams
),
INFRT_KERNEL
(
infrt
::
kernel
::
phi
::
LoadCombinedParams
),
{
"model_path"
,
"params_path"
});
{
"model_path"
,
"params_path"
});
...
...
paddle/infrt/tests/dialect/phi/phi_pass.mlir
浏览文件 @
aa67c292
// RUN: infrtopt -phi-op-convert=valid-targets=CPU-FP32-NCHW -infrt-op-fuse %s
// RUN: infrtopt -phi-op-convert=valid-targets=CPU-FP32-NCHW -infrt-op-fuse %s
// CHECK-LABEL: @ops
// CHECK-LABEL: @ops
func @ops(%a:!infrt.
lod_tensor<?xf32,0>, %b:!infrt.lod_tensor<?xf32,0
>) {
func @ops(%a:!infrt.
dense_tensor<CPU, FP32, NCHW>, %b:!infrt.dense_tensor<CPU, FP32, NCHW
>) {
%g = "pd.elementwise_add"(%a, %b) {axis=1:si32} : (!infrt.
lod_tensor<?xf32,0>, !infrt.lod_tensor<?xf32>) -> tensor<?xf32
>
%g = "pd.elementwise_add"(%a, %b) {axis=1:si32} : (!infrt.
dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW
>
%h = "pd.abs"(%g):(
tensor<?xf32>) -> tensor<?xf32
>
%h = "pd.abs"(%g):(
!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW
>
infrt.return %h:
tensor<?xf32
>
infrt.return %h:
!infrt.dense_tensor<CPU, FP32, NCHW
>
}
}
// CHECK-LABEL: @op_execute
// CHECK-LABEL: @op_execute
func @op_execute(%a:!infrt.
lod_tensor<?xf32,0>, %b:!infrt.lod_tensor<?xf32,0>, %c:!infrt.lod_tensor<?xf32,0>) -> !infrt.lod_tensor<?xf32,0
> {
func @op_execute(%a:!infrt.
dense_tensor<CPU, FP32, NCHW>, %b:!infrt.dense_tensor<CPU, FP32, NCHW>, %c:!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW
> {
%g = "pd.elementwise_add"(%a, %b) {axis=1:si32} : (!infrt.
lod_tensor<?xf32,0>, !infrt.lod_tensor<?xf32>) -> tensor<?xf32
>
%g = "pd.elementwise_add"(%a, %b) {axis=1:si32} : (!infrt.
dense_tensor<CPU, FP32, NCHW>, !infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW
>
%h = "pd.abs"(%g):(
tensor<?xf32>) -> tensor<?xf32
>
%h = "pd.abs"(%g):(
!infrt.dense_tensor<CPU, FP32, NCHW>) -> !infrt.dense_tensor<CPU, FP32, NCHW
>
infrt.return %h:
tensor<?xf32
>
infrt.return %h:
!infrt.dense_tensor<CPU, FP32, NCHW
>
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录