Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1fd1c169
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1fd1c169
编写于
2月 22, 2023
作者:
Z
zhupengyang
提交者:
GitHub
2月 22, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[XPU] link out_max to x_max between xpu_fusion_ops (#50690)
上级
afd8cc42
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
312 addition
and
48 deletion
+312
-48
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc
paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc
+25
-16
paddle/fluid/framework/ir/xpu/link_xpu_op_max_pass.cc
paddle/fluid/framework/ir/xpu/link_xpu_op_max_pass.cc
+145
-0
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
...ence/analysis/passes/ir_params_sync_among_devices_pass.cc
+15
-20
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+1
-1
paddle/phi/api/yaml/static_ops.yaml
paddle/phi/api/yaml/static_ops.yaml
+3
-3
paddle/phi/infermeta/fusion.cc
paddle/phi/infermeta/fusion.cc
+6
-1
paddle/phi/infermeta/fusion.h
paddle/phi/infermeta/fusion.h
+3
-1
paddle/phi/kernels/fusion/xpu/fc_xpu_kernel.cc
paddle/phi/kernels/fusion/xpu/fc_xpu_kernel.cc
+9
-6
python/paddle/fluid/tests/unittests/ir/inference/test_xpu_link_xpu_op_max_pass.py
...s/unittests/ir/inference/test_xpu_link_xpu_op_max_pass.py
+104
-0
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
1fd1c169
...
...
@@ -226,6 +226,7 @@ if(WITH_XPU)
${
XPU_PASS_DEPS
}
)
pass_library
(
multi_encoder_xpu_slice_fuse_pass inference DIR xpu
)
pass_library
(
generate_sequence_xpu_fuse_pass inference DIR xpu
)
pass_library
(
link_xpu_op_max_pass inference DIR xpu
)
endif
()
cc_library
(
...
...
paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc
浏览文件 @
1fd1c169
...
...
@@ -159,9 +159,9 @@ Fused subgraph:
\ | / |
\ | / |
fc_xpu-----------
|
|
act_out
| \
| \
act_out out_max
*/
class
FcXPUFusePass
:
public
FusePassBase
{
protected:
...
...
@@ -185,6 +185,7 @@ void FcXPUFusePass::ApplyImpl(ir::Graph* graph) const {
for
(
auto
act_type
:
{
"relu"
,
"gelu"
,
"tanh"
,
""
,
})
{
ApplyImpl
(
graph
,
mul_type
,
with_bias
,
act_type
);
...
...
@@ -244,6 +245,18 @@ void FcXPUFusePass::ApplyImpl(ir::Graph* graph,
QuantWeight
<
int16_t
>
(
mul_w_tensor
,
mul_w_max_tensor
,
!
transpose_w
);
}
std
::
string
fc_out_name
;
if
(
act_out
)
{
fc_out_name
=
act_out
->
Name
();
}
else
if
(
add_out
)
{
fc_out_name
=
add_out
->
Name
();
}
else
{
fc_out_name
=
mul_out
->
Name
();
}
std
::
string
fc_out_max_name
=
fc_out_name
+
"_max"
;
VarDesc
fc_out_max_desc
(
fc_out_max_name
);
Node
*
fc_out_max
=
graph
->
CreateVarNode
(
&
fc_out_max_desc
);
// Generate fc_xpu op
framework
::
OpDesc
fc_xpu_op_desc
(
block
);
fc_xpu_op_desc
.
SetType
(
"fc_xpu"
);
...
...
@@ -282,25 +295,21 @@ void FcXPUFusePass::ApplyImpl(ir::Graph* graph,
"act_alpha"
,
PADDLE_GET_CONST
(
float
,
act
->
Op
()
->
GetAttr
(
"slope"
)));
}
}
if
(
act_out
)
{
fc_xpu_op_desc
.
SetOutput
(
"out"
,
{
act_out
->
Name
()});
}
else
if
(
add_out
)
{
fc_xpu_op_desc
.
SetOutput
(
"out"
,
{
add_out
->
Name
()});
}
else
{
fc_xpu_op_desc
.
SetOutput
(
"out"
,
{
mul_out
->
Name
()});
}
fc_xpu_op_desc
.
SetOutput
(
"out"
,
{
fc_out_name
});
fc_xpu_op_desc
.
SetOutput
(
"out_max"
,
{
fc_out_max_name
});
auto
*
fc_xpu
=
graph
->
CreateOpNode
(
&
fc_xpu_op_desc
);
SAFE_
IR_NODE_LINK_TO
(
mul_x
,
fc_xpu
);
SAFE_
IR_NODE_LINK_TO
(
mul_w
,
fc_xpu
);
SAFE_
IR_NODE_LINK_TO
(
mul_w_max
,
fc_xpu
);
IR_NODE_LINK_TO
(
mul_x
,
fc_xpu
);
IR_NODE_LINK_TO
(
mul_w
,
fc_xpu
);
IR_NODE_LINK_TO
(
mul_w_max
,
fc_xpu
);
SAFE_IR_NODE_LINK_TO
(
bias
,
fc_xpu
);
if
(
act_out
)
{
SAFE_
IR_NODE_LINK_TO
(
fc_xpu
,
act_out
);
IR_NODE_LINK_TO
(
fc_xpu
,
act_out
);
}
else
if
(
add_out
)
{
SAFE_
IR_NODE_LINK_TO
(
fc_xpu
,
add_out
);
IR_NODE_LINK_TO
(
fc_xpu
,
add_out
);
}
else
{
SAFE_
IR_NODE_LINK_TO
(
fc_xpu
,
mul_out
);
IR_NODE_LINK_TO
(
fc_xpu
,
mul_out
);
}
IR_NODE_LINK_TO
(
fc_xpu
,
fc_out_max
);
// delete useless node
std
::
unordered_set
<
const
Node
*>
delete_nodes
;
...
...
paddle/fluid/framework/ir/xpu/link_xpu_op_max_pass.cc
0 → 100644
浏览文件 @
1fd1c169
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/xpu/pass_utils.h"
#include "paddle/fluid/framework/ir/xpu/quant_utils.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
namespace
phi
{
class
DenseTensor
;
}
// namespace phi
namespace
paddle
{
namespace
framework
{
class
Scope
;
}
// namespace framework
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
namespace
ir
{
namespace
patterns
{
struct
FusionXPUOpPattern
:
public
PatternBase
{
FusionXPUOpPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
const
std
::
string
&
op_type
);
// declare operator node's name
PATTERN_DECL_NODE
(
fusion_op
);
// declare variable node's name
PATTERN_DECL_NODE
(
out
);
PATTERN_DECL_NODE
(
out_max
);
private:
std
::
string
op_type_
;
};
FusionXPUOpPattern
::
FusionXPUOpPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
const
std
::
string
&
op_type
)
:
PatternBase
(
pattern
,
name_scope
,
name_scope
),
op_type_
(
op_type
)
{
auto
*
fusion_op
=
pattern
->
NewNode
(
fusion_op_repr
())
->
assert_is_op
(
op_type_
);
auto
*
out
=
pattern
->
NewNode
(
out_repr
())
->
assert_is_op_output
(
op_type_
,
"out"
)
->
assert_var_not_persistable
();
auto
*
out_max
=
pattern
->
NewNode
(
out_max_repr
())
->
assert_is_op_output
(
op_type_
,
"out_max"
)
->
assert_var_not_persistable
();
fusion_op
->
LinksTo
({
out
,
out_max
});
}
}
// namespace patterns
class
LinkXPUOpMaxPass
:
public
FusePassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
private:
void
ApplyImpl
(
ir
::
Graph
*
graph
,
const
std
::
string
&
op_type
)
const
;
const
std
::
string
name_scope_
{
"multi_encoder_xpu_slice_fuse_pass"
};
// ops with x_max/out_max
std
::
set
<
std
::
string
>
op_types_
{
"fc_xpu"
,
"conv2d_xpu"
};
};
/*
Origin subgraph:
fusion_xpu_op0
/ \
| |
out0 out0_max
|
\
fusion_xpu_op1
Fused subgraph:
fusion_xpu_op0
/ \
| |
out0 out0_max
| |
\ /
fusion_xpu_op1
*/
void
LinkXPUOpMaxPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
Init
(
name_scope_
,
graph
);
for
(
auto
op_type
:
op_types_
)
{
ApplyImpl
(
graph
,
op_type
);
}
}
void
LinkXPUOpMaxPass
::
ApplyImpl
(
ir
::
Graph
*
graph
,
const
std
::
string
&
op_type
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
graph
,
platform
::
errors
::
PreconditionNotMet
(
"graph should not be null."
));
GraphPatternDetector
gpd
;
patterns
::
FusionXPUOpPattern
pattern
(
gpd
.
mutable_pattern
(),
name_scope_
,
op_type
);
int
found_subgraph_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
graph
)
{
VLOG
(
4
)
<<
"handle LinkXPUOpMaxPass fuse"
;
GET_IR_NODE
(
fusion_op
);
GET_IR_NODE
(
out
);
GET_IR_NODE
(
out_max
);
for
(
auto
next_op
:
out
->
outputs
)
{
auto
*
next_op_desc
=
next_op
->
Op
();
if
(
op_types_
.
count
(
next_op_desc
->
Type
())
==
0
)
continue
;
next_op_desc
->
SetInput
(
"x_max"
,
{
out_max
->
Name
()});
IR_NODE_LINK_TO
(
out_max
,
next_op
);
found_subgraph_count
++
;
}
};
gpd
(
graph
,
handler
);
AddStatis
(
found_subgraph_count
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
link_xpu_op_max_pass
,
paddle
::
framework
::
ir
::
LinkXPUOpMaxPass
);
REGISTER_PASS_CAPABILITY
(
link_xpu_op_max_pass
)
.
AddCombination
(
paddle
::
framework
::
compatible
::
OpVersionComparatorCombination
().
EQ
(
"fc_xpu"
,
0
));
paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
浏览文件 @
1fd1c169
...
...
@@ -224,27 +224,22 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToXpu(Argument *argument) {
LOG
(
INFO
)
<<
"Sync params from CPU to XPU: "
<<
"xpu_device_id - "
<<
argument
->
xpu_device_id
();
platform
::
Place
place
=
platform
::
XPUPlace
(
argument
->
xpu_device_id
());
platform
::
CPUPlace
cpu_place
;
platform
::
Place
xpu_place
=
platform
::
XPUPlace
(
argument
->
xpu_device_id
());
auto
*
scope
=
argument
->
scope_ptr
();
std
::
vector
<
std
::
string
>
all_vars
=
scope
->
LocalVarNames
();
framework
::
ir
::
Graph
&
graph
=
argument
->
main_graph
();
for
(
auto
&
var_name
:
all_vars
)
{
auto
*
var
=
scope
->
FindLocalVar
(
var_name
)
;
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
PreconditionNotMet
(
"The var should not be nullptr"
)
);
for
(
auto
*
node
:
graph
.
Nodes
()
)
{
if
(
!
node
->
IsVar
()
||
!
node
->
Var
()
->
Persistable
())
continue
;
auto
*
var
=
scope
->
FindVar
(
node
->
Name
());
if
(
!
var
->
IsType
<
phi
::
DenseTensor
>
())
continue
;
auto
*
tensor
=
var
->
GetMutable
<
phi
::
DenseTensor
>
(
);
if
(
var
->
IsType
<
phi
::
DenseTensor
>
())
{
auto
*
t
=
var
->
GetMutable
<
phi
::
DenseTensor
>
();
platform
::
CPUPlace
cpu_place
;
phi
::
DenseTensor
temp_tensor
;
temp_tensor
.
Resize
(
t
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
t
,
cpu_place
,
&
temp_tensor
);
t
->
clear
();
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
place
,
t
);
}
temp_tensor
.
Resize
(
tensor
->
dims
());
paddle
::
framework
::
TensorCopySync
(
*
tensor
,
cpu_place
,
&
temp_tensor
);
tensor
->
clear
();
paddle
::
framework
::
TensorCopySync
(
temp_tensor
,
xpu_place
,
tensor
);
}
}
#endif
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
1fd1c169
...
...
@@ -522,7 +522,7 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) {
"multi_encoder_xpu_slice_fuse_pass"
,
// "embedding_with_eltwise_add_xpu_fuse_pass",
"fc_xpu_fuse_pass"
,
// "link_previous_out_max_xpu
_pass",
"link_xpu_op_max
_pass"
,
});
use_xpu_
=
true
;
}
...
...
paddle/phi/api/yaml/static_ops.yaml
浏览文件 @
1fd1c169
-
op
:
fc_xpu
args
:
(Tensor x, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha)
output
:
Tensor
args
:
(Tensor x, Tensor
x_max, Tensor
w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha)
output
:
Tensor
(out), Tensor(out_max)
infer_meta
:
func
:
FcXPUInferMeta
kernel
:
func
:
fc_xpu
data_type
:
x
optional
:
bias
optional
:
bias
, x_max
-
op
:
generate_sequence_xpu
args
:
(Tensor x, DataType dtype)
...
...
paddle/phi/infermeta/fusion.cc
浏览文件 @
1fd1c169
...
...
@@ -22,6 +22,7 @@ limitations under the License. */
namespace
phi
{
void
FcXPUInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
x_max
,
const
MetaTensor
&
w
,
const
MetaTensor
&
w_max
,
const
MetaTensor
&
bias
,
...
...
@@ -31,7 +32,8 @@ void FcXPUInferMeta(const MetaTensor& x,
float
beta
,
int
act_type
,
float
act_alpha
,
MetaTensor
*
out
)
{
MetaTensor
*
out
,
MetaTensor
*
out_max
)
{
std
::
vector
<
int
>
out_shape
(
in_num_col_dims
+
1
);
for
(
int
i
=
0
;
i
<
in_num_col_dims
;
i
++
)
{
out_shape
[
i
]
=
x
.
dims
()[
i
];
...
...
@@ -40,6 +42,9 @@ void FcXPUInferMeta(const MetaTensor& x,
out
->
set_dims
(
DDim
(
out_shape
.
data
(),
out_shape
.
size
()));
out
->
set_dtype
(
x
.
dtype
());
out
->
set_layout
(
x
.
layout
());
out_max
->
set_dims
(
w_max
.
dims
());
out_max
->
set_dtype
(
x
.
dtype
());
out_max
->
set_layout
(
x
.
layout
());
}
void
GenerateSequenceXPUInferMeta
(
const
MetaTensor
&
x
,
...
...
paddle/phi/infermeta/fusion.h
浏览文件 @
1fd1c169
...
...
@@ -23,6 +23,7 @@ namespace phi {
// NOTE: The InferMeta Functions in this file are arranged in alphabetic order.
void
FcXPUInferMeta
(
const
MetaTensor
&
x
,
const
MetaTensor
&
x_max
,
const
MetaTensor
&
w
,
const
MetaTensor
&
w_max
,
const
MetaTensor
&
bias
,
...
...
@@ -32,7 +33,8 @@ void FcXPUInferMeta(const MetaTensor& x,
float
beta
,
int
act_type
,
float
act_alpha
,
MetaTensor
*
out
);
MetaTensor
*
out
,
MetaTensor
*
out_max
);
void
GenerateSequenceXPUInferMeta
(
const
MetaTensor
&
x
,
DataType
dtype
,
...
...
paddle/phi/kernels/fusion/xpu/fc_xpu_kernel.cc
浏览文件 @
1fd1c169
...
...
@@ -21,6 +21,7 @@ namespace fusion {
template
<
typename
T
,
typename
Context
>
void
FcXPUKernel
(
const
Context
&
ctx
,
const
DenseTensor
&
x
,
const
paddle
::
optional
<
DenseTensor
>&
x_max
,
const
DenseTensor
&
w
,
const
DenseTensor
&
w_max
,
const
paddle
::
optional
<
DenseTensor
>&
bias
,
...
...
@@ -30,33 +31,35 @@ void FcXPUKernel(const Context& ctx,
float
beta
,
int
act_type
,
float
act_alpha
,
DenseTensor
*
out
)
{
DenseTensor
*
out
,
DenseTensor
*
out_max
)
{
auto
in_mat_dims
=
flatten_to_2d
(
x
.
dims
(),
in_num_col_dims
);
int
m
=
in_mat_dims
[
0
];
int
k
=
in_mat_dims
[
1
];
int
n
=
w
.
dims
()[
0
];
const
float
*
x_max_data
=
x_max
.
get_ptr
()
==
nullptr
?
nullptr
:
x_max
.
get_ptr
()
->
data
<
float
>
();
const
float
*
bias_data
=
bias
.
get_ptr
()
==
nullptr
?
nullptr
:
bias
.
get_ptr
()
->
data
<
T
>
();
bias
.
get_ptr
()
==
nullptr
?
nullptr
:
bias
.
get_ptr
()
->
data
<
float
>
();
xpu
::
Activation_t
act
(
static_cast
<
xpu
::
Activation_t
::
act_enum
>
(
act_type
));
if
(
act_type
==
5
)
{
act
.
leaky_alpha
=
act_alpha
;
}
else
if
(
act_type
==
15
)
{
act
.
hard_sigmoid_slope
=
act_alpha
;
}
ctx
.
template
Alloc
<
T
>(
out
);
int
r
=
xpu
::
fc_fusion
<
T
,
int16_t
,
T
,
int16_t
>
(
// TX, TW. TY, TGEMM
ctx
.
x_context
(),
// ctx
x
.
data
<
T
>
(),
// x
w
.
data
<
int16_t
>
(),
// w
out
->
data
<
T
>
(),
// y
ctx
.
template
Alloc
<
T
>(
out
),
// y
m
,
// m
n
,
// n
k
,
// k
transpose_x
,
// x_trans
true
,
// w_trans
nullptr
,
// x_maxptr
x_max_data
,
// x_maxptr
w_max
.
data
<
float
>
(),
// w_maxptr
nullptr
,
// y_maxptr
ctx
.
template
Alloc
<
float
>(
out_max
),
// y_maxptr
transpose_x
?
m
:
k
,
// ldx
k
,
// ldw
n
,
// ldy
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_xpu_link_xpu_op_max_pass.py
0 → 100644
浏览文件 @
1fd1c169
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
hypothesis.strategies
as
st
from
auto_scan_test
import
PassAutoScanTest
from
program_config
import
OpConfig
,
ProgramConfig
,
TensorConfig
class
TestFcXPUFusePass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
config
=
self
.
create_inference_config
(
use_xpu
=
True
)
yield
config
,
[
"fc_xpu"
,
"fc_xpu"
],
(
1e-3
,
1e-3
)
def
sample_program_config
(
self
,
draw
):
# 1. matmul0
matmul0_x_shape
=
draw
(
st
.
lists
(
st
.
integers
(
min_value
=
1
,
max_value
=
4
),
min_size
=
2
,
max_size
=
4
)
)
matmul0_y_shape
=
draw
(
st
.
lists
(
st
.
integers
(
min_value
=
1
,
max_value
=
8
),
min_size
=
2
,
max_size
=
2
)
)
matmul0_y_shape
[
0
]
=
matmul0_x_shape
[
-
1
]
# 2. add0
add0_bias_shape
=
[
matmul0_y_shape
[
1
]]
# 3. matmul1
matmul1_y_shape
=
draw
(
st
.
lists
(
st
.
integers
(
min_value
=
1
,
max_value
=
8
),
min_size
=
2
,
max_size
=
2
)
)
matmul1_y_shape
[
0
]
=
matmul0_y_shape
[
-
1
]
# 4. add1
add1_bias_shape
=
[
matmul1_y_shape
[
1
]]
matmul0_op
=
OpConfig
(
"matmul_v2"
,
inputs
=
{
"X"
:
[
"matmul0_x"
],
"Y"
:
[
"matmul0_y"
]},
outputs
=
{
"Out"
:
[
"matmul0_out"
]},
trans_x
=
False
,
trans_y
=
False
,
)
add0_op
=
OpConfig
(
"elementwise_add"
,
inputs
=
{
"X"
:
[
"matmul0_out"
],
"Y"
:
[
"add0_bias"
]},
outputs
=
{
"Out"
:
[
"add0_out"
]},
axis
=-
1
,
)
matmul1_op
=
OpConfig
(
"matmul_v2"
,
inputs
=
{
"X"
:
[
"add0_out"
],
"Y"
:
[
"matmul1_y"
]},
outputs
=
{
"Out"
:
[
"matmul1_out"
]},
trans_x
=
False
,
trans_y
=
False
,
)
add1_op
=
OpConfig
(
"elementwise_add"
,
inputs
=
{
"X"
:
[
"matmul1_out"
],
"Y"
:
[
"add1_bias"
]},
outputs
=
{
"Out"
:
[
"add1_out"
]},
axis
=-
1
,
)
ops
=
[
matmul0_op
,
add0_op
,
matmul1_op
,
add1_op
]
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{
"matmul0_y"
:
TensorConfig
(
shape
=
matmul0_y_shape
),
"add0_bias"
:
TensorConfig
(
shape
=
add0_bias_shape
),
"matmul1_y"
:
TensorConfig
(
shape
=
matmul1_y_shape
),
"add1_bias"
:
TensorConfig
(
shape
=
add1_bias_shape
),
},
inputs
=
{
"matmul0_x"
:
TensorConfig
(
shape
=
matmul0_x_shape
),
},
outputs
=
ops
[
-
1
].
outputs
[
"Out"
],
)
return
program_config
def
test
(
self
):
self
.
run_and_statis
(
quant
=
False
,
max_examples
=
25
,
passes
=
[
"fc_xpu_fuse_pass"
,
"link_xpu_op_max_pass"
],
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录