Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
4a267295
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4a267295
编写于
1月 29, 2021
作者:
D
dingsiyu
提交者:
GitHub
1月 29, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Merge ascend_optimizer and ascend_parser. (#30776)
Merge ascend_optimizer and ascend_parser.
上级
636fefd9
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
1602 addition
and
254 deletion
+1602
-254
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
...tributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
+2
-1
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
...distributed/fleet/meta_optimizers/ascend/ascend_parser.py
+1600
-253
未找到文件。
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
浏览文件 @
4a267295
...
@@ -233,6 +233,7 @@ class AscendOptimizer(Optimizer):
...
@@ -233,6 +233,7 @@ class AscendOptimizer(Optimizer):
self
.
parser
=
AscendIRParser
()
self
.
parser
=
AscendIRParser
()
input_varlist
=
self
.
_get_input_varlist
(
main_block
.
program
)
input_varlist
=
self
.
_get_input_varlist
(
main_block
.
program
)
startup_graph
,
main_graph
=
self
.
parser
.
parse_program
(
startup_graph
,
main_graph
=
self
.
parser
.
parse_program
(
startup_program
,
main_block
.
program
,
input_varlist
,
self
.
fetch_list
)
startup_program
,
main_block
.
program
,
input_varlist
,
self
.
fetch_list
)
...
...
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
浏览文件 @
4a267295
...
@@ -17,7 +17,7 @@ import paddle.fluid.core as core
...
@@ -17,7 +17,7 @@ import paddle.fluid.core as core
import
numpy
as
np
import
numpy
as
np
from
paddle.distributed
import
fleet
from
paddle.distributed
import
fleet
registerd_op
=
{
registerd_op
=
{
## forwards
"elementwise_add"
:
"AddParser"
,
"elementwise_add"
:
"AddParser"
,
"matmul"
:
"MatMulParser"
,
"matmul"
:
"MatMulParser"
,
"mul"
:
"MulParser"
,
"mul"
:
"MulParser"
,
...
@@ -26,23 +26,74 @@ registerd_op = {
...
@@ -26,23 +26,74 @@ registerd_op = {
"shape"
:
"ShapeParser"
,
"shape"
:
"ShapeParser"
,
"fill_constant"
:
"FillConstantParser"
,
"fill_constant"
:
"FillConstantParser"
,
"reduce_sum"
:
"ReduceSumParser"
,
"reduce_sum"
:
"ReduceSumParser"
,
"reduce_sum_grad"
:
"ReduceSumGradParser"
,
"elementwise_mul"
:
"DotMulParser"
,
"matmul_grad"
:
"MatMulGradParser"
,
"elementwise_div"
:
"DotDivParser"
,
"mul_grad"
:
"MulGradParser"
,
"elementwise_pow"
:
"DotPowParser"
,
"elementwise_max"
:
"MaxParser"
,
"elementwise_min"
:
"MinParser"
,
"elementwise_sub"
:
"DotSubParser"
,
"pow"
:
"PowParser"
,
"gelu"
:
"GeluParser"
,
"sqrt"
:
"SqrtParser"
,
"log"
:
"LogParser"
,
"sum"
:
"SumParser"
,
"logical_not"
:
"LogicalNotParser"
,
"gather"
:
"GatherParser"
,
"scatter"
:
"ScatterParser"
,
"cast"
:
"CastParser"
,
"tanh"
:
"TanhParser"
,
"stack"
:
"StackParser"
,
"square"
:
"SquareParser"
,
"unsqueeze2"
:
"UnSqueezeParser"
,
"assign"
:
"AssignParser"
,
"softmax"
:
"SoftMaxParser"
,
"reshape2"
:
"ReshapeParser"
,
"reshape2"
:
"ReshapeParser"
,
"transpose2"
:
"TransposeParser"
,
"layer_norm"
:
"LayerNormParser"
,
"less_than"
:
"LessParser"
,
"mean"
:
"MeanParser"
,
"scale"
:
"ScaleParser"
,
"scale"
:
"ScaleParser"
,
"relu_grad"
:
"ReluGradParser"
,
"slice"
:
"SliceParser"
,
"softmax_with_cross_entropy_grad"
:
"SoftmaxWithCrossEntropyGradParser"
,
"top_k"
:
"TopkParser"
,
"accuracy"
:
"AccuracyParser"
,
#"increment": "IncrementParser",
"lookup_table"
:
"LookupTableParser"
,
"truncated_gaussian_random"
:
"TruncatedNormalParser"
,
"truncated_gaussian_random"
:
"TruncatedNormalParser"
,
"sgd"
:
"SGDParser"
,
"c_allgather"
:
"AllGatherParser"
,
"c_allgather"
:
"AllGatherParser"
,
"c_allreduce_sum"
:
"AllReduceSumParser"
,
"c_allreduce_sum"
:
"AllReduceSumParser"
,
"c_allreduce_max"
:
"AllReduceMaxParser"
,
"c_allreduce_max"
:
"AllReduceMaxParser"
,
"c_broadcast"
:
"BroadcastParser"
,
"c_broadcast"
:
"BroadcastParser"
,
"c_reduce_scatter"
:
"ReduceScatterParser"
,
"c_reduce_scatter"
:
"ReduceScatterParser"
,
"c_send"
:
"SendParser"
,
"c_send"
:
"SendParser"
,
"c_receive"
:
"ReceiveParser"
"c_receive"
:
"ReceiveParser"
,
}
## backwords
"matmul_grad"
:
"MatMulGradParser"
,
"mul_grad"
:
"MulGradParser"
,
"relu_grad"
:
"ReluGradParser"
,
"reduce_sum_grad"
:
"ReduceSumGradParser"
,
"softmax_with_cross_entropy_grad"
:
"SoftmaxWithCrossEntropyGradParser"
,
"tanh_grad"
:
"TanhGradParser"
,
"log_grad"
:
"LogGradParser"
,
"pow_grad"
:
"PowGradParser"
,
"sqrt_grad"
:
"SqrtGradParser"
,
"gelu_grad"
:
"GeluGradParser"
,
"mean_grad"
:
"MeanGradParser"
,
'lookup_table_grad'
:
"LookUpTableGradParser"
,
"elementwise_mul_grad"
:
"DotMulGradParser"
,
"elementwise_add_grad"
:
"DotAddGradParser"
,
"elementwise_div_grad"
:
"DotDivGradParser"
,
"softmax_grad"
:
"SoftmaxGradParser"
,
"slice_grad"
:
"SliceGradParser"
,
"reshape2_grad"
:
"ReshapeGradParser"
,
"gather_grad"
:
"GatherGradParser"
,
"transpose2_grad"
:
"TransposeGradParser"
,
"layer_norm_grad"
:
"LayerNormGradParser"
,
## opt
"sgd"
:
"SGDParser"
,
#"adam": "AdamParser",
}
global_cnt
=
-
1
global_cnt
=
-
1
global_input_cnt
=
-
1
global_input_cnt
=
-
1
...
@@ -67,6 +118,7 @@ class AscendHelper(object):
...
@@ -67,6 +118,7 @@ class AscendHelper(object):
5
:
"float32"
,
5
:
"float32"
,
6
:
"float64"
6
:
"float64"
}
}
self
.
dtype2paddle_inv_map
=
{
"VarType.FP32"
:
0
,
"VarType.FP16"
:
1
}
def
dtype2ge
(
self
,
dtype
):
def
dtype2ge
(
self
,
dtype
):
assert
dtype
in
self
.
dtype2ge_map
,
"dtype[%d] is not supported %d"
%
(
assert
dtype
in
self
.
dtype2ge_map
,
"dtype[%d] is not supported %d"
%
(
...
@@ -159,7 +211,65 @@ class AscendParserBase(object):
...
@@ -159,7 +211,65 @@ class AscendParserBase(object):
tensor
.
set_data
(
data_8
)
tensor
.
set_data
(
data_8
)
return
tensor
return
tensor
def
_get_ge_tensor
(
self
,
shape
,
dtype
,
value_list
):
tensor_desc
=
core
.
GETensorDesc
(
core
.
GEShape
(
shape
),
core
.
GEFormat
.
FORMAT_ND
,
self
.
ascend_helper
.
dtype2ge
(
dtype
))
tensor
=
core
.
GETensor
(
tensor_desc
)
data
=
np
.
array
(
value_list
).
reshape
(
shape
).
astype
(
self
.
ascend_helper
.
dtype2np
(
dtype
))
buf
=
data
.
tobytes
()
data_8
=
np
.
frombuffer
(
buf
,
dtype
=
np
.
uint8
)
tensor
.
set_data
(
data_8
)
tensor_const
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensor
)
return
tensor_const
def
_get_variable
(
self
,
shape
,
dtype
,
tensor
):
if
dtype
==
"int32"
:
type
=
core
.
GEDataType
.
DT_INT32
elif
dtype
==
"float32"
:
type
=
core
.
GEDataType
.
DT_FLOAT
var
=
core
.
GEOperatorFactory
.
create_operator
(
"variable"
+
self
.
_accumulated_op_id
(),
"Variable"
)
var
.
update_output_desc
(
"y"
,
core
.
GETensorDesc
(
core
.
GEShape
(
shape
),
core
.
GEFormat
.
FORMAT_ND
,
type
))
assign
=
core
.
GEOperatorFactory
.
create_operator
(
"assign"
+
self
.
_accumulated_op_id
(),
"Assign"
).
set_input
(
"value"
,
tensor
).
set_input
(
"ref"
,
var
)
return
assign
def
_create_shape_tensor
(
self
):
tensor_desc
=
core
.
GETensorDesc
(
core
.
GEShape
([
2
]),
core
.
GEFormat
.
FORMAT_ND
,
core
.
GEDataType
.
DT_INT32
)
tensor
=
core
.
GETensor
(
tensor_desc
)
data
=
np
.
ones
((
2
)).
astype
(
"int32"
).
reshape
([
2
])
data
[
0
]
=
64
buf
=
data
.
tobytes
()
data_8
=
np
.
frombuffer
(
buf
,
dtype
=
np
.
uint8
)
tensor
.
set_data
(
data_8
)
return
tensor
def
_get_GEtensor_shape
(
self
,
tensor
):
tensor_shape
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Shape"
).
set_input
(
"x"
,
tensor
)
tensor_shape
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
tensor_shape
).
set_attr_int32
(
"dst_type"
,
0
)
return
tensor_shape
### elementwise_op
class
AddParser
(
AscendParserBase
):
class
AddParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
AddParser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
AddParser
,
self
).
__init__
(
graph
,
var2geop
)
...
@@ -169,109 +279,276 @@ class AddParser(AscendParserBase):
...
@@ -169,109 +279,276 @@ class AddParser(AscendParserBase):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
add
=
core
.
GEOperatorFactory
.
create_operator
(
add
=
core
.
GEOperatorFactory
.
create_operator
(
"add"
+
self
.
_accumulated_op_id
(),
"Add"
).
set_input
(
"add"
+
self
.
_accumulated_op_id
(),
"x1"
,
x
).
set_input
(
"x2"
,
y
)
"Add"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
return
[
add
],
[[
0
]]
return
[
add
],
[[
0
]]
class
ReduceSum
Parser
(
AscendParserBase
):
class
DotSub
Parser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReduceSum
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
DotSub
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
reduce_sum
"
self
.
parser_name
=
"
elementwise_sub
"
def
_apply
(
self
):
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
axes
=
self
.
op
.
attr
(
"dim"
)
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
keep_dims
=
self
.
op
.
attr
(
"keep_dim"
)
sub
=
core
.
GEOperatorFactory
.
create_operator
(
reduce_sum
=
core
.
GEOperatorFactory
.
create_operator
(
"sub"
+
self
.
_accumulated_op_id
(),
"reduce_sum"
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"Sub"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
"x"
,
x
,
0
).
set_attr_vec_int32
(
"axes"
,
axes
).
set_attr_bool
(
return
[
sub
],
[[
0
]]
"keep_dims"
,
keep_dims
)
return
[
reduce_sum
],
[[
0
]]
class
ReduceSumGrad
Parser
(
AscendParserBase
):
class
DotMul
Parser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReduceSumGrad
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
DotMul
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
reduce_sum_grad
"
self
.
parser_name
=
"
elementwise_mul
"
def
_apply
(
self
):
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
input
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
mul
=
core
.
GEOperatorFactory
.
create_operator
(
"dotmul"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
return
[
mul
],
[[
0
]]
shape_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Shape"
).
set_input
(
"x"
,
input
,
0
)
axis_const
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
self
.
_create_ge_tensor
([
1
],
2
,
-
1
))
self
.
_mark_as_input
(
axis_const
)
broadcast
=
core
.
GEOperatorFactory
.
create_operator
(
class
DotDivParser
(
AscendParserBase
):
"broadcast_to_d"
+
self
.
_accumulated_op_id
(),
def
__init__
(
self
,
graph
,
var2geop
):
"BroadcastTo"
).
set_input
(
"x"
,
x
).
set_input
(
"shape"
,
shape_tensor
)
super
(
DotDivParser
,
self
).
__init__
(
graph
,
var2geop
)
# unsqueeze cannot get right result, but ExpandDims seems have the same functionality.
self
.
parser_name
=
"elementwise_div"
reduce_sum_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"expand"
+
self
.
_accumulated_op_id
(),
"ExpandDims"
).
set_input
(
"x"
,
broadcast
).
set_input
(
"axis"
,
axis_const
)
return
[
shape_tensor
,
axis_const
,
broadcast
,
reduce_sum_grad
],
[[
3
]]
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
div
=
core
.
GEOperatorFactory
.
create_operator
(
"dotdiv"
+
self
.
_accumulated_op_id
(),
"Div"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
return
[
div
],
[[
0
]]
class
MatMulParser
(
AscendParserBase
):
class
DotPowParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
MatMul
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
DotPow
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
matmul
"
self
.
parser_name
=
"
elementwise_pow
"
def
_apply
(
self
):
def
_apply
(
self
):
x
1
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x2
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
matmul
=
core
.
GEOperatorFactory
.
create_operator
(
pow
=
core
.
GEOperatorFactory
.
create_operator
(
"
matmul"
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"
dotpow"
+
self
.
_accumulated_op_id
(),
"x1"
,
x1
).
set_input
(
"x2"
,
x2
)
"Pow"
).
set_input
(
"x1"
,
x1
).
set_input
(
"x2"
,
y
)
return
[
matmul
],
[[
0
]]
return
[
pow
],
[[
0
]]
class
MatMulGrad
Parser
(
AscendParserBase
):
class
Less
Parser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
MatMulGrad
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
Less
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
matmul_grad
"
self
.
parser_name
=
"
less_than
"
def
_apply
(
self
):
def
_apply
(
self
):
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
less_than
=
core
.
GEOperatorFactory
.
create_operator
(
"less_than"
+
self
.
_accumulated_op_id
(),
"Less"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
return
[
less_than
],
[[
0
]]
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
class
MaxParser
(
AscendParserBase
):
"x1"
,
out_grad
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
def
__init__
(
self
,
graph
,
var2geop
):
"transpose_x1"
,
False
).
set_attr_bool
(
"transpose_x2"
,
True
)
super
(
MaxParser
,
self
).
__init__
(
graph
,
var2geop
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
=
"elementwise_max"
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
out_grad
).
set_attr_bool
(
def
_apply
(
self
):
"transpose_x1"
,
True
).
set_attr_bool
(
"transpose_x2"
,
False
)
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
return
[
x_grad
,
y_grad
],
[[
0
],
[
1
]]
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
max_out
=
core
.
GEOperatorFactory
.
create_operator
(
"max"
+
self
.
_accumulated_op_id
(),
"Maximum"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
return
[
max_out
],
[[
0
]]
class
M
ulGrad
Parser
(
AscendParserBase
):
class
M
in
Parser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
M
ulGrad
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
M
in
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
mul_grad
"
self
.
parser_name
=
"
elementwise_min
"
def
_apply
(
self
):
def
_apply
(
self
):
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
min_out
=
core
.
GEOperatorFactory
.
create_operator
(
"min"
+
self
.
_accumulated_op_id
(),
"Minimum"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
return
[
min_out
],
[[
0
]]
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"transpose_x1"
,
False
).
set_attr_bool
(
"transpose_x2"
,
True
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
out_grad
).
set_attr_bool
(
"transpose_x1"
,
True
).
set_attr_bool
(
"transpose_x2"
,
False
)
return
[
x_grad
,
y_grad
],
[[
0
],
[
1
]]
## cal
class
LogParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
LogParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"log"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
log
=
core
.
GEOperatorFactory
.
create_operator
(
"log"
+
self
.
_accumulated_op_id
(),
"Log"
).
set_input
(
"x"
,
x
)
return
[
log
],
[[
0
]]
class
SqrtParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SqrtParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"sqrt"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
sqrt
=
core
.
GEOperatorFactory
.
create_operator
(
"sqrt"
+
self
.
_accumulated_op_id
(),
"Sqrt"
).
set_input
(
"x"
,
x
)
return
[
sqrt
],
[[
0
]]
class
PowParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
PowParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"pow"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
factor
=
self
.
op
.
attr
(
"factor"
)
pow_value
=
core
.
GEOperatorFactory
.
create_operator
(
"pow"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
"x"
,
x
).
set_attr_float
(
"power"
,
factor
).
set_attr_float
(
"scale"
,
1.0
).
set_attr_float
(
"shift"
,
0.0
)
return
[
pow_value
],
[[
0
]]
class
SquareParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SquareParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"square"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
square
=
core
.
GEOperatorFactory
.
create_operator
(
"square"
+
self
.
_accumulated_op_id
(),
"Square"
).
set_input
(
"x"
,
x
)
return
[
square
],
[[
0
]]
class
SumParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SumParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"sum"
def
_apply
(
self
):
len_list
=
len
(
self
.
op
.
input_arg_names
)
if
len_list
<
2
:
assert
False
,
"the size of input list must large or equal 2"
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
sum
=
core
.
GEOperatorFactory
.
create_operator
(
"sum"
+
self
.
_accumulated_op_id
(),
"Add"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
for
i
in
range
(
2
,
len_list
):
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
i
])
sum
=
core
.
GEOperatorFactory
.
create_operator
(
"sum"
+
self
.
_accumulated_op_id
(),
"Add"
).
set_input
(
"x1"
,
sum
).
set_input
(
"x2"
,
y
)
return
[
sum
],
[[
0
]]
class
LogicalNotParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
LogicalNotParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"logical_not"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
logical_not
=
core
.
GEOperatorFactory
.
create_operator
(
"logical_not"
+
self
.
_accumulated_op_id
(),
"LogicalNot"
).
set_input
(
"x"
,
x
)
return
[
logical_not
],
[[
0
]]
class
MeanParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
MeanParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"mean"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
mean
=
core
.
GEOperatorFactory
.
create_operator
(
"mean"
+
self
.
_accumulated_op_id
(),
"ReduceMeanD"
).
set_input
(
"x"
,
x
).
set_attr_bool
(
"keep_dims"
,
False
).
set_attr_vec_int32
(
"axes"
,
[])
return
[
mean
],
[[
0
]]
class
ReduceSumParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReduceSumParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"reduce_sum"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
axes
=
self
.
op
.
attr
(
"dim"
)
keep_dims
=
self
.
op
.
attr
(
"keep_dim"
)
reduce_all
=
self
.
op
.
attr
(
"reduce_all"
)
x_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
if
reduce_all
:
axes
=
list
(
range
(
len
(
x_shape
)))
reduce_sum
=
core
.
GEOperatorFactory
.
create_operator
(
"reduce_sum"
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
x
,
0
).
set_attr_vec_int32
(
"axes"
,
axes
).
set_attr_bool
(
"keep_dims"
,
keep_dims
)
return
[
reduce_sum
],
[[
0
]]
#class IncrementParser(AscendParserBase):
# def __init__(self, graph, var2geop):
# super(IncrementParser, self).__init__(graph, var2geop)
# self.parser_name = "increment"
#
# def _apply(self):
# x = self._get_ge_input(self.op.input_arg_names[0])
# step = self.op.attr("step") #self._get_ge_input(self.op.input_arg_names[1])
# print("step: ", step)
#
# increment = core.GEOperatorFactory.create_operator("adds" + self._accumulated_op_id(), "Adds").set_input("x", x).set_attr_float("value", step) #set_input("x2", bias)
#
# return [increment]
## matrix cal
class
MatMulParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
MatMulParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"matmul"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
transpose_x
=
self
.
op
.
attr
(
"transpose_X"
)
transpose_y
=
self
.
op
.
attr
(
"transpose_Y"
)
x1_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
x2_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
if
len
(
x1_shape
)
>
2
:
matmul
=
core
.
GEOperatorFactory
.
create_operator
(
"matmul"
+
self
.
_accumulated_op_id
(),
"BatchMatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"adj_x1"
,
transpose_x
).
set_attr_bool
(
"adj_x2"
,
transpose_y
)
elif
len
(
x1_shape
)
==
2
:
matmul
=
core
.
GEOperatorFactory
.
create_operator
(
"matmul"
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"transpose_x1"
,
transpose_x
).
set_attr_bool
(
"transpose_x2"
,
transpose_y
)
else
:
assert
False
,
"not support"
return
[
matmul
],
[[
0
]]
class
MulParser
(
AscendParserBase
):
class
MulParser
(
AscendParserBase
):
...
@@ -282,13 +559,105 @@ class MulParser(AscendParserBase):
...
@@ -282,13 +559,105 @@ class MulParser(AscendParserBase):
def
_apply
(
self
):
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
x_num_col_dims
=
self
.
op
.
attr
(
"x_num_col_dims"
)
y_num_col_dims
=
self
.
op
.
attr
(
"y_num_col_dims"
)
shape_x1
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
shape_x2
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
if
x_num_col_dims
==
1
and
y_num_col_dims
==
1
:
if
len
(
shape_x1
)
==
2
and
len
(
shape_x2
)
==
2
:
matmul
=
core
.
GEOperatorFactory
.
create_operator
(
matmul
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
elif
len
(
shape_x1
)
==
3
and
len
(
shape_x2
)
==
2
:
flatten_x1
=
core
.
GEOperatorFactory
.
create_operator
(
"flatten"
+
self
.
_accumulated_op_id
(),
"Flatten"
).
set_input
(
"x"
,
x
)
matmul
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
flatten_x1
,
0
).
set_input
(
"x2"
,
y
,
0
)
else
:
assert
False
,
"not support"
else
:
if
len
(
shape_x1
)
==
3
and
len
(
shape_x2
)
==
2
:
assert
x_num_col_dims
==
2
,
"only support 2"
flatten_x1
=
core
.
GEOperatorFactory
.
create_operator
(
"flatten"
+
self
.
_accumulated_op_id
(),
"FlattenV2"
).
set_input
(
"x"
,
x
).
set_attr_int32
(
"axis"
,
0
).
set_attr_int32
(
"end_axis"
,
1
)
matmul_m
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"mul"
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
y
)
"x1"
,
flatten_x1
,
0
).
set_input
(
"x2"
,
y
,
0
)
matmul_transpose
=
core
.
GEOperatorFactory
.
create_operator
(
"transpose"
+
self
.
_accumulated_op_id
(),
"TransposeD"
).
set_input
(
"x"
,
matmul_m
).
set_attr_vec_int32
(
"perm"
,
[
1
,
0
])
tensor
=
self
.
_create_ge_tensor
(
[
3
],
2
,
[
shape_x2
[
1
],
shape_x1
[
0
],
shape_x1
[
1
]])
const_shape
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensor
)
reshape_matmul
=
core
.
GEOperatorFactory
.
create_operator
(
"reshape"
+
self
.
_accumulated_op_id
(),
"Reshape"
).
set_input
(
"x"
,
matmul_transpose
).
set_input
(
"shape"
,
const_shape
).
set_attr_int32
(
"axis"
,
0
)
matmul
=
core
.
GEOperatorFactory
.
create_operator
(
"transpose"
+
self
.
_accumulated_op_id
(),
"TransposeD"
).
set_input
(
"x"
,
reshape_matmul
).
set_attr_vec_int32
(
"perm"
,
[
1
,
2
,
0
])
else
:
assert
False
,
"not support"
return
[
matmul
],
[[
0
]]
return
[
matmul
],
[[
0
]]
class
LayerNormParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
LayerNormParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"layer_norm"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
scale
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
bias
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
epsilon
=
self
.
op
.
attr
(
"epsilon"
)
begin_norm_axis
=
self
.
op
.
attr
(
"begin_norm_axis"
)
x_dtype
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
dtype
shape_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Shape"
).
set_input
(
"x"
,
x
)
scale_expand
=
core
.
GEOperatorFactory
.
create_operator
(
"broadcast_to_d"
+
self
.
_accumulated_op_id
(),
"BroadcastTo"
).
set_input
(
"x"
,
scale
).
set_input
(
"shape"
,
shape_tensor
)
bias_expand
=
core
.
GEOperatorFactory
.
create_operator
(
"broadcast_to_d"
+
self
.
_accumulated_op_id
(),
"BroadcastTo"
).
set_input
(
"x"
,
bias
).
set_input
(
"shape"
,
shape_tensor
)
layer_norm
=
core
.
GEOperatorFactory
.
create_operator
(
"layer_norm"
+
self
.
_accumulated_op_id
(),
"LayerNorm"
).
set_input
(
"x"
,
x
).
set_input
(
"gamma"
,
scale_expand
).
set_input
(
"beta"
,
bias_expand
).
set_attr_int32
(
"begin_norm_axis"
,
begin_norm_axis
).
set_attr_int32
(
"begin_params_axis"
,
begin_norm_axis
).
set_attr_float
(
"epsilon"
,
epsilon
)
cast_dtype
=
0
if
self
.
ascend_helper
.
dtype2paddle_inv_map
[
str
(
x_dtype
)]
==
0
else
1
y
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
layer_norm
,
0
).
set_attr_int32
(
"dst_type"
,
cast_dtype
)
mean
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
layer_norm
,
1
).
set_attr_int32
(
"dst_type"
,
cast_dtype
)
variance
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
layer_norm
,
2
).
set_attr_int32
(
"dst_type"
,
cast_dtype
)
return
[
y
,
mean
,
variance
],
[[
1
],
[
2
],
[
0
]]
## activate function
class
ReluParser
(
AscendParserBase
):
class
ReluParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReluParser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
ReluParser
,
self
).
__init__
(
graph
,
var2geop
)
...
@@ -301,20 +670,31 @@ class ReluParser(AscendParserBase):
...
@@ -301,20 +670,31 @@ class ReluParser(AscendParserBase):
return
[
relu
],
[[
0
]]
return
[
relu
],
[[
0
]]
class
ReluGrad
Parser
(
AscendParserBase
):
class
Gelu
Parser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReluGrad
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
Gelu
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
relu_grad
"
self
.
parser_name
=
"
gelu
"
def
_apply
(
self
):
def
_apply
(
self
):
out
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
gelu
=
core
.
GEOperatorFactory
.
create_operator
(
relu_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"gelu"
+
self
.
_accumulated_op_id
(),
"Gelu"
).
set_input
(
"x"
,
x
)
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"ReluGrad"
).
set_input
(
return
[
gelu
],
[[
0
]]
"gradients"
,
out_grad
).
set_input
(
"features"
,
out
)
return
[
relu_grad
],
[[
0
]]
class
TanhParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
TanhParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"tanh"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
tanh
=
core
.
GEOperatorFactory
.
create_operator
(
"tanh"
+
self
.
_accumulated_op_id
(),
"Tanh"
).
set_input
(
"x"
,
x
)
return
[
tanh
],
[[
0
]]
## loss function
class
SoftmaxWithCrossEntropyParser
(
AscendParserBase
):
class
SoftmaxWithCrossEntropyParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SoftmaxWithCrossEntropyParser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
SoftmaxWithCrossEntropyParser
,
self
).
__init__
(
graph
,
var2geop
)
...
@@ -323,80 +703,61 @@ class SoftmaxWithCrossEntropyParser(AscendParserBase):
...
@@ -323,80 +703,61 @@ class SoftmaxWithCrossEntropyParser(AscendParserBase):
def
_apply
(
self
):
def
_apply
(
self
):
label
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
label
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
logits
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
logits
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
cls_num
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
[
1
]
cls_num
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
[
1
]
softmax
=
core
.
GEOperatorFactory
.
create_operator
(
softmax
=
core
.
GEOperatorFactory
.
create_operator
(
"softmax"
+
self
.
_accumulated_op_id
(),
"SoftmaxV2"
).
set_input
(
"softmax"
+
self
.
_accumulated_op_id
(),
"x"
,
logits
)
"SoftmaxV2"
).
set_input
(
"x"
,
logits
)
label
=
core
.
GEOperatorFactory
.
create_operator
(
label
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
label
).
set_attr_int32
(
"dst_type"
,
3
)
"x"
,
label
).
set_attr_int32
(
"dst_type"
,
3
)
tensoron
=
self
.
_create_ge_tensor
([
1
],
5
,
1
)
tensoron
=
self
.
_create_ge_tensor
([
1
],
5
,
1
)
on_const
=
core
.
GEOperatorFactory
.
create_operator
(
on
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensoron
)
"Const"
).
set_attr_tensor
(
"value"
,
tensoron
)
self
.
_mark_as_input
(
on_const
)
tensoroff
=
self
.
_create_ge_tensor
([
1
],
5
,
0
)
tensoroff
=
self
.
_create_ge_tensor
([
1
],
5
,
0
)
off_const
=
core
.
GEOperatorFactory
.
create_operator
(
off
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensoroff
)
"Const"
).
set_attr_tensor
(
"value"
,
tensoroff
)
self
.
_mark_as_input
(
off_const
)
self
.
_mark_as_input
(
on
)
self
.
_mark_as_input
(
off
)
onehot
=
core
.
GEOperatorFactory
.
create_operator
(
onehot
=
core
.
GEOperatorFactory
.
create_operator
(
"onehot"
+
self
.
_accumulated_op_id
(),
"OneHotD"
).
set_input
(
"onehot"
+
self
.
_accumulated_op_id
(),
"OneHotD"
).
set_input
(
"x"
,
label
).
set_input
(
"on_value"
,
on
_const
).
set_input
(
"x"
,
label
).
set_input
(
"on_value"
,
on
).
set_input
(
"off_value"
,
off
_const
).
set_attr_int32
(
"depth"
,
cls_num
)
"off_value"
,
off
).
set_attr_int32
(
"depth"
,
cls_num
)
squeeze
=
core
.
GEOperatorFactory
.
create_operator
(
squeeze
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"Squeeze"
).
set_input
(
"x"
,
onehot
)
"mul"
+
self
.
_accumulated_op_id
(),
"Squeeze"
).
set_input
(
"x"
,
onehot
)
loss
=
core
.
GEOperatorFactory
.
create_operator
(
loss_all
=
core
.
GEOperatorFactory
.
create_operator
(
"loss"
+
self
.
_accumulated_op_id
(),
"loss"
+
self
.
_accumulated_op_id
(),
"SoftmaxCrossEntropyWithLogits"
).
set_input
(
"SoftmaxCrossEntropyWithLogits"
).
set_input
(
"features"
,
logits
).
set_input
(
"labels"
,
squeeze
)
"features"
,
logits
).
set_input
(
"labels"
,
squeeze
)
loss
=
core
.
GEOperatorFactory
.
create_operator
(
return
[
label
,
softmax
,
on_const
,
off_const
,
onehot
,
squeeze
,
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
loss
],
[[
6
],
[
1
]]
"x"
,
loss_all
,
0
).
set_attr_int32
(
"dst_type"
,
0
)
loss_expand
=
core
.
GEOperatorFactory
.
create_operator
(
"unsqueeze"
+
self
.
_accumulated_op_id
(),
"Unsqueeze"
).
set_input
(
"x"
,
loss
).
set_attr_vec_int32
(
"axes"
,
[
1
])
return
[
label
,
softmax
,
loss_expand
],
[[
2
],
[
1
]]
class
Soft
maxWithCrossEntropyGrad
Parser
(
AscendParserBase
):
class
Soft
Max
Parser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
Soft
maxWithCrossEntropyGrad
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
Soft
Max
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"softmax
_with_cross_entropy_grad
"
self
.
parser_name
=
"softmax"
def
_apply
(
self
):
def
_apply
(
self
):
label
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
logits
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
loss_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
axes
=
self
.
op
.
attr
(
"axis"
)
softmax
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
cls_num
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
[
1
]
tensoron
=
self
.
_create_ge_tensor
([
1
],
5
,
1
)
softmax
=
core
.
GEOperatorFactory
.
create_operator
(
on_const
=
core
.
GEOperatorFactory
.
create_operator
(
"softmax"
+
self
.
_accumulated_op_id
(),
"SoftmaxV2"
).
set_input
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"x"
,
logits
).
set_attr_vec_int32
(
"axes"
,
[
axes
])
"value"
,
tensoron
)
return
[
softmax
],
[[
0
]]
self
.
_mark_as_input
(
on_const
)
tensoroff
=
self
.
_create_ge_tensor
([
1
],
5
,
0
)
off_const
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensoroff
)
self
.
_mark_as_input
(
off_const
)
label
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
label
).
set_attr_int32
(
"dst_type"
,
3
)
onehot
=
core
.
GEOperatorFactory
.
create_operator
(
"onehot"
+
self
.
_accumulated_op_id
(),
"OneHotD"
).
set_input
(
"x"
,
label
).
set_input
(
"on_value"
,
on_const
).
set_input
(
"off_value"
,
off_const
).
set_attr_int32
(
"depth"
,
cls_num
)
# the fuck onehot will add a demension, so must call squeeze afterward
squeeze
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"Squeeze"
).
set_input
(
"x"
,
onehot
)
sub
=
core
.
GEOperatorFactory
.
create_operator
(
"sub"
+
self
.
_accumulated_op_id
(),
"Sub"
).
set_input
(
"x1"
,
softmax
).
set_input
(
"x2"
,
squeeze
)
grad
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
loss_grad
).
set_input
(
"x2"
,
sub
)
return
[
on_const
,
off_const
,
label
,
onehot
,
squeeze
,
sub
,
grad
],
[[
-
1
]]
## general
class
ShapeParser
(
AscendParserBase
):
class
ShapeParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ShapeParser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
ShapeParser
,
self
).
__init__
(
graph
,
var2geop
)
...
@@ -418,16 +779,15 @@ class FillConstantParser(AscendParserBase):
...
@@ -418,16 +779,15 @@ class FillConstantParser(AscendParserBase):
shape
=
self
.
op
.
attr
(
"shape"
)
shape
=
self
.
op
.
attr
(
"shape"
)
dtype
=
self
.
op
.
attr
(
"dtype"
)
dtype
=
self
.
op
.
attr
(
"dtype"
)
value
=
self
.
op
.
attr
(
"value"
)
value
=
self
.
op
.
attr
(
"value"
)
print
(
"shape: "
,
shape
)
print
(
"dtype: "
,
dtype
)
print
(
"value: "
,
value
)
tensor
=
self
.
_create_ge_tensor
(
shape
,
dtype
,
value
)
tensor
=
self
.
_create_ge_tensor
(
shape
,
dtype
,
value
)
const
=
core
.
GEOperatorFactory
.
create_operator
(
const
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensor
)
"Const"
).
set_attr_tensor
(
"value"
,
tensor
)
self
.
_mark_as_input
(
const
)
self
.
_mark_as_input
(
const
)
if
self
.
op
.
block
.
var
(
self
.
op
.
output
(
'Out'
)[
0
]).
persistable
:
if
self
.
op
.
block
.
var
(
self
.
op
.
output
(
'Out'
)[
0
]).
persistable
:
print
(
"%s fill_constant"
%
(
self
.
op
.
output
(
'Out'
)[
0
]))
print
(
"%s is Persistable in fill_constant"
%
(
self
.
op
.
output
(
'Out'
)[
0
]))
var
=
core
.
GEOperatorFactory
.
create_operator
(
var
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
op
.
output
(
'Out'
)[
0
],
"Variable"
)
self
.
op
.
output
(
'Out'
)[
0
],
"Variable"
)
var
.
update_output_desc
(
"y"
,
var
.
update_output_desc
(
"y"
,
...
@@ -441,27 +801,12 @@ class FillConstantParser(AscendParserBase):
...
@@ -441,27 +801,12 @@ class FillConstantParser(AscendParserBase):
return
[
const
],
[[
0
]]
return
[
const
],
[[
0
]]
else
:
else
:
print
(
print
(
"self.op.output('Out')[0] is not persistable in fill_constant"
)
"self.op.output('Out')[0]: %s is not persistable in fill_constant"
%
(
self
.
op
.
output
(
'Out'
)[
0
]))
return
[
const
],
[[
0
]]
return
[
const
],
[[
0
]]
class
SGDParser
(
AscendParserBase
):
class
TruncatedNormalParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SGDParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"sgd"
def
_apply
(
self
):
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
lr
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
param
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
sgd
=
core
.
GEOperatorFactory
.
create_operator
(
"momentum"
+
self
.
_accumulated_op_id
(),
"ApplyGradientDescent"
).
set_input
(
"var"
,
param
).
set_input
(
"alpha"
,
lr
).
set_input
(
"delta"
,
grad
)
return
[
sgd
],
[[
0
]]
class
TruncatedNormalParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
TruncatedNormalParser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
TruncatedNormalParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"truncated_gaussian_random"
self
.
parser_name
=
"truncated_gaussian_random"
...
@@ -472,30 +817,27 @@ class TruncatedNormalParser(AscendParserBase):
...
@@ -472,30 +817,27 @@ class TruncatedNormalParser(AscendParserBase):
mean
=
self
.
op
.
attr
(
"mean"
)
mean
=
self
.
op
.
attr
(
"mean"
)
std
=
self
.
op
.
attr
(
"std"
)
std
=
self
.
op
.
attr
(
"std"
)
seed
=
self
.
op
.
attr
(
"seed"
)
seed
=
self
.
op
.
attr
(
"seed"
)
tensor1
=
self
.
_create_ge_tensor
([
len
(
shape
)],
2
,
shape
)
tensor1
=
self
.
_create_ge_tensor
([
len
(
shape
)],
2
,
shape
)
shape_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
shape_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensor1
)
"Const"
).
set_attr_tensor
(
"value"
,
tensor1
)
tensor2
=
self
.
_create_ge_tensor
([
1
],
dtype
,
mean
)
tensor2
=
self
.
_create_ge_tensor
([
1
],
dtype
,
mean
)
mean_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
mean_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensor2
)
"Const"
).
set_attr_tensor
(
"value"
,
tensor2
)
tensor3
=
self
.
_create_ge_tensor
([
1
],
dtype
,
std
)
tensor3
=
self
.
_create_ge_tensor
([
1
],
dtype
,
std
)
std_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
std_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensor3
)
"Const"
).
set_attr_tensor
(
"value"
,
tensor3
)
tensor4
=
self
.
_create_ge_tensor
([
1
],
dtype
,
mean
-
2
*
std
)
tensor4
=
self
.
_create_ge_tensor
([
1
],
dtype
,
mean
-
2
*
std
)
min_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
min_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensor4
)
"Const"
).
set_attr_tensor
(
"value"
,
tensor4
)
tensor5
=
self
.
_create_ge_tensor
([
1
],
dtype
,
mean
+
2
*
std
)
tensor5
=
self
.
_create_ge_tensor
([
1
],
dtype
,
mean
+
2
*
std
)
max_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
max_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"const"
+
self
.
_accumulated_op_id
(),
"value"
,
tensor5
)
"Const"
).
set_attr_tensor
(
"value"
,
tensor5
)
self
.
_mark_as_input
(
shape_tensor
)
self
.
_mark_as_input
(
shape_tensor
)
self
.
_mark_as_input
(
mean_tensor
)
self
.
_mark_as_input
(
mean_tensor
)
...
@@ -516,7 +858,6 @@ class TruncatedNormalParser(AscendParserBase):
...
@@ -516,7 +858,6 @@ class TruncatedNormalParser(AscendParserBase):
if
self
.
op
.
block
.
var
(
self
.
op
.
output
(
'Out'
)[
0
]).
persistable
:
if
self
.
op
.
block
.
var
(
self
.
op
.
output
(
'Out'
)[
0
]).
persistable
:
print
(
"%s is Persistable in truncated_normal"
%
print
(
"%s is Persistable in truncated_normal"
%
(
self
.
op
.
output
(
'Out'
)[
0
]))
(
self
.
op
.
output
(
'Out'
)[
0
]))
#var = core.GEOperatorFactory.create_operator(self.op.output('Out')[0], "Variable").set_input("x", truncated_normal)
var
=
core
.
GEOperatorFactory
.
create_operator
(
var
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
op
.
output
(
'Out'
)[
0
],
"Variable"
)
self
.
op
.
output
(
'Out'
)[
0
],
"Variable"
)
var
.
update_output_desc
(
"y"
,
var
.
update_output_desc
(
"y"
,
...
@@ -535,9 +876,339 @@ class TruncatedNormalParser(AscendParserBase):
...
@@ -535,9 +876,339 @@ class TruncatedNormalParser(AscendParserBase):
print
(
print
(
"self.op.output('Out')[0] is not persistable in truncated_noraml"
"self.op.output('Out')[0] is not persistable in truncated_noraml"
)
)
return
[
truncated_normal
],
[[
0
]]
#[assign]
return
[
truncated_normal
],
[[
0
]]
class
GatherParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
GatherParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"gather"
def
_apply
(
self
):
index
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
clo
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
[
-
1
]
gather
=
core
.
GEOperatorFactory
.
create_operator
(
"gather"
+
self
.
_accumulated_op_id
(),
"Gather"
).
set_input
(
"x"
,
x
).
set_input
(
"indices"
,
index
).
set_attr_bool
(
"validate_indices"
,
True
)
return
[
gather
],
[[
0
]]
class
ScatterParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ScatterParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"scatter"
def
_apply
(
self
):
index
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
updates
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
overwrite
=
self
.
op
.
attr
(
"overwrite"
)
index_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
if
len
(
index_shape
)
==
1
:
index
=
core
.
GEOperatorFactory
.
create_operator
(
"unsqueeze"
+
self
.
getid
(),
"Unsqueeze"
).
set_input
(
"x"
,
index
).
set_attr_vec_int32
(
"axes"
,
[
1
])
if
not
overwrite
:
scatter_value
=
core
.
GEOperatorFactory
.
create_operator
(
"scatter"
+
self
.
_accumulated_op_id
(),
"TensorScatterAdd"
).
set_input
(
"x"
,
x_var
).
set_input
(
"indices"
,
index_var
).
set_input
(
"updates"
,
updatesi_var
)
else
:
scatter_value
=
core
.
GEOperatorFactory
.
create_operator
(
"scatter"
+
self
.
_accumulated_op_id
(),
"TensorScatterUpdate"
).
set_input
(
"x"
,
x_var
).
set_input
(
"indices"
,
index_var
).
set_input
(
"updates"
,
updates_var
)
return
[
x_var
,
index_var
,
updates_var
,
scatter_value
],
[[
-
1
]]
class
CastParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
CastParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"cast"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
dtype
=
self
.
op
.
attr
(
"out_dtype"
)
cast
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
x
).
set_attr_int32
(
"dst_type"
,
dtype
)
return
[
cast
]
class
AssignParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
AssignParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"assign"
def
_apply
(
self
):
const
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
var
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
assign
=
core
.
GEOperatorFactory
.
create_operator
(
"assign"
+
self
.
_accumulated_op_id
(),
"Assign"
).
set_input
(
"value"
,
const
).
set_input
(
"ref"
,
var
)
return
[
assign
],
[[
0
]]
class
ScaleParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ScaleParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"scale"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
scale
=
self
.
op
.
attr
(
"scale"
)
bias
=
self
.
op
.
attr
(
"bias"
)
bias_after_scale
=
self
.
op
.
attr
(
"bias_after_scale"
)
if
bias_after_scale
:
scale_value
=
core
.
GEOperatorFactory
.
create_operator
(
"scale"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
"x"
,
x
).
set_attr_float
(
"power"
,
1.0
).
set_attr_float
(
"scale"
,
scale
).
set_attr_float
(
"shift"
,
bias
)
else
:
x_add_bias
=
core
.
GEOperatorFactory
.
create_operator
(
"adds"
+
self
.
_accumulated_op_id
(),
"Adds"
).
set_input
(
"x"
,
x
).
set_attr_float
(
"value"
,
bias
)
scale_value
=
core
.
GEOperatorFactory
.
create_operator
(
"scale"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
"x"
,
x_add_bias
).
set_attr_float
(
"power"
,
1.0
).
set_attr_float
(
"scale"
,
scale
).
set_attr_float
(
"shift"
,
0.0
)
return
[
scale_value
],
[[
0
]]
class
SliceParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SliceParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"slice"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
axes
=
self
.
op
.
attr
(
"axes"
)
starts
=
self
.
op
.
attr
(
"starts"
)
ends
=
self
.
op
.
attr
(
"ends"
)
x_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
len_shape
=
len
(
x_shape
)
axes_cor
=
list
(
range
(
len_shape
))
starts_cor
,
ends_cor
=
[],
[]
cnt
=
0
for
i
in
range
(
len_shape
):
starts_cor
.
append
(
starts
[
cnt
]
if
i
in
axes
else
0
)
if
i
in
axes
and
ends
[
cnt
]
<=
x_shape
[
i
]:
ends_cor
.
append
(
ends
[
cnt
])
else
:
ends_cor
.
append
(
x_shape
[
i
])
if
i
in
axes
:
cnt
+=
1
size
=
[
ends_cor
[
i
]
-
starts_cor
[
i
]
for
i
in
range
(
len
(
axes_cor
))]
assert
len
(
axes_cor
)
==
len
(
starts_cor
)
==
len
(
ends_cor
),
"the three fields must have same size"
slice_value
=
core
.
GEOperatorFactory
.
create_operator
(
"slice"
+
self
.
_accumulated_op_id
(),
"SliceD"
).
set_input
(
"x"
,
x
).
set_attr_vec_int32
(
"offsets"
,
starts_cor
).
set_attr_vec_int32
(
"size"
,
size
)
return
[
slice_value
],
[[
0
]]
class
ReshapeParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReshapeParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"reshape2"
def
_apply
(
self
):
org_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
assert
org_shape
.
count
(
-
1
)
==
0
,
"do not allow the dim is -1"
shape
=
self
.
op
.
attr
(
"shape"
)
for
cnt
in
range
(
len
(
shape
)):
if
shape
[
cnt
]
==
0
:
shape
[
cnt
]
=
org_shape
[
cnt
]
if
-
1
in
shape
:
assert
shape
.
count
(
-
1
)
==
1
,
"only allow one dim is -1"
mul_res_org
=
reduce
(
lambda
x
,
y
:
x
*
y
,
org_shape
)
mul_res_refine
=
reduce
(
lambda
x
,
y
:
x
*
y
,
shape
)
*
-
1
idx
=
shape
.
index
(
-
1
)
shape
[
idx
]
=
mul_res_org
//
mul_res_refine
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
tensor
=
self
.
_create_ge_tensor
([
len
(
shape
)],
2
,
shape
)
const_shape
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensor
)
reshape
=
core
.
GEOperatorFactory
.
create_operator
(
"reshape"
+
self
.
_accumulated_op_id
(),
"Reshape"
).
set_input
(
"x"
,
x
).
set_input
(
"shape"
,
const_shape
).
set_attr_int32
(
"axis"
,
0
)
x_shape
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Shape"
).
set_input
(
"x"
,
x
)
return
[
x_shape
,
reshape
],
[[
1
],
[
0
]]
class
TransposeParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
TransposeParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"transpose2"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
perm
=
self
.
op
.
attr
(
"axis"
)
transpose
=
core
.
GEOperatorFactory
.
create_operator
(
"transpose"
+
self
.
_accumulated_op_id
(),
"TransposeD"
).
set_input
(
"x"
,
x
).
set_attr_vec_int32
(
"perm"
,
perm
)
x_shape
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Shape"
).
set_input
(
"x"
,
x
)
return
[
x_shape
,
transpose
],
[[
1
],
[
0
]]
class
AccuracyParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
AccuracyParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"accuracy"
def
_apply
(
self
):
pred
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
label
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
logits
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
pred
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
pred
).
set_attr_int32
(
"dst_type"
,
3
)
label
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
label
).
set_attr_int32
(
"dst_type"
,
3
)
equal
=
core
.
GEOperatorFactory
.
create_operator
(
"equal"
+
self
.
_accumulated_op_id
(),
"Equal"
).
set_input
(
"x1"
,
pred
).
set_input
(
"x2"
,
label
)
cast
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
equal
).
set_attr_int32
(
"dst_type"
,
0
)
acc
=
core
.
GEOperatorFactory
.
create_operator
(
"mean"
+
self
.
_accumulated_op_id
(),
"ReduceMeanD"
).
set_input
(
"x"
,
cast
).
set_attr_bool
(
"keep_dims"
,
False
).
set_attr_vec_int32
(
"axes"
,
[])
correct
=
core
.
GEOperatorFactory
.
create_operator
(
"sum"
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
cast
).
set_attr_bool
(
"keep_dims"
,
False
).
set_attr_vec_int32
(
"axes"
,
[])
ones_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"oneslike"
+
self
.
_accumulated_op_id
(),
"OnesLike"
).
set_input
(
"x"
,
label
)
ones_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
ones_tensor
).
set_attr_int32
(
"dst_type"
,
0
)
total
=
core
.
GEOperatorFactory
.
create_operator
(
"sum"
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
ones_tensor
).
set_attr_bool
(
"keep_dims"
,
False
).
set_attr_vec_int32
(
"axes"
,
[])
return
[
acc
,
correct
,
total
],
[[
0
],
[
1
],
[
2
]]
class
TopkParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
TopkParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"top_k"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
k
=
self
.
op
.
attr
(
"k"
)
tensor
=
self
.
_create_ge_tensor
([
1
],
2
,
k
)
const_k
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensor
)
cast_x
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
x
).
set_attr_int32
(
"dst_type"
,
1
)
topk
=
core
.
GEOperatorFactory
.
create_operator
(
"topk"
+
self
.
_accumulated_op_id
(),
"TopK"
).
set_input
(
"x"
,
cast_x
).
set_input
(
"k"
,
const_k
)
value
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
topk
,
0
).
set_attr_int32
(
"dst_type"
,
0
)
index
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
topk
,
1
).
set_attr_int32
(
"dst_type"
,
0
)
return
[
value
,
index
],
[[
1
],
[
0
]]
class
LookupTableParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
LookupTableParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"lookup_table"
def
_apply
(
self
):
ids
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
w
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
ids_squeeze
=
core
.
GEOperatorFactory
.
create_operator
(
"squeeze"
+
self
.
_accumulated_op_id
(),
"Squeeze"
).
set_input
(
"x"
,
ids
).
set_attr_vec_int32
(
"axes"
,
[
-
1
])
out
=
core
.
GEOperatorFactory
.
create_operator
(
"lookup"
+
self
.
_accumulated_op_id
(),
"Gather"
).
set_input
(
"x"
,
w
).
set_input
(
"indices"
,
ids_squeeze
)
return
[
out
],
[[
0
]]
class
StackParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
StackParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"stack"
def
_apply
(
self
):
tiles
=
len
(
self
.
op
.
input_arg_names
)
data_x_lst
=
[]
for
index
in
range
(
tiles
):
data_x_lst
.
append
(
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
index
]))
axis
=
self
.
op
.
attr
(
"axis"
)
data_x
=
data_x_lst
[
0
]
tensor
=
self
.
_create_ge_tensor
([
1
],
2
,
axis
)
tensor_axis
=
core
.
GEOperatorFactory
.
create_operator
(
"axis"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensor
)
expand
=
core
.
GEOperatorFactory
.
create_operator
(
"expand"
+
self
.
_accumulated_op_id
(),
"ExpandDims"
).
set_input
(
"x"
,
data_x
).
set_input
(
"axis"
,
tensor_axis
)
stack
=
core
.
GEOperatorFactory
.
create_operator
(
"stack"
+
self
.
_accumulated_op_id
(),
"TileWithAxis"
).
set_input
(
"x"
,
expand
).
set_attr_int32
(
"axis"
,
axis
).
set_attr_int32
(
"tiles"
,
tiles
)
return
[
stack
],
[[
0
]]
class
UnSqueezeParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
UnSqueezeParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"unsqueeze2"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
axes
=
self
.
op
.
attr
(
'axes'
)
output
=
core
.
GEOperatorFactory
.
create_operator
(
"unsqueeze"
+
self
.
_accumulated_op_id
(),
"Unsqueeze"
).
set_input
(
"x"
,
x
).
set_attr_vec_int32
(
"axes"
,
axes
)
shape
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Shape"
).
set_input
(
"x"
,
output
)
return
[
shape
,
output
],
[[
1
],
[
0
]]
## parallel
class
AllGatherParser
(
AscendParserBase
):
class
AllGatherParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
AllGatherParser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
AllGatherParser
,
self
).
__init__
(
graph
,
var2geop
)
...
@@ -667,59 +1338,735 @@ class ReceiveParser(AscendParserBase):
...
@@ -667,59 +1338,735 @@ class ReceiveParser(AscendParserBase):
return
[
receive
],
[[
0
]]
return
[
receive
],
[[
0
]]
class
ScaleParser
(
AscendParserBase
):
#****************************************************************#
#*************************** *************************#
#*************************** *************************#
#*************************** GradParser *************************#
#*************************** *************************#
#*************************** *************************#
#****************************************************************#
## grad
class
ReduceSumGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
Scale
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
ReduceSumGrad
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
scale
"
self
.
parser_name
=
"
reduce_sum_grad
"
def
_apply
(
self
):
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
scale
=
self
.
op
.
attr
(
input
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
"scale"
)
#self.get_ge_input(self.op.input_arg_names[1])
bias
=
self
.
op
.
attr
(
"bias"
)
shape_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
bias_after_scale
=
self
.
op
.
attr
(
"bias_after_scale"
)
"shape"
+
self
.
_accumulated_op_id
(),
if
bias_after_scale
:
"Shape"
).
set_input
(
"x"
,
input
,
0
)
scale_value
=
core
.
GEOperatorFactory
.
create_operator
(
tensoron
=
self
.
_create_ge_tensor
([
1
],
2
,
-
1
)
"scale"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
const
=
core
.
GEOperatorFactory
.
create_operator
(
"x"
,
x
).
set_attr_float
(
"power"
,
1.0
).
set_attr_float
(
"const"
+
self
.
_accumulated_op_id
(),
"scale"
,
scale
).
set_attr_float
(
"shift"
,
bias
)
"Const"
).
set_attr_tensor
(
"value"
,
tensoron
)
self
.
_mark_as_input
(
const
)
reduce_sum
=
core
.
GEOperatorFactory
.
create_operator
(
"broadcast_to_d"
+
self
.
_accumulated_op_id
(),
"BroadcastTo"
).
set_input
(
"x"
,
x
).
set_input
(
"shape"
,
shape_tensor
)
#reduce_sum = core.GEOperatorFactory.create_operator("expand" + self._accumulated_op_id(), "ExpandDims").set_input("x", reduce_sum).set_input("axis", const)
return
[
reduce_sum
],
[[
0
]]
class
MatMulGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
MatMulGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"matmul_grad"
def
_apply
(
self
):
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
transpose_x
=
self
.
op
.
attr
(
"transpose_X"
)
transpose_y
=
self
.
op
.
attr
(
"transpose_Y"
)
out_grad_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
x_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
y_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
if
len
(
x_shape
)
>
2
:
if
transpose_y
:
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"BatchMatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"adj_x1"
,
False
).
set_attr_bool
(
"adj_x2"
,
False
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"BatchMatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
x
).
set_attr_bool
(
"adj_x1"
,
True
).
set_attr_bool
(
"adj_x2"
,
False
)
else
:
else
:
x_add_bias
=
core
.
GEOperatorFactory
.
create_operator
(
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"adds"
+
self
.
_accumulated_op_id
(),
"Adds"
).
set_input
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"x"
,
x
).
set_attr_float
(
"value"
,
"BatchMatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
bias
)
#set_input("x2", bias)
"x2"
,
y
).
set_attr_bool
(
scale_value
=
core
.
GEOperatorFactory
.
create_operator
(
"adj_x1"
,
False
).
set_attr_bool
(
"adj_x2"
,
True
)
"scale"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"x"
,
x_add_bias
).
set_attr_float
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"power"
,
1.0
).
set_attr_float
(
"BatchMatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"scale"
,
scale
).
set_attr_float
(
"shift"
,
0.0
)
"x2"
,
out_grad
).
set_attr_bool
(
#tensor_zeros = core.GEOperatorFactory.create_operator("zeroslike" + self.getid(), "ZerosLike").set_input("x", x)
"adj_x1"
,
True
).
set_attr_bool
(
"adj_x2"
,
False
)
#bias_ = self.create_ge_tensor([1], 5, bias)
else
:
#const_bias = core.GEOperatorFactory.create_operator("const" + self.getid(), "Const").set_attr_tensor("value", tensor_bias)
if
transpose_y
:
return
[
scale_value
],
[[
0
]]
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"transpose_x1"
,
False
).
set_attr_bool
(
"transpose_x2"
,
False
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
x
).
set_attr_bool
(
"transpose_x1"
,
True
).
set_attr_bool
(
"transpose_x2"
,
False
)
else
:
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"transpose_x1"
,
False
).
set_attr_bool
(
"transpose_x2"
,
True
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
out_grad
).
set_attr_bool
(
"transpose_x1"
,
True
).
set_attr_bool
(
"transpose_x2"
,
False
)
return
[
x_grad
,
y_grad
],
[[
0
],
[
1
]]
class
ReshapeParser
(
AscendParserBase
):
class
MulGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
Reshape
Parser
,
self
).
__init__
(
graph
,
var2geop
)
super
(
MulGrad
Parser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"
reshape2
"
self
.
parser_name
=
"
mul_grad
"
def
_apply
(
self
):
def
_apply
(
self
):
print
(
"swbuf:"
,
self
.
op
.
input_arg_names
)
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
shape
=
self
.
op
.
attr
(
"shape"
)
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
axis
=
0
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
if
shape
[
0
]
==
-
1
:
x_num_col_dims
=
self
.
op
.
attr
(
"x_num_col_dims"
)
axis
=
1
y_num_col_dims
=
self
.
op
.
attr
(
"y_num_col_dims"
)
shape
=
shape
[
1
:]
print
(
"shape: "
,
shape
)
shape_out_grad
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
data_x1_shape
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
shape_x
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
tensor
=
self
.
_create_ge_tensor
([
len
(
shape
)],
2
,
shape
)
shape_y
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
if
x_num_col_dims
==
1
and
y_num_col_dims
==
1
:
if
len
(
shape_x
)
==
2
and
len
(
shape_y
)
==
2
:
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"transpose_x1"
,
False
).
set_attr_bool
(
"transpose_x2"
,
True
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
out_grad
).
set_attr_bool
(
"transpose_x1"
,
True
).
set_attr_bool
(
"transpose_x2"
,
False
)
elif
len
(
shape_x
)
==
3
and
len
(
shape_y
)
==
2
:
flatten_x
=
core
.
GEOperatorFactory
.
create_operator
(
"flatten"
+
self
.
_accumulated_op_id
(),
"Flatten"
).
set_input
(
"x"
,
x
)
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
y
).
set_attr_bool
(
"transpose_x1"
,
False
).
set_attr_bool
(
"transpose_x2"
,
True
)
if
len
(
shape_out_grad
)
==
2
:
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"unsqueeze"
+
self
.
_accumulated_op_id
(),
"Unsqueeze"
).
set_input
(
"x"
,
x_grad
).
set_attr_vec_int32
(
"axes"
,
[
1
])
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
flatten_x
).
set_input
(
"x2"
,
out_grad
).
set_attr_bool
(
"transpose_x1"
,
True
).
set_attr_bool
(
"transpose_x2"
,
False
)
else
:
if
len
(
shape_x
)
==
3
and
len
(
shape_y
)
==
2
:
assert
x_num_col_dims
==
2
,
"only support 2"
flatten_x
=
core
.
GEOperatorFactory
.
create_operator
(
"flatten"
+
self
.
_accumulated_op_id
(),
"FlattenV2"
).
set_input
(
"x"
,
x
).
set_attr_int32
(
"axis"
,
0
).
set_attr_int32
(
"end_axis"
,
1
)
flatten_out_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"flatten"
+
self
.
_accumulated_op_id
(),
"FlattenV2"
).
set_input
(
"x"
,
out_grad
).
set_attr_int32
(
"axis"
,
0
).
set_attr_int32
(
"end_axis"
,
1
)
y_unsqueeze
=
core
.
GEOperatorFactory
.
create_operator
(
"unsqueeze"
+
self
.
_accumulated_op_id
(),
"Unsqueeze"
).
set_input
(
"x"
,
y
).
set_attr_vec_int32
(
"axes"
,
[
0
])
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"BatchMatMul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
y_unsqueeze
).
set_attr_bool
(
"adj_x1"
,
False
).
set_attr_bool
(
"adj_x2"
,
True
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"MatMul"
).
set_input
(
"x1"
,
flatten_x
).
set_input
(
"x2"
,
flatten_out_grad
).
set_attr_bool
(
"transpose_x1"
,
True
).
set_attr_bool
(
"transpose_x2"
,
False
)
return
[
x_grad
,
y_grad
],
[[
0
],
[
1
]]
class
ReluGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReluGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"relu_grad"
def
_apply
(
self
):
out
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
relu_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"ReluGrad"
).
set_input
(
"gradients"
,
out_grad
).
set_input
(
"features"
,
out
)
return
[
relu_grad
],
[[
0
]]
class
SoftmaxWithCrossEntropyGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SoftmaxWithCrossEntropyGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"softmax_with_cross_entropy_grad"
def
_apply
(
self
):
label
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
loss_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
softmax
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
cls_num
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
[
1
]
label_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
loss_grad_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
softmax_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
tensoron
=
self
.
_create_ge_tensor
([
1
],
5
,
1
)
on
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensoron
)
tensoroff
=
self
.
_create_ge_tensor
([
1
],
5
,
0
)
off
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
tensoroff
)
self
.
_mark_as_input
(
on
)
self
.
_mark_as_input
(
off
)
label
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
label
).
set_attr_int32
(
"dst_type"
,
3
)
onehot
=
core
.
GEOperatorFactory
.
create_operator
(
"onehot"
+
self
.
_accumulated_op_id
(),
"OneHotD"
).
set_input
(
"x"
,
label
).
set_input
(
"on_value"
,
on
).
set_input
(
"off_value"
,
off
).
set_attr_int32
(
"depth"
,
cls_num
)
squeeze
=
core
.
GEOperatorFactory
.
create_operator
(
"suqeeze"
+
self
.
_accumulated_op_id
(),
"Squeeze"
).
set_input
(
"x"
,
onehot
)
sub
=
core
.
GEOperatorFactory
.
create_operator
(
"sub"
+
self
.
_accumulated_op_id
(),
"Sub"
).
set_input
(
"x1"
,
softmax
).
set_input
(
"x2"
,
squeeze
)
grad
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
loss_grad
).
set_input
(
"x2"
,
sub
)
return
[
on
,
off
,
label
,
onehot
,
grad
],
[[
-
1
]]
class
DotMulGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
DotMulGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"elementwise_mul_grad"
def
_apply
(
self
):
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_1
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
out_2
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
out_grad
).
set_input
(
"x2"
,
out_2
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
out_1
).
set_input
(
"x2"
,
out_grad
)
return
[
x_grad
,
y_grad
],
[[
0
],
[
1
]]
class
DotAddGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
DotAddGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"elementwise_add_grad"
def
_apply
(
self
):
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_1
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
out_2
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
out_grad_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
out_1_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
out_2_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
x_grad
=
out_grad
cur_time_x
=
len
(
out_grad_shape
)
-
len
(
out_1_shape
)
for
i
in
range
(
cur_time_x
):
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
x_grad
).
set_attr_vec_int32
(
"axes"
,
[
0
]).
set_attr_bool
(
"keep_dims"
,
False
)
for
axis
,
size
in
enumerate
(
out_1_shape
):
if
size
==
1
:
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
x_grad
).
set_attr_vec_int32
(
"axes"
,
[
axis
]).
set_attr_bool
(
"keep_dims"
,
True
)
y_grad
=
out_grad
cur_time_y
=
len
(
out_grad_shape
)
-
len
(
out_2_shape
)
for
i
in
range
(
cur_time_y
):
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
y_grad
).
set_attr_vec_int32
(
"axes"
,
[
0
]).
set_attr_bool
(
"keep_dims"
,
False
)
for
axis
,
size
in
enumerate
(
out_2_shape
):
if
size
==
1
:
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
y_grad
).
set_attr_vec_int32
(
"axes"
,
[
axis
]).
set_attr_bool
(
"keep_dims"
,
True
)
return
[
x_grad
,
y_grad
],
[[
0
],
[
1
]]
class
DotDivGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
DotDivGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"elementwise_div_grad"
def
_apply
(
self
):
out
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
3
])
y_power
=
core
.
GEOperatorFactory
.
create_operator
(
"power"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
"x"
,
y
).
set_attr_float
(
"power"
,
-
1
)
tensor_zeros
=
core
.
GEOperatorFactory
.
create_operator
(
"zeroslike"
+
self
.
_accumulated_op_id
(),
"ZerosLike"
).
set_input
(
"x"
,
x
)
x_zero
=
core
.
GEOperatorFactory
.
create_operator
(
"equal"
+
self
.
_accumulated_op_id
(),
"Equal"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
tensor_zeros
)
x_nozero
=
core
.
GEOperatorFactory
.
create_operator
(
"logical_not"
+
self
.
_accumulated_op_id
(),
"LogicalNot"
).
set_input
(
"x"
,
x_zero
)
x_nozero_f
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
x_nozero
).
set_attr_int32
(
"dst_type"
,
0
)
x_grad_w
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
x_nozero_f
).
set_input
(
"x2"
,
y_power
)
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
x_grad_w
).
set_input
(
"x2"
,
out_grad
)
y_grad_w
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
out
).
set_input
(
"x2"
,
y_power
)
y_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"mul"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
y_grad_w
).
set_input
(
"x2"
,
out_grad
)
return
[
x_grad
,
y_grad
],
[[
0
],
[
1
]]
class
SoftmaxGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SoftmaxGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"softmax_grad"
def
_apply
(
self
):
out
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"SoftmaxGrad"
).
set_input
(
"softmax"
,
out
).
set_input
(
"grad_softmax"
,
out_grad
)
return
[
x_grad
],
[[
0
]]
class
ReshapeGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
ReshapeGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"reshape2_grad"
def
_apply
(
self
):
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x_shape
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
x_shape_list
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
if
x_shape_list
[
0
]
==
0
:
x_shape_delzero
=
x_shape_list
[
1
:]
tensor
=
self
.
_create_ge_tensor
([
len
(
x_shape_delzero
)],
2
,
x_shape_delzero
)
const_shape
=
core
.
GEOperatorFactory
.
create_operator
(
const_shape
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"shape"
+
self
.
_accumulated_op_id
(),
"value"
,
tensor
)
"Const"
).
set_attr_tensor
(
"value"
,
tensor
)
reshape
=
core
.
GEOperatorFactory
.
create_operator
(
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"reshape"
+
self
.
_accumulated_op_id
(),
"Reshape"
).
set_input
(
"reshape"
+
self
.
_accumulated_op_id
(),
"Reshape"
).
set_input
(
"x"
,
data_x1_shape
).
set_input
(
"x"
,
out_grad
).
set_input
(
"shape"
,
const_shape
)
"shape"
,
const_shape
).
set_attr_int32
(
"axis"
,
axis
)
return
[
x_grad
],
[[
0
]]
class
GatherGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
GatherGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"gather_grad"
def
_apply
(
self
):
index
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
return
[
reshape
,
reshape
],
[[
0
],
[
1
]]
index_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
out_grad_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
x_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
if
len
(
index_shape
)
==
1
:
index
=
core
.
GEOperatorFactory
.
create_operator
(
"unsqueeze"
+
self
.
_accumulated_op_id
(),
"Unsqueeze"
).
set_input
(
"x"
,
index
).
set_attr_vec_int32
(
"axes"
,
[
1
])
tensor_zeros
=
core
.
GEOperatorFactory
.
create_operator
(
"zeroslike"
+
self
.
_accumulated_op_id
(),
"ZerosLike"
).
set_input
(
"x"
,
x
)
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"scatter"
+
self
.
_accumulated_op_id
(),
"TensorScatterUpdate"
).
set_input
(
"x"
,
tensor_zeros
).
set_input
(
"indices"
,
index
).
set_input
(
"updates"
,
out_grad
)
return
[
tensor_zeros
,
x_grad
],
[[
-
1
]]
class
TransposeGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
TransposeGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"transpose2_grad"
def
_apply
(
self
):
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
perm
=
self
.
op
.
attr
(
"axis"
)
x_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
[
1
:]
out_grad_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
assert
list
(
map
(
lambda
x
:
out_grad_shape
[
x
],
perm
))
==
list
(
x_shape
)
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"transpose"
+
self
.
_accumulated_op_id
(),
"TransposeD"
).
set_input
(
"x"
,
out_grad
).
set_attr_vec_int32
(
"perm"
,
perm
)
return
[
x_grad
],
[[
0
]]
class
LayerNormGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
LayerNormGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"layer_norm_grad"
def
_apply
(
self
):
bias
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
mean
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
scale
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
variance
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
3
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
4
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
5
])
x_dtype
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
4
]).
dtype
x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
self
.
parser_name
+
self
.
_accumulated_op_id
(),
"LayerNormGrad"
).
set_input
(
"dy"
,
out_grad
).
set_input
(
"x"
,
x
).
set_input
(
"variance"
,
variance
).
set_input
(
"mean"
,
mean
).
set_input
(
"gamma"
,
scale
)
cast_dtype
=
0
if
self
.
ascend_helper
.
dtype2paddle_inv_map
[
str
(
x_dtype
)]
==
0
else
1
out_x_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
x_grad
,
0
).
set_attr_int32
(
"dst_type"
,
cast_dtype
)
out_scale_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
x_grad
,
1
).
set_attr_int32
(
"dst_type"
,
cast_dtype
)
out_bias_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"cast"
+
self
.
_accumulated_op_id
(),
"Cast"
).
set_input
(
"x"
,
x_grad
,
2
).
set_attr_int32
(
"dst_type"
,
cast_dtype
)
return
[
out_x_grad
,
out_scale_grad
,
out_bias_grad
],
[[
2
],
[
1
],
[
0
]]
class
TanhGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
TanhGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
'tanh_grad'
def
_apply
(
self
):
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
tanh_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"tanh_grad"
+
self
.
_accumulated_op_id
(),
"TanhGrad"
).
set_input
(
"y"
,
y
).
set_input
(
"dy"
,
out_grad
)
return
[
tanh_grad
],
[[
0
]]
class
LogGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
LogGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
'log_grad'
def
_apply
(
self
):
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
input
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
log_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"log_grad"
+
self
.
_accumulated_op_id
(),
"DivNoNan"
).
set_input
(
"x1"
,
grad
).
set_input
(
"x2"
,
input
)
return
[
log_grad
],
[[
0
]]
class
SqrtGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SqrtGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"sqrt_grad"
def
_apply
(
self
):
y
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
out_grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
sqrt_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"sqrt_grad"
+
self
.
_accumulated_op_id
(),
"SqrtGrad"
).
set_input
(
"y"
,
y
).
set_input
(
"dy"
,
out_grad
)
return
[
sqrt_grad
]
class
PowGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
PowGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"pow_grad"
def
_apply
(
self
):
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
factor
=
self
.
op
.
attr
(
"factor"
)
shape_tensor
=
self
.
_create_shape_tensor
()
shape_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"shape"
+
self
.
_accumulated_op_id
(),
"Shape"
).
set_input
(
"x"
,
x
)
factor_scale
=
self
.
_create_ge_tensor
([
1
],
5
,
factor
)
factor_scale
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
factor_scale
)
factor_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"broadcast_to_d"
+
self
.
_accumulated_op_id
(),
"BroadcastTo"
).
set_input
(
"x"
,
factor_scale
).
set_input
(
"shape"
,
shape_tensor
)
x_power
=
core
.
GEOperatorFactory
.
create_operator
(
"x_power"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
"x"
,
x
).
set_attr_float
(
"power"
,
factor
-
1
)
x_power_mul_factor
=
core
.
GEOperatorFactory
.
create_operator
(
"x_power_mul_factor"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
x
).
set_input
(
"x2"
,
factor_tensor
)
x_power_mul_factor_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"x_power_mul_factor_grad"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
x_power_mul_factor
).
set_input
(
"x2"
,
grad
)
return
[
x_power_mul_factor_grad
],
[[
0
]]
class
GeluGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
GeluGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"gelu_grad"
def
_apply
(
self
):
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
y
=
core
.
GEOperatorFactory
.
create_operator
(
"gelu"
+
self
.
_accumulated_op_id
(),
"Gelu"
).
set_input
(
"x"
,
x
)
gelu_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"gelu_grad"
+
self
.
_accumulated_op_id
(),
"GeluGrad"
).
set_input
(
"x"
,
x
).
set_input
(
"dy"
,
grad
).
set_input
(
"y"
,
y
)
return
[
gelu_grad
],
[[
0
]]
class
MeanGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
MeanGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"mean_grad"
def
_apply
(
self
):
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
ones_tensor
=
core
.
GEOperatorFactory
.
create_operator
(
"one_tensor"
+
self
.
_accumulated_op_id
(),
"OnesLike"
).
set_input
(
"x"
,
x
)
sum
=
core
.
GEOperatorFactory
.
create_operator
(
"mean"
+
self
.
_accumulated_op_id
(),
"ReduceSumD"
).
set_input
(
"x"
,
ones_tensor
).
set_attr_bool
(
"keep_dims"
,
False
).
set_attr_vec_int32
(
"axes"
,
[])
mean
=
core
.
GEOperatorFactory
.
create_operator
(
"x_power"
+
self
.
_accumulated_op_id
(),
"Power"
).
set_input
(
"x"
,
sum
).
set_attr_float
(
"power"
,
-
1
)
mean_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"mean_grad"
+
self
.
_accumulated_op_id
(),
"Mul"
).
set_input
(
"x1"
,
mean
).
set_input
(
"x2"
,
grad
)
return
[
mean_grad
],
[[
0
]]
class
SliceGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SliceGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"slice_grad"
def
_apply
(
self
):
x
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
axes
=
self
.
op
.
attr
(
"axes"
)
starts
=
self
.
op
.
attr
(
"starts"
)
ends
=
self
.
op
.
attr
(
"ends"
)
x_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
grad_shape
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
len_shape
=
len
(
x_shape
)
axes_cor
=
list
(
range
(
len_shape
))
starts_cor
,
ends_cor
=
[],
[]
cnt
=
0
for
i
in
range
(
len_shape
):
starts_cor
.
append
(
starts
[
cnt
]
if
i
in
axes
else
0
)
if
i
in
axes
and
ends
[
cnt
]
<=
x_shape
[
i
]:
ends_cor
.
append
(
x_shape
[
i
]
-
ends
[
cnt
])
else
:
ends_cor
.
append
(
0
)
if
i
in
axes
:
cnt
+=
1
starts_cor
[
0
]
=
0
ends_cor
[
0
]
=
0
paddings
=
[[
s
,
e
]
for
(
s
,
e
)
in
zip
(
starts_cor
,
ends_cor
)]
slice_value
=
core
.
GEOperatorFactory
.
create_operator
(
"slice_grad"
+
self
.
_accumulated_op_id
(),
"PadD"
).
set_input
(
"x"
,
grad
).
set_attr_vec_vec_int64
(
"paddings"
,
paddings
)
return
[
slice_value
],
[[
0
]]
class
LookUpTableGradParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
LookUpTableGradParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"lookup_table_grad"
def
_apply
(
self
):
ids
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
embedding
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
shape_ids
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
0
]).
shape
shape_grad
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
1
]).
shape
shape_embedding
=
self
.
op
.
block
.
var
(
self
.
op
.
input_arg_names
[
2
]).
shape
ids_flatten
=
core
.
GEOperatorFactory
.
create_operator
(
"flatten"
+
self
.
_accumulated_op_id
(),
"FlattenV2"
).
set_input
(
"x"
,
ids
).
set_attr_int32
(
"axis"
,
0
).
set_attr_int32
(
"end_axis"
,
1
)
grad_flatten
=
core
.
GEOperatorFactory
.
create_operator
(
"flatten"
+
self
.
_accumulated_op_id
(),
"FlattenV2"
).
set_input
(
"x"
,
grad
).
set_attr_int32
(
"axis"
,
0
).
set_attr_int32
(
"end_axis"
,
1
)
tensor_zeros
=
core
.
GEOperatorFactory
.
create_operator
(
"zeroslike"
+
self
.
_accumulated_op_id
(),
"ZerosLike"
).
set_input
(
"x"
,
embedding
)
embedding_grad
=
core
.
GEOperatorFactory
.
create_operator
(
"scatteradd"
+
self
.
_accumulated_op_id
(),
"TensorScatterAdd"
).
set_input
(
"x"
,
tensor_zeros
).
set_input
(
"indices"
,
ids_flatten
).
set_input
(
"updates"
,
grad_flatten
)
return
[
embedding_grad
],
[[
0
]]
class
SGDParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
SGDParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"sgd"
def
_apply
(
self
):
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
lr
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
param
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
sgd
=
core
.
GEOperatorFactory
.
create_operator
(
"momentum"
+
self
.
_accumulated_op_id
(),
"ApplyGradientDescent"
).
set_input
(
"var"
,
param
).
set_input
(
"alpha"
,
lr
).
set_input
(
"delta"
,
grad
)
return
[
sgd
],
[[
0
]]
class
AdamParser
(
AscendParserBase
):
def
__init__
(
self
,
graph
,
var2geop
):
super
(
AdamParser
,
self
).
__init__
(
graph
,
var2geop
)
self
.
parser_name
=
"adam"
def
_apply
(
self
):
beta1_power
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
0
])
beta2_power
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
1
])
grad
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
2
])
lr
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
3
])
moment1
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
4
])
moment2
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
5
])
param
=
self
.
_get_ge_input
(
self
.
op
.
input_arg_names
[
6
])
beta1
=
self
.
op
.
attr
(
'beta1'
)
beta2
=
self
.
op
.
attr
(
'beta2'
)
epsilon
=
self
.
op
.
attr
(
'epsilon'
)
beta1
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
self
.
_create_ge_tensor
([
1
],
5
,
beta1
))
beta2
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
self
.
_create_ge_tensor
([
1
],
5
,
beta2
))
epsilon
=
core
.
GEOperatorFactory
.
create_operator
(
"const"
+
self
.
_accumulated_op_id
(),
"Const"
).
set_attr_tensor
(
"value"
,
self
.
_create_ge_tensor
([
1
],
5
,
epsilon
))
adam
=
core
.
GEOperatorFactory
.
create_operator
(
"adam"
+
self
.
_accumulated_op_id
(),
"ApplyAdam"
).
set_input
(
"var"
,
param
).
set_input
(
"m"
,
moment1
).
set_input
(
"v"
,
moment2
).
set_input
(
"beta1_power"
,
beta1_power
).
set_input
(
"beta2_power"
,
beta2_power
).
set_input
(
"lr"
,
lr
).
set_input
(
"beta1"
,
beta1
).
set_input
(
"beta2"
,
beta2
).
set_input
(
"epsilon"
,
epsilon
).
set_input
(
"grad"
,
grad
)
return
[
adam
],
[[
0
]]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录