Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
aeffccb7
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aeffccb7
编写于
5月 25, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
5月 25, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1402 for second order codes
Merge pull request !1402 from zongha/master
上级
0e665616
fcd4de6f
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
1331 addition
and
345 deletion
+1331
-345
example/resnet50_imagenet2012_THOR/config.py
example/resnet50_imagenet2012_THOR/config.py
+0
-0
example/resnet50_imagenet2012_THOR/lr_generator.py
example/resnet50_imagenet2012_THOR/lr_generator.py
+0
-126
example/resnet50_imagenet2012_THOR/model/dataset_helper.py
example/resnet50_imagenet2012_THOR/model/dataset_helper.py
+17
-90
example/resnet50_imagenet2012_THOR/model/model_thor.py
example/resnet50_imagenet2012_THOR/model/model_thor.py
+162
-66
example/resnet50_imagenet2012_THOR/model/resnet.py
example/resnet50_imagenet2012_THOR/model/resnet.py
+24
-24
example/resnet50_imagenet2012_THOR/run_distribute_train.sh
example/resnet50_imagenet2012_THOR/run_distribute_train.sh
+1
-1
example/resnet50_imagenet2012_THOR/train.py
example/resnet50_imagenet2012_THOR/train.py
+27
-38
mindspore/ops/_op_impl/custom_op/batch_matmul_impl.py
mindspore/ops/_op_impl/custom_op/batch_matmul_impl.py
+76
-0
mindspore/ops/_op_impl/custom_op/cholesky_trsm.py
mindspore/ops/_op_impl/custom_op/cholesky_trsm.py
+64
-0
mindspore/ops/_op_impl/custom_op/fused_abs_max1.py
mindspore/ops/_op_impl/custom_op/fused_abs_max1.py
+69
-0
mindspore/ops/_op_impl/custom_op/img2col_impl.py
mindspore/ops/_op_impl/custom_op/img2col_impl.py
+87
-0
mindspore/ops/_op_impl/custom_op/matmul_cube_dense_left.py
mindspore/ops/_op_impl/custom_op/matmul_cube_dense_left.py
+101
-0
mindspore/ops/_op_impl/custom_op/matmul_cube_fracz_left_cast_impl.py
...ps/_op_impl/custom_op/matmul_cube_fracz_left_cast_impl.py
+102
-0
mindspore/ops/_op_impl/custom_op/matmul_cube_fracz_right_mul_impl.py
...ps/_op_impl/custom_op/matmul_cube_fracz_right_mul_impl.py
+113
-0
mindspore/ops/_op_impl/custom_op/matmul_cube_impl.py
mindspore/ops/_op_impl/custom_op/matmul_cube_impl.py
+114
-0
mindspore/ops/_op_impl/custom_op/matrix_combine_impl.py
mindspore/ops/_op_impl/custom_op/matrix_combine_impl.py
+63
-0
mindspore/ops/_op_impl/custom_op/transpose02314_impl.py
mindspore/ops/_op_impl/custom_op/transpose02314_impl.py
+63
-0
mindspore/ops/operations/thor_ops.py
mindspore/ops/operations/thor_ops.py
+248
-0
未找到文件。
example/resnet50_imagenet2012_THOR/config
_imagenet
.py
→
example/resnet50_imagenet2012_THOR/config.py
浏览文件 @
aeffccb7
文件已移动
example/resnet50_imagenet2012_THOR/lr_generator.py
已删除
100644 → 0
浏览文件 @
0e665616
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""learning rate generator"""
import
math
import
numpy
as
np
def
linear_warmup_lr
(
current_step
,
warmup_steps
,
base_lr
,
init_lr
):
"""linear_warmup_lr"""
lr_inc
=
(
float
(
base_lr
)
-
float
(
init_lr
))
/
float
(
warmup_steps
)
lr
=
float
(
init_lr
)
+
lr_inc
*
current_step
return
lr
def
cosine_annealing_lr
(
lr
,
steps_per_epoch
,
warmup_epochs
,
max_epoch
,
T_max
,
eta_min
=
0
,
num_periods
=
0.5
):
"""linear_warmup_lr"""
base_lr
=
lr
warmup_init_lr
=
0
total_steps
=
int
(
max_epoch
*
steps_per_epoch
)
warmup_steps
=
int
(
warmup_epochs
*
steps_per_epoch
)
decay_steps
=
total_steps
-
warmup_steps
lr_each_step
=
[]
for
i
in
range
(
total_steps
):
if
i
<
warmup_steps
:
lr
=
linear_warmup_lr
(
i
+
1
,
warmup_steps
,
base_lr
,
warmup_init_lr
)
else
:
# linear_decay = (total_steps - i) / decay_steps
cosine_decay
=
0.5
*
(
1
+
math
.
cos
(
math
.
pi
*
i
/
decay_steps
))
decayed
=
cosine_decay
lr
=
base_lr
*
decayed
lr_each_step
.
append
(
lr
)
return
np
.
array
(
lr_each_step
).
astype
(
np
.
float32
)
def
warmup_cosine_annealing_lr
(
lr
,
steps_per_epoch
,
warmup_epochs
,
max_epoch
,
T_max
,
eta_min
=
0
,
num_periods
=
0.5
):
"""warmup_cosine_annealing_lr"""
base_lr
=
lr
warmup_init_lr
=
0
total_steps
=
int
(
max_epoch
*
steps_per_epoch
*
0.99
)
warmup_steps
=
int
(
warmup_epochs
*
steps_per_epoch
)
decay_steps
=
total_steps
-
warmup_steps
lr_each_step
=
[]
for
i
in
range
(
total_steps
):
if
i
<
warmup_steps
:
lr
=
linear_warmup_lr
(
i
+
1
,
warmup_steps
,
base_lr
,
warmup_init_lr
)
else
:
linear_decay
=
(
total_steps
-
i
)
/
decay_steps
cosine_decay
=
0.5
*
(
1
+
math
.
cos
(
math
.
pi
*
2
*
num_periods
*
i
/
decay_steps
))
decayed
=
linear_decay
*
cosine_decay
lr
=
base_lr
*
decayed
+
0.000005
lr_each_step
.
append
(
lr
)
return
np
.
array
(
lr_each_step
).
astype
(
np
.
float32
)
def
get_lr
(
global_step
,
lr_init
,
lr_end
,
lr_max
,
warmup_epochs
,
total_epochs
,
steps_per_epoch
,
lr_decay_mode
):
"""
generate learning rate array
Args:
global_step(int): total steps of the training
lr_init(float): init learning rate
lr_end(float): end learning rate
lr_max(float): max learning rate
warmup_epochs(int): number of warmup epochs
total_epochs(int): total epoch of training
steps_per_epoch(int): steps of one epoch
lr_decay_mode(string): learning rate decay mode, including steps, poly or default
Returns:
np.array, learning rate array
"""
lr_each_step
=
[]
total_steps
=
steps_per_epoch
*
total_epochs
warmup_steps
=
steps_per_epoch
*
warmup_epochs
if
lr_decay_mode
==
'steps'
:
decay_epoch_index
=
[
0.3
*
total_steps
,
0.6
*
total_steps
,
0.8
*
total_steps
]
for
i
in
range
(
total_steps
):
if
i
<
decay_epoch_index
[
0
]:
lr
=
lr_max
elif
i
<
decay_epoch_index
[
1
]:
lr
=
lr_max
*
0.1
elif
i
<
decay_epoch_index
[
2
]:
lr
=
lr_max
*
0.01
else
:
lr
=
lr_max
*
0.001
lr_each_step
.
append
(
lr
)
elif
lr_decay_mode
==
'poly'
:
if
warmup_steps
!=
0
:
inc_each_step
=
(
float
(
lr_max
)
-
float
(
lr_init
))
/
float
(
warmup_steps
)
else
:
inc_each_step
=
0
for
i
in
range
(
total_steps
):
if
i
<
warmup_steps
:
lr
=
float
(
lr_init
)
+
inc_each_step
*
float
(
i
)
else
:
base
=
(
1.0
-
(
float
(
i
)
-
float
(
warmup_steps
))
/
(
float
(
total_steps
)
-
float
(
warmup_steps
)))
lr
=
float
(
lr_max
)
*
base
*
base
if
lr
<
0.0
:
lr
=
0.0
lr_each_step
.
append
(
lr
)
else
:
for
i
in
range
(
total_steps
):
if
i
<
warmup_steps
:
lr
=
lr_init
+
(
lr_max
-
lr_init
)
*
i
/
warmup_steps
else
:
lr
=
lr_max
-
(
lr_max
-
lr_end
)
*
(
i
-
warmup_steps
)
/
(
total_steps
-
warmup_steps
)
lr_each_step
.
append
(
lr
)
current_step
=
global_step
lr_each_step
=
np
.
array
(
lr_each_step
).
astype
(
np
.
float32
)
learning_rate
=
lr_each_step
[
current_step
:]
return
learning_rate
example/resnet50_imagenet2012_THOR/model/dataset_helper.py
浏览文件 @
aeffccb7
...
...
@@ -13,12 +13,10 @@
# limitations under the License.
# ============================================================================
"""Dataset help for minddata dataset"""
from
mindspore
import
context
from
mindspore._checkparam
import
check_bool
from
mindspore.nn.wrap
import
GetNextSingleOp
from
mindspore.parallel._utils
import
_get_device_num
,
_get_global_rank
,
_get_parallel_mode
from
mindspore.train._utils
import
_exec_datagraph
,
_get_types_and_shapes
,
_to_tensor
,
\
_construct_tensor_list
,
_to_full_shapes
,
_to_full_tensor
from
mindspore.parallel._utils
import
_get_device_num
,
_get_parallel_mode
from
mindspore.train._utils
import
_exec_datagraph
,
_get_types_and_shapes
,
\
_to_full_shapes
from
mindspore.train.parallel_utils
import
ParallelMode
...
...
@@ -42,19 +40,9 @@ class DatasetHelper:
>>> outputs = network(*inputs)
"""
def
__init__
(
self
,
dataset
,
first_order_iter
=
0
,
dataset_sink_mode
=
True
):
def
__init__
(
self
,
dataset
,
dataset_sink_mode
=
True
,
iter_first_order
=
0
):
check_bool
(
dataset_sink_mode
)
iterclass
=
_DatasetIterGE
if
not
dataset_sink_mode
:
iterclass
=
_DatasetIterFeed
elif
not
context
.
get_context
(
"enable_ge"
):
if
context
.
get_context
(
"enable_loop_sink"
):
iterclass
=
_DatasetIterMSLoopSink
else
:
iterclass
=
_DatasetIterMS
self
.
iter
=
iterclass
(
dataset
,
first_order_iter
)
self
.
iter
=
_DatasetIterMSLoopSink
(
dataset
,
iter_first_order
)
def
__iter__
(
self
):
return
self
.
iter
.
__iter__
()
...
...
@@ -85,12 +73,6 @@ class _DatasetIter:
self
.
dataset
=
dataset
dataset_types
,
dataset_shapes
=
_get_types_and_shapes
(
dataset
)
self
.
dataset_types
,
self
.
dataset_shapes
=
dataset_types
,
dataset_shapes
# for self._parallel_mode equal to semi_auto_parallel or auto_parallel, use a complete tensor to
# compile, and slice tensor to run. The batch dimension of tensors for compile is device_number
# times the batch dimension of tensors for run
if
_get_parallel_mode
()
in
(
ParallelMode
.
SEMI_AUTO_PARALLEL
,
ParallelMode
.
AUTO_PARALLEL
):
device_num
=
_get_device_num
()
self
.
dataset_shapes
=
_to_full_shapes
(
dataset_shapes
,
device_num
)
def
__iter__
(
self
):
self
.
ind
=
0
...
...
@@ -109,83 +91,28 @@ class _DatasetIter:
loop_count
=
1
if
hasattr
(
dataset
,
'__loop_size__'
):
loop_size
=
dataset
.
__loop_size__
if
dataset
.
get_dataset_size
()
%
loop_size
!=
0
:
raise
ValueError
(
f
'Dataset size
{
dataset
.
get_dataset_size
()
}
and '
f
'loop_size
{
loop_size
}
are not matched.'
)
loop_count
=
int
(
dataset
.
get_dataset_size
()
/
loop_size
)
return
loop_count
class
_DatasetIterMSLoopSink
(
_DatasetIter
):
"""Iter for context (
enable_loop_sink=True
)"""
"""Iter for context (
device_target=Ascend
)"""
def
__init__
(
self
,
dataset
,
first_order_it
er
):
def
__init__
(
self
,
dataset
,
iter_first_ord
er
):
super
(
_DatasetIterMSLoopSink
,
self
).
__init__
(
dataset
)
# self.loop_count = self.get_loop_count(dataset)
loop_size
=
dataset
.
__loop_size__
+
first_order_iter
loop_size
=
dataset
.
__loop_size__
+
iter_first_order
self
.
loop_count
=
int
(
dataset
.
get_dataset_size
()
/
loop_size
)
*
2
# for self._parallel_mode equal to semi_auto_parallel or auto_parallel, use a complete tensor to
# compile, and slice tensor to run. The batch dimension of tensors for compile is device_number
# times the batch dimension of tensors for run. Now only support LoopSink.
if
_get_parallel_mode
()
in
(
ParallelMode
.
SEMI_AUTO_PARALLEL
,
ParallelMode
.
AUTO_PARALLEL
):
device_num
=
_get_device_num
()
self
.
dataset_shapes
=
_to_full_shapes
(
self
.
dataset_shapes
,
device_num
)
def
op
():
return
tuple
()
self
.
op
=
op
class
_DatasetIterMS
(
_DatasetIter
):
"""Iter for context (enable_loop_sink=False)"""
def
__init__
(
self
,
dataset
,
first_order_order
):
super
(
_DatasetIterMS
,
self
).
__init__
(
dataset
)
self
.
loop_count
=
dataset
.
get_dataset_size
()
self
.
loop_size
=
1
queue_name
=
dataset
.
__ME_INITED__
self
.
op
=
GetNextSingleOp
(
self
.
dataset_types
,
self
.
dataset_shapes
,
queue_name
)
class
_DatasetIterGE
(
_DatasetIter
):
"""Iter for ge"""
def
__init__
(
self
,
dataset
):
super
(
_DatasetIterGE
,
self
).
__init__
(
dataset
)
self
.
loop_count
=
self
.
get_loop_count
(
dataset
)
parallel_mode
=
_get_parallel_mode
()
self
.
need_to_full
=
parallel_mode
in
(
ParallelMode
.
SEMI_AUTO_PARALLEL
,
ParallelMode
.
AUTO_PARALLEL
)
batch_expand_num
=
1
if
self
.
need_to_full
:
batch_expand_num
=
_get_device_num
()
tensor_list_run
=
_construct_tensor_list
(
self
.
dataset_types
,
self
.
dataset_shapes
,
batch_expand_num
)
def
op
():
return
tensor_list_run
self
.
op
=
op
class
_DatasetIterFeed
:
"""Iter for feed data"""
def
__init__
(
self
,
dataset
,
first_order_order
):
self
.
dataset
=
dataset
self
.
device_num
=
_get_device_num
()
self
.
global_rank
=
_get_global_rank
()
self
.
repeat_count
=
dataset
.
get_repeat_count
()
self
.
repeat_ind
=
0
self
.
loop_count
=
dataset
.
get_dataset_size
()
self
.
ind
=
0
parallel_mode
=
context
.
get_auto_parallel_context
(
"parallel_mode"
)
self
.
need_to_full
=
parallel_mode
in
(
ParallelMode
.
SEMI_AUTO_PARALLEL
,
ParallelMode
.
AUTO_PARALLEL
)
def
__iter__
(
self
):
if
self
.
repeat_ind
%
self
.
repeat_count
==
0
:
self
.
iter
=
self
.
dataset
.
__iter__
()
self
.
repeat_ind
+=
1
self
.
ind
=
0
return
self
def
__next__
(
self
):
if
self
.
ind
>=
self
.
loop_count
:
raise
StopIteration
()
self
.
ind
+=
1
data
=
self
.
iter
.
__next__
()
if
self
.
need_to_full
:
return
_to_full_tensor
(
data
,
self
.
device_num
,
self
.
global_rank
)
return
_to_tensor
(
data
)
example/resnet50_imagenet2012_THOR/model/model_thor.py
浏览文件 @
aeffccb7
...
...
@@ -13,8 +13,11 @@
# limitations under the License.
# ============================================================================
"""Model."""
import
numpy
as
np
from
mindspore
import
context
from
mindspore
import
log
as
logger
from
mindspore
import
nn
from
mindspore._c_expression
import
init_exec_dataset
from
mindspore._checkparam
import
check_input_data
,
check_output_data
,
check_int_positive
,
check_bool
from
mindspore.common
import
dtype
as
mstype
...
...
@@ -28,9 +31,9 @@ from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_
from
mindspore.train
import
amp
from
mindspore.train.callback
import
_InternalCallbackParam
,
RunContext
,
_build_callbacks
from
mindspore.train.parallel_utils
import
ParallelMode
import
mindspore.nn
as
nn
from
second_order
.dataset_helper
import
DatasetHelper
import
numpy
as
np
from
model
.dataset_helper
import
DatasetHelper
def
_convert_type
(
types
):
"""
...
...
@@ -69,7 +72,8 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'):
dataset_types
,
dataset_shapes
,
input_indexs
,
phase
=
phase
)
phase
=
phase
,
need_run
=
False
)
class
Model
:
...
...
@@ -123,7 +127,7 @@ class Model:
>>> return out
>>>
>>> net = Net()
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> loss = nn.SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True
)
>>> optim = Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
>>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
>>> dataset = get_dataset()
...
...
@@ -131,30 +135,36 @@ class Model:
"""
def
__init__
(
self
,
network
,
loss_fn
=
None
,
optimizer
=
None
,
metrics
=
None
,
eval_network
=
None
,
eval_indexes
=
None
,
amp_level
=
"O0"
,
frequency
=
278
,
**
kwargs
):
eval_indexes
=
None
,
amp_level
=
"O0"
,
frequency
=
278
,
stop_epoch
=
100
,
**
kwargs
):
self
.
_network
=
network
self
.
_loss_fn
=
loss_fn
self
.
_optimizer
=
optimizer
self
.
_loss_scale_manager
=
None
self
.
_loss_scale_manager_set
=
False
self
.
_keep_bn_fp32
=
True
self
.
_frequency
=
frequency
self
.
_check_kwargs
(
kwargs
)
if
'keep_batchnorm_fp32'
in
kwargs
:
self
.
_keep_bn_fp32
=
kwargs
[
'keep_batchnorm_fp32'
]
if
'loss_scale_manager'
in
kwargs
:
self
.
_loss_scale_manager
=
kwargs
[
'loss_scale_manager'
]
self
.
_loss_scale_manager_set
=
True
self
.
_amp_level
=
amp_level
self
.
_process_amp_args
(
kwargs
)
self
.
_parallel_mode
=
_get_parallel_mode
()
self
.
_device_number
=
_get_device_num
()
self
.
_global_rank
=
_get_global_rank
()
self
.
_parameter_broadcast
=
_get_parameter_broadcast
()
self
.
_frequency
=
frequency
self
.
_stop_epoch
=
stop_epoch
self
.
_train_network
=
self
.
_build_train_network
()
self
.
_build_eval_network
(
metrics
,
eval_network
,
eval_indexes
)
self
.
_build_predict_network
()
def
_process_amp_args
(
self
,
kwargs
):
if
self
.
_amp_level
==
"O0"
:
self
.
_keep_bn_fp32
=
False
if
'keep_batchnorm_fp32'
in
kwargs
:
self
.
_keep_bn_fp32
=
kwargs
[
'keep_batchnorm_fp32'
]
if
'loss_scale_manager'
in
kwargs
:
self
.
_loss_scale_manager
=
kwargs
[
'loss_scale_manager'
]
self
.
_loss_scale_manager_set
=
True
def
_check_kwargs
(
self
,
kwargs
):
for
arg
in
kwargs
:
if
arg
not
in
[
'loss_scale_manager'
,
'keep_batchnorm_fp32'
]:
...
...
@@ -180,6 +190,9 @@ class Model:
elif
self
.
_loss_fn
:
network
=
nn
.
WithLossCell
(
network
,
self
.
_loss_fn
)
# If need to check if loss_fn is not None, but optimizer is None
if
self
.
_parallel_mode
in
(
ParallelMode
.
SEMI_AUTO_PARALLEL
,
ParallelMode
.
AUTO_PARALLEL
):
network
.
set_auto_parallel
()
return
network
def
_build_eval_network
(
self
,
metrics
,
eval_network
,
eval_indexes
):
...
...
@@ -198,14 +211,18 @@ class Model:
else
:
if
self
.
_loss_fn
is
None
:
raise
ValueError
(
"loss_fn can not be None."
)
self
.
_eval_network
=
nn
.
WithEvalCell
(
self
.
_network
,
self
.
_loss_fn
)
self
.
_eval_network
=
nn
.
WithEvalCell
(
self
.
_network
,
self
.
_loss_fn
,
self
.
_amp_level
==
"O2"
)
self
.
_eval_indexes
=
[
0
,
1
,
2
]
if
self
.
_parallel_mode
in
(
ParallelMode
.
SEMI_AUTO_PARALLEL
,
ParallelMode
.
AUTO_PARALLEL
):
self
.
_eval_network
.
set_auto_parallel
()
def
_build_predict_network
(
self
):
"""Build the network for prediction."""
self
.
_predict_network
=
self
.
_network
if
self
.
_parallel_mode
in
(
ParallelMode
.
SEMI_AUTO_PARALLEL
,
ParallelMode
.
AUTO_PARALLEL
):
self
.
_predict_network
=
_VirtualDatasetCell
(
self
.
_network
)
self
.
_predict_network
.
set_auto_parallel
()
def
_clear_metrics
(
self
):
"""Clear metrics local values."""
...
...
@@ -246,6 +263,94 @@ class Model:
scaling_sens
/=
self
.
_device_number
return
scaling_sens
def
_exec_preprocess
(
self
,
network
,
is_train
,
phase
,
dataset
,
dataset_sink_mode
,
iter_first_order
):
"""Initializes dataset."""
need_wrap
=
False
if
dataset_sink_mode
:
# remove later to deal with loop sink
if
not
hasattr
(
dataset
,
'__ME_INITED__'
)
and
context
.
get_context
(
"device_target"
)
==
"Ascend"
\
and
not
context
.
get_context
(
"enable_ge"
):
need_wrap
=
True
if
not
is_train
:
dataset
.
__loop_size__
=
1
dataset_helper
=
DatasetHelper
(
dataset
,
dataset_sink_mode
,
iter_first_order
)
# remove later to deal with loop sink
if
need_wrap
:
network
=
nn
.
DataWrapper
(
network
,
*
(
dataset_helper
.
types_shapes
()),
dataset
.
__ME_INITED__
)
network
.
set_train
(
is_train
)
network
.
phase
=
phase
return
dataset_helper
,
network
def
init
(
self
,
train_dataset
=
None
,
valid_dataset
=
None
):
"""
Initializes compute graphs and data graphs with sink mode.
Note:
Pre-init process only supports `GRAPH_MODE` and `Ascend` target currently.
Args:
train_dataset (Dataset): A training dataset iterator. If define `train_dataset`, training graphs will be
initialized. Default: None.
valid_dataset (Dataset): A evaluating dataset iterator. If define `valid_dataset`, evaluation graphs will
be initialized, and `metrics` in `Model` can not be None. Default: None.
Examples:
>>> train_dataset = get_train_dataset()
>>> valid_dataset = get_valid_dataset()
>>> net = Net()
>>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
>>> optim = Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
>>> model = Model(net, loss_fn=loss, optimizer=optim, metrics={'acc'})
>>> model.init(train_dataset, valid_dataset)
>>> model.train(2, train_dataset)
>>> model.eval(valid_dataset)
"""
if
context
.
get_context
(
"mode"
)
!=
context
.
GRAPH_MODE
or
context
.
get_context
(
"device_target"
)
!=
"Ascend"
:
raise
RuntimeError
(
'Pre-init process only supports GRAPH MODE and Ascend target currently.'
)
if
not
train_dataset
and
not
valid_dataset
:
raise
ValueError
(
'Both train_dataset and valid_dataset can not be None or empty.'
)
_device_number_check
(
self
.
_parallel_mode
,
self
.
_device_number
)
if
train_dataset
:
_parameter_broadcast_check
(
self
.
_parallel_mode
,
self
.
_parameter_broadcast
)
self
.
_train_network
.
set_train
()
self
.
_train_network
.
phase
=
'train'
if
self
.
_parameter_broadcast
:
self
.
_train_network
.
set_broadcast_flag
()
train_dataset_helper
,
train_network
=
self
.
_exec_preprocess
(
self
.
_train_network
,
is_train
=
True
,
phase
=
'train'
,
dataset
=
train_dataset
,
dataset_sink_mode
=
True
)
self
.
_train_network
=
train_network
for
inputs
in
train_dataset_helper
:
self
.
_train_network
.
compile
(
*
inputs
)
break
if
valid_dataset
:
if
not
self
.
_metric_fns
:
raise
RuntimeError
(
'If define `valid_dataset`, metric fn can not be None or empty.'
)
self
.
_eval_network
.
set_train
(
False
)
self
.
_eval_network
.
phase
=
'eval'
valid_dataset_helper
,
eval_network
=
self
.
_exec_preprocess
(
self
.
_eval_network
,
is_train
=
False
,
phase
=
'eval'
,
dataset
=
valid_dataset
,
dataset_sink_mode
=
True
)
self
.
_eval_network
=
eval_network
for
inputs
in
valid_dataset_helper
:
self
.
_eval_network
.
compile
(
*
inputs
)
break
def
_train
(
self
,
epoch
,
train_dataset
,
callbacks
=
None
,
dataset_sink_mode
=
True
):
"""
Training.
...
...
@@ -306,32 +411,27 @@ class Model:
list_callback (_ListCallback): Executor of callback list. Default: None.
cb_params (_InternalCallbackParam): Callback parameters. Default: None.
"""
# remove later to deal with loop sink
iter_first_order
=
277
iter_first_order
=
self
.
_frequency
-
1
iter_second_order
=
1
train_dataset
.
__loop_size__
=
iter_second_order
need_wrap
=
False
if
not
hasattr
(
train_dataset
,
'__ME_INITED__'
)
and
context
.
get_context
(
"enable_loop_sink"
)
\
and
not
context
.
get_context
(
"enable_ge"
):
need_wrap
=
True
dataset_helper
=
DatasetHelper
(
train_dataset
,
iter_first_order
)
# remove later to deal with loop sink
if
need_wrap
:
self
.
_train_network
=
nn
.
DataWrapper
(
self
.
_train_network
,
*
(
dataset_helper
.
types_shapes
()),
train_dataset
.
__ME_INITED__
)
dataset_helper
,
train_network
=
self
.
_exec_preprocess
(
self
.
_train_network
,
is_train
=
True
,
phase
=
'train'
,
dataset
=
train_dataset
,
dataset_sink_mode
=
True
,
iter_first_order
=
iter_first_order
)
self
.
_train_network
=
train_network
cb_params
.
train_network
=
self
.
_train_network
self
.
_train_network
.
set_train
()
cb_params
.
cur_step_num
=
0
loop_size
=
dataset_helper
.
loop_size
()
run_context
=
RunContext
(
cb_params
)
list_callback
.
begin
(
run_context
)
# used to stop training for early stop, such as stopAtTIme or stopATStep
should_stop
=
False
has_do_
train1_datase
t
=
False
checkpoint
_branch_one
=
True
has_do_
dataset_ini
t
=
False
switch
_branch_one
=
True
for
i
in
range
(
epoch
):
cb_params
.
cur_epoch_num
=
i
+
1
list_callback
.
epoch_begin
(
run_context
)
...
...
@@ -339,18 +439,18 @@ class Model:
# for data sink dataset_helper only iter once, other wise iter epoch_size times.
for
inputs
in
dataset_helper
:
list_callback
.
step_begin
(
run_context
)
if
checkpoint
_branch_one
:
if
switch
_branch_one
:
cb_params
.
cur_step_num
+=
loop_size
self
.
_train_network
.
set_second_order
(
True
)
self
.
_train_network
.
add_flags_recursive
(
thor
=
True
)
self
.
_train_network
.
phase
=
'train0'
else
:
cb_params
.
cur_step_num
+=
iter_first_order
self
.
_train_network
.
set_second_order
(
False
)
self
.
_train_network
.
add_flags_recursive
(
thor
=
False
)
self
.
_train_network
.
phase
=
'train1'
if
not
has_do_
train1_datase
t
:
if
not
has_do_
dataset_ini
t
:
_exec_datagraph
(
train_dataset
,
iter_first_order
,
phase
=
'train1_dataset'
)
has_do_
train1_datase
t
=
True
checkpoint_branch_one
=
not
checkpoint
_branch_one
has_do_
dataset_ini
t
=
True
switch_branch_one
=
not
switch
_branch_one
outputs
=
self
.
_train_network
(
*
inputs
)
cb_params
.
net_outputs
=
outputs
list_callback
.
step_end
(
run_context
)
...
...
@@ -376,17 +476,21 @@ class Model:
list_callback (_ListCallback): Executor of callback list. Default: None.
cb_params (_InternalCallbackParam): Callback parameters. Default: None.
"""
dataset_helper
=
DatasetHelper
(
train_dataset
,
dataset_sink_mode
=
False
)
dataset_helper
,
_
=
self
.
_exec_preprocess
(
self
.
_train_network
,
is_train
=
True
,
phase
=
'train'
,
dataset
=
train_dataset
,
dataset_sink_mode
=
False
)
cb_params
.
cur_step_num
=
0
run_context
=
RunContext
(
cb_params
)
_callback_wrapper
(
list_callback
,
run_context
,
"begin"
)
list_callback
.
begin
(
run_context
)
# used to stop training for early stop, such as stopAtTIme or stopATStep
should_stop
=
False
for
i
in
range
(
epoch
):
cb_params
.
cur_epoch_num
=
i
+
1
_callback_wrapper
(
list_callback
,
run_context
,
"epoch_begin"
)
list_callback
.
epoch_begin
(
run_context
)
for
next_element
in
dataset_helper
:
len_element
=
len
(
next_element
)
...
...
@@ -394,7 +498,7 @@ class Model:
raise
ValueError
(
"when loss_fn is not None, train_dataset should"
"return two elements, but got {}"
.
format
(
len_element
))
cb_params
.
cur_step_num
+=
1
_callback_wrapper
(
list_callback
,
run_context
,
"step_begin"
)
list_callback
.
step_begin
(
run_context
)
overflow
=
False
if
self
.
_loss_scale_manager
and
self
.
_loss_scale_manager
.
get_drop_overflow_update
():
...
...
@@ -408,19 +512,19 @@ class Model:
overflow
=
np
.
all
(
overflow
.
asnumpy
())
self
.
_loss_scale_manager
.
update_loss_scale
(
overflow
)
_callback_wrapper
(
list_callback
,
run_context
,
"step_end"
)
list_callback
.
step_end
(
run_context
)
should_stop
=
should_stop
or
run_context
.
get_stop_requested
()
if
should_stop
:
break
train_dataset
.
reset
()
_callback_wrapper
(
list_callback
,
run_context
,
"epoch_end"
)
list_callback
.
epoch_end
(
run_context
)
should_stop
=
should_stop
or
run_context
.
get_stop_requested
()
if
should_stop
:
break
_callback_wrapper
(
list_callback
,
run_context
,
"end"
)
list_callback
.
end
(
run_context
)
def
train
(
self
,
epoch
,
train_dataset
,
callbacks
=
None
,
dataset_sink_mode
=
True
):
"""
...
...
@@ -452,7 +556,7 @@ class Model:
Examples:
>>> dataset = get_dataset()
>>> net = Net()
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> loss = nn.SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True
)
>>> loss_scale_manager = FixedLossScaleManager()
>>> optim = Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
>>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None, loss_scale_manager=loss_scale_manager)
...
...
@@ -465,9 +569,6 @@ class Model:
_device_number_check
(
self
.
_parallel_mode
,
self
.
_device_number
)
_parameter_broadcast_check
(
self
.
_parallel_mode
,
self
.
_parameter_broadcast
)
if
context
.
get_context
(
"device_target"
)
in
[
"CPU"
,
"GPU"
]
and
context
.
get_context
(
"enable_loop_sink"
):
raise
ValueError
(
"CPU and GPU can't support loop sink, please set enable_loop_sink=False."
)
self
.
_train
(
epoch
,
train_dataset
,
callbacks
=
callbacks
,
...
...
@@ -485,25 +586,15 @@ class Model:
Returns:
Dict, returns the loss value & metrics values for the model in test mode.
"""
_device_number_check
(
self
.
_parallel_mode
,
self
.
_device_number
)
run_context
=
RunContext
(
cb_params
)
# remove later to deal with loop sink
need_wrap
=
False
if
not
hasattr
(
valid_dataset
,
'__ME_INITED__'
)
and
context
.
get_context
(
"enable_loop_sink"
)
\
and
not
context
.
get_context
(
"enable_ge"
):
need_wrap
=
True
valid_dataset
.
__loop_size__
=
1
dataset_helper
=
DatasetHelper
(
valid_dataset
)
# remove later to deal with loop sink
if
need_wrap
:
self
.
_eval_network
=
nn
.
DataWrapper
(
self
.
_eval_network
,
*
(
dataset_helper
.
types_shapes
()),
valid_dataset
.
__ME_INITED__
)
self
.
_eval_network
.
set_train
(
mode
=
False
)
self
.
_eval_network
.
phase
=
'eval'
dataset_helper
,
eval_network
=
self
.
_exec_preprocess
(
self
.
_eval_network
,
is_train
=
False
,
phase
=
'eval'
,
dataset
=
valid_dataset
,
dataset_sink_mode
=
True
)
self
.
_eval_network
=
eval_network
cb_params
.
eval_network
=
self
.
_eval_network
list_callback
.
begin
(
run_context
)
for
inputs
in
dataset_helper
:
...
...
@@ -537,7 +628,11 @@ class Model:
run_context
=
RunContext
(
cb_params
)
list_callback
.
begin
(
run_context
)
dataset_helper
=
DatasetHelper
(
valid_dataset
,
dataset_sink_mode
=
False
)
dataset_helper
,
_
=
self
.
_exec_preprocess
(
self
.
_eval_network
,
is_train
=
False
,
phase
=
'eval'
,
dataset
=
valid_dataset
,
dataset_sink_mode
=
False
)
for
next_element
in
dataset_helper
:
cb_params
.
cur_step_num
+=
1
list_callback
.
step_begin
(
run_context
)
...
...
@@ -574,11 +669,12 @@ class Model:
Examples:
>>> dataset = get_dataset()
>>> net = Net()
>>> loss = nn.SoftmaxCrossEntropyWithLogits()
>>> loss = nn.SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True
)
>>> model = Model(net, loss_fn=loss, optimizer=None, metrics={'acc'})
>>> model.eval(dataset)
"""
check_bool
(
dataset_sink_mode
)
_device_number_check
(
self
.
_parallel_mode
,
self
.
_device_number
)
if
not
self
.
_metric_fns
:
raise
ValueError
(
"metric fn can not be None or empty."
)
...
...
example/resnet50_imagenet2012_THOR/model/resnet.py
浏览文件 @
aeffccb7
...
...
@@ -14,22 +14,24 @@
# ============================================================================
"""ResNet."""
import
math
import
mindspore.nn
as
nn
import
numpy
as
np
import
mindspore.nn
as
nn
from
mindspore.common.tensor
import
Tensor
from
mindspore.ops
import
operations
as
P
from
second_order.thor_layer
import
Conv2d_Thor
,
Dense_Thor
from
model.thor_layer
import
Conv2d_Thor
,
Dense_Thor
def
calculate_gain
(
nonlinearity
,
param
=
None
):
"""calculate_gain"""
linear_fns
=
[
'linear'
,
'conv1d'
,
'conv2d'
,
'conv3d'
,
'conv_transpose1d'
,
'conv_transpose2d'
,
'conv_transpose3d'
]
res
=
0
if
nonlinearity
in
linear_fns
or
nonlinearity
==
'sigmoid'
:
re
turn
1
re
s
=
1
elif
nonlinearity
==
'tanh'
:
re
turn
5.0
/
3
re
s
=
5.0
/
3
elif
nonlinearity
==
'relu'
:
re
turn
math
.
sqrt
(
2.0
)
re
s
=
math
.
sqrt
(
2.0
)
elif
nonlinearity
==
'leaky_relu'
:
if
param
is
None
:
negative_slope
=
0.01
...
...
@@ -38,16 +40,17 @@ def calculate_gain(nonlinearity, param=None):
negative_slope
=
param
else
:
raise
ValueError
(
"negative_slope {} not a valid number"
.
format
(
param
))
re
turn
math
.
sqrt
(
2.0
/
(
1
+
negative_slope
**
2
))
re
s
=
math
.
sqrt
(
2.0
/
(
1
+
negative_slope
**
2
))
else
:
raise
ValueError
(
"Unsupported nonlinearity {}"
.
format
(
nonlinearity
))
return
res
def
_calculate_fan_in_and_fan_out
(
tensor
):
"""_calculate_fan_in_and_fan_out"""
dimensions
=
len
(
tensor
)
if
dimensions
<
2
:
raise
ValueError
(
"Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
)
if
dimensions
==
2
:
# Linear
fan_in
=
tensor
[
1
]
fan_out
=
tensor
[
0
]
...
...
@@ -67,7 +70,6 @@ def _calculate_correct_fan(tensor, mode):
valid_modes
=
[
'fan_in'
,
'fan_out'
]
if
mode
not
in
valid_modes
:
raise
ValueError
(
"Mode {} not supported, please use one of {}"
.
format
(
mode
,
valid_modes
))
fan_in
,
fan_out
=
_calculate_fan_in_and_fan_out
(
tensor
)
return
fan_in
if
mode
==
'fan_in'
else
fan_out
...
...
@@ -93,8 +95,6 @@ def _conv3x3(in_channel, out_channel, stride=1, damping=0.03, loss_scale=1, freq
return
Conv2d_Thor
(
in_channel
,
out_channel
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
0
,
pad_mode
=
'same'
,
weight_init
=
weight
,
damping
=
damping
,
loss_scale
=
loss_scale
,
frequency
=
frequency
)
# return nn.Conv2d(in_channel, out_channel,
# kernel_size=3, stride=stride, padding=0, pad_mode='same', weight_init=weight)
def
_conv1x1
(
in_channel
,
out_channel
,
stride
=
1
,
damping
=
0.03
,
loss_scale
=
1
,
frequency
=
278
):
...
...
@@ -125,7 +125,7 @@ def _bn_last(channel):
def
_fc
(
in_channel
,
out_channel
,
damping
,
loss_scale
,
frequency
):
weight_shape
=
(
out_channel
,
in_channel
)
weight
=
Tensor
(
kaiming_uniform
(
weight_shape
,
a
=
math
.
sqrt
(
5
))
weight
=
Tensor
(
kaiming_uniform
(
weight_shape
,
a
=
math
.
sqrt
(
5
))
)
return
Dense_Thor
(
in_channel
,
out_channel
,
has_bias
=
False
,
weight_init
=
weight
,
bias_init
=
0
,
damping
=
damping
,
loss_scale
=
loss_scale
,
frequency
=
frequency
)
...
...
@@ -321,7 +321,7 @@ class ResNet(nn.Cell):
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
relu
(
x
)
c1
,
argmax
=
self
.
maxpool
(
x
)
c1
,
_
=
self
.
maxpool
(
x
)
c2
=
self
.
layer1
(
c1
)
c3
=
self
.
layer2
(
c2
)
...
...
example/resnet50_imagenet2012_THOR/run_distribute_train
_new
.sh
→
example/resnet50_imagenet2012_THOR/run_distribute_train.sh
浏览文件 @
aeffccb7
...
...
@@ -51,6 +51,6 @@ do
echo
"start training for rank
$RANK_ID
, device
$DEVICE_ID
"
env
>
env.log
python train
_0517_1
.py
--do_train
=
True
--run_distribute
=
True
--device_num
=
$DEVICE_NUM
--dataset_path
=
$2
>
log 2>&1 &
python train.py
--do_train
=
True
--run_distribute
=
True
--device_num
=
$DEVICE_NUM
--dataset_path
=
$2
>
log 2>&1 &
cd
..
done
example/resnet50_imagenet2012_THOR/train.py
浏览文件 @
aeffccb7
...
...
@@ -17,7 +17,6 @@ import argparse
import
os
import
random
import
mindspore.dataset.engine
as
de
from
mindspore
import
Tensor
from
mindspore
import
context
from
mindspore.communication.management
import
init
...
...
@@ -25,19 +24,17 @@ from mindspore.parallel._auto_parallel_context import auto_parallel_context
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
,
LossMonitor
,
TimeMonitor
from
mindspore.train.loss_scale_manager
import
FixedLossScaleManager
from
mindspore.train.model
import
ParallelMode
from
second_order.model_second_orde
r
import
Model
from
second_order
.resnet
import
resnet50
from
second_order
.thor
import
THOR
from
model.model_tho
r
import
Model
from
model
.resnet
import
resnet50
from
model
.thor
import
THOR
import
numpy
as
np
from
config
_imagenet
import
config
from
config
import
config
from
crossentropy
import
CrossEntropy
from
dataset_imagenet
import
create_dataset
from
lr_generator
import
warmup_cosine_annealing_lr
random
.
seed
(
1
)
np
.
random
.
seed
(
1
)
de
.
config
.
set_seed
(
1
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Image classification'
)
parser
.
add_argument
(
'--run_distribute'
,
type
=
bool
,
default
=
False
,
help
=
'Run distribute'
)
...
...
@@ -50,29 +47,29 @@ args_opt = parser.parse_args()
device_id
=
int
(
os
.
getenv
(
'DEVICE_ID'
))
context
.
set_context
(
mode
=
context
.
GRAPH_MODE
,
device_target
=
"Ascend"
,
save_graphs
=
True
,
device_id
=
device_id
)
context
.
set_context
(
enable_task_sink
=
True
)
context
.
set_context
(
enable_loop_sink
=
True
)
context
.
set_context
(
enable_mem_reuse
=
True
)
def
get_
second_order
_lr
(
global_step
,
lr_init
,
decay
,
total_epochs
,
steps_per_epoch
):
"""get_
second_order
_lr"""
def
get_
model
_lr
(
global_step
,
lr_init
,
decay
,
total_epochs
,
steps_per_epoch
):
"""get_
model
_lr"""
lr_each_step
=
[]
total_steps
=
steps_per_epoch
*
total_epochs
for
i
in
range
(
total_steps
):
epoch
=
(
i
+
1
)
/
steps_per_epoch
base
=
(
1.0
-
float
(
epoch
)
/
total_epochs
)
**
decay
lr_local
=
lr_init
*
base
if
epoch
>=
39
:
lr_local
=
lr_local
*
0.5
if
epoch
>=
40
:
lr_local
=
lr_local
*
0.5
lr_each_step
.
append
(
lr_local
)
current_step
=
global_step
lr_each_step
=
np
.
array
(
lr_each_step
).
astype
(
np
.
float32
)
print
(
"learning_rate_is====="
,
lr_each_step
)
learning_rate
=
lr_each_step
[
current_step
:]
return
learning_rate
def
get_
second_order
_damping
(
global_step
,
damping_init
,
decay_rate
,
total_epochs
,
steps_per_epoch
):
"""get_
second_order
_damping"""
def
get_
model
_damping
(
global_step
,
damping_init
,
decay_rate
,
total_epochs
,
steps_per_epoch
):
"""get_
model
_damping"""
damping_each_step
=
[]
total_steps
=
steps_per_epoch
*
total_epochs
for
step
in
range
(
total_steps
):
...
...
@@ -83,26 +80,23 @@ def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs
current_step
=
global_step
damping_each_step
=
np
.
array
(
damping_each_step
).
astype
(
np
.
float32
)
damping_now
=
damping_each_step
[
current_step
:]
print
(
"damping_is========="
,
damping_now
)
return
damping_now
if
__name__
==
'__main__'
:
if
args_opt
.
do_eval
:
print
(
"eval"
)
else
:
if
args_opt
.
run_distribute
:
if
not
args_opt
.
do_eval
and
args_opt
.
run_distribute
:
context
.
set_auto_parallel_context
(
device_num
=
args_opt
.
device_num
,
parallel_mode
=
ParallelMode
.
DATA_PARALLEL
,
mirror_mean
=
True
,
parameter_broadcast
=
True
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
80
],
"hccl_world_groupsum1"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
107
],
"hccl_world_groupsum1"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum2"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum3"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum4"
)
auto_parallel_context
().
set_all_reduce_fusion_split_indices
([
27
],
"hccl_world_groupsum5"
)
init
()
else
:
print
(
" "
)
epoch_size
=
config
.
epoch_size
damping
=
get_
second_order
_damping
(
0
,
0.03
,
0.87
,
50
,
5004
)
damping
=
get_
model
_damping
(
0
,
0.03
,
0.87
,
50
,
5004
)
net
=
resnet50
(
class_num
=
config
.
class_num
,
damping
=
damping
,
loss_scale
=
config
.
loss_scale
,
frequency
=
config
.
frequency
)
...
...
@@ -115,17 +109,12 @@ if __name__ == '__main__':
step_size
=
dataset
.
get_dataset_size
()
loss_scale
=
FixedLossScaleManager
(
config
.
loss_scale
,
drop_overflow_update
=
False
)
lr
=
Tensor
(
warmup_cosine_annealing_lr
(
0.035
,
step_size
,
config
.
warmup_epochs
,
50
,
config
.
T_max
,
config
.
eta_min
))
opt
=
THOR
(
filter
(
lambda
x
:
x
.
requires_grad
,
net
.
get_parameters
()),
lr
,
config
.
momentum
,
damping
,
config
.
frequency
,
lr
=
Tensor
(
get_model_lr
(
0
,
0.05
,
6
,
70
,
5004
))
opt
=
THOR
(
filter
(
lambda
x
:
x
.
requires_grad
,
net
.
get_parameters
()),
lr
,
config
.
momentum
,
filter
(
lambda
x
:
'matrix_A'
in
x
.
name
,
net
.
get_parameters
()),
filter
(
lambda
x
:
'matrix_G'
in
x
.
name
,
net
.
get_parameters
()),
filter
(
lambda
x
:
'spatial_norm'
in
x
.
name
,
net
.
get_parameters
()),
filter
(
lambda
x
:
'A_inv_max'
in
x
.
name
,
net
.
get_parameters
()),
filter
(
lambda
x
:
'G_inv_max'
in
x
.
name
,
net
.
get_parameters
()),
config
.
weight_decay
,
config
.
loss_scale
)
model
=
Model
(
net
,
loss_fn
=
loss
,
optimizer
=
opt
,
amp_level
=
'O2'
,
loss_scale_manager
=
loss_scale
,
...
...
mindspore/ops/_op_impl/custom_op/batch_matmul_impl.py
0 → 100644
浏览文件 @
aeffccb7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""batch_matmul_impl"""
from
mindspore.ops.op_info_register
import
op_info_register
@
op_info_register
(
"""{
"op_name": "CusBatchMatMul",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "batchmatmul.so",
"compute_cost": 10,
"kernel_name": "CusBatchMatMul",
"partial_flag": true,
"attr": [
],
"inputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 1,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "x2",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
def
CusBatchMatMul
(
input_x1
,
input_x2
,
output
,
transpose_a
=
False
,
transpose_b
=
True
,
kernel_name
=
"batchmatmul"
):
"""CusBatchMatMul"""
return
mindspore/ops/_op_impl/custom_op/cholesky_trsm.py
0 → 100644
浏览文件 @
aeffccb7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""CusCholeskyTrsm"""
from
mindspore.ops.op_info_register
import
op_info_register
@
op_info_register
(
"""{
"op_name": "CusCholeskyTrsm",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "choleskytrsm.so",
"compute_cost": 10,
"kernel_name": "CusCholeskyTrsm",
"partial_flag": true,
"attr": [
],
"inputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
def
CusCholeskyTrsm
(
input_x
,
output
,
kernel_name
):
"""CusCholeskyTrsm"""
return
mindspore/ops/_op_impl/custom_op/fused_abs_max1.py
0 → 100644
浏览文件 @
aeffccb7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""CusFusedAbsMax1"""
from
mindspore.ops.op_info_register
import
op_info_register
@
op_info_register
(
"""{
"op_name": "CusFusedAbsMax1",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "fusedabsmax1.so",
"compute_cost": 10,
"kernel_name": "CusFusedAbsMax1",
"partial_flag": true,
"attr": [
{
"name": "origin_shape",
"param_type": "required",
"type": "listInt",
"value": "all"
}
],
"inputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
def
CusFusedAbsMax1
(
input_x
,
output
,
origin_shape
=
None
,
kernel_name
=
"fused_abs_max1"
):
"""CusFusedAbsMax1"""
return
mindspore/ops/_op_impl/custom_op/img2col_impl.py
0 → 100644
浏览文件 @
aeffccb7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""CusImg2ColNC1HWC0"""
from
mindspore.ops.op_info_register
import
op_info_register
@
op_info_register
(
"""{
"op_name": "CusImg2ColNC1HWC0",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "img2colnc1hwc0.so",
"compute_cost": 10,
"kernel_name": "CusImg2ColNC1HWC0",
"partial_flag": true,
"attr": [
{
"name": "ksizes",
"param_type": "required",
"type": "listInt",
"value": "all"
},
{
"name": "strides",
"param_type": "required",
"type": "listInt",
"value": "all"
},
{
"name": "dilates",
"param_type": "required",
"type": "listInt",
"value": "all"
},
{
"name": "padding",
"param_type": "required",
"type": "str",
"value": "all"
}
],
"inputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"NC1HWC0"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"FRACTAL_NZ"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
def
CusImg2ColNC1HWC0
(
input_x
,
output
,
ksizes
,
strides
,
dilates
,
padding
,
kernel_name
=
"img2col"
):
"""CusImg2ColNC1HWC0"""
return
mindspore/ops/_op_impl/custom_op/matmul_cube_dense_left.py
0 → 100644
浏览文件 @
aeffccb7
# -*- coding:utf-8 -*-
"""
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License == distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
matmul
"""
from
__future__
import
absolute_import
from
mindspore.ops.op_info_register
import
op_info_register
from
topi.cce
import
util
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT
=
2147483648
NoneType
=
type
(
None
)
@
op_info_register
(
"""{
"op_name": "CusMatMulCubeDenseLeft",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "matmulcubedenseleft.so",
"compute_cost": 10,
"kernel_name": "CusMatMulCubeDenseLeft",
"partial_flag": true,
"attr": [
],
"inputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 1,
"dtype": [
"float16"
],
"format": [
"FRACTAL_NZ"
],
"name": "x2",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 2,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "x3",
"need_compile": false,
"param_type": "optional",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"FRACTAL_NZ"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
@
util
.
check_input_type
(
dict
,
dict
,
(
dict
,
NoneType
),
dict
,
bool
,
bool
,
str
)
def
CusMatMulCubeDenseLeft
(
input_x1
,
input_x2
,
bias
=
None
,
output_y
=
{},
trans_a
=
False
,
trans_b
=
False
,
kernel_name
=
"matmulcube"
):
"""CusMatMulCubeDenseLeft"""
return
mindspore/ops/_op_impl/custom_op/matmul_cube_fracz_left_cast_impl.py
0 → 100644
浏览文件 @
aeffccb7
# -*- coding:utf-8 -*-
"""
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License == distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
matmul
"""
from
__future__
import
absolute_import
from
mindspore.ops.op_info_register
import
op_info_register
from
topi.cce
import
util
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT
=
2147483648
NoneType
=
type
(
None
)
@
op_info_register
(
"""{
"op_name": "CusMatMulCubeFraczLeftCast",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "matmulcubefraczleftcast.so",
"compute_cost": 10,
"kernel_name": "CusMatMulCubeFraczLeftCast",
"partial_flag": true,
"attr": [
],
"inputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 1,
"dtype": [
"float32"
],
"format": [
"FracZ"
],
"name": "x2",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 2,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "x3",
"need_compile": false,
"param_type": "optional",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"FracZ"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
# pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements
@
util
.
check_input_type
(
dict
,
dict
,
(
dict
,
NoneType
),
dict
,
bool
,
bool
,
str
)
def
CusMatMulCubeFraczLeftCast
(
input_x1
,
input_x2
,
bias
=
None
,
output_y
=
{},
trans_a
=
False
,
trans_b
=
False
,
kernel_name
=
"CusMatMulCubeFraczLeftCast"
):
"""CusMatMulCubeFraczLeftCast"""
return
mindspore/ops/_op_impl/custom_op/matmul_cube_fracz_right_mul_impl.py
0 → 100644
浏览文件 @
aeffccb7
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License == distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
matmul
"""
from
__future__
import
absolute_import
from
mindspore.ops.op_info_register
import
op_info_register
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT
=
2147483648
NoneType
=
type
(
None
)
@
op_info_register
(
"""{
"op_name": "CusMatMulCubeFraczRightMul",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "matmulcubefraczrightmul.so",
"compute_cost": 10,
"kernel_name": "CusMatMulCubeFraczRightMul",
"partial_flag": true,
"attr": [
],
"inputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"FracZ"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 1,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "x2",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 2,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "x3",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 3,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "x4",
"need_compile": false,
"param_type": "optional",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"FracZ"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
def
CusMatMulCubeFraczRightMul
(
input_x1
,
input_x2
,
input_x3
,
bias
=
None
,
output_y
=
{},
trans_a
=
False
,
trans_b
=
False
,
kernel_name
=
"matmulcube"
):
"""CusMatMulCubeFraczRightMul"""
return
mindspore/ops/_op_impl/custom_op/matmul_cube_impl.py
0 → 100644
浏览文件 @
aeffccb7
#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""
copyright 2020 Huawei Technologies Co., Ltd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License == distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
matmul
"""
from
__future__
import
absolute_import
from
mindspore.ops.op_info_register
import
op_info_register
from
topi.cce
import
util
# General limitation of the size for input shape: 2**31
SHAPE_SIZE_LIMIT
=
2147483648
NoneType
=
type
(
None
)
@
op_info_register
(
"""{
"op_name": "CusMatMulCube",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "matmulcube.so",
"compute_cost": 10,
"kernel_name": "CusMatMulCube",
"partial_flag": true,
"attr": [
{
"name": "transpose_a",
"param_type": "required",
"type": "bool",
"value": "all"
},
{
"name": "transpose_b",
"param_type": "required",
"type": "bool",
"value": "all"
}
],
"inputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"FRACTAL_NZ"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 1,
"dtype": [
"float16"
],
"format": [
"FRACTAL_NZ"
],
"name": "x2",
"need_compile": false,
"param_type": "required",
"shape": "all"
},
{
"index": 2,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "x3",
"need_compile": false,
"param_type": "optional",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"FRACTAL_NZ"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
# pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements
@
util
.
check_input_type
(
dict
,
dict
,
(
dict
,
NoneType
),
dict
,
bool
,
bool
,
str
)
def
CusMatMulCube
(
input_x1
,
input_x2
,
bias
=
None
,
output_y
=
{},
trans_a
=
False
,
trans_b
=
False
,
kernel_name
=
"matmulcube"
):
"""CusMatMulCube"""
return
mindspore/ops/_op_impl/custom_op/matrix_combine_impl.py
0 → 100644
浏览文件 @
aeffccb7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""CusMatrixCombine"""
from
mindspore.ops.op_info_register
import
op_info_register
@
op_info_register
(
"""{
"op_name": "CusMatrixCombine",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "matrixcombine.so",
"compute_cost": 10,
"kernel_name": "CusMatrixCombine",
"partial_flag": true,
"attr": [
],
"inputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float32"
],
"format": [
"DefaultFormat"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
def
CusMatrixCombine
(
input_x
,
output
,
kernel_name
=
"matrix_combine"
):
"""CusMatrixCombine"""
return
mindspore/ops/_op_impl/custom_op/transpose02314_impl.py
0 → 100644
浏览文件 @
aeffccb7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""CusTranspose02314"""
from
mindspore.ops.op_info_register
import
op_info_register
@
op_info_register
(
"""{
"op_name": "CusTranspose02314",
"imply_type": "TBE",
"fusion_type": "OPAQUE",
"async_flag": false,
"binfile_name": "transpose02314.so",
"compute_cost": 10,
"kernel_name": "CusTranspose02314",
"partial_flag": true,
"attr": [
],
"inputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"NC1HWC0"
],
"name": "x1",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
],
"outputs": [
{
"index": 0,
"dtype": [
"float16"
],
"format": [
"DefaultFormat"
],
"name": "y",
"need_compile": false,
"param_type": "required",
"shape": "all"
}
]
}"""
)
def
CusTranspose02314
(
input_x
,
output
,
kernel_name
=
"transpose021354"
):
"""CusTranspose02314"""
return
mindspore/ops/operations/thor_ops.py
0 → 100644
浏览文件 @
aeffccb7
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""thor_ops"""
import
mindspore
as
ms
from
mindspore.ops
import
prim_attr_register
,
PrimitiveWithInfer
from
mindspore.ops.composite
import
multitype_ops
as
C
class
CusBatchMatMul
(
PrimitiveWithInfer
):
"""CusMatMulCube definition"""
@
prim_attr_register
def
__init__
(
self
):
"""init CusMatMulCube"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
,
'x2'
],
outputs
=
[
'y'
])
def
get_bprop
(
self
):
def
bprop
(
x1
,
x2
,
out
,
dout
):
return
(
C
.
zeros_like
(
x1
),
C
.
zeros_like
(
x2
))
return
bprop
def
infer_shape
(
self
,
data1_shape
,
data2_shape
):
return
data1_shape
def
infer_dtype
(
self
,
data1_dtype
,
data2_dtype
):
return
data1_dtype
class
CusCholeskyTrsm
(
PrimitiveWithInfer
):
"""CusCholeskyTrsm definition"""
@
prim_attr_register
def
__init__
(
self
):
"""init CusCholeskyTrsm"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
],
outputs
=
[
'y'
])
def
infer_shape
(
self
,
data1_shape
):
ll
=
[]
m
,
_
=
data1_shape
if
m
>=
128
:
ll
=
[
m
//
128
,
128
,
128
]
else
:
ll
=
[
1
,
64
,
64
]
return
ll
def
infer_dtype
(
self
,
data1_dtype
):
return
data1_dtype
class
CusFusedAbsMax1
(
PrimitiveWithInfer
):
"""CusCholeskyTrsm definition"""
@
prim_attr_register
def
__init__
(
self
,
origin_shape
=
[
-
1
,
-
1
]):
"""init CusCholeskyTrsm"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
],
outputs
=
[
'y'
])
self
.
origin_shape
=
origin_shape
def
get_bprop
(
self
):
def
bprop
(
x
,
out
,
dout
):
return
(
C
.
zeros_like
(
x
),)
return
bprop
def
infer_shape
(
self
,
data1_shape
):
ll
=
[]
if
len
(
data1_shape
)
==
2
:
ll
=
[
1
,]
else
:
ll
=
[
32
,
64
]
return
ll
def
infer_dtype
(
self
,
data1_dtype
):
return
data1_dtype
class
CusImg2Col
(
PrimitiveWithInfer
):
"""CusImg2Col definition"""
@
prim_attr_register
def
__init__
(
self
,
ksizes
,
strides
,
dilates
=
(
1
,
1
,
1
,
1
),
mode
=
"NC1HWC0"
):
"""init CusImg2Col"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
],
outputs
=
[
'y'
])
self
.
ksizes
=
ksizes
self
.
strides
=
strides
self
.
dilates
=
dilates
self
.
mode
=
mode
def
get_bprop
(
self
):
def
bprop
(
x
,
out
,
dout
):
return
(
C
.
zeros_like
(
x
),)
return
bprop
def
infer_shape
(
self
,
data1_shape
):
bs
,
c
,
h
,
w
=
data1_shape
_
,
stride_h
,
stride_w
,
_
=
self
.
strides
_
,
k_w
,
k_h
,
_
=
self
.
ksizes
# assert m == n
c0
=
16
c1
=
c
//
16
if
c1
==
0
:
c1
=
1
shape
=
[
bs
*
int
(
h
//
stride_h
)
*
int
(
w
//
stride_w
),
k_w
*
k_h
*
c1
*
c0
]
return
shape
def
infer_dtype
(
self
,
data1_dtype
):
return
data1_dtype
class
CusMatMulCubeDenseLeft
(
PrimitiveWithInfer
):
"""CusMatMulCube definition"""
@
prim_attr_register
def
__init__
(
self
):
"""init CusMatMulCube"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
,
'x2'
],
outputs
=
[
'y'
])
def
get_bprop
(
self
):
def
bprop
(
x1
,
x2
,
out
,
dout
):
return
(
C
.
zeros_like
(
x1
),
C
.
zeros_like
(
x2
))
return
bprop
def
infer_shape
(
self
,
data1_shape
,
data2_shape
):
return
data2_shape
def
infer_dtype
(
self
,
data1_dtype
,
data2_dtype
):
return
ms
.
common
.
dtype
.
tensor_type
(
getattr
(
ms
,
"float16"
))
class
CusMatMulCubeFraczRightMul
(
PrimitiveWithInfer
):
"""CusMatMulCubeFraczRightMul definition"""
@
prim_attr_register
def
__init__
(
self
):
"""init CusMatMulCubeFraczRightMul"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
,
'x2'
,
'x3'
],
outputs
=
[
'y'
])
def
get_bprop
(
self
):
def
bprop
(
x1
,
x2
,
x3
,
out
,
dout
):
return
(
C
.
zeros_like
(
x1
),
C
.
zeros_like
(
x2
),
C
.
zeros_like
(
x3
))
return
bprop
def
infer_shape
(
self
,
data1_shape
,
data2_shape
,
data3_shape
):
return
data1_shape
def
infer_dtype
(
self
,
data1_dtype
,
data2_dtype
,
data3_dtype
):
return
ms
.
common
.
dtype
.
tensor_type
(
getattr
(
ms
,
"float32"
))
class
CusMatMulCube
(
PrimitiveWithInfer
):
"""CusMatMulCube definition"""
@
prim_attr_register
def
__init__
(
self
,
transpose_a
=
False
,
transpose_b
=
False
):
"""init CusMatMulCube"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
,
'x2'
],
outputs
=
[
'y'
])
self
.
transpose_a
=
transpose_a
self
.
transpose_b
=
transpose_b
def
get_bprop
(
self
):
def
bprop
(
x1
,
x2
,
out
,
dout
):
return
(
C
.
zeros_like
(
x1
),
C
.
zeros_like
(
x2
))
return
bprop
def
infer_shape
(
self
,
data1_shape
,
data2_shape
):
# shape = [1, data1_shape[1], data2_shape[2], 16, 16]
# return shape
if
self
.
transpose_a
:
k1
,
m
=
data1_shape
else
:
m
,
k1
=
data1_shape
if
self
.
transpose_b
:
n
,
k2
=
data2_shape
else
:
k2
,
n
=
data2_shape
assert
k1
==
k2
shape
=
[
m
,
n
]
return
shape
def
infer_dtype
(
self
,
data1_dtype
,
data2_dtype
):
return
ms
.
common
.
dtype
.
tensor_type
(
getattr
(
ms
,
"float32"
))
class
CusMatrixCombine
(
PrimitiveWithInfer
):
"""CusMatMulCube definition"""
@
prim_attr_register
def
__init__
(
self
):
"""init CusMatMulCube"""
self
.
init_prim_io_names
(
inputs
=
[
'x'
],
outputs
=
[
'y'
])
def
get_bprop
(
self
):
def
bprop
(
x
,
out
,
dout
):
return
(
C
.
zeros_like
(
x
),)
return
bprop
def
infer_shape
(
self
,
data_shape
):
a
,
b
,
c
=
data_shape
shape
=
[
a
*
b
,
a
*
c
]
return
shape
def
infer_dtype
(
self
,
data_dtype
):
return
data_dtype
class
CusTranspose02314
(
PrimitiveWithInfer
):
"""CusTranspose02314 definition"""
@
prim_attr_register
def
__init__
(
self
):
"""init CusTranspose02314"""
self
.
init_prim_io_names
(
inputs
=
[
'x1'
],
outputs
=
[
'y'
])
def
get_bprop
(
self
):
def
bprop
(
x
,
out
,
dout
):
return
(
C
.
zeros_like
(
x
),)
return
bprop
def
infer_shape
(
self
,
data1_shape
):
assert
len
(
data1_shape
)
==
4
n
,
c
,
h
,
w
=
data1_shape
c0
=
16
c1
=
c
//
16
shape
=
(
n
*
h
*
w
,
c1
*
c0
)
return
shape
def
infer_dtype
(
self
,
data1_dtype
):
return
data1_dtype
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录