Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
25969b5d
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
25969b5d
编写于
7月 14, 2020
作者:
C
chenzomi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add loss monitor to lenet
上级
f65586ce
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
108 addition
and
45 deletion
+108
-45
mindspore/train/callback/_loss_monitor.py
mindspore/train/callback/_loss_monitor.py
+12
-43
model_zoo/lenet_quant/src/loss_monitor.py
model_zoo/lenet_quant/src/loss_monitor.py
+92
-0
model_zoo/lenet_quant/train.py
model_zoo/lenet_quant/train.py
+2
-1
model_zoo/lenet_quant/train_quant.py
model_zoo/lenet_quant/train_quant.py
+2
-1
未找到文件。
mindspore/train/callback/_loss_monitor.py
浏览文件 @
25969b5d
...
...
@@ -14,7 +14,6 @@
# ============================================================================
"""LossMonitor Callback class."""
import
time
import
numpy
as
np
from
mindspore.common.tensor
import
Tensor
...
...
@@ -32,62 +31,32 @@ class LossMonitor(Callback):
Args:
per_print_times (int): Print loss every times. Default: 1.
lr_init (numpy array): train learning rate. Default: None.
Raises:
ValueError: If print_step is not int or less than zero.
Examples:
>>> LossMonitor(100, lr_init=Tensor([0.05]*100).asnumpy())
"""
def
__init__
(
self
,
per_print_times
=
1
,
lr_init
=
None
):
def
__init__
(
self
,
per_print_times
=
1
):
super
(
LossMonitor
,
self
).
__init__
()
if
not
isinstance
(
per_print_times
,
int
)
or
per_print_times
<
0
:
raise
ValueError
(
"print_step must be int and >= 0."
)
self
.
_per_print_times
=
per_print_times
self
.
lr_init
=
lr_init
def
epoch_begin
(
self
,
run_context
):
self
.
losses
=
[]
self
.
epoch_time
=
time
.
time
()
def
epoch_end
(
self
,
run_context
):
cb_params
=
run_context
.
original_args
()
epoch_mseconds
=
(
time
.
time
()
-
self
.
epoch_time
)
*
1000
per_step_mseconds
=
epoch_mseconds
/
cb_params
.
batch_num
print
(
"Epoch time: {:5.3f}, per step time: {:5.3f}, "
"avg loss: {:5.3f}"
.
format
(
epoch_mseconds
,
per_step_mseconds
,
np
.
mean
(
self
.
losses
)))
print
(
"*"
*
60
)
def
step_begin
(
self
,
run_context
):
self
.
step_time
=
time
.
time
()
def
step_end
(
self
,
run_context
):
cb_params
=
run_context
.
original_args
()
step_mseconds
=
(
time
.
time
()
-
self
.
step_time
)
*
1000
step_loss
=
cb_params
.
net_outputs
loss
=
cb_params
.
net_outputs
if
isinstance
(
step_loss
,
(
tuple
,
list
))
and
isinstance
(
step_loss
[
0
],
Tensor
):
step_loss
=
step_loss
[
0
]
if
isinstance
(
step_loss
,
Tensor
):
step_loss
=
np
.
mean
(
step_loss
.
asnumpy
())
if
isinstance
(
loss
,
(
tuple
,
list
)):
if
isinstance
(
loss
[
0
],
Tensor
)
and
isinstance
(
loss
[
0
].
asnumpy
(),
np
.
ndarray
):
loss
=
loss
[
0
]
self
.
losses
.
append
(
step_loss
)
cur_step_in_epoch
=
int
((
cb_params
.
cur_step_num
-
1
)
%
cb_params
.
batch_num
)
+
1
if
isinstance
(
loss
,
Tensor
)
and
isinstance
(
loss
.
asnumpy
(),
np
.
ndarray
):
loss
=
np
.
mean
(
loss
.
asnumpy
())
if
isinstance
(
step_loss
,
float
)
and
(
np
.
isnan
(
step_loss
)
or
np
.
isinf
(
step_loss
)):
raise
ValueError
(
"Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}]. "
"Invalid loss, terminating training."
.
format
(
cb_params
.
cur_epoch_num
-
1
,
cb_params
.
epoch_num
,
cur_step_in_epoch
,
cb_params
.
batch_num
))
cur_step_in_epoch
=
(
cb_params
.
cur_step_num
-
1
)
%
cb_params
.
batch_num
+
1
if
isinstance
(
loss
,
float
)
and
(
np
.
isnan
(
loss
)
or
np
.
isinf
(
loss
)):
raise
ValueError
(
"epoch: {} step: {}. Invalid loss, terminating training."
.
format
(
cb_params
.
cur_epoch_num
,
cur_step_in_epoch
))
if
self
.
_per_print_times
!=
0
and
cb_params
.
cur_step_num
%
self
.
_per_print_times
==
0
:
print
(
"Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}], "
"loss: [{:5.4f}], avg los: [{:5.4f}], time: [{:5.4f}ms]"
.
format
(
cb_params
.
cur_epoch_num
,
cb_params
.
epoch_num
,
cur_step_in_epoch
,
int
(
cb_params
.
batch_num
),
step_loss
,
np
.
mean
(
self
.
losses
),
step_mseconds
),
flush
=
True
)
print
(
"epoch: %s step: %s, loss is %s"
%
(
cb_params
.
cur_epoch_num
,
cur_step_in_epoch
,
loss
),
flush
=
True
)
model_zoo/lenet_quant/src/loss_monitor.py
0 → 100644
浏览文件 @
25969b5d
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""LossMonitor Callback class."""
import
time
import
numpy
as
np
from
mindspore.common.tensor
import
Tensor
from
mindspore.train.callback
import
Callback
class
LossMonitor
(
Callback
):
"""
Monitor the loss in training.
If the loss is NAN or INF, it will terminate training.
Note:
If per_print_times is 0 do not print loss.
Args:
per_print_times (int): Print loss every times. Default: 1.
lr_init (numpy array): train learning rate. Default: None.
Raises:
ValueError: If print_step is not int or less than zero.
Examples:
>>> LossMonitor(100, lr_init=Tensor([0.05]*100).asnumpy())
"""
def
__init__
(
self
,
per_print_times
=
1
,
lr_init
=
None
):
super
(
LossMonitor
,
self
).
__init__
()
if
not
isinstance
(
per_print_times
,
int
)
or
per_print_times
<
0
:
raise
ValueError
(
"print_step must be int and >= 0."
)
self
.
_per_print_times
=
per_print_times
self
.
lr_init
=
lr_init
def
epoch_begin
(
self
,
run_context
):
self
.
losses
=
[]
self
.
epoch_time
=
time
.
time
()
def
epoch_end
(
self
,
run_context
):
cb_params
=
run_context
.
original_args
()
epoch_mseconds
=
(
time
.
time
()
-
self
.
epoch_time
)
*
1000
per_step_mseconds
=
epoch_mseconds
/
cb_params
.
batch_num
print
(
"Epoch time: {:5.3f}, per step time: {:5.3f}, "
"avg loss: {:5.3f}"
.
format
(
epoch_mseconds
,
per_step_mseconds
,
np
.
mean
(
self
.
losses
)))
print
(
"*"
*
60
)
def
step_begin
(
self
,
run_context
):
self
.
step_time
=
time
.
time
()
def
step_end
(
self
,
run_context
):
cb_params
=
run_context
.
original_args
()
step_mseconds
=
(
time
.
time
()
-
self
.
step_time
)
*
1000
step_loss
=
cb_params
.
net_outputs
if
isinstance
(
step_loss
,
(
tuple
,
list
))
and
isinstance
(
step_loss
[
0
],
Tensor
):
step_loss
=
step_loss
[
0
]
if
isinstance
(
step_loss
,
Tensor
):
step_loss
=
np
.
mean
(
step_loss
.
asnumpy
())
self
.
losses
.
append
(
step_loss
)
cur_step_in_epoch
=
int
((
cb_params
.
cur_step_num
-
1
)
%
cb_params
.
batch_num
)
+
1
if
isinstance
(
step_loss
,
float
)
and
(
np
.
isnan
(
step_loss
)
or
np
.
isinf
(
step_loss
)):
raise
ValueError
(
"Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}]. "
"Invalid loss, terminating training."
.
format
(
cb_params
.
cur_epoch_num
-
1
,
cb_params
.
epoch_num
,
cur_step_in_epoch
,
cb_params
.
batch_num
))
if
self
.
_per_print_times
!=
0
and
cb_params
.
cur_step_num
%
self
.
_per_print_times
==
0
:
print
(
"Epoch: [{:3d}/{:3d}], step: [{:5d}/{:5d}], "
"loss: [{:5.4f}], avg loss: [{:5.4f}], time: [{:5.4f}ms]"
.
format
(
cb_params
.
cur_epoch_num
,
cb_params
.
epoch_num
,
cur_step_in_epoch
,
int
(
cb_params
.
batch_num
),
step_loss
,
np
.
mean
(
self
.
losses
),
step_mseconds
),
flush
=
True
)
model_zoo/lenet_quant/train.py
浏览文件 @
25969b5d
...
...
@@ -22,12 +22,13 @@ import os
import
argparse
import
mindspore.nn
as
nn
from
mindspore
import
context
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
,
LossMonitor
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
from
mindspore.train
import
Model
from
mindspore.nn.metrics
import
Accuracy
from
src.dataset
import
create_dataset
from
src.config
import
mnist_cfg
as
cfg
from
src.lenet_fusion
import
LeNet5
as
LeNet5Fusion
from
src.loss_monitor
import
LossMonitor
parser
=
argparse
.
ArgumentParser
(
description
=
'MindSpore MNIST Example'
)
parser
.
add_argument
(
'--device_target'
,
type
=
str
,
default
=
"Ascend"
,
...
...
model_zoo/lenet_quant/train_quant.py
浏览文件 @
25969b5d
...
...
@@ -23,13 +23,14 @@ import argparse
import
mindspore.nn
as
nn
from
mindspore
import
context
from
mindspore.train.serialization
import
load_checkpoint
,
load_param_into_net
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
,
LossMonitor
from
mindspore.train.callback
import
ModelCheckpoint
,
CheckpointConfig
from
mindspore.train
import
Model
from
mindspore.nn.metrics
import
Accuracy
from
mindspore.train.quant
import
quant
from
src.dataset
import
create_dataset
from
src.config
import
mnist_cfg
as
cfg
from
src.lenet_fusion
import
LeNet5
as
LeNet5Fusion
from
src.loss_monitor
import
LossMonitor
parser
=
argparse
.
ArgumentParser
(
description
=
'MindSpore MNIST Example'
)
parser
.
add_argument
(
'--device_target'
,
type
=
str
,
default
=
"Ascend"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录