Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
fc0f4546
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
fc0f4546
编写于
2月 24, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(dnn/check_non_finite): adjust some details of CheckNonFinite
GitOrigin-RevId: 52ddd805b433fec80dfa23352918b3d3ec9b8947
上级
3bd40887
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
62 addition
and
34 deletion
+62
-34
dnn/src/common/reduce_helper_device.h
dnn/src/common/reduce_helper_device.h
+3
-3
dnn/src/cuda/check_non_finite/kern.cu
dnn/src/cuda/check_non_finite/kern.cu
+4
-4
dnn/src/cuda/check_non_finite/opr_impl.cpp
dnn/src/cuda/check_non_finite/opr_impl.cpp
+4
-4
imperative/python/megengine/amp/grad_scaler.py
imperative/python/megengine/amp/grad_scaler.py
+4
-2
imperative/python/megengine/functional/math.py
imperative/python/megengine/functional/math.py
+3
-1
imperative/python/test/unit/amp/test_grad_scaler.py
imperative/python/test/unit/amp/test_grad_scaler.py
+44
-20
未找到文件。
dnn/src/common/reduce_helper_device.h
浏览文件 @
fc0f4546
...
@@ -175,13 +175,13 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> {
...
@@ -175,13 +175,13 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> {
:
INIT
(
wtype
(
DTypeTrait
<
wtype
>::
min
())),
src
(
src
),
dst
(
dst
),
B
(
B
)
{}
:
INIT
(
wtype
(
DTypeTrait
<
wtype
>::
min
())),
src
(
src
),
dst
(
dst
),
B
(
B
)
{}
};
};
template
<
typename
src_ctype
,
typename
index_ctype
,
typename
dst_ctype
,
typename
wtype_
>
template
<
typename
src_ctype
,
typename
dst_ctype
,
typename
wtype_
>
struct
CheckNonFiniteOp
{
struct
CheckNonFiniteOp
{
typedef
wtype_
wtype
;
typedef
wtype_
wtype
;
const
wtype
INIT
;
const
wtype
INIT
;
src_ctype
**
srcs
;
src_ctype
**
srcs
;
index_ctype
*
srcs_total_nr_elems
;
size_t
*
srcs_total_nr_elems
;
dst_ctype
*
dst
;
dst_ctype
*
dst
;
const
size_t
B
;
const
size_t
B
;
const
src_ctype
scale
;
const
src_ctype
scale
;
...
@@ -206,7 +206,7 @@ struct CheckNonFiniteOp {
...
@@ -206,7 +206,7 @@ struct CheckNonFiniteOp {
return
lhs
|
rhs
;
return
lhs
|
rhs
;
}
}
MEGDNN_HOST
MEGDNN_DEVICE
CheckNonFiniteOp
(
MEGDNN_HOST
MEGDNN_DEVICE
CheckNonFiniteOp
(
src_ctype
**
srcs
,
index_ctype
*
srcs_total_nr_elems
,
dst_ctype
*
dst
,
src_ctype
**
srcs
,
size_t
*
srcs_total_nr_elems
,
dst_ctype
*
dst
,
size_t
B
,
src_ctype
scale
)
size_t
B
,
src_ctype
scale
)
:
INIT
(
wtype
(
0
)),
:
INIT
(
wtype
(
0
)),
srcs
(
srcs
),
srcs
(
srcs
),
...
...
dnn/src/cuda/check_non_finite/kern.cu
浏览文件 @
fc0f4546
...
@@ -8,10 +8,10 @@ namespace cuda {
...
@@ -8,10 +8,10 @@ namespace cuda {
#define COMMA ,
#define COMMA ,
#define cb(_dtype) \
#define cb(_dtype)
\
INST_REDUCE( \
INST_REDUCE(
\
device_reduce::CheckNonFiniteOp< \
device_reduce::CheckNonFiniteOp<
\
_dtype COMMA
size_t
COMMA dt_int32 COMMA dt_int32>, \
_dtype COMMA
dt_float32
COMMA dt_int32 COMMA dt_int32>, \
false);
false);
cb
(
dt_float32
);
cb
(
dt_float32
);
...
...
dnn/src/cuda/check_non_finite/opr_impl.cpp
浏览文件 @
fc0f4546
...
@@ -10,11 +10,11 @@ namespace megdnn {
...
@@ -10,11 +10,11 @@ namespace megdnn {
namespace
cuda
{
namespace
cuda
{
using
device_reduce
::
CheckNonFiniteOp
;
using
device_reduce
::
CheckNonFiniteOp
;
#define total_nr_elems_max
2048
#define total_nr_elems_max
8192
template
<
typename
T
>
template
<
typename
T
>
size_t
CheckNonFiniteImpl
::
_get_workspace_in_bytes
()
{
size_t
CheckNonFiniteImpl
::
_get_workspace_in_bytes
()
{
// Call the _get_workspace_in_bytes to reduce the loop fetch workspace bytes
// Call the _get_workspace_in_bytes to reduce the loop fetch workspace bytes
typedef
CheckNonFiniteOp
<
T
,
size_t
,
dt_int32
,
dt_int32
>
Op
;
typedef
CheckNonFiniteOp
<
T
,
dt_float32
,
dt_int32
,
dt_int32
>
Op
;
megdnn_assert
(
m_size
>
0
);
megdnn_assert
(
m_size
>
0
);
WorkspaceBundle
bundle
(
WorkspaceBundle
bundle
(
nullptr
,
{
nullptr
,
{
...
@@ -59,7 +59,7 @@ void CheckNonFiniteImpl::_exec(
...
@@ -59,7 +59,7 @@ void CheckNonFiniteImpl::_exec(
_megdnn_in
const
TensorNDArray
&
srcs
,
_megdnn_tensor_out
dst
,
_megdnn_in
const
TensorNDArray
&
srcs
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
_megdnn_workspace
workspace
)
{
check_exec
(
srcs
,
dst
,
workspace
.
size
);
check_exec
(
srcs
,
dst
,
workspace
.
size
);
typedef
CheckNonFiniteOp
<
T
,
size_t
,
dt_int32
,
dt_int32
>
Op
;
typedef
CheckNonFiniteOp
<
T
,
dt_float32
,
dt_int32
,
dt_int32
>
Op
;
auto
stream
=
cuda_stream
(
this
->
handle
());
auto
stream
=
cuda_stream
(
this
->
handle
());
SmallVector
<
size_t
>
workspace_sizes
{
SmallVector
<
size_t
>
workspace_sizes
{
sizeof
(
T
*
)
*
m_size
,
sizeof
(
T
*
)
*
m_size
,
...
@@ -102,7 +102,7 @@ void CheckNonFiniteImpl::_exec(
...
@@ -102,7 +102,7 @@ void CheckNonFiniteImpl::_exec(
cuda_check
(
cudaStreamAddCallback
(
cuda_check
(
cudaStreamAddCallback
(
stream
,
callback_free
,
static_cast
<
void
*>
(
workspace_cpu_raw
),
0
));
stream
,
callback_free
,
static_cast
<
void
*>
(
workspace_cpu_raw
),
0
));
r
eturn
r
un_reduce
<
Op
,
false
>
(
run_reduce
<
Op
,
false
>
(
static_cast
<
dt_int32
*>
(
static_cast
<
dt_int32
*>
(
(
void
*
)((
char
*
)
workspace_gpu_raw
+
(
void
*
)((
char
*
)
workspace_gpu_raw
+
workspace_gpu
.
total_size_in_bytes
())),
workspace_gpu
.
total_size_in_bytes
())),
...
...
imperative/python/megengine/amp/grad_scaler.py
浏览文件 @
fc0f4546
...
@@ -141,8 +141,10 @@ class GradScaler:
...
@@ -141,8 +141,10 @@ class GradScaler:
tensor
.
grad
=
None
tensor
.
grad
=
None
return
self
return
self
def
_check_gradients
(
self
,
grad
,
scale
):
def
_check_gradients
(
self
,
grads
,
scale
):
return
_check_non_finite
(
grad
,
scale
)
if
len
(
grads
)
==
0
:
return
False
return
_check_non_finite
(
grads
,
scale
)
def
update
(
self
,
new_scale
:
float
=
None
):
def
update
(
self
,
new_scale
:
float
=
None
):
r
"""Update the scale factor according to whether encountered overflow grad.
r
"""Update the scale factor according to whether encountered overflow grad.
...
...
imperative/python/megengine/functional/math.py
浏览文件 @
fc0f4546
...
@@ -691,11 +691,13 @@ def _check_non_finite(inps: Iterable[Tensor], scale=1.0) -> Tensor:
...
@@ -691,11 +691,13 @@ def _check_non_finite(inps: Iterable[Tensor], scale=1.0) -> Tensor:
r
"""Check whether input contains infinite or nan value.
r
"""Check whether input contains infinite or nan value.
Args:
Args:
inp
: a tensor
to be checked.
inp
s: tensors
to be checked.
Returns:
Returns:
a int32 scalar tensor, 0 for False and 1 for True.
a int32 scalar tensor, 0 for False and 1 for True.
"""
"""
if
isinstance
(
inps
,
Tensor
):
inps
=
[
inps
]
op
=
builtin
.
CheckNonFinite
(
scale
=
scale
)
op
=
builtin
.
CheckNonFinite
(
scale
=
scale
)
oups
=
apply
(
op
,
*
inps
)
oups
=
apply
(
op
,
*
inps
)
out
=
oups
[
-
1
]
out
=
oups
[
-
1
]
...
...
imperative/python/test/unit/amp/test_grad_scaler.py
浏览文件 @
fc0f4546
import
numpy
as
np
import
numpy
as
np
import
pytest
import
megengine
as
mge
import
megengine
as
mge
from
megengine.amp
import
GradScaler
from
megengine.amp
import
GradScaler
...
@@ -6,23 +7,46 @@ from megengine.autodiff import GradManager
...
@@ -6,23 +7,46 @@ from megengine.autodiff import GradManager
from
megengine.jit
import
trace
from
megengine.jit
import
trace
def
test_grad_scaler
():
@
pytest
.
mark
.
parametrize
(
def
f
():
"is_trace"
,
[
False
,
True
],
gm
=
GradManager
()
)
scaler
=
GradScaler
()
def
test_grad_scaler
(
is_trace
):
gm
=
GradManager
()
x
=
mge
.
tensor
(
1.0
)
scaler
=
GradScaler
()
for
_
in
range
(
3
):
with
gm
:
def
f
(
idx
,
data
,
calc
):
y
=
x
+
1
x
=
mge
.
tensor
(
data
,
no_cache
=
True
)
gm
.
attach
(
y
)
y
=
mge
.
tensor
(
data
,
no_cache
=
True
)
loss
=
y
+
1
scaler
.
backward
(
gm
,
loss
,
unscale_grad
=
False
)
if
is_trace
:
np
.
testing
.
assert_equal
(
y
.
grad
.
numpy
(),
scaler
.
scale_factor
)
calc
=
trace
(
calc
)
scaler
.
unscale
(
gm
.
attached_tensors
())
np
.
testing
.
assert_equal
(
y
.
grad
.
numpy
(),
1
)
gm
.
attach
([
x
,
y
])
# test handle None elements
with
gm
:
scaler
.
unscale
(
gm
.
attached_tensors
())
loss
=
calc
(
x
,
y
)
scaler
.
backward
(
gm
,
loss
,
unscale_grad
=
False
)
f
()
np
.
testing
.
assert_equal
(
x
.
grad
.
numpy
(),
2
*
scaler
.
scale_factor
)
trace
(
f
)()
scaler
.
unscale
(
filter
(
lambda
t
:
t
.
grad
is
not
None
,
gm
.
attached_tensors
()))
# scaler.unscale(gm.attached_tensors())
np
.
testing
.
assert_equal
(
x
.
grad
.
numpy
(),
2
)
def
double_variables
(
x
,
y
):
z
=
x
+
2
*
y
loss
=
2
*
z
+
1
return
loss
def
single_variable
(
x
,
y
):
z
=
x
+
1
loss
=
2
*
z
+
1
return
loss
# need grad being unique storage or not inplace modifying grad
def
double_variables_with_same_grad
(
x
,
y
):
z
=
x
+
y
loss
=
2
*
z
+
1
return
loss
for
data
in
[
np
.
random
.
random
((
1
,
2
,
3
,
4
)),
1.0
]:
for
calc
in
[
double_variables
,
single_variable
,
double_variables_with_same_grad
]:
for
idx
in
range
(
3
):
f
(
idx
,
data
,
calc
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录