提交 fc0f4546 编写于 作者: M Megvii Engine Team

fix(dnn/check_non_finite): adjust some details of CheckNonFinite

GitOrigin-RevId: 52ddd805b433fec80dfa23352918b3d3ec9b8947
上级 3bd40887
...@@ -175,13 +175,13 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> { ...@@ -175,13 +175,13 @@ struct MaxOp<src_ctype, dst_ctype, dt_float32> {
: INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {} : INIT(wtype(DTypeTrait<wtype>::min())), src(src), dst(dst), B(B) {}
}; };
template <typename src_ctype, typename index_ctype, typename dst_ctype, typename wtype_> template <typename src_ctype, typename dst_ctype, typename wtype_>
struct CheckNonFiniteOp { struct CheckNonFiniteOp {
typedef wtype_ wtype; typedef wtype_ wtype;
const wtype INIT; const wtype INIT;
src_ctype** srcs; src_ctype** srcs;
index_ctype* srcs_total_nr_elems; size_t* srcs_total_nr_elems;
dst_ctype* dst; dst_ctype* dst;
const size_t B; const size_t B;
const src_ctype scale; const src_ctype scale;
...@@ -206,7 +206,7 @@ struct CheckNonFiniteOp { ...@@ -206,7 +206,7 @@ struct CheckNonFiniteOp {
return lhs | rhs; return lhs | rhs;
} }
MEGDNN_HOST MEGDNN_DEVICE CheckNonFiniteOp( MEGDNN_HOST MEGDNN_DEVICE CheckNonFiniteOp(
src_ctype** srcs, index_ctype* srcs_total_nr_elems, dst_ctype* dst, src_ctype** srcs, size_t* srcs_total_nr_elems, dst_ctype* dst,
size_t B, src_ctype scale) size_t B, src_ctype scale)
: INIT(wtype(0)), : INIT(wtype(0)),
srcs(srcs), srcs(srcs),
......
...@@ -8,10 +8,10 @@ namespace cuda { ...@@ -8,10 +8,10 @@ namespace cuda {
#define COMMA , #define COMMA ,
#define cb(_dtype) \ #define cb(_dtype) \
INST_REDUCE( \ INST_REDUCE( \
device_reduce::CheckNonFiniteOp< \ device_reduce::CheckNonFiniteOp< \
_dtype COMMA size_t COMMA dt_int32 COMMA dt_int32>, \ _dtype COMMA dt_float32 COMMA dt_int32 COMMA dt_int32>, \
false); false);
cb(dt_float32); cb(dt_float32);
......
...@@ -10,11 +10,11 @@ namespace megdnn { ...@@ -10,11 +10,11 @@ namespace megdnn {
namespace cuda { namespace cuda {
using device_reduce::CheckNonFiniteOp; using device_reduce::CheckNonFiniteOp;
#define total_nr_elems_max 2048 #define total_nr_elems_max 8192
template <typename T> template <typename T>
size_t CheckNonFiniteImpl::_get_workspace_in_bytes() { size_t CheckNonFiniteImpl::_get_workspace_in_bytes() {
// Call the _get_workspace_in_bytes to reduce the loop fetch workspace bytes // Call the _get_workspace_in_bytes to reduce the loop fetch workspace bytes
typedef CheckNonFiniteOp<T, size_t, dt_int32, dt_int32> Op; typedef CheckNonFiniteOp<T, dt_float32, dt_int32, dt_int32> Op;
megdnn_assert(m_size > 0); megdnn_assert(m_size > 0);
WorkspaceBundle bundle( WorkspaceBundle bundle(
nullptr, { nullptr, {
...@@ -59,7 +59,7 @@ void CheckNonFiniteImpl::_exec( ...@@ -59,7 +59,7 @@ void CheckNonFiniteImpl::_exec(
_megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst, _megdnn_in const TensorNDArray& srcs, _megdnn_tensor_out dst,
_megdnn_workspace workspace) { _megdnn_workspace workspace) {
check_exec(srcs, dst, workspace.size); check_exec(srcs, dst, workspace.size);
typedef CheckNonFiniteOp<T, size_t, dt_int32, dt_int32> Op; typedef CheckNonFiniteOp<T, dt_float32, dt_int32, dt_int32> Op;
auto stream = cuda_stream(this->handle()); auto stream = cuda_stream(this->handle());
SmallVector<size_t> workspace_sizes{ SmallVector<size_t> workspace_sizes{
sizeof(T*) * m_size, sizeof(T*) * m_size,
...@@ -102,7 +102,7 @@ void CheckNonFiniteImpl::_exec( ...@@ -102,7 +102,7 @@ void CheckNonFiniteImpl::_exec(
cuda_check(cudaStreamAddCallback( cuda_check(cudaStreamAddCallback(
stream, callback_free, static_cast<void*>(workspace_cpu_raw), 0)); stream, callback_free, static_cast<void*>(workspace_cpu_raw), 0));
return run_reduce<Op, false>( run_reduce<Op, false>(
static_cast<dt_int32*>( static_cast<dt_int32*>(
(void*)((char*)workspace_gpu_raw + (void*)((char*)workspace_gpu_raw +
workspace_gpu.total_size_in_bytes())), workspace_gpu.total_size_in_bytes())),
......
...@@ -141,8 +141,10 @@ class GradScaler: ...@@ -141,8 +141,10 @@ class GradScaler:
tensor.grad = None tensor.grad = None
return self return self
def _check_gradients(self, grad, scale): def _check_gradients(self, grads, scale):
return _check_non_finite(grad, scale) if len(grads) == 0:
return False
return _check_non_finite(grads, scale)
def update(self, new_scale: float = None): def update(self, new_scale: float = None):
r"""Update the scale factor according to whether encountered overflow grad. r"""Update the scale factor according to whether encountered overflow grad.
......
...@@ -691,11 +691,13 @@ def _check_non_finite(inps: Iterable[Tensor], scale=1.0) -> Tensor: ...@@ -691,11 +691,13 @@ def _check_non_finite(inps: Iterable[Tensor], scale=1.0) -> Tensor:
r"""Check whether input contains infinite or nan value. r"""Check whether input contains infinite or nan value.
Args: Args:
inp: a tensor to be checked. inps: tensors to be checked.
Returns: Returns:
a int32 scalar tensor, 0 for False and 1 for True. a int32 scalar tensor, 0 for False and 1 for True.
""" """
if isinstance(inps, Tensor):
inps = [inps]
op = builtin.CheckNonFinite(scale=scale) op = builtin.CheckNonFinite(scale=scale)
oups = apply(op, *inps) oups = apply(op, *inps)
out = oups[-1] out = oups[-1]
......
import numpy as np import numpy as np
import pytest
import megengine as mge import megengine as mge
from megengine.amp import GradScaler from megengine.amp import GradScaler
...@@ -6,23 +7,46 @@ from megengine.autodiff import GradManager ...@@ -6,23 +7,46 @@ from megengine.autodiff import GradManager
from megengine.jit import trace from megengine.jit import trace
def test_grad_scaler(): @pytest.mark.parametrize(
def f(): "is_trace", [False, True],
gm = GradManager() )
scaler = GradScaler() def test_grad_scaler(is_trace):
gm = GradManager()
x = mge.tensor(1.0) scaler = GradScaler()
for _ in range(3):
with gm: def f(idx, data, calc):
y = x + 1 x = mge.tensor(data, no_cache=True)
gm.attach(y) y = mge.tensor(data, no_cache=True)
loss = y + 1
scaler.backward(gm, loss, unscale_grad=False) if is_trace:
np.testing.assert_equal(y.grad.numpy(), scaler.scale_factor) calc = trace(calc)
scaler.unscale(gm.attached_tensors())
np.testing.assert_equal(y.grad.numpy(), 1) gm.attach([x, y])
# test handle None elements with gm:
scaler.unscale(gm.attached_tensors()) loss = calc(x, y)
scaler.backward(gm, loss, unscale_grad=False)
f() np.testing.assert_equal(x.grad.numpy(), 2 * scaler.scale_factor)
trace(f)() scaler.unscale(filter(lambda t: t.grad is not None, gm.attached_tensors()))
# scaler.unscale(gm.attached_tensors())
np.testing.assert_equal(x.grad.numpy(), 2)
def double_variables(x, y):
z = x + 2 * y
loss = 2 * z + 1
return loss
def single_variable(x, y):
z = x + 1
loss = 2 * z + 1
return loss
# need grad being unique storage or not inplace modifying grad
def double_variables_with_same_grad(x, y):
z = x + y
loss = 2 * z + 1
return loss
for data in [np.random.random((1, 2, 3, 4)), 1.0]:
for calc in [double_variables, single_variable, double_variables_with_same_grad]:
for idx in range(3):
f(idx, data, calc)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册