Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
73112558
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
73112558
编写于
3月 04, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mge/dnn): support checknonfinite for fp16
GitOrigin-RevId: 83fa139ac06ed6851537764b3cdaba812f219773
上级
f7e10ea8
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
49 addition
and
19 deletion
+49
-19
dnn/include/megdnn/oprs/general.h
dnn/include/megdnn/oprs/general.h
+0
-1
dnn/src/common/check_non_finite.cpp
dnn/src/common/check_non_finite.cpp
+1
-2
dnn/src/common/reduce_helper_device.h
dnn/src/common/reduce_helper_device.h
+1
-1
dnn/src/cuda/check_non_finite/kern.cu
dnn/src/cuda/check_non_finite/kern.cu
+8
-4
dnn/src/cuda/check_non_finite/opr_impl.cpp
dnn/src/cuda/check_non_finite/opr_impl.cpp
+31
-9
dnn/src/cuda/check_non_finite/opr_impl.h
dnn/src/cuda/check_non_finite/opr_impl.h
+7
-1
dnn/src/naive/check_non_finite/opr_impl.h
dnn/src/naive/check_non_finite/opr_impl.h
+1
-1
未找到文件。
dnn/include/megdnn/oprs/general.h
浏览文件 @
73112558
...
...
@@ -1383,7 +1383,6 @@ public:
protected:
void
check_exec
(
const
TensorNDArray
&
srcs
,
const
TensorND
&
dst
,
size_t
workspace_in_bytes
);
virtual
size_t
_get_workspace_in_bytes
()
=
0
;
};
/*!
...
...
dnn/src/common/check_non_finite.cpp
浏览文件 @
73112558
...
...
@@ -18,8 +18,7 @@ void CheckNonFinite::check_exec(
const
TensorNDArray
&
srcs
,
const
TensorND
&
dst
,
size_t
workspace_in_bytes
)
{
megdnn_assert_contiguous
(
dst
.
layout
);
megdnn_assert
(
srcs
.
size
()
>
0
);
megdnn_assert
(
srcs
.
begin
()
->
layout
.
dtype
==
dtype
::
Float32
());
auto
required_workspace_in_bytes
=
_get_workspace_in_bytes
();
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
srcs
,
dst
.
layout
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
}
...
...
dnn/src/common/reduce_helper_device.h
浏览文件 @
73112558
...
...
@@ -236,4 +236,4 @@ void get_ABC(const TensorShape& shape, size_t& A, size_t& B, size_t& C, size_t a
}
// namespace megdnn
// vim: syntax=cpp.doxygen
\ No newline at end of file
// vim: syntax=cpp.doxygen
dnn/src/cuda/check_non_finite/kern.cu
浏览文件 @
73112558
...
...
@@ -18,11 +18,15 @@ namespace cuda {
#define COMMA ,
INST_REDUCE
(
device_reduce
::
CheckNonFiniteOp
<
dt_float32
COMMA
size_t
COMMA
dt_int32
COMMA
dt_int32
>
,
false
);
#define cb(_dtype) \
INST_REDUCE( \
device_reduce::CheckNonFiniteOp< \
_dtype COMMA size_t COMMA dt_int32 COMMA dt_int32>, \
false);
cb
(
dt_float32
);
cb
(
dt_float16
);
#undef cb
#undef COMMA
}
// namespace cuda
}
// namespace megdnn
...
...
dnn/src/cuda/check_non_finite/opr_impl.cpp
浏览文件 @
73112558
...
...
@@ -22,13 +22,14 @@ namespace cuda {
using
device_reduce
::
CheckNonFiniteOp
;
#define total_nr_elems_max 2048
template
<
typename
T
>
size_t
CheckNonFiniteImpl
::
_get_workspace_in_bytes
()
{
// Call the _get_workspace_in_bytes to reduce the loop fetch workspace bytes
typedef
CheckNonFiniteOp
<
dt_float32
,
size_t
,
dt_int32
,
dt_int32
>
Op
;
typedef
CheckNonFiniteOp
<
T
,
size_t
,
dt_int32
,
dt_int32
>
Op
;
megdnn_assert
(
m_size
>
0
);
WorkspaceBundle
bundle
(
nullptr
,
{
sizeof
(
dt_float32
*
)
*
m_size
,
sizeof
(
T
*
)
*
m_size
,
sizeof
(
size_t
)
*
m_size
,
});
return
get_reduce_workspace_in_bytes
<
Op
>
(
1
,
m_size
*
total_nr_elems_max
,
1
)
+
...
...
@@ -41,17 +42,38 @@ size_t CheckNonFiniteImpl::get_workspace_in_bytes(
for
(
const
auto
&
src
:
srcs
)
{
m_size
+=
DIVUP
(
src
.
layout
.
total_nr_elems
(),
total_nr_elems_max
);
}
return
_get_workspace_in_bytes
();
if
(
srcs
.
begin
()
->
layout
.
dtype
==
dtype
::
Float32
())
{
return
_get_workspace_in_bytes
<
dt_float32
>
();
}
else
if
(
srcs
.
begin
()
->
layout
.
dtype
==
dtype
::
Float16
())
{
return
_get_workspace_in_bytes
<
dt_float16
>
();
}
else
{
megdnn_log_warn
(
"only support fp16 and fp32, fallback to fp32"
);
return
_get_workspace_in_bytes
<
dt_float32
>
();
}
}
void
CheckNonFiniteImpl
::
exec
(
_megdnn_in
const
TensorNDArray
&
srcs
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
if
(
srcs
.
begin
()
->
layout
.
dtype
==
dtype
::
Float32
())
{
_exec
<
dt_float32
>
(
srcs
,
dst
,
workspace
);
}
#ifdef DNN_INC_FLOAT16
else
if
(
srcs
.
begin
()
->
layout
.
dtype
==
dtype
::
Float16
())
{
_exec
<
dt_float16
>
(
srcs
,
dst
,
workspace
);
}
#endif
}
template
<
typename
T
>
void
CheckNonFiniteImpl
::
_exec
(
_megdnn_in
const
TensorNDArray
&
srcs
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
check_exec
(
srcs
,
dst
,
workspace
.
size
);
typedef
CheckNonFiniteOp
<
dt_float32
,
size_t
,
dt_int32
,
dt_int32
>
Op
;
typedef
CheckNonFiniteOp
<
T
,
size_t
,
dt_int32
,
dt_int32
>
Op
;
auto
stream
=
cuda_stream
(
this
->
handle
());
SmallVector
<
size_t
>
workspace_sizes
{
sizeof
(
dt_float32
*
)
*
m_size
,
sizeof
(
T
*
)
*
m_size
,
sizeof
(
size_t
)
*
m_size
,
};
WorkspaceBundle
workspace_cpu
(
nullptr
,
workspace_sizes
),
...
...
@@ -63,8 +85,8 @@ void CheckNonFiniteImpl::exec(
workspace_cpu
=
WorkspaceBundle
(
workspace_cpu_raw
,
workspace_sizes
);
workspace_gpu
=
WorkspaceBundle
(
workspace_gpu_raw
,
workspace_sizes
);
auto
srcs_cpu
=
static_cast
<
dt_float32
**>
(
workspace_cpu
.
get
(
0
));
auto
srcs_gpu
=
static_cast
<
dt_float32
**>
(
workspace_gpu
.
get
(
0
));
auto
srcs_cpu
=
static_cast
<
T
**>
(
workspace_cpu
.
get
(
0
));
auto
srcs_gpu
=
static_cast
<
T
**>
(
workspace_gpu
.
get
(
0
));
auto
srcs_total_nr_elems_cpu
=
static_cast
<
size_t
*>
(
workspace_cpu
.
get
(
1
));
auto
srcs_total_nr_elems_gpu
=
static_cast
<
size_t
*>
(
workspace_gpu
.
get
(
1
));
...
...
@@ -75,7 +97,7 @@ void CheckNonFiniteImpl::exec(
size_t
src_nr_elems
=
src
.
layout
.
total_nr_elems
();
size_t
nr_elems
=
DIVUP
(
src_nr_elems
,
total_nr_elems_max
);
for
(
size_t
j
=
0
;
j
<
nr_elems
;
++
j
,
++
i
)
{
srcs_cpu
[
i
]
=
src
.
ptr
<
dt_float32
>
()
+
j
*
total_nr_elems_max
;
srcs_cpu
[
i
]
=
src
.
ptr
<
T
>
()
+
j
*
total_nr_elems_max
;
if
(
j
+
1
==
nr_elems
&&
src_nr_elems
%
total_nr_elems_max
)
{
srcs_total_nr_elems_cpu
[
i
]
=
src_nr_elems
%
total_nr_elems_max
;
}
else
{
...
...
@@ -97,7 +119,7 @@ void CheckNonFiniteImpl::exec(
workspace_gpu
.
total_size_in_bytes
())),
1
,
m_size
*
total_nr_elems_max
,
1
,
stream
,
Op
(
srcs_gpu
,
srcs_total_nr_elems_gpu
,
dst
.
ptr
<
dt_int32
>
(),
total_nr_elems_max
,
param
().
scale
));
total_nr_elems_max
,
static_cast
<
T
>
(
param
().
scale
)
));
}
}
// namespace cuda
...
...
dnn/src/cuda/check_non_finite/opr_impl.h
浏览文件 @
73112558
...
...
@@ -18,7 +18,13 @@ namespace megdnn {
namespace
cuda
{
class
CheckNonFiniteImpl
final
:
public
CheckNonFinite
{
size_t
_get_workspace_in_bytes
()
override
;
template
<
typename
T
>
size_t
_get_workspace_in_bytes
();
template
<
typename
T
>
void
_exec
(
_megdnn_in
const
TensorNDArray
&
srcs
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
);
public:
using
CheckNonFinite
::
CheckNonFinite
;
...
...
dnn/src/naive/check_non_finite/opr_impl.h
浏览文件 @
73112558
...
...
@@ -17,7 +17,7 @@ namespace megdnn {
namespace
naive
{
class
CheckNonFiniteImpl
final
:
public
CheckNonFinite
{
size_t
_get_workspace_in_bytes
()
override
{
return
0
;
}
size_t
_get_workspace_in_bytes
()
{
return
0
;
}
public:
using
CheckNonFinite
::
CheckNonFinite
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录