Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
23a3d133
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
23a3d133
编写于
8月 06, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(dnn/softmax): create redcue and elemwise opr when get workspace size
GitOrigin-RevId: 476a39bdd3a9fd419a3cbd646b7b2c8d6ae5f06a
上级
2797fcfa
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
96 addition
and
66 deletion
+96
-66
dnn/src/naive/softmax/opr_impl.cpp
dnn/src/naive/softmax/opr_impl.cpp
+82
-58
dnn/src/naive/softmax/opr_impl.h
dnn/src/naive/softmax/opr_impl.h
+14
-8
未找到文件。
dnn/src/naive/softmax/opr_impl.cpp
浏览文件 @
23a3d133
...
...
@@ -10,96 +10,120 @@
#include "src/naive/elemwise/opr_impl.h"
#include "src/naive/handle.h"
#include "src/naive/lowbit_utils.h"
using
namespace
megdnn
;
namespace
{
template
<
typename
T
>
TensorND
op_exec
(
_megdnn_tensor_in
src
,
megdnn
::
dt_byte
*
workspace_ptr
,
const
T
&
opr
)
{
TensorLayout
dst_layout
;
opr
->
deduce_layout
(
src
.
layout
,
dst_layout
);
TensorND
dst
{
workspace_ptr
,
dst_layout
};
workspace_ptr
+=
dst_layout
.
span
().
dist_byte
();
auto
new_workspace
=
Workspace
{
workspace_ptr
,
opr
->
get_workspace_in_bytes
(
src
.
layout
,
dst_layout
)};
workspace_ptr
+=
opr
->
get_workspace_in_bytes
(
src
.
layout
,
dst_layout
);
opr
->
exec
(
src
,
dst
,
new_workspace
);
return
dst
;
}
}
// namespace
namespace
megdnn
{
namespace
naive
{
//===============================Softmax Forward============================
size_t
SoftmaxForwardImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
{
int32_t
axis
=
param
().
axis
;
int32_t
nidm
=
src
.
ndim
;
if
(
axis
<
0
)
axis
+=
nidm
;
megdnn_assert
(
axis
>=
0
,
"is not a vaild axis=%d for dim=%d"
,
axis
,
nidm
);
reduce_opr
=
handle
()
->
create_operator
<
Reduce
>
();
elemwise_opr
=
handle
()
->
create_operator
<
Elemwise
>
();
reduce_opr
->
param
().
axis
=
axis
;
reduce_opr
->
param
().
data_type
=
param
::
Reduce
::
DataType
::
DEFAULT
;
reduce_opr
->
param
().
mode
=
Reduce
::
Mode
::
MAX
;
reduce_opr
->
param
().
mode
=
Reduce
::
Mode
::
MAX
;
size_t
max_workspace
=
reduce_opr
->
get_workspace_in_bytes
(
src
,
dst
);
reduce_opr
->
param
().
mode
=
Reduce
::
Mode
::
SUM
;
size_t
sum_workspace
=
reduce_opr
->
get_workspace_in_bytes
(
src
,
dst
);
reduce_worksize
=
max_workspace
>
sum_workspace
?
max_workspace
:
sum_workspace
;
return
WorkspaceBundle
(
nullptr
,
{
src
.
span
().
dist_byte
(),
reduce_worksize
})
.
total_size_in_bytes
();
}
void
SoftmaxForwardImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
auto
axis
=
param
().
axis
;
if
(
axis
<
0
)
axis
+=
src
.
layout
.
ndim
;
check_exec
(
src
.
layout
,
dst
.
layout
,
workspace
.
size
);
auto
workspace_ptr
=
workspace
.
raw_ptr
;
auto
reduce_opr
=
handle
()
->
create_operator
<
ReduceForward
>
();
reduce_opr
->
param
().
axis
=
axis
;
WorkspaceBundle
workspace_bundle
{
workspace
.
raw_ptr
,
{
src
.
layout
.
span
().
dist_byte
(),
reduce_worksize
}};
TensorLayout
tmp_layout
;
reduce_opr
->
param
().
mode
=
Reduce
::
Mode
::
MAX
;
reduce_opr
->
param
().
data_type
=
param
::
Reduce
::
DataType
::
DEFAULT
;
TensorND
max_tensor
=
op_exec
(
src
,
workspace_ptr
,
reduce_opr
);
reduce_opr
->
deduce_layout
(
src
.
layout
,
tmp_layout
);
TensorND
max_tensor
{
workspace_bundle
.
get_workspace
(
0
).
raw_ptr
,
tmp_layout
};
reduce_opr
->
exec
(
src
,
max_tensor
,
workspace_bundle
.
get_workspace
(
1
));
auto
elemwise_opr
=
handle
()
->
create_operator
<
Elemwise
>
();
elemwise_opr
->
param
().
mode
=
Elemwise
::
Mode
::
SUB
;
elemwise_opr
->
exec
({
src
,
max_tensor
},
dst
);
// no broadcast
elemwise_opr
->
param
().
mode
=
Elemwise
::
Mode
::
EXP
;
TensorLayout
exp_layout
;
elemwise_opr
->
deduce_layout
({
src
.
layout
},
exp_layout
);
TensorND
exp_tensor
{
workspace_ptr
,
exp_layout
};
workspace_ptr
+=
exp_layout
.
span
().
dist_byte
();
elemwise_opr
->
exec
({
dst
},
exp_tensor
);
elemwise_opr
->
exec
({
dst
},
dst
);
reduce_opr
->
param
().
mode
=
Reduce
::
Mode
::
SUM
;
TensorND
down_tensor
=
op_exec
(
exp_tensor
,
workspace_ptr
,
reduce_opr
);
reduce_opr
->
deduce_layout
(
src
.
layout
,
tmp_layout
);
TensorND
deno_tensor
{
workspace_bundle
.
get_workspace
(
0
).
raw_ptr
,
tmp_layout
};
reduce_opr
->
exec
(
dst
,
deno_tensor
,
workspace_bundle
.
get_workspace
(
1
));
elemwise_opr
->
param
().
mode
=
Elemwise
::
Mode
::
TRUE_DIV
;
elemwise_opr
->
exec
({
exp_tensor
,
down
_tensor
},
dst
);
elemwise_opr
->
exec
({
dst
,
deno
_tensor
},
dst
);
}
//=============================Softmax backward ============================
size_t
SoftmaxBackwardImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
)
{
int32_t
axis
=
param
().
axis
;
int32_t
nidm
=
src
.
ndim
;
if
(
axis
<
0
)
axis
+=
nidm
;
megdnn_assert
(
axis
>=
0
,
"is not a vaild axis=%d for dim=%d"
,
axis
,
nidm
);
reduce_opr
=
handle
()
->
create_operator
<
Reduce
>
();
elemwise_opr
=
handle
()
->
create_operator
<
Elemwise
>
();
reduce_opr
->
param
().
axis
=
axis
;
reduce_opr
->
param
().
data_type
=
param
::
Reduce
::
DataType
::
DEFAULT
;
reduce_opr
->
param
().
mode
=
Reduce
::
Mode
::
SUM
;
reduce_worksize
=
reduce_opr
->
get_workspace_in_bytes
(
src
,
diff
);
return
WorkspaceBundle
(
nullptr
,
{
src
.
span
().
dist_byte
(),
src
.
span
().
dist_byte
(),
reduce_worksize
})
.
total_size_in_bytes
();
}
void
SoftmaxBackwardImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
{
auto
axis
=
param
().
axis
;
if
(
axis
<
0
)
axis
+=
src
.
layout
.
ndim
;
check_exec
(
src
.
layout
,
diff
.
layout
,
grad
.
layout
,
workspace
.
size
);
auto
workspace_ptr
=
workspace
.
raw_ptr
;
TensorLayout
mulres
=
src
.
layout
;
mulres
.
dtype
=
src
.
layout
.
dtype
;
mulres
.
format
=
src
.
layout
.
format
;
mulres
.
init_contiguous_stride
();
TensorND
mul_tensor
{
workspace_ptr
,
mulres
};
workspace_ptr
+=
mulres
.
span
().
dist_byte
();
TensorND
mul_tensor2
{
workspace_ptr
,
mulres
};
workspace_ptr
+=
mulres
.
span
().
dist_byte
();
auto
elemwise_opr
=
handle
()
->
create_operator
<
Elemwise
>
();
WorkspaceBundle
workspace_bundle
{
workspace
.
raw_ptr
,
{
src
.
layout
.
span
().
dist_byte
(),
src
.
layout
.
span
().
dist_byte
(),
reduce_worksize
}};
TensorLayout
mul_layout
=
src
.
layout
;
mul_layout
.
dtype
=
src
.
layout
.
dtype
;
mul_layout
.
format
=
src
.
layout
.
format
;
mul_layout
.
init_contiguous_stride
();
TensorND
mul_lhs_tensor
{
workspace_bundle
.
get_workspace
(
0
).
raw_ptr
,
mul_layout
};
TensorND
mul_rhs_tensor
{
workspace_bundle
.
get_workspace
(
1
).
raw_ptr
,
mul_layout
};
elemwise_opr
->
param
().
mode
=
Elemwise
::
Mode
::
MUL
;
elemwise_opr
->
exec
({
src
,
diff
},
mul_tensor
);
elemwise_opr
->
exec
({
src
,
diff
},
mul_
lhs_
tensor
);
auto
reduce_opr
=
handle
()
->
create_operator
<
ReduceForward
>
();
reduce_opr
->
param
().
axis
=
axis
;
reduce_opr
->
param
().
mode
=
Reduce
::
Mode
::
SUM
;
reduce_opr
->
param
().
data_type
=
param
::
Reduce
::
DataType
::
DEFAULT
;
TensorND
sum_tensor
=
op_exec
(
mul_tensor
,
workspace_ptr
,
reduce_opr
);
TensorLayout
sum_layout
;
reduce_opr
->
deduce_layout
(
mul_lhs_tensor
.
layout
,
sum_layout
);
TensorND
sum_tensor
{
grad
.
raw_ptr
(),
sum_layout
};
reduce_opr
->
exec
(
mul_lhs_tensor
,
sum_tensor
,
workspace_bundle
.
get_workspace
(
2
));
elemwise_opr
->
exec
({
sum_tensor
,
src
},
mul_tensor2
);
// there are broadcast occurring elemwsie mul
elemwise_opr
->
exec
({
sum_tensor
,
src
},
mul_rhs_tensor
);
elemwise_opr
->
param
().
mode
=
Elemwise
::
Mode
::
SUB
;
elemwise_opr
->
exec
({
mul_
tensor
,
mul_tensor2
},
grad
);
elemwise_opr
->
exec
({
mul_
lhs_tensor
,
mul_rhs_tensor
},
grad
);
}
}
// namespace naive
}
// namespace megdnn
\ No newline at end of file
dnn/src/naive/softmax/opr_impl.h
浏览文件 @
23a3d133
#pragma once
#include "megdnn/oprs.h"
#include "src/common/utils.h"
namespace
megdnn
{
namespace
naive
{
...
...
@@ -11,9 +11,12 @@ public:
_megdnn_tensor_in
src
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
)
override
{
return
src
.
span
().
dist_byte
()
*
2
;
}
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
override
;
private:
size_t
reduce_worksize
=
0
;
std
::
unique_ptr
<
megdnn
::
Reduce
>
reduce_opr
;
std
::
unique_ptr
<
megdnn
::
Elemwise
>
elemwise_opr
;
};
class
SoftmaxBackwardImpl
:
public
SoftmaxBackward
{
...
...
@@ -23,10 +26,13 @@ public:
_megdnn_tensor_in
src
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad_x
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
src
.
span
().
dist_byte
()
*
3
;
}
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
)
override
;
private:
size_t
reduce_worksize
=
0
;
std
::
unique_ptr
<
megdnn
::
Reduce
>
reduce_opr
;
std
::
unique_ptr
<
megdnn
::
Elemwise
>
elemwise_opr
;
};
}
// namespace naive
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录