Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
9c0a17d0
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
9c0a17d0
编写于
7月 28, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb/gopt): add auto aligned reformat impls
GitOrigin-RevId: fd0814fdb3e9f3418df81f6e9295d3cb44f3a67d
上级
2ed76b16
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
555 addition
and
1 deletion
+555
-1
src/gopt/impl/reformat_emitter.cpp
src/gopt/impl/reformat_emitter.cpp
+60
-0
src/gopt/impl/reformat_manager.cpp
src/gopt/impl/reformat_manager.cpp
+179
-0
src/gopt/include/megbrain/gopt/reformat_emitter.h
src/gopt/include/megbrain/gopt/reformat_emitter.h
+20
-0
src/gopt/include/megbrain/gopt/reformat_manager.h
src/gopt/include/megbrain/gopt/reformat_manager.h
+10
-1
src/gopt/test/reformat_manager.cpp
src/gopt/test/reformat_manager.cpp
+286
-0
未找到文件。
src/gopt/impl/reformat_emitter.cpp
浏览文件 @
9c0a17d0
...
...
@@ -13,6 +13,7 @@
#include "megbrain/gopt/reformat_emitter.h"
#include <numeric>
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/io.h"
using
namespace
mgb
;
using
namespace
gopt
;
...
...
@@ -243,4 +244,63 @@ ReformatEmitter::UnderlyingBuilders ReformatEmitter::analyze() const {
}
return
builders
;
}
/* ============== PaddingEmitter ================= */
PaddingEmitter
::
EmitResult
PaddingEmitter
::
emit
()
const
{
auto
&&
const_extent
=
m_const_extent
;
auto
&&
axis
=
m_axis
;
auto
builder
=
[
const_extent
,
axis
](
const
VarNodeArray
&
vars
)
{
auto
i
=
vars
[
0
];
auto
padding_shp_var
=
vars
[
1
];
TensorShape
shape
;
shape
.
ndim
=
i
->
shape
().
ndim
;
for
(
size_t
ax
=
0
;
ax
<
shape
.
ndim
;
++
ax
)
shape
[
ax
]
=
1
;
shape
[
axis
]
=
const_extent
;
auto
host_val
=
std
::
make_shared
<
HostTensorND
>
(
i
->
comp_node
(),
i
->
dtype
());
host_val
->
resize
(
shape
);
auto
ptr
=
host_val
->
raw_ptr
();
size_t
size_bytes
=
TensorLayout
{
shape
,
i
->
dtype
()}.
span
().
dist_byte
();
std
::
memset
(
ptr
,
0
,
size_bytes
);
auto
padding
=
opr
::
ImmutableTensor
::
make
(
*
i
->
owner_graph
(),
*
host_val
);
padding
=
opr
::
Broadcast
::
make
(
padding
,
padding_shp_var
);
auto
o
=
opr
::
Concat
::
make
({
i
,
padding
},
axis
);
return
o
.
node
();
};
auto
checker
=
[
axis
](
const
VarNodeArray
&
vars
)
{
mgb_assert
(
vars
.
size
()
==
2
);
return
vars
[
0
]
->
shape
().
ndim
>
axis
;
};
return
std
::
make_tuple
(
builder
,
checker
);
}
/* ============== SubtensorEmitter ================= */
SubtensorEmitter
::
EmitResult
SubtensorEmitter
::
emit
()
const
{
auto
&&
const_extent
=
m_const_extent
;
auto
&&
axis
=
m_axis
;
auto
builder
=
[
const_extent
,
axis
](
const
VarNodeArray
&
vars
)
{
auto
i
=
vars
[
0
];
auto
x
=
SymbolVar
(
i
);
auto
cv
=
[
&
x
](
int
v
)
{
return
x
.
make_scalar
(
v
);
};
using
AIdx
=
opr
::
Subtensor
::
AxisIndexer
;
std
::
vector
<
AIdx
>
index
(
i
->
shape
().
ndim
);
for
(
size_t
ax
=
0
;
ax
<
index
.
size
();
++
ax
)
{
if
(
ax
==
axis
)
index
[
ax
]
=
AIdx
::
make_interval
(
ax
,
None
,
cv
(
const_extent
),
None
);
else
index
[
ax
]
=
AIdx
::
make_interval
(
ax
,
None
,
None
,
cv
(
1
));
}
auto
o
=
opr
::
Subtensor
::
make
(
x
,
index
);
return
o
.
node
();
};
auto
checker
=
[
axis
](
const
VarNodeArray
&
vars
)
{
mgb_assert
(
vars
.
size
()
==
2
);
return
vars
[
0
]
->
shape
().
ndim
>
axis
;
};
return
std
::
make_tuple
(
builder
,
checker
);
}
// vim: syntax=cpp.doxygen
src/gopt/impl/reformat_manager.cpp
浏览文件 @
9c0a17d0
...
...
@@ -12,12 +12,27 @@
#include "megbrain/gopt/reformat_manager.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/utils/arith_helper.h"
using
namespace
mgb
;
using
namespace
gopt
;
using
NamedTensorShape
=
megdnn
::
NamedTensorShape
;
using
Dimension
=
megdnn
::
Dimension
;
namespace
{
int
gcd
(
const
int
&
p
,
const
int
&
q
)
{
int
x
=
p
,
y
=
q
;
while
(
y
!=
0
)
{
if
(
x
<
y
)
{
y
=
(
y
%
x
);
}
else
{
x
=
(
x
%
y
);
std
::
swap
(
x
,
y
);
}
}
return
x
;
}
NamedTensorShape
tensor_formats_to_named_tensor_shape
(
TensorFormats
format
)
{
switch
(
format
)
{
case
TensorFormats
::
NCHW
:
...
...
@@ -371,6 +386,170 @@ ReformatManager::ReformatImpl ReformatManager::get(
})
}
ReformatManager
::
ReformatImpl
ReformatManager
::
auto_aligned_reformat_featrue
(
const
VarNode
*
orig_var
,
TensorFormats
orig_format
,
const
ReformatKey
&
key
)
const
{
NamedTensorShape
input_shape
=
tensor_formats_to_named_tensor_shape
(
key
.
input_format
);
NamedTensorShape
output_shape
=
tensor_formats_to_named_tensor_shape
(
key
.
output_format
);
size_t
input_alignment
,
output_alignment
;
size_t
input_channel_idx
,
output_channel_idx
;
for
(
size_t
i
=
0
;
i
<
input_shape
.
ndim
;
++
i
)
{
if
(
input_shape
[
i
].
name
()
==
Dimension
::
Name
::
C
&&
input_shape
[
i
].
extent
()
==
Dimension
::
UNDETERMINED_EXTENT
)
{
input_channel_idx
=
i
;
input_alignment
=
input_shape
[
i
].
stride
();
break
;
}
}
for
(
size_t
i
=
0
;
i
<
output_shape
.
ndim
;
++
i
)
{
if
(
output_shape
[
i
].
name
()
==
Dimension
::
Name
::
C
&&
output_shape
[
i
].
extent
()
==
Dimension
::
UNDETERMINED_EXTENT
)
{
output_channel_idx
=
i
;
output_alignment
=
output_shape
[
i
].
stride
();
break
;
}
}
NamedTensorShape
orig_shape
=
tensor_formats_to_named_tensor_shape
(
orig_format
);
size_t
orig_channel
=
0
;
for
(
size_t
i
=
0
;
i
<
orig_shape
.
ndim
;
++
i
)
{
if
(
orig_shape
[
i
].
name
()
==
Dimension
::
Name
::
C
&&
orig_shape
[
i
].
extent
()
==
Dimension
::
UNDETERMINED_EXTENT
)
{
orig_channel
=
orig_var
->
shape
()[
i
]
*
orig_shape
[
i
].
stride
();
break
;
}
}
mgb_assert
(
orig_channel
>
0
,
"incompatible NamedTensorShape for feature(got:%s)"
,
orig_shape
.
to_string
().
c_str
());
size_t
aligned_in_channel
=
divup
(
orig_channel
,
input_alignment
)
*
input_alignment
;
size_t
aligned_out_channel
=
divup
(
orig_channel
,
output_alignment
)
*
output_alignment
;
size_t
common_alignment
=
input_alignment
*
output_alignment
/
gcd
(
input_alignment
,
output_alignment
);
size_t
aligned_channel
=
divup
(
orig_channel
,
common_alignment
)
*
common_alignment
;
auto
builder
=
[
key
,
aligned_channel
,
aligned_in_channel
,
aligned_out_channel
,
input_shape
,
input_channel_idx
,
output_shape
,
output_channel_idx
](
const
VarNodeArray
&
vars
)
{
VarNode
*
x
,
*
cur
;
x
=
cur
=
vars
[
0
];
if
(
aligned_channel
>
aligned_in_channel
)
{
auto
padding_shape
=
input_shape
;
auto
&&
dim
=
padding_shape
[
input_channel_idx
];
size_t
const_extent
=
(
aligned_channel
-
aligned_in_channel
)
/
dim
.
stride
();
padding_shape
[
input_channel_idx
]
=
Dimension
(
dim
.
name
(),
dim
.
stride
(),
const_extent
);
auto
make_shape
=
std
::
get
<
0
>
(
MakeShapeEmitter
{
input_shape
,
padding_shape
}.
emit
());
auto
padding_shp_var
=
make_shape
({
x
});
auto
padding
=
std
::
get
<
0
>
(
PaddingEmitter
{
const_extent
,
input_channel_idx
}.
emit
());
cur
=
padding
({
cur
,
padding_shp_var
});
}
cur
=
ReformatManager
::
instance
().
get
(
key
)({
cur
});
if
(
aligned_channel
>
aligned_out_channel
)
{
auto
&&
dim
=
output_shape
[
output_channel_idx
];
size_t
const_extent
=
aligned_out_channel
/
dim
.
stride
();
auto
sub
=
std
::
get
<
0
>
(
SubtensorEmitter
{
const_extent
,
output_channel_idx
}.
emit
());
cur
=
sub
({
cur
});
}
return
cur
;
};
return
builder
;
}
ReformatManager
::
ReformatImpl
ReformatManager
::
auto_aligned_reformat_weight
(
const
VarNode
*
orig_var
,
const
ReformatKey
&
key
,
const
AlignmentDesc
&
extra_alignment
)
const
{
size_t
in_channels
=
0
,
out_channels
=
0
;
size_t
input_channel_idx
,
output_channel_idx
;
Dimension
::
Name
out_channel_name
;
auto
input_shape
=
tensor_formats_to_named_tensor_shape
(
key
.
input_format
);
for
(
size_t
i
=
0
;
i
<
input_shape
.
ndim
;
++
i
)
{
if
(
input_shape
[
i
].
name
()
==
Dimension
::
Name
::
C
&&
input_shape
[
i
].
extent
()
==
Dimension
::
UNDETERMINED_EXTENT
)
{
in_channels
=
orig_var
->
shape
()[
i
];
input_channel_idx
=
i
;
mgb_assert
(
input_shape
[
i
].
stride
()
==
1
,
"unsupport weight format(got:%s)"
,
input_shape
.
to_string
().
c_str
());
}
else
if
((
input_shape
[
i
].
name
()
==
Dimension
::
Name
::
K
||
input_shape
[
i
].
name
()
==
Dimension
::
Name
::
N
)
&&
input_shape
[
i
].
extent
()
==
Dimension
::
UNDETERMINED_EXTENT
)
{
out_channels
=
orig_var
->
shape
()[
i
];
out_channel_name
=
input_shape
[
i
].
name
();
output_channel_idx
=
i
;
mgb_assert
(
input_shape
[
i
].
stride
()
==
1
,
"unsupport weight format(got:%s)"
,
input_shape
.
to_string
().
c_str
());
}
}
size_t
in_channel_alignment
,
out_channel_alignment
=
1
;
auto
output_shape
=
tensor_formats_to_named_tensor_shape
(
key
.
output_format
);
for
(
size_t
i
=
0
;
i
<
output_shape
.
ndim
;
++
i
)
{
if
(
output_shape
[
i
].
name
()
==
Dimension
::
Name
::
C
&&
output_shape
[
i
].
extent
()
==
Dimension
::
UNDETERMINED_EXTENT
)
{
in_channel_alignment
=
output_shape
[
i
].
stride
();
}
else
if
(
output_shape
[
i
].
name
()
==
out_channel_name
&&
output_shape
[
i
].
extent
()
==
Dimension
::
UNDETERMINED_EXTENT
)
{
out_channel_alignment
=
output_shape
[
i
].
stride
();
}
}
size_t
aligned_in_channel
=
divup
(
in_channels
,
in_channel_alignment
)
*
in_channel_alignment
;
if
(
extra_alignment
.
name
==
out_channel_name
)
{
out_channel_alignment
=
extra_alignment
.
alignment
*
out_channel_alignment
/
gcd
(
extra_alignment
.
alignment
,
out_channel_alignment
);
}
size_t
aligned_out_channel
=
divup
(
out_channels
,
out_channel_alignment
)
*
out_channel_alignment
;
auto
builder
=
[
key
,
input_shape
,
in_channels
,
input_channel_idx
,
aligned_in_channel
,
out_channels
,
output_channel_idx
,
aligned_out_channel
](
const
VarNodeArray
&
vars
)
{
VarNode
*
x
,
*
cur
;
x
=
cur
=
vars
[
0
];
if
(
aligned_in_channel
>
in_channels
)
{
auto
padding_shape
=
input_shape
;
auto
&&
dim
=
padding_shape
[
input_channel_idx
];
size_t
const_extent
=
(
aligned_in_channel
-
in_channels
)
/
dim
.
stride
();
padding_shape
[
input_channel_idx
]
=
Dimension
(
dim
.
name
(),
dim
.
stride
(),
const_extent
);
auto
make_shape
=
std
::
get
<
0
>
(
MakeShapeEmitter
{
input_shape
,
padding_shape
}.
emit
());
auto
padding_shp_var
=
make_shape
({
x
});
auto
padding
=
std
::
get
<
0
>
(
PaddingEmitter
{
const_extent
,
input_channel_idx
}.
emit
());
cur
=
padding
({
cur
,
padding_shp_var
});
}
if
(
aligned_out_channel
>
out_channels
)
{
auto
padding_shape
=
input_shape
;
auto
&&
dim
=
padding_shape
[
output_channel_idx
];
size_t
const_extent
=
(
aligned_out_channel
-
out_channels
)
/
dim
.
stride
();
padding_shape
[
output_channel_idx
]
=
Dimension
(
dim
.
name
(),
dim
.
stride
(),
const_extent
);
auto
make_shape
=
std
::
get
<
0
>
(
MakeShapeEmitter
{
input_shape
,
padding_shape
}.
emit
());
auto
padding_shp_var
=
make_shape
({
cur
});
auto
padding
=
std
::
get
<
0
>
(
PaddingEmitter
{
const_extent
,
output_channel_idx
}.
emit
());
cur
=
padding
({
cur
,
padding_shp_var
});
}
cur
=
ReformatManager
::
instance
().
get
(
key
)({
cur
});
return
cur
;
};
return
builder
;
}
const
ReformatManager
&
ReformatManager
::
instance
()
{
static
ReformatManager
inst
;
return
inst
;
...
...
src/gopt/include/megbrain/gopt/reformat_emitter.h
浏览文件 @
9c0a17d0
...
...
@@ -77,6 +77,26 @@ private:
};
UnderlyingBuilders
analyze
()
const
;
};
class
PaddingEmitter
final
:
public
Emitter
{
public:
PaddingEmitter
(
size_t
const_extent
,
size_t
axis
)
:
m_const_extent
{
const_extent
},
m_axis
{
axis
}
{}
EmitResult
emit
()
const
override
;
private:
size_t
m_const_extent
,
m_axis
;
};
class
SubtensorEmitter
final
:
public
Emitter
{
public:
SubtensorEmitter
(
size_t
const_extent
,
size_t
axis
)
:
m_const_extent
{
const_extent
},
m_axis
{
axis
}
{}
EmitResult
emit
()
const
override
;
private:
size_t
m_const_extent
,
m_axis
;
};
}
// namespace gopt
}
// namespace mgb
...
...
src/gopt/include/megbrain/gopt/reformat_manager.h
浏览文件 @
9c0a17d0
...
...
@@ -101,12 +101,21 @@ public:
ReformatKey
::
Equal
>
;
ReformatImpl
get
(
const
ReformatKey
&
key
)
const
;
ReformatImpl
get
(
ReformatKey
&&
key
)
const
{
return
get
(
key
);
}
ReformatImpl
auto_aligned_reformat_featrue
(
const
VarNode
*
orig_var
,
TensorFormats
orig_format
,
const
ReformatKey
&
key
)
const
;
struct
AlignmentDesc
{
megdnn
::
Dimension
::
Name
name
;
size_t
alignment
;
};
ReformatImpl
auto_aligned_reformat_weight
(
const
VarNode
*
orig_var
,
const
ReformatKey
&
key
,
const
AlignmentDesc
&
extra_alignment
=
{})
const
;
static
const
ReformatManager
&
instance
();
private:
ReformatCache
m_cache
;
};
}
// namespace gopt
}
// namespace mgb
...
...
src/gopt/test/reformat_manager.cpp
浏览文件 @
9c0a17d0
...
...
@@ -13,7 +13,10 @@
#include "./helper.h"
#include "megbrain/gopt/reformat_manager.h"
#include "megbrain/graph/event.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/plugin/base.h"
#include "megbrain/plugin/profiler.h"
using
namespace
mgb
;
using
namespace
gopt
;
...
...
@@ -168,4 +171,287 @@ TEST(TestReformatManager, InputChannelSmall) {
MGB_ASSERT_TENSOR_EQ
(
t1
,
t2
);
}
TEST
(
TestReformatManager
,
AutoAlignedFeature
)
{
constexpr
size_t
N
=
16
,
C
=
22
,
H
=
55
,
W
=
55
;
HostTensorGenerator
<>
gen
;
using
ReformatKey
=
ReformatManager
::
ReformatKey
;
auto
src_format
=
TensorFormats
::
NCHWc4
,
dst_format
=
TensorFormats
::
NCHWc32
;
ReformatKey
key
{
src_format
,
dst_format
};
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
std
::
shared_ptr
<
HostTensorND
>
host_orig_x
=
gen
({
N
,
C
,
H
,
W
});
std
::
shared_ptr
<
HostTensorND
>
host_x
=
gen
({
N
,
(
C
+
3
)
/
4
,
H
,
W
,
4
});
auto
mkvar
=
[
&
](
const
char
*
name
,
const
std
::
shared_ptr
<
HostTensorND
>&
host_val
)
{
return
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_val
).
rename
(
name
);
};
auto
orig_x
=
mkvar
(
"orig_x"
,
host_orig_x
);
auto
x
=
mkvar
(
"x"
,
host_x
);
auto
builder
=
ReformatManager
::
instance
().
auto_aligned_reformat_featrue
(
orig_x
.
node
(),
TensorFormats
::
NCHW
,
key
);
auto
y
=
builder
({
x
.
node
()});
HostTensorND
t
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
t
)});
func
->
execute
();
*
host_x
=
*
gen
({(
N
+
5
),
(
C
+
3
)
/
4
,
H
,
W
,
4
});
func
->
execute
();
*
host_x
=
*
gen
({(
N
-
5
),
(
C
+
3
)
/
4
,
H
,
W
,
4
});
func
->
execute
();
auto
shp
=
TensorShape
{(
N
-
5
),
(
C
+
31
)
/
32
,
H
,
W
,
32
};
ASSERT_TRUE
(
shp
.
eq_shape
(
t
.
shape
()));
}
TEST
(
TestReformatManager
,
AutoAlignedFeatureB4
)
{
constexpr
size_t
N
=
16
,
C
=
94
,
H
=
55
,
W
=
55
;
HostTensorGenerator
<>
gen
;
using
ReformatKey
=
ReformatManager
::
ReformatKey
;
auto
src_format
=
TensorFormats
::
NCHWc4
,
dst_format
=
TensorFormats
::
NCHWc64
;
ReformatKey
key
{
src_format
,
dst_format
};
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
std
::
shared_ptr
<
HostTensorND
>
host_orig_x
=
gen
({
N
,
C
,
H
,
W
});
std
::
shared_ptr
<
HostTensorND
>
host_x
=
gen
({
N
,
(
C
+
3
)
/
4
,
H
,
W
,
4
});
auto
mkvar
=
[
&
](
const
char
*
name
,
const
std
::
shared_ptr
<
HostTensorND
>&
host_val
,
const
DType
&
dtype
)
{
return
opr
::
TypeCvt
::
make
(
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_val
).
rename
(
name
),
dtype
);
};
auto
orig_x
=
mkvar
(
"orig_x"
,
host_orig_x
,
dtype
::
Quantized4Asymm
(
20.
f
,
static_cast
<
uint8_t
>
(
8
)));
auto
x
=
mkvar
(
"x"
,
host_x
,
dtype
::
Quantized4Asymm
(
25.
f
,
static_cast
<
uint8_t
>
(
4
)));
auto
builder
=
ReformatManager
::
instance
().
auto_aligned_reformat_featrue
(
orig_x
.
node
(),
TensorFormats
::
NCHW
,
key
);
auto
y
=
builder
({
x
.
node
()});
HostTensorND
t
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
t
)});
func
->
execute
();
}
TEST
(
TestReformatManager
,
AutoAlignedWeight
)
{
constexpr
size_t
K
=
32
,
C
=
32
,
R
=
3
,
S
=
3
;
HostTensorGenerator
<>
gen
;
using
ReformatKey
=
ReformatManager
::
ReformatKey
;
auto
src_format
=
TensorFormats
::
NCHW
,
dst_format
=
TensorFormats
::
NCHWc64
;
ReformatKey
key
{
src_format
,
dst_format
};
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
auto
mkvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
)
{
return
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
gen
(
shp
)).
rename
(
name
);
};
auto
w
=
mkvar
(
"w"
,
{
K
,
C
,
R
,
S
});
auto
builder
=
ReformatManager
::
instance
().
auto_aligned_reformat_weight
(
w
.
node
(),
key
,
ReformatManager
::
AlignmentDesc
{
megdnn
::
Dimension
::
Name
::
N
,
64
});
auto
y
=
builder
({
w
.
node
()});
HostTensorND
t
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
t
)});
func
->
execute
();
}
#if MGB_CUDA
#include "megbrain/comp_node_env.h"
namespace
{
class
ReformatProfiler
:
public
PluginBase
{
using
CompNodeEventPtr
=
std
::
unique_ptr
<
CompNode
::
Event
>
;
public:
class
MarkInputContiguous
;
ReformatProfiler
(
cg
::
ComputingGraph
*
graph
,
cg
::
OperatorNodeBase
*
opr_start
,
cg
::
OperatorNodeBase
*
opr_end
);
~
ReformatProfiler
()
noexcept
;
double
duration
()
const
;
private:
CompNodeEventPtr
m_start
,
m_end
;
cg
::
OperatorNodeBase
*
m_opr_start
,
*
m_opr_end
;
};
ReformatProfiler
::
ReformatProfiler
(
cg
::
ComputingGraph
*
graph
,
cg
::
OperatorNodeBase
*
opr_start
,
cg
::
OperatorNodeBase
*
opr_end
)
:
PluginBase
(
graph
),
m_opr_start
(
opr_start
),
m_opr_end
(
opr_end
)
{
using
namespace
cg
::
event
;
auto
on_reformat_start
=
[
this
](
BeforeKernel
const
&
event
)
{
auto
opr
=
event
.
opr
;
if
(
opr
!=
m_opr_start
)
return
;
if
(
m_start
==
nullptr
)
{
m_start
=
event
.
comp_node
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
);
}
m_start
->
record
();
};
auto
on_reformat_end
=
[
this
](
AfterKernel
const
&
event
)
{
auto
opr
=
event
.
opr
;
if
(
opr
!=
m_opr_end
)
return
;
if
(
m_end
==
nullptr
)
{
m_end
=
event
.
comp_node
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
);
}
m_end
->
record
();
};
auto
&&
ev
=
graph
->
event
();
add_event_handler
(
ev
.
register_receiver
<
BeforeKernel
>
(
on_reformat_start
));
add_event_handler
(
ev
.
register_receiver
<
AfterKernel
>
(
on_reformat_end
));
}
ReformatProfiler
::~
ReformatProfiler
()
noexcept
{
if
(
m_start
)
m_start
->
host_wait
();
if
(
m_end
)
m_end
->
host_wait
();
}
double
ReformatProfiler
::
duration
()
const
{
mgb_assert
(
m_end
);
m_end
->
host_wait
();
return
m_start
->
elapsed_time_until
(
*
m_end
)
-
m_start
->
elapsed_time_until
(
*
m_start
);
}
MGB_DEFINE_OPR_CLASS
(
ReformatProfiler
::
MarkInputContiguous
,
cg
::
SingleCNOperatorNodeBase
)
// {
void
scn_do_execute
()
override
{};
void
init_output_static_infer_desc
()
override
;
void
add_input_layout_constraint
()
override
;
public:
MarkInputContiguous
(
VarNode
*
node
,
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
node
,
const
OperatorNodeConfig
&
config
=
{});
};
// namespace
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
ReformatProfiler
::
MarkInputContiguous
);
ReformatProfiler
::
MarkInputContiguous
::
MarkInputContiguous
(
VarNode
*
node
,
const
OperatorNodeConfig
&
config
)
:
Super
(
node
->
owner_graph
(),
config
,
"mark_contiguous"
,
{
node
})
{
add_input
({
node
});
add_output
(
None
);
}
SymbolVar
ReformatProfiler
::
MarkInputContiguous
::
make
(
SymbolVar
node
,
const
OperatorNodeConfig
&
config
)
{
return
node
.
insert_single_output_opr
<
MarkInputContiguous
>
(
node
.
node
(),
config
);
}
void
ReformatProfiler
::
MarkInputContiguous
::
init_output_static_infer_desc
()
{
using
namespace
cg
::
static_infer
;
auto
&&
mgr
=
owner_graph
()
->
static_infer_manager
();
mgr
.
register_shape_infer
(
output
(
0
),
ShapeInferDesc
::
make_identity
(
input
(
0
)));
}
void
ReformatProfiler
::
MarkInputContiguous
::
add_input_layout_constraint
()
{
input
(
0
)
->
add_layout_constraint_contiguous
();
}
class
CUTimer
{
public:
CUTimer
(
cudaStream_t
&
stream
,
cudaEvent_t
&
evt0
,
cudaEvent_t
&
evt1
)
:
m_stream
{
stream
},
m_evt0
{
evt0
},
m_evt1
{
evt1
}
{
reset
();
}
void
reset
()
{
m_started
=
false
;
m_stopped
=
false
;
}
void
start
()
{
mgb_assert
(
!
m_started
);
mgb_assert
(
!
m_stopped
);
m_started
=
true
;
cudaEventRecord
(
m_evt0
,
m_stream
);
}
void
stop
()
{
mgb_assert
(
m_started
);
mgb_assert
(
!
m_stopped
);
m_stopped
=
true
;
cudaEventRecord
(
m_evt1
,
m_stream
);
}
size_t
get_time_in_us
()
const
{
cudaStreamSynchronize
(
m_stream
);
float
t
=
-
1
;
cudaEventElapsedTime
(
&
t
,
m_evt0
,
m_evt1
);
return
static_cast
<
size_t
>
(
t
*
1e3
);
}
private:
bool
m_started
,
m_stopped
;
size_t
m_start_point
,
m_stop_point
;
cudaStream_t
&
m_stream
;
cudaEvent_t
&
m_evt0
,
&
m_evt1
;
};
}
// namespace
TEST
(
TestReformatManager
,
AutoAlignedFeatureProfiling
)
{
REQUIRE_GPU
(
1
);
auto
cn
=
CompNode
::
load
(
"gpux"
);
using
ReformatKey
=
ReformatManager
::
ReformatKey
;
auto
dtype
=
dtype
::
Quantized4Asymm
(
20.
f
,
static_cast
<
uint8_t
>
(
4
));
HostTensorND
hval
(
cn
,
dtype
);
constexpr
size_t
N
=
16
,
C
=
18
,
H
=
55
,
W
=
55
;
hval
.
resize
({
N
,
(
C
+
63
)
/
64
,
H
,
W
,
64
});
std
::
shared_ptr
<
DeviceTensorND
>
dval
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
,
dtype
);
dval
->
copy_from
(
hval
).
sync
();
std
::
shared_ptr
<
DeviceTensorND
>
dprime
=
std
::
make_shared
<
DeviceTensorND
>
(
cn
,
dtype
);
dprime
->
resize
({
N
,
C
,
H
,
W
});
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
graph
->
options
().
var_sanity_check_first_run
=
false
;
auto
x
=
opr
::
VolatileSharedDeviceTensor
::
make
(
*
graph
,
dval
);
auto
xprime
=
opr
::
VolatileSharedDeviceTensor
::
make
(
*
graph
,
dprime
);
ReformatKey
key
{
TensorFormats
::
NCHWc64
,
TensorFormats
::
NCHW
};
auto
builder
=
ReformatManager
::
instance
().
auto_aligned_reformat_featrue
(
xprime
.
node
(),
TensorFormats
::
NCHW
,
key
);
auto
y
=
builder
({
x
.
node
()});
auto
mark
=
ReformatProfiler
::
MarkInputContiguous
::
make
(
SymbolVar
(
y
));
auto
cb
=
[](
DeviceTensorND
&
d
)
{
MGB_MARK_USED_VAR
(
d
);
};
auto
output_spec
=
std
::
make_pair
(
mark
,
cb
);
auto
func
=
graph
->
compile
({
output_spec
});
static
constexpr
size_t
RUNS
=
100
;
cn
.
activate
();
auto
stream
=
CompNodeEnv
::
from_comp_node
(
cn
).
cuda_env
().
stream
;
cudaEvent_t
evt0
;
cudaEvent_t
evt1
;
MGB_CUDA_CHECK
(
cudaEventCreate
(
&
evt0
));
MGB_CUDA_CHECK
(
cudaEventCreate
(
&
evt1
));
CUTimer
timer
(
stream
,
evt0
,
evt1
);
timer
.
start
();
for
(
size_t
i
=
0
;
i
<
RUNS
;
++
i
)
func
->
execute
();
timer
.
stop
();
double
time_cuda_evt
=
timer
.
get_time_in_us
()
/
static_cast
<
double
>
(
RUNS
);
OperatorNodeBase
*
start
=
x
.
node
()
->
owner_opr
();
OperatorNodeBase
*
end
=
y
->
owner_opr
();
std
::
unique_ptr
<
ReformatProfiler
>
profiler
=
std
::
make_unique
<
ReformatProfiler
>
(
graph
.
get
(),
start
,
end
);
ASSERT_TRUE
(
y
->
shape
().
eq_shape
(
TensorShape
{
N
,
C
,
H
,
W
}));
for
(
size_t
i
=
0
;
i
<
RUNS
;
++
i
)
func
->
execute
();
double
time_profiler
=
profiler
->
duration
()
*
1e6
;
printf
(
"%f, %f
\n
"
,
time_profiler
,
time_cuda_evt
);
ASSERT_EQ
(
time_cuda_evt
,
time_profiler
);
MGB_CUDA_CHECK
(
cudaEventDestroy
(
evt0
));
MGB_CUDA_CHECK
(
cudaEventDestroy
(
evt1
));
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录