Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
d9c4ef59
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
d9c4ef59
编写于
3月 24, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(imperative): using simple hash key in heuristic cache
GitOrigin-RevId: 6fddd612e7cc193a140a401fd2a62a98a5056b1d
上级
26ea33c6
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
91 addition
and
68 deletion
+91
-68
dnn/include/megdnn/heuristic_cache.h
dnn/include/megdnn/heuristic_cache.h
+10
-10
dnn/src/common/heuristic_cache.cpp
dnn/src/common/heuristic_cache.cpp
+65
-42
imperative/src/impl/algo_chooser.h
imperative/src/impl/algo_chooser.h
+3
-2
imperative/src/impl/ops/convolution.cpp
imperative/src/impl/ops/convolution.cpp
+10
-13
src/opr/impl/search_policy/algo_chooser.cpp
src/opr/impl/search_policy/algo_chooser.cpp
+3
-1
未找到文件。
dnn/include/megdnn/heuristic_cache.h
浏览文件 @
d9c4ef59
...
...
@@ -29,15 +29,12 @@ public:
MGE_WIN_DECLSPEC_FUC
static
HeuristicCache
&
instance
();
struct
KeyStorage
{
std
::
string
category
;
std
::
string
input
;
size_t
k1
,
k2
;
bool
operator
==
(
const
KeyStorage
&
k
)
const
{
return
category
==
k
.
category
&&
input
==
k
.
input
;
}
bool
operator
==
(
const
KeyStorage
&
k
)
const
{
return
k1
==
k
.
k1
&&
k2
==
k
.
k2
;
}
};
class
Key
{
struct
Key
{
Handle
*
m_handle
;
uint32_t
m_opr_type
;
const
TensorLayout
*
m_inp_layouts_ptr
;
...
...
@@ -45,8 +42,7 @@ public:
const
void
*
m_param_ptr
;
size_t
m_param_size
;
mutable
std
::
string
m_category
;
mutable
std
::
string
m_input
;
mutable
SmallVector
<
size_t
>
m_buf
;
public:
Key
(
Handle
*
opr_handle
,
Algorithm
::
OprType
opr_type
,
...
...
@@ -65,6 +61,10 @@ public:
struct
Result
{
ExecutionPolicy
policy
;
size_t
workspace
;
// for cache collision
SmallVector
<
size_t
>
m_buf
;
SmallVector
<
char
>
m_param_buf
;
};
MGE_WIN_DECLSPEC_FUC
void
put
(
const
Key
&
key
,
Result
&
result
);
...
...
@@ -76,8 +76,8 @@ public:
private:
struct
Hash
{
size_t
operator
()(
const
KeyStorage
&
k
)
const
{
size_t
h1
=
std
::
hash
<
std
::
string
>
{}(
k
.
category
)
;
size_t
h2
=
std
::
hash
<
std
::
string
>
{}(
k
.
input
)
;
size_t
h1
=
k
.
k1
;
size_t
h2
=
k
.
k2
;
h1
^=
h2
+
0x9e3779b9
+
(
h1
<<
6
)
+
(
h1
>>
2
);
return
h1
;
}
...
...
dnn/src/common/heuristic_cache.cpp
浏览文件 @
d9c4ef59
...
...
@@ -11,6 +11,8 @@
*/
#include "megdnn/heuristic_cache.h"
#include "megdnn/tensor_format.h"
#include "src/common/hash_ct.h"
#include "src/common/utils.h"
#include "src/naive/handle.h"
...
...
@@ -32,38 +34,27 @@ HeuristicCache& HeuristicCache::instance() {
}
HeuristicCache
::
KeyStorage
HeuristicCache
::
Key
::
build_key_storage
()
const
{
auto
&&
ctg
=
m_category
;
auto
&&
inp
=
m_input
;
size_t
buf_size
=
16
*
m_inp_layouts_size
+
6
;
size_t
buf
[
buf_size
]
;
if
(
!
m_category
.
empty
()
&&
!
m_input
.
empty
())
return
{
ctg
,
inp
};
inp
.
reserve
(
sizeof
(
TensorLayout
)
*
3
*
m_inp_layouts_size
+
m_param_size
);
size_t
pos
=
0
;
for
(
size_t
i
=
0
;
i
<
m_inp_layouts_size
;
i
++
)
{
auto
&&
l
y
=
m_inp_layouts_ptr
[
i
];
for
(
size_t
j
=
0
;
j
<
ly
.
ndim
;
j
++
)
{
if
(
j
)
inp
.
push_back
(
','
);
inp
.
append
(
std
::
to_string
(
ly
.
shape
[
j
])
);
auto
&&
l
ayout
=
m_inp_layouts_ptr
[
i
];
if
(
layout
.
dtype
.
valid
()
)
{
buf
[
pos
++
]
=
static_cast
<
size_t
>
(
layout
.
dtype
.
enumv
());
}
else
{
buf
[
pos
++
]
=
static_cast
<
size_t
>
(
SIZE_MAX
);
}
inp
.
push_back
(
';'
);
for
(
size_t
j
=
0
;
j
<
ly
.
ndim
;
j
++
)
{
if
(
j
)
inp
.
push_back
(
','
);
inp
.
append
(
std
::
to_string
(
ly
.
stride
[
j
]));
buf
[
pos
++
]
=
static_cast
<
size_t
>
(
layout
.
format
.
type
());
for
(
size_t
j
=
0
;
j
<
layout
.
ndim
;
j
++
)
{
buf
[
pos
++
]
=
layout
.
shape
[
j
];
buf
[
pos
++
]
=
layout
.
stride
[
j
];
}
inp
.
push_back
(
';'
);
inp
.
append
(
ly
.
dtype
.
name
());
inp
.
push_back
(
';'
);
inp
.
append
(
ly
.
format
.
to_string
().
c_str
());
inp
.
push_back
(
'|'
);
}
if
(
m_param_size
)
{
inp
.
append
(
reinterpret_cast
<
const
char
*>
(
m_param_ptr
),
m_param_size
);
}
ctg
=
"plat:"
;
ctg
.
append
(
std
::
to_string
(
static_cast
<
uint32_t
>
(
m_handle
->
type
())));
buf
[
pos
++
]
=
m_opr_type
;
buf
[
pos
++
]
=
static_cast
<
size_t
>
(
m_handle
->
type
());
switch
(
m_handle
->
type
())
{
#if MEGDNN_WITH_CUDA
case
Handle
::
HandleType
::
CUDA
:
{
...
...
@@ -72,9 +63,9 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const {
cuda_rt
/=
1000
;
auto
&&
handle
=
static_cast
<
megdnn
::
cuda
::
HandleImpl
*>
(
m_handle
);
auto
&&
prop
=
handle
->
device_prop
();
ctg
.
append
(
ssprintf
(
";dev=%s;cap=%d.%d;runtime=%d;"
,
prop
.
name
,
prop
.
major
,
prop
.
minor
,
cuda_rt
))
;
buf
[
pos
++
]
=
prop
.
major
;
buf
[
pos
++
]
=
prop
.
minor
;
buf
[
pos
++
]
=
cuda_rt
;
break
;
}
#endif
...
...
@@ -85,9 +76,10 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const {
int
drv
=
-
1
,
hip_rt
=
-
1
;
hip_check
(
hipDriverGetVersion
(
&
drv
));
hip_check
(
hipRuntimeGetVersion
(
&
hip_rt
));
ctg
.
append
(
ssprintf
(
";dev=%s;cap=%d.%d,drv=%d;runtime=%d;"
,
prop
.
name
,
prop
.
major
,
prop
.
minor
,
drv
,
hip_rt
));
buf
[
pos
++
]
=
prop
.
major
;
buf
[
pos
++
]
=
prop
.
minor
;
buf
[
pos
++
]
=
drv
;
buf
[
pos
++
]
=
hip_rt
;
break
;
}
#endif
...
...
@@ -108,16 +100,21 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const {
size_t
nr_threads
=
static_cast
<
megdnn
::
naive
::
HandleImpl
*>
(
m_handle
)
->
megcore_dispatcher
()
->
nr_threads
();
ctg
.
append
(
";"
);
ctg
.
append
(
std
::
to_string
(
nr_threads
));
ctg
.
append
(
";"
);
buf
[
pos
++
]
=
nr_threads
;
break
;
}
default:
ctg
.
append
(
";"
)
;
break
;
}
ctg
.
append
(
std
::
to_string
(
m_opr_type
));
return
{
ctg
,
inp
};
m_buf
.
resize
(
pos
);
SmallVector
<
size_t
>
tmp
(
buf
,
buf
+
pos
);
m_buf
=
std
::
move
(
tmp
);
size_t
k1
=
XXHash64CT
::
hash
((
const
char
*
)
buf
,
pos
*
sizeof
(
size_t
),
20220328
);
size_t
k2
=
XXHash64CT
::
hash
((
const
char
*
)
m_param_ptr
,
m_param_size
,
20220328
);
return
{
k1
,
k2
};
}
void
HeuristicCache
::
put
(
const
Key
&
key
,
Result
&
result
)
{
...
...
@@ -126,15 +123,41 @@ void HeuristicCache::put(const Key& key, Result& result) {
m_heuristic_cache
[
key
.
build_key_storage
()]
=
result
;
}
template
<
typename
T
>
bool
is_same_buf
(
const
T
hash_buf
[],
const
size_t
buf_size
,
const
T
hash_buf_
[],
const
size_t
buf_size_
)
{
if
(
buf_size
!=
buf_size_
)
{
return
false
;
}
for
(
size_t
i
=
0
;
i
<
buf_size
;
i
++
)
{
if
(
hash_buf
[
i
]
!=
hash_buf_
[
i
])
{
return
false
;
}
}
return
true
;
}
HeuristicCache
::
Result
HeuristicCache
::
get
(
const
Key
&
key
)
{
MEGDNN_LOCK_GUARD
(
m_mtx
);
KeyStorage
ks
=
key
.
build_key_storage
();
auto
iter
=
m_heuristic_cache
.
find
(
ks
);
if
(
iter
==
m_heuristic_cache
.
end
())
{
return
{};
}
else
{
return
iter
->
second
;
if
(
iter
!=
m_heuristic_cache
.
end
())
{
if
(
is_same_buf
(
key
.
m_buf
.
data
(),
key
.
m_buf
.
size
(),
iter
->
second
.
m_buf
.
data
(),
iter
->
second
.
m_buf
.
size
())
&&
is_same_buf
(
(
char
*
)(
key
.
m_param_ptr
),
key
.
m_param_size
,
iter
->
second
.
m_param_buf
.
data
(),
iter
->
second
.
m_param_buf
.
size
()))
{
return
iter
->
second
;
}
megdnn_log_warn
(
"hash collision occurs in heuristic cache with key: (%zu, %zu)"
,
ks
.
k1
,
ks
.
k2
);
}
SmallVector
<
char
>
param_buf
(
(
char
*
)
key
.
m_param_ptr
,
(
char
*
)
key
.
m_param_ptr
+
key
.
m_param_size
);
return
Result
{{},
0
,
key
.
m_buf
,
param_buf
};
}
void
HeuristicCache
::
clear
()
{
...
...
imperative/src/impl/algo_chooser.h
浏览文件 @
d9c4ef59
...
...
@@ -18,6 +18,8 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo(
megdnn_opr
->
execution_policy
()
=
rst
.
policy
;
return
rst
.
workspace
;
}
SmallVector
<
size_t
>
buf
=
rst
.
m_buf
;
SmallVector
<
char
>
param_buf
=
rst
.
m_param_buf
;
std
::
string
param_str
;
megdnn
::
Algorithm
::
serialize_write_pod
(
megdnn_opr
->
param
(),
param_str
);
...
...
@@ -40,11 +42,10 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo(
megdnn
::
ExecutionPolicy
policy
;
policy
=
mgb
::
rdnn
::
AlgoChooser
<
Opr
>::
get_policy
(
helper
);
size_t
workspace
=
helper
.
get_workspace_size_bytes
(
policy
,
layouts
);
megdnn_opr
->
execution_policy
()
=
policy
;
if
(
execution_policy
.
strategy
&
rdnn
::
ExecutionStrategy
::
HEURISTIC
)
{
megdnn
::
HeuristicCache
::
Result
cache_result
{
policy
,
workspace
};
megdnn
::
HeuristicCache
::
Result
cache_result
{
policy
,
workspace
,
buf
,
param_buf
};
megdnn
::
HeuristicCache
::
instance
().
put
(
cache_key
,
cache_result
);
}
return
workspace
;
...
...
imperative/src/impl/ops/convolution.cpp
浏览文件 @
d9c4ef59
...
...
@@ -123,8 +123,6 @@ TensorLayout do_shape_infer(
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_output_attrs_fallible
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
)
{
using
Param
=
::
megdnn
::
param
::
Convolution
;
SmallVector
<
LogicalTensorDesc
>
dests
(
1
);
auto
&&
desc
=
dests
[
0
];
desc
.
comp_node
=
inputs
[
0
].
comp_node
;
...
...
@@ -166,15 +164,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
}
oup_shapes
[
0
]
=
out_layout
;
DnnOprCaller
<
megdnn
::
ConvBiasForward
>
dnn_opr
(
cn
);
dnn_opr
.
op
->
param
().
pad_h
=
conv
.
pad_h
;
dnn_opr
.
op
->
param
().
pad_w
=
conv
.
pad_w
;
dnn_opr
.
op
->
param
().
stride_h
=
conv
.
stride_h
;
dnn_opr
.
op
->
param
().
stride_w
=
conv
.
stride_w
;
dnn_opr
.
op
->
param
().
dilate_h
=
conv
.
dilate_h
;
dnn_opr
.
op
->
param
().
dilate_w
=
conv
.
dilate_w
;
dnn_opr
.
op
->
param
().
sparse
=
conv
.
sparse
;
dnn_opr
.
op
->
param
().
compute_mode
=
conv
.
compute_mode
;
dnn_opr
.
op
->
param
().
format
=
conv
.
format
;
auto
&&
param
=
dnn_opr
.
op
->
param
();
param
.
pad_h
=
conv
.
pad_h
;
param
.
pad_w
=
conv
.
pad_w
;
param
.
stride_h
=
conv
.
stride_h
;
param
.
stride_w
=
conv
.
stride_w
;
param
.
dilate_h
=
conv
.
dilate_h
;
param
.
dilate_w
=
conv
.
dilate_w
;
param
.
sparse
=
conv
.
sparse
;
param
.
compute_mode
=
conv
.
compute_mode
;
param
.
format
=
conv
.
format
;
// shape infer
TensorLayout
shp
({
0
},
inputs
[
0
]
->
dtype
());
...
...
@@ -513,8 +512,6 @@ TensorLayout do_shape_infer(
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_output_attrs_fallible
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
)
{
using
Param
=
::
megdnn
::
param
::
Convolution3D
;
SmallVector
<
LogicalTensorDesc
>
dests
(
1
);
auto
&&
desc
=
dests
[
0
];
desc
.
comp_node
=
inputs
[
0
].
comp_node
;
...
...
src/opr/impl/search_policy/algo_chooser.cpp
浏览文件 @
d9c4ef59
...
...
@@ -42,6 +42,8 @@ size_t AlgoChooser<Opr>::setup_algo(
megdnn_opr
->
execution_policy
()
=
rst
.
policy
;
return
rst
.
workspace
;
}
SmallVector
<
size_t
>
buf
=
rst
.
m_buf
;
SmallVector
<
char
>
param_buf
=
rst
.
m_param_buf
;
if
(
WorkspaceLimitGetter
::
is_prealloc_run
(
mgb_opr
->
owner_graph
()))
{
return
0
;
...
...
@@ -92,7 +94,7 @@ size_t AlgoChooser<Opr>::setup_algo(
megdnn_opr
->
execution_policy
()
=
policy
;
if
(
mgb_opr
->
execution_policy
().
strategy
&
rdnn
::
ExecutionStrategy
::
HEURISTIC
)
{
HeuristicCache
::
Result
cache_result
{
policy
,
workspace
};
HeuristicCache
::
Result
cache_result
{
policy
,
workspace
,
buf
,
param_buf
};
HeuristicCache
::
instance
().
put
(
cache_key
,
cache_result
);
}
return
workspace
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录