Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7dfd3846
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7dfd3846
编写于
3月 31, 2022
作者:
Z
Zhang Ting
提交者:
GitHub
3月 31, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Implement AutotuneCache class for Kernel AutoTune (#41169)
上级
6744754f
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
81 addition
and
33 deletion
+81
-33
paddle/phi/kernels/autotune/cache.h
paddle/phi/kernels/autotune/cache.h
+66
-26
paddle/phi/kernels/autotune/cache_test.cc
paddle/phi/kernels/autotune/cache_test.cc
+15
-7
未找到文件。
paddle/phi/kernels/autotune/cache.h
浏览文件 @
7dfd3846
...
...
@@ -51,20 +51,35 @@ struct hash<std::vector<T>> {
namespace
phi
{
namespace
autotune
{
template
<
typename
...
Args
>
size_t
GetKey
(
Args
&&
...
args
)
{
size_t
seed
=
0
;
HashCombine
(
&
seed
,
std
::
forward
<
Args
>
(
args
)...);
return
seed
;
}
// Define the cache key of operator
size_t
ConvKey
(
const
std
::
vector
<
int64_t
>&
x_dims
,
const
std
::
vector
<
int64_t
>&
w_dims
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
phi
::
DataType
dtype
)
{
return
GetKey
(
x_dims
,
w_dims
,
strides
,
paddings
,
dilations
,
static_cast
<
int64_t
>
(
dtype
));
}
template
<
typename
AlgorithmT
>
class
AlgorithmsCache
{
public:
AlgorithmsCache
()
{
hash_
.
clear
();
}
template
<
typename
...
Args
>
size_t
GetKey
(
Args
&&
...
args
)
{
size_t
seed
=
0
;
HashCombine
(
&
seed
,
std
::
forward
<
Args
>
(
args
)...);
return
seed
;
}
AlgorithmsCache
()
:
cache_mutex_
(
new
std
::
mutex
())
{
hash_
.
clear
();
}
AlgorithmT
Get
(
size_t
key
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
cache_mutex_
);
PADDLE_ENFORCE_NE
(
hash_
.
find
(
key
),
hash_
.
end
(),
...
...
@@ -74,7 +89,7 @@ class AlgorithmsCache {
bool
Find
(
size_t
key
)
{
bool
ret
=
false
;
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
cache_mutex_
);
if
(
hash_
.
find
(
key
)
!=
hash_
.
end
())
{
cache_hits_
++
;
ret
=
true
;
...
...
@@ -85,7 +100,7 @@ class AlgorithmsCache {
}
void
Set
(
size_t
key
,
AlgorithmT
algo
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
cache_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
cache_mutex_
);
hash_
[
key
]
=
algo
;
}
...
...
@@ -96,27 +111,52 @@ class AlgorithmsCache {
return
cache_hit_rate
;
}
// Define the cache key of operator
size_t
ConvKey
(
const
std
::
vector
<
int64_t
>&
x_dims
,
const
std
::
vector
<
int64_t
>&
w_dims
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
dilations
,
phi
::
DataType
dtype
)
{
return
GetKey
(
x_dims
,
w_dims
,
strides
,
paddings
,
dilations
,
static_cast
<
int64_t
>
(
dtype
));
}
int64_t
Size
()
{
return
hash_
.
size
();
}
private:
std
::
unordered_map
<
size_t
,
AlgorithmT
>
hash_
;
std
::
mutex
cache_mutex_
;
std
::
shared_ptr
<
std
::
mutex
>
cache_mutex_
;
int64_t
cache_hits_
=
0
;
int64_t
cache_misses_
=
0
;
};
// AlgorithmsConfigKey -> AlgorithmsID
using
AlgorithmsConfigKeyMap
=
AlgorithmsCache
<
int64_t
>
;
// AlgorithmsType -> AlgorithmsCache
using
AlgorithmsTypeMap
=
std
::
unordered_map
<
std
::
string
,
AlgorithmsConfigKeyMap
>
;
class
AutoTuneCache
{
public:
static
AutoTuneCache
&
Instance
()
{
static
AutoTuneCache
autotune_cache
;
return
autotune_cache
;
}
AlgorithmsConfigKeyMap
&
RegisterOrGet
(
const
std
::
string
&
algo_type
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
autotune_cache_mutex_
);
if
(
auto_tune_map_
.
find
(
algo_type
)
==
auto_tune_map_
.
end
())
{
AlgorithmsConfigKeyMap
cache
;
auto_tune_map_
[
algo_type
]
=
cache
;
}
return
auto_tune_map_
[
algo_type
];
}
// The number of total config cached
int64_t
Size
()
{
int64_t
total
=
0
;
for
(
auto
&
v
:
auto_tune_map_
)
{
VLOG
(
3
)
<<
v
.
first
<<
" "
<<
v
.
second
.
Size
();
total
+=
v
.
second
.
Size
();
}
return
total
;
}
private:
AutoTuneCache
()
:
autotune_cache_mutex_
(
new
std
::
mutex
())
{}
AlgorithmsTypeMap
auto_tune_map_
;
std
::
shared_ptr
<
std
::
mutex
>
autotune_cache_mutex_
;
};
}
// namespace autotune
}
// namespace phi
paddle/phi/kernels/autotune/cache_test.cc
浏览文件 @
7dfd3846
...
...
@@ -18,10 +18,12 @@
#include <functional>
#include "glog/logging.h"
void
Algo
()
{
VLOG
(
3
)
<<
"algo test"
;
}
enum
ConvAlgos
{
GEMMKernel
=
0
,
CuDNNKernel_1
=
1
,
CuDNNKernel_2
=
2
};
TEST
(
AlgosCache
,
AlgosCache
)
{
phi
::
autotune
::
AlgorithmsCache
<
std
::
function
<
void
()
>>
cache
;
auto
autotune_cache
=
phi
::
autotune
::
AutoTuneCache
::
Instance
();
auto
&
cache
=
autotune_cache
.
RegisterOrGet
(
"conv_fw"
);
std
::
vector
<
int64_t
>
x_shape
=
{
4
,
224
,
224
,
3
};
std
::
vector
<
int64_t
>
w_shape
=
{
32
,
3
,
3
,
3
};
std
::
vector
<
int
>
paddings
=
{
0
,
0
};
...
...
@@ -29,17 +31,23 @@ TEST(AlgosCache, AlgosCache) {
std
::
vector
<
int
>
dilations
=
{
1
,
1
};
phi
::
DataType
dtype
=
paddle
::
experimental
::
CppTypeToDataType
<
float
>::
Type
();
auto
key
=
cache
.
ConvKey
(
x_shape
,
w_shape
,
paddings
,
strides
,
dilations
,
dtype
);
auto
key
=
phi
::
autotune
::
ConvKey
(
x_shape
,
w_shape
,
paddings
,
strides
,
dilations
,
dtype
);
EXPECT_EQ
(
cache
.
Find
(
key
),
false
);
cache
.
Set
(
key
,
Algo
);
cache
.
Set
(
key
,
ConvAlgos
::
GEMMKernel
);
EXPECT_EQ
(
cache
.
Size
(),
1
);
EXPECT_EQ
(
cache
.
Find
(
key
),
true
);
auto
algo
=
cache
.
Get
(
key
);
algo
(
);
EXPECT_EQ
(
algo
,
ConvAlgos
::
GEMMKernel
);
x_shape
=
{
4
,
128
,
128
,
3
};
key
=
cache
.
ConvKey
(
x_shape
,
w_shape
,
paddings
,
strides
,
dilations
,
dtype
);
key
=
phi
::
autotune
::
ConvKey
(
x_shape
,
w_shape
,
paddings
,
strides
,
dilations
,
dtype
);
EXPECT_EQ
(
cache
.
Find
(
key
),
false
);
cache
.
Set
(
key
,
ConvAlgos
::
CuDNNKernel_1
);
EXPECT_EQ
(
cache
.
Size
(),
2
);
EXPECT_EQ
(
autotune_cache
.
Size
(),
2
);
float
cache_hit_rate
=
static_cast
<
float
>
(
1
)
/
static_cast
<
float
>
(
3
);
EXPECT_LT
(
std
::
abs
(
cache_hit_rate
-
cache
.
CacheHitRate
()),
1e-5
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录