提交 d9c4ef59 编写于 作者: M Megvii Engine Team

perf(imperative): using simple hash key in heuristic cache

GitOrigin-RevId: 6fddd612e7cc193a140a401fd2a62a98a5056b1d
上级 26ea33c6
...@@ -29,15 +29,12 @@ public: ...@@ -29,15 +29,12 @@ public:
MGE_WIN_DECLSPEC_FUC static HeuristicCache& instance(); MGE_WIN_DECLSPEC_FUC static HeuristicCache& instance();
struct KeyStorage { struct KeyStorage {
std::string category; size_t k1, k2;
std::string input;
bool operator==(const KeyStorage& k) const { bool operator==(const KeyStorage& k) const { return k1 == k.k1 && k2 == k.k2; }
return category == k.category && input == k.input;
}
}; };
class Key { struct Key {
Handle* m_handle; Handle* m_handle;
uint32_t m_opr_type; uint32_t m_opr_type;
const TensorLayout* m_inp_layouts_ptr; const TensorLayout* m_inp_layouts_ptr;
...@@ -45,8 +42,7 @@ public: ...@@ -45,8 +42,7 @@ public:
const void* m_param_ptr; const void* m_param_ptr;
size_t m_param_size; size_t m_param_size;
mutable std::string m_category; mutable SmallVector<size_t> m_buf;
mutable std::string m_input;
public: public:
Key(Handle* opr_handle, Algorithm::OprType opr_type, Key(Handle* opr_handle, Algorithm::OprType opr_type,
...@@ -65,6 +61,10 @@ public: ...@@ -65,6 +61,10 @@ public:
struct Result { struct Result {
ExecutionPolicy policy; ExecutionPolicy policy;
size_t workspace; size_t workspace;
// for cache collision
SmallVector<size_t> m_buf;
SmallVector<char> m_param_buf;
}; };
MGE_WIN_DECLSPEC_FUC void put(const Key& key, Result& result); MGE_WIN_DECLSPEC_FUC void put(const Key& key, Result& result);
...@@ -76,8 +76,8 @@ public: ...@@ -76,8 +76,8 @@ public:
private: private:
struct Hash { struct Hash {
size_t operator()(const KeyStorage& k) const { size_t operator()(const KeyStorage& k) const {
size_t h1 = std::hash<std::string>{}(k.category); size_t h1 = k.k1;
size_t h2 = std::hash<std::string>{}(k.input); size_t h2 = k.k2;
h1 ^= h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2); h1 ^= h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2);
return h1; return h1;
} }
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
*/ */
#include "megdnn/heuristic_cache.h" #include "megdnn/heuristic_cache.h"
#include "megdnn/tensor_format.h"
#include "src/common/hash_ct.h"
#include "src/common/utils.h" #include "src/common/utils.h"
#include "src/naive/handle.h" #include "src/naive/handle.h"
...@@ -32,38 +34,27 @@ HeuristicCache& HeuristicCache::instance() { ...@@ -32,38 +34,27 @@ HeuristicCache& HeuristicCache::instance() {
} }
HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const {
auto&& ctg = m_category; size_t buf_size = 16 * m_inp_layouts_size + 6;
auto&& inp = m_input; size_t buf[buf_size];
if (!m_category.empty() && !m_input.empty()) size_t pos = 0;
return {ctg, inp};
inp.reserve(sizeof(TensorLayout) * 3 * m_inp_layouts_size + m_param_size);
for (size_t i = 0; i < m_inp_layouts_size; i++) { for (size_t i = 0; i < m_inp_layouts_size; i++) {
auto&& ly = m_inp_layouts_ptr[i]; auto&& layout = m_inp_layouts_ptr[i];
for (size_t j = 0; j < ly.ndim; j++) { if (layout.dtype.valid()) {
if (j) buf[pos++] = static_cast<size_t>(layout.dtype.enumv());
inp.push_back(','); } else {
inp.append(std::to_string(ly.shape[j])); buf[pos++] = static_cast<size_t>(SIZE_MAX);
} }
inp.push_back(';'); buf[pos++] = static_cast<size_t>(layout.format.type());
for (size_t j = 0; j < ly.ndim; j++) { for (size_t j = 0; j < layout.ndim; j++) {
if (j) buf[pos++] = layout.shape[j];
inp.push_back(','); buf[pos++] = layout.stride[j];
inp.append(std::to_string(ly.stride[j]));
} }
inp.push_back(';');
inp.append(ly.dtype.name());
inp.push_back(';');
inp.append(ly.format.to_string().c_str());
inp.push_back('|');
}
if (m_param_size) {
inp.append(reinterpret_cast<const char*>(m_param_ptr), m_param_size);
} }
ctg = "plat:"; buf[pos++] = m_opr_type;
ctg.append(std::to_string(static_cast<uint32_t>(m_handle->type()))); buf[pos++] = static_cast<size_t>(m_handle->type());
switch (m_handle->type()) { switch (m_handle->type()) {
#if MEGDNN_WITH_CUDA #if MEGDNN_WITH_CUDA
case Handle::HandleType::CUDA: { case Handle::HandleType::CUDA: {
...@@ -72,9 +63,9 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { ...@@ -72,9 +63,9 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const {
cuda_rt /= 1000; cuda_rt /= 1000;
auto&& handle = static_cast<megdnn::cuda::HandleImpl*>(m_handle); auto&& handle = static_cast<megdnn::cuda::HandleImpl*>(m_handle);
auto&& prop = handle->device_prop(); auto&& prop = handle->device_prop();
ctg.append(ssprintf( buf[pos++] = prop.major;
";dev=%s;cap=%d.%d;runtime=%d;", prop.name, prop.major, prop.minor, buf[pos++] = prop.minor;
cuda_rt)); buf[pos++] = cuda_rt;
break; break;
} }
#endif #endif
...@@ -85,9 +76,10 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { ...@@ -85,9 +76,10 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const {
int drv = -1, hip_rt = -1; int drv = -1, hip_rt = -1;
hip_check(hipDriverGetVersion(&drv)); hip_check(hipDriverGetVersion(&drv));
hip_check(hipRuntimeGetVersion(&hip_rt)); hip_check(hipRuntimeGetVersion(&hip_rt));
ctg.append(ssprintf( buf[pos++] = prop.major;
";dev=%s;cap=%d.%d,drv=%d;runtime=%d;", prop.name, prop.major, buf[pos++] = prop.minor;
prop.minor, drv, hip_rt)); buf[pos++] = drv;
buf[pos++] = hip_rt;
break; break;
} }
#endif #endif
...@@ -108,16 +100,21 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const { ...@@ -108,16 +100,21 @@ HeuristicCache::KeyStorage HeuristicCache::Key::build_key_storage() const {
size_t nr_threads = static_cast<megdnn::naive::HandleImpl*>(m_handle) size_t nr_threads = static_cast<megdnn::naive::HandleImpl*>(m_handle)
->megcore_dispatcher() ->megcore_dispatcher()
->nr_threads(); ->nr_threads();
ctg.append(";"); buf[pos++] = nr_threads;
ctg.append(std::to_string(nr_threads));
ctg.append(";");
break; break;
} }
default: default:
ctg.append(";"); break;
} }
ctg.append(std::to_string(m_opr_type));
return {ctg, inp}; m_buf.resize(pos);
SmallVector<size_t> tmp(buf, buf + pos);
m_buf = std::move(tmp);
size_t k1 = XXHash64CT::hash((const char*)buf, pos * sizeof(size_t), 20220328);
size_t k2 = XXHash64CT::hash((const char*)m_param_ptr, m_param_size, 20220328);
return {k1, k2};
} }
void HeuristicCache::put(const Key& key, Result& result) { void HeuristicCache::put(const Key& key, Result& result) {
...@@ -126,15 +123,41 @@ void HeuristicCache::put(const Key& key, Result& result) { ...@@ -126,15 +123,41 @@ void HeuristicCache::put(const Key& key, Result& result) {
m_heuristic_cache[key.build_key_storage()] = result; m_heuristic_cache[key.build_key_storage()] = result;
} }
template <typename T>
bool is_same_buf(
const T hash_buf[], const size_t buf_size, const T hash_buf_[],
const size_t buf_size_) {
if (buf_size != buf_size_) {
return false;
}
for (size_t i = 0; i < buf_size; i++) {
if (hash_buf[i] != hash_buf_[i]) {
return false;
}
}
return true;
}
HeuristicCache::Result HeuristicCache::get(const Key& key) { HeuristicCache::Result HeuristicCache::get(const Key& key) {
MEGDNN_LOCK_GUARD(m_mtx); MEGDNN_LOCK_GUARD(m_mtx);
KeyStorage ks = key.build_key_storage(); KeyStorage ks = key.build_key_storage();
auto iter = m_heuristic_cache.find(ks); auto iter = m_heuristic_cache.find(ks);
if (iter == m_heuristic_cache.end()) { if (iter != m_heuristic_cache.end()) {
return {}; if (is_same_buf(
} else { key.m_buf.data(), key.m_buf.size(), iter->second.m_buf.data(),
return iter->second; iter->second.m_buf.size()) &&
is_same_buf(
(char*)(key.m_param_ptr), key.m_param_size,
iter->second.m_param_buf.data(), iter->second.m_param_buf.size())) {
return iter->second;
}
megdnn_log_warn(
"hash collision occurs in heuristic cache with key: (%zu, %zu)", ks.k1,
ks.k2);
} }
SmallVector<char> param_buf(
(char*)key.m_param_ptr, (char*)key.m_param_ptr + key.m_param_size);
return Result{{}, 0, key.m_buf, param_buf};
} }
void HeuristicCache::clear() { void HeuristicCache::clear() {
......
...@@ -18,6 +18,8 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( ...@@ -18,6 +18,8 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo(
megdnn_opr->execution_policy() = rst.policy; megdnn_opr->execution_policy() = rst.policy;
return rst.workspace; return rst.workspace;
} }
SmallVector<size_t> buf = rst.m_buf;
SmallVector<char> param_buf = rst.m_param_buf;
std::string param_str; std::string param_str;
megdnn::Algorithm::serialize_write_pod(megdnn_opr->param(), param_str); megdnn::Algorithm::serialize_write_pod(megdnn_opr->param(), param_str);
...@@ -40,11 +42,10 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo( ...@@ -40,11 +42,10 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo(
megdnn::ExecutionPolicy policy; megdnn::ExecutionPolicy policy;
policy = mgb::rdnn::AlgoChooser<Opr>::get_policy(helper); policy = mgb::rdnn::AlgoChooser<Opr>::get_policy(helper);
size_t workspace = helper.get_workspace_size_bytes(policy, layouts); size_t workspace = helper.get_workspace_size_bytes(policy, layouts);
megdnn_opr->execution_policy() = policy; megdnn_opr->execution_policy() = policy;
if (execution_policy.strategy & rdnn::ExecutionStrategy::HEURISTIC) { if (execution_policy.strategy & rdnn::ExecutionStrategy::HEURISTIC) {
megdnn::HeuristicCache::Result cache_result{policy, workspace}; megdnn::HeuristicCache::Result cache_result{policy, workspace, buf, param_buf};
megdnn::HeuristicCache::instance().put(cache_key, cache_result); megdnn::HeuristicCache::instance().put(cache_key, cache_result);
} }
return workspace; return workspace;
......
...@@ -123,8 +123,6 @@ TensorLayout do_shape_infer( ...@@ -123,8 +123,6 @@ TensorLayout do_shape_infer(
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
using Param = ::megdnn::param::Convolution;
SmallVector<LogicalTensorDesc> dests(1); SmallVector<LogicalTensorDesc> dests(1);
auto&& desc = dests[0]; auto&& desc = dests[0];
desc.comp_node = inputs[0].comp_node; desc.comp_node = inputs[0].comp_node;
...@@ -166,15 +164,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -166,15 +164,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
} }
oup_shapes[0] = out_layout; oup_shapes[0] = out_layout;
DnnOprCaller<megdnn::ConvBiasForward> dnn_opr(cn); DnnOprCaller<megdnn::ConvBiasForward> dnn_opr(cn);
dnn_opr.op->param().pad_h = conv.pad_h; auto&& param = dnn_opr.op->param();
dnn_opr.op->param().pad_w = conv.pad_w; param.pad_h = conv.pad_h;
dnn_opr.op->param().stride_h = conv.stride_h; param.pad_w = conv.pad_w;
dnn_opr.op->param().stride_w = conv.stride_w; param.stride_h = conv.stride_h;
dnn_opr.op->param().dilate_h = conv.dilate_h; param.stride_w = conv.stride_w;
dnn_opr.op->param().dilate_w = conv.dilate_w; param.dilate_h = conv.dilate_h;
dnn_opr.op->param().sparse = conv.sparse; param.dilate_w = conv.dilate_w;
dnn_opr.op->param().compute_mode = conv.compute_mode; param.sparse = conv.sparse;
dnn_opr.op->param().format = conv.format; param.compute_mode = conv.compute_mode;
param.format = conv.format;
// shape infer // shape infer
TensorLayout shp({0}, inputs[0]->dtype()); TensorLayout shp({0}, inputs[0]->dtype());
...@@ -513,8 +512,6 @@ TensorLayout do_shape_infer( ...@@ -513,8 +512,6 @@ TensorLayout do_shape_infer(
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
using Param = ::megdnn::param::Convolution3D;
SmallVector<LogicalTensorDesc> dests(1); SmallVector<LogicalTensorDesc> dests(1);
auto&& desc = dests[0]; auto&& desc = dests[0];
desc.comp_node = inputs[0].comp_node; desc.comp_node = inputs[0].comp_node;
......
...@@ -42,6 +42,8 @@ size_t AlgoChooser<Opr>::setup_algo( ...@@ -42,6 +42,8 @@ size_t AlgoChooser<Opr>::setup_algo(
megdnn_opr->execution_policy() = rst.policy; megdnn_opr->execution_policy() = rst.policy;
return rst.workspace; return rst.workspace;
} }
SmallVector<size_t> buf = rst.m_buf;
SmallVector<char> param_buf = rst.m_param_buf;
if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) { if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) {
return 0; return 0;
...@@ -92,7 +94,7 @@ size_t AlgoChooser<Opr>::setup_algo( ...@@ -92,7 +94,7 @@ size_t AlgoChooser<Opr>::setup_algo(
megdnn_opr->execution_policy() = policy; megdnn_opr->execution_policy() = policy;
if (mgb_opr->execution_policy().strategy & rdnn::ExecutionStrategy::HEURISTIC) { if (mgb_opr->execution_policy().strategy & rdnn::ExecutionStrategy::HEURISTIC) {
HeuristicCache::Result cache_result{policy, workspace}; HeuristicCache::Result cache_result{policy, workspace, buf, param_buf};
HeuristicCache::instance().put(cache_key, cache_result); HeuristicCache::instance().put(cache_key, cache_result);
} }
return workspace; return workspace;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册