Optimize CINN cache key (#37786)

* optimize cache key * add cinn cache key by graph address * perfect cache key test script * rename GraphHashProto to GraphHashStrategy * optimize graph_serialize_str_ to graph_hash_val_ and other change by review advices

Optimize CINN cache key (#37786)
* optimize cache key * add cinn cache key by graph address * perfect cache key test script * rename GraphHashProto to GraphHashStrategy * optimize graph_serialize_str_ to graph_hash_val_ and other change by review advices
2567dfa4 · jiangcheng · GitHub · 151c5d74 · 2567dfa4 · 2567dfa4
5 changed file
--- a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc
@@ -29,55 +29,32 @@ namespace paddle {
 namespace framework {
 namespace paddle2cinn {

+using GraphHashStrategy = CinnCacheKey::GraphHashStrategy;
+
+CinnCacheKey::CinnCacheKey(GraphHashStrategy graph_hash)
+    : graph_hash_(graph_hash) {}
+
 CinnCacheKey::CinnCacheKey(
    const ir::Graph& graph,
    const std::map<std::string, const LoDTensor*>& input_tensors,
-    const std::string& arch_str) {
+    const std::string& arch_str, GraphHashStrategy graph_hash)
+    : graph_hash_(graph_hash) {
  this->SetKey(graph, input_tensors, arch_str);
 }

 CinnCacheKey::CinnCacheKey(const ir::Graph& graph,
                           const std::map<std::string, DDim>& input_shapes,
-                           const std::string& arch_str) {
+                           const std::string& arch_str,
+                           GraphHashStrategy graph_hash)
+    : graph_hash_(graph_hash) {
  this->SetKey(graph, input_shapes, arch_str);
 }

-size_t CinnCacheKey::HashGraph(const ir::Graph& graph) {
-  // using Dot to unqiue graph
-  inference::analysis::Dot dot;
-  std::unordered_map<const ir::Node*, std::string> node2dot;
-  int id = 0;
-  // Create nodes
-  // graph.Nodes() return unordered_set, the same graph may
-  // return different result?
-  for (const ir::Node* n : graph.Nodes()) {
-    std::string node_id = std::to_string(id++);
-    dot.AddNode(node_id, {}, n->Name(), true);
-    node2dot[n] = node_id;
-  }
-
-  // Create edges
-  for (const ir::Node* n : graph.Nodes()) {
-    const auto& src_id = node2dot.at(n);
-    for (auto* out : n->outputs) {
-      const auto& dest_id = node2dot.at(out);
-      dot.AddEdge(src_id, dest_id, {});
-    }
-  }
-
-  const std::string& viz_graph = dot.Build();
-  VLOG(1) << "The hash graph:\n" << viz_graph;
-
-  size_t hash_val = std::hash<std::string>()(viz_graph);
-  VLOG(4) << "The graph's hash value is: " << hash_val;
-  return hash_val;
-}
-
 void CinnCacheKey::SetKey(
    const ir::Graph& graph,
    const std::map<std::string, const LoDTensor*>& input_tensors,
    const std::string& arch_str) {
-  graph_serialize_str_ = std::to_string(HashGraph(graph));
+  graph_hash_val_ = graph_hash_(graph);
  for (const auto& name_tensor : input_tensors) {
    input_shapes_[name_tensor.first] = name_tensor.second->dims();
  }
@@ -87,7 +64,7 @@ void CinnCacheKey::SetKey(
 void CinnCacheKey::SetKey(const ir::Graph& graph,
                          const std::map<std::string, DDim>& input_shapes,
                          const std::string& arch_str) {
-  graph_serialize_str_ = std::to_string(HashGraph(graph));
+  graph_hash_val_ = graph_hash_(graph);
  input_shapes_ = input_shapes;
  arch_str_ = arch_str;
 }
@@ -97,7 +74,7 @@ bool CinnCacheKey::operator!=(const CinnCacheKey& other) const {
 }

 bool CinnCacheKey::operator==(const CinnCacheKey& other) const {
-  return graph_serialize_str_ == other.graph_serialize_str_ &&
+  return graph_hash_val_ == other.graph_hash_val_ &&
         input_shapes_ == other.input_shapes_ && arch_str_ == other.arch_str_;
 }

@@ -114,11 +91,48 @@ size_t CinnCacheKey::Hash::operator()(const CinnCacheKey& key) const {
    ret = hash_combine(ret, string_hasher(name_shape.second.to_str()));
  }

-  ret = hash_combine(ret, string_hasher(key.graph_serialize_str_));
+  ret = hash_combine(ret, key.graph_hash_val_);
  ret = hash_combine(ret, string_hasher(key.arch_str_));
  return ret;
 }

+size_t CinnCacheKeyByStructure::HashGraph(const ir::Graph& graph) {
+  // sort grad node by name and id.
+  auto compare = [](ir::Node* n1, ir::Node* n2) {
+    return (n1->Name() == n2->Name()) ? (n1->id() < n2->id())
+                                      : (n1->Name() < n2->Name());
+  };
+
+  // graph.Nodes() return unordered_set, here using set to avoid the same graph
+  // may return different result
+  std::set<ir::Node *, bool (*)(ir::Node *, ir::Node *)> node_set(compare),
+      output_set(compare);
+  node_set.insert(graph.Nodes().begin(), graph.Nodes().end());
+
+  std::string hash_str;
+  for (ir::Node* n : node_set) {
+    hash_str.append(n->Name());
+
+    output_set.clear();
+    output_set.insert(n->outputs.begin(), n->outputs.end());
+    for (auto* out : output_set) {
+      hash_str.append(out->Name());
+    }
+  }
+
+  VLOG(1) << "The hash graph:\n" << hash_str;
+
+  size_t hash_val = std::hash<std::string>()(hash_str);
+  VLOG(4) << "The graph's hash value by graph structure is: " << hash_val;
+  return hash_val;
+}
+
+size_t CinnCacheKeyByAddress::HashGraph(const ir::Graph& graph) {
+  size_t hash_val = reinterpret_cast<size_t>(&graph);
+  VLOG(4) << "The graph's hash value by graph address is: " << hash_val;
+  return hash_val;
+}
+
 }  // namespace paddle2cinn
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.h
+++ b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.h
@@ -14,6 +14,7 @@

 #pragma once

+#include <functional>
 #include <map>

 #include "paddle/fluid/framework/ddim.h"
@@ -33,14 +34,18 @@ namespace paddle2cinn {
 // shapes.
 class CinnCacheKey {
 public:
+  using GraphHashStrategy = std::function<size_t(const ir::Graph&)>;
+
+  explicit CinnCacheKey(GraphHashStrategy graph_hash);
+
  CinnCacheKey(const ir::Graph& graph,
               const std::map<std::string, const LoDTensor*>& input_tensors,
-               const std::string& arch_str);
+               const std::string& arch_str, GraphHashStrategy graph_hash);
  CinnCacheKey(const ir::Graph& graph,
               const std::map<std::string, DDim>& input_shapes,
-               const std::string& arch_str);
+               const std::string& arch_str, GraphHashStrategy graph_hash);

-  ~CinnCacheKey() {}
+  ~CinnCacheKey() = default;

  void SetKey(const ir::Graph& graph,
              const std::map<std::string, const LoDTensor*>& input_tensors,
@@ -58,13 +63,38 @@ class CinnCacheKey {
  };

 private:
-  size_t HashGraph(const ir::Graph& graph);
-
-  std::string graph_serialize_str_;
+  GraphHashStrategy graph_hash_;
+  size_t graph_hash_val_;
  std::map<std::string, DDim> input_shapes_;
  std::string arch_str_;
 };

+#define CINN_CACHE_KEY_CREATE(NAME)                                    \
+  class NAME : public CinnCacheKey {                                   \
+   public:                                                             \
+    NAME() : CinnCacheKey(HashGraph) {}                                \
+                                                                       \
+    NAME(const ir::Graph& graph,                                       \
+         const std::map<std::string, const LoDTensor*>& input_tensors, \
+         const std::string& arch_str)                                  \
+        : CinnCacheKey(graph, input_tensors, arch_str, HashGraph) {}   \
+                                                                       \
+    NAME(const ir::Graph& graph,                                       \
+         const std::map<std::string, DDim>& input_shapes,              \
+         const std::string& arch_str)                                  \
+        : CinnCacheKey(graph, input_shapes, arch_str, HashGraph) {}    \
+                                                                       \
+   private:                                                            \
+    static size_t HashGraph(const ir::Graph& graph);                   \
+  };
+
+// Class to store the keys by graph address for compiling CINN.
+CINN_CACHE_KEY_CREATE(CinnCacheKeyByAddress)
+// Class to store the keys by graph structure for compiling CINN.
+CINN_CACHE_KEY_CREATE(CinnCacheKeyByStructure)
+
+#undef CINN_CACHE_KEY_CREATE
+
 }  // namespace paddle2cinn
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc
@@ -26,8 +26,8 @@ namespace paddle {
 namespace framework {
 namespace paddle2cinn {

-TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
-  std::unordered_set<CinnCacheKey, CinnCacheKey::Hash> test_set;
+TEST(CinnCacheKeyTest, TestAsUnorderedKeyByStructure) {
+  std::unordered_set<CinnCacheKeyByStructure, CinnCacheKey::Hash> test_set;

  ProgramDesc empty_program;
  ir::Graph empty_graph(empty_program);
@@ -47,19 +47,20 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
  DDim ddim = paddle::framework::make_ddim({1, 2, 3});
  std::map<std::string, DDim> feed_shapes = {{"X", ddim}};

-  CinnCacheKey cache_key0(empty_graph, feed_tensors, "x86");
-  CinnCacheKey cache_key1(empty_graph, feed_shapes, "x86");
+  CinnCacheKeyByStructure cache_key0(empty_graph, feed_tensors, "x86");
+  CinnCacheKeyByStructure cache_key1(empty_graph, feed_shapes, "x86");
  EXPECT_EQ(cache_key0, cache_key1);

-  CinnCacheKey cache_key2(graph, feed_shapes, "x86");
-  CinnCacheKey cache_key3(graph, feed_shapes, "nvgpu");
-  CinnCacheKey cache_key4(graph, feed_tensors, "nvgpu");
+  CinnCacheKeyByStructure cache_key2(graph, feed_shapes, "x86");
+  CinnCacheKeyByStructure cache_key3(graph, feed_shapes, "nvgpu");
+  CinnCacheKeyByStructure cache_key4(graph, feed_tensors, "nvgpu");
  EXPECT_NE(cache_key2, cache_key3);
  EXPECT_EQ(cache_key3, cache_key4);

-  CinnCacheKey cache_key5(empty_graph,
-                          std::map<std::string, const LoDTensor *>(), "unk");
-  CinnCacheKey cache_key6(empty_graph, std::map<std::string, DDim>(), "unk");
+  CinnCacheKeyByStructure cache_key5(
+      empty_graph, std::map<std::string, const LoDTensor *>(), "unk");
+  CinnCacheKeyByStructure cache_key6(empty_graph, std::map<std::string, DDim>(),
+                                     "unk");
  EXPECT_EQ(cache_key5, cache_key6);

  EXPECT_NE(cache_key1, cache_key3);
@@ -98,6 +99,107 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKey) {
  EXPECT_EQ(test_set.find(cache_key6), test_set.end());
 }

+TEST(CinnCacheKeyTest, TestAsUnorderedKeyByAddress) {
+  std::unordered_set<CinnCacheKeyByAddress, CinnCacheKey::Hash> test_set;
+
+  ProgramDesc empty_program;
+  ir::Graph empty_graph(empty_program);
+
+  ProgramDesc program;
+  auto *global_block = program.MutableBlock(0);
+  auto *x = global_block->Var("X");
+  x->SetType(proto::VarType::LOD_TENSOR);
+  ir::Graph graph(program);
+
+  LoDTensor tensor;
+  tensor.Resize({1, 2, 3});
+  const LoDTensor *tensor_pointer = &tensor;
+  std::map<std::string, const LoDTensor *> feed_tensors = {
+      {"X", tensor_pointer}};
+
+  DDim ddim = paddle::framework::make_ddim({1, 2, 3});
+  std::map<std::string, DDim> feed_shapes = {{"X", ddim}};
+
+  CinnCacheKeyByAddress cache_key0(empty_graph, feed_tensors, "x86");
+  CinnCacheKeyByAddress cache_key1(empty_graph, feed_shapes, "x86");
+  EXPECT_EQ(cache_key0, cache_key1);
+
+  CinnCacheKeyByAddress cache_key2(graph, feed_shapes, "x86");
+  CinnCacheKeyByAddress cache_key3(graph, feed_shapes, "nvgpu");
+  CinnCacheKeyByAddress cache_key4(graph, feed_tensors, "nvgpu");
+  EXPECT_NE(cache_key2, cache_key3);
+  EXPECT_EQ(cache_key3, cache_key4);
+
+  CinnCacheKeyByAddress cache_key5(
+      empty_graph, std::map<std::string, const LoDTensor *>(), "unk");
+  CinnCacheKeyByAddress cache_key6(empty_graph, std::map<std::string, DDim>(),
+                                   "unk");
+  EXPECT_EQ(cache_key5, cache_key6);
+
+  EXPECT_NE(cache_key1, cache_key3);
+  EXPECT_NE(cache_key4, cache_key2);
+
+  EXPECT_NE(cache_key3, cache_key5);
+  EXPECT_NE(cache_key6, cache_key4);
+
+  EXPECT_NE(cache_key5, cache_key1);
+  EXPECT_NE(cache_key2, cache_key6);
+
+  test_set.insert(cache_key0);
+  test_set.insert(cache_key1);
+  test_set.insert(cache_key3);
+  test_set.insert(cache_key4);
+  test_set.insert(cache_key5);
+  test_set.insert(cache_key6);
+  EXPECT_EQ(test_set.size(), 3U);
+
+  auto iter = test_set.find(cache_key0);
+  EXPECT_NE(iter, test_set.end());
+  test_set.erase(iter);
+  EXPECT_EQ(test_set.size(), 2U);
+  EXPECT_EQ(test_set.find(cache_key1), test_set.end());
+
+  iter = test_set.find(cache_key3);
+  EXPECT_NE(iter, test_set.end());
+  test_set.erase(iter);
+  EXPECT_EQ(test_set.size(), 1U);
+  EXPECT_EQ(test_set.find(cache_key4), test_set.end());
+
+  iter = test_set.find(cache_key5);
+  EXPECT_NE(iter, test_set.end());
+  test_set.erase(iter);
+  EXPECT_EQ(test_set.size(), 0U);
+  EXPECT_EQ(test_set.find(cache_key6), test_set.end());
+}
+
+TEST(CinnCacheKeyTest, TestSameGraph) {
+  ProgramDesc program1;
+  auto *global_block1 = program1.MutableBlock(0);
+  auto *x1 = global_block1->Var("X");
+  x1->SetType(proto::VarType::LOD_TENSOR);
+  ir::Graph graph1(program1);
+
+  ProgramDesc program2;
+  auto *global_block2 = program2.MutableBlock(0);
+  auto *x2 = global_block2->Var("X");
+  x2->SetType(proto::VarType::LOD_TENSOR);
+  ir::Graph graph2(program2);
+
+  LoDTensor tensor;
+  tensor.Resize({1, 2, 3});
+  const LoDTensor *tensor_pointer = &tensor;
+  std::map<std::string, const LoDTensor *> feed_tensors = {
+      {"X", tensor_pointer}};
+
+  CinnCacheKeyByAddress cache_key_by_address1(graph1, feed_tensors, "x86");
+  CinnCacheKeyByAddress cache_key_by_address2(graph2, feed_tensors, "x86");
+  EXPECT_NE(cache_key_by_address1, cache_key_by_address2);
+
+  CinnCacheKeyByStructure cache_key_by_struct1(graph1, feed_tensors, "x86");
+  CinnCacheKeyByStructure cache_key_by_struct2(graph2, feed_tensors, "x86");
+  EXPECT_EQ(cache_key_by_struct1, cache_key_by_struct2);
+}
+
 }  // namespace paddle2cinn
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
@@ -69,23 +69,41 @@ const CinnCompiledObject& CinnCompiler::Compile(
    const std::map<std::string, const LoDTensor*>& input_tensors,
    const Target& target, void* stream) {
  VLOG(1) << "-- The graph to be compiled is:\n" << VizGraph(graph);
-  CinnCacheKey cur_key(graph, input_tensors, target.arch_str());
+  CinnCacheKeyByAddress cur_key_by_address(graph, input_tensors,
+                                           target.arch_str());
+  CinnCacheKeyByStructure cur_key_by_struct;
+
  bool exist = false;
  {
    AutoRDLock r_guard{&rwlock_};
-    exist = cache_.count(cur_key) != 0;
+    exist = cache_by_address_.count(cur_key_by_address) != 0;
+    // if cannot find graph by address, checkout whether the graph structure
+    // have been stored in cache.
+    if (!exist) {
+      // generate the structure cache key
+      cur_key_by_struct.SetKey(graph, input_tensors, target.arch_str());
+
+      // if the graph structure can be found, storing the graph address in
+      // cache for next query.
+      if (cache_by_struct_.count(cur_key_by_struct) != 0) {
+        exist = true;
+        cache_by_address_[cur_key_by_address] =
+            cache_by_struct_.at(cur_key_by_struct).get();
+      }
+    }
  }
  if (!exist) {
    std::int64_t compiled_num = real_compiled_num_.fetch_add(1);
    auto compiled_res =
        CompileGraph(graph, input_tensors, target, compiled_num, stream);
    AutoWRLock w_guard{&rwlock_};
-    if (!cache_.count(cur_key)) {
-      cache_[cur_key] = std::move(compiled_res);
+    if (!cache_by_struct_.count(cur_key_by_struct)) {
+      cache_by_address_[cur_key_by_address] = compiled_res.get();
+      cache_by_struct_[cur_key_by_struct] = std::move(compiled_res);
    }
  }
  AutoRDLock guard{&rwlock_};
-  const auto& cached_boj = *cache_[cur_key];
+  const auto& cached_boj = *cache_by_address_[cur_key_by_address];
  return cached_boj;
 }

@@ -182,7 +200,8 @@ void CinnCompiler::Clear() {
  {
    AutoWRLock guard{&rwlock_};
    graphs_.clear();
-    cache_.clear();
+    cache_by_address_.clear();
+    cache_by_struct_.clear();
  }
  real_compiled_num_.store(0);
 }

--- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h
+++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h
@@ -95,9 +95,12 @@ class CinnCompiler {
      void* stream = nullptr) const;

  std::unordered_map<std::string, std::unique_ptr<ir::Graph>> graphs_;
-  std::unordered_map<CinnCacheKey, std::unique_ptr<CinnCompiledObject>,
+  std::unordered_map<CinnCacheKeyByAddress, CinnCompiledObject*,
                     CinnCacheKey::Hash>
-      cache_;
+      cache_by_address_;
+  std::unordered_map<CinnCacheKeyByStructure,
+                     std::unique_ptr<CinnCompiledObject>, CinnCacheKey::Hash>
+      cache_by_struct_;
  std::atomic_int64_t real_compiled_num_{0};
  mutable RWLock rwlock_;