From dce3465da518641ee177187fbc0c0d36faea28f2 Mon Sep 17 00:00:00 2001
From: Tian Zheng <tizheng@nvidia.com>
Date: Thu, 27 Oct 2022 20:33:16 -0700
Subject: [PATCH] patch for paddle

---
 include/cudnn_frontend_ExecutionPlan.h      | 10 +++++++---
 include/cudnn_frontend_ExecutionPlanCache.h |  2 +-
 include/cudnn_frontend_OperationGraph.h     |  2 +-
 include/cudnn_frontend_find_plan.h          |  6 +++---
 include/cudnn_frontend_get_plan.h           |  4 ++--
 5 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/cudnn_frontend_ExecutionPlan.h b/include/cudnn_frontend_ExecutionPlan.h
index 7bed4b4..3314b5c 100644
--- a/include/cudnn_frontend_ExecutionPlan.h
+++ b/include/cudnn_frontend_ExecutionPlan.h
@@ -167,6 +167,10 @@ class ExecutionPlan_v8 : public BackendDescriptor {
         return json_string;
 #endif
     }
+    
+    ManagedOpaqueDescriptor GetEngineConfig() const {
+        return engine_config;
+    }
 
     ExecutionPlan_v8(ExecutionPlan_v8 const &) = default;
     ExecutionPlan_v8 &
@@ -182,7 +186,7 @@ class ExecutionPlan_v8 : public BackendDescriptor {
                                  CUDNN_TYPE_NUMERICAL_NOTE,
                                  CUDNN_NUMERICAL_NOTE_TYPE_COUNT,
                                  &elem_count,
-                                 NULL);
+                                 nullptr);
         numeric_notes_vec.resize(elem_count);
         status = cudnnBackendGetAttribute(extractedEngine_,
                                  CUDNN_ATTR_ENGINE_NUMERICAL_NOTE,
@@ -206,7 +210,7 @@ class ExecutionPlan_v8 : public BackendDescriptor {
                                  CUDNN_TYPE_BEHAVIOR_NOTE,
                                  CUDNN_BEHAVIOR_NOTE_TYPE_COUNT,
                                  &elem_count,
-                                 NULL);
+                                 nullptr);
         behavior_notes_vec.resize(elem_count);
         status = cudnnBackendGetAttribute(extractedEngine_,
                                  CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE,
@@ -310,7 +314,7 @@ class ExecutionPlan_v8 : public BackendDescriptor {
                 CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE,
                 CUDNN_TYPE_INT64,
                 1,
-                NULL,
+                nullptr,
                 &workSpaceSize);
         if (status != CUDNN_STATUS_SUCCESS) {
             set_error_and_throw_exception(this,
diff --git a/include/cudnn_frontend_ExecutionPlanCache.h b/include/cudnn_frontend_ExecutionPlanCache.h
index 99a157c..741c490 100644
--- a/include/cudnn_frontend_ExecutionPlanCache.h
+++ b/include/cudnn_frontend_ExecutionPlanCache.h
@@ -94,7 +94,7 @@ class ExecutionPlanCache_v1 {
 
     /// String to map of feature_vector to execution plan
     /// For a given FeatureVector of type T according to the Operation Graph, we get the plan. 
-    using FeatureVectorToPlanMap = std::map<cudnn_frontend::feature_vector_t, cudnn_frontend::ExecutionPlan, cudnn_frontend::ExecutionPlanCache_v1::compare>;
+    using FeatureVectorToPlanMap = std::map<cudnn_frontend::feature_vector_t, cudnn_frontend::ExecutionPlan>;
     FeatureVectorToPlanMap  cache;
 
     mutable std::mutex cache_mutex;
diff --git a/include/cudnn_frontend_OperationGraph.h b/include/cudnn_frontend_OperationGraph.h
index 1478ce8..7894080 100644
--- a/include/cudnn_frontend_OperationGraph.h
+++ b/include/cudnn_frontend_OperationGraph.h
@@ -78,7 +78,7 @@ class OperationGraph_v8 : public BackendDescriptor {
                                                CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT,
                                                CUDNN_TYPE_INT64,
                                                1,
-                                               NULL,
+                                               nullptr,
                                                &global_count);
         if (status != CUDNN_STATUS_SUCCESS) {
             set_error_and_throw_exception(this,
diff --git a/include/cudnn_frontend_find_plan.h b/include/cudnn_frontend_find_plan.h
index 02a08a1..5f94e45 100644
--- a/include/cudnn_frontend_find_plan.h
+++ b/include/cudnn_frontend_find_plan.h
@@ -53,7 +53,7 @@ time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const
     cudaDeviceSynchronize();
 
     cudaStream_t stream = nullptr;
-    ::cudnnGetStream(handle, &stream);
+    cudnnGetStream(handle, &stream);
 
     for (auto &plan : plans) {
         float time_ms       = 0.0f;
@@ -61,7 +61,7 @@ time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const
         float min_time_ms   = std::numeric_limits<float>::max();
 
         // Warm-up run
-        auto warmup_status = ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
+        auto warmup_status = cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
         if (warmup_status != CUDNN_STATUS_SUCCESS) {
             getLogger() << "[cudnn_frontend] Plan " << plan.getTag() << " failed with " << to_string(warmup_status) << std::endl;
             continue;
@@ -71,7 +71,7 @@ time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const
         for (int i = 0; i < maxIterCount; i++) {
             cudaEventRecord(start, stream);
 
-            ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
+            cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc());
 
             cudaEventRecord(stop, stream);
             cudaEventSynchronize(stop);
diff --git a/include/cudnn_frontend_get_plan.h b/include/cudnn_frontend_get_plan.h
index 50535ab..c43eec9 100644
--- a/include/cudnn_frontend_get_plan.h
+++ b/include/cudnn_frontend_get_plan.h
@@ -26,7 +26,7 @@
 
 namespace cudnn_frontend {
 
-auto
+inline auto
 EngineConfigGenerator::cudnnGetPlan(cudnnHandle_t handle, OperationGraph & opGraph)
     -> executionPlans_t {
     // Creating a set of execution plans that are supported.
@@ -47,7 +47,7 @@ EngineConfigGenerator::cudnnGetPlan(cudnnHandle_t handle, OperationGraph & opGra
     return plans;
 }
 
-auto
+inline auto
 EngineConfigGenerator::cudnnGetPlan(cudnnHandle_t handle, OperationGraph & opGraph, Predicate pred)
     -> executionPlans_t {
     // Creating a set of execution plans that are supported.
-- 
2.25.1