From dce3465da518641ee177187fbc0c0d36faea28f2 Mon Sep 17 00:00:00 2001 From: Tian Zheng Date: Thu, 27 Oct 2022 20:33:16 -0700 Subject: [PATCH] patch for paddle --- include/cudnn_frontend_ExecutionPlan.h | 10 +++++++--- include/cudnn_frontend_ExecutionPlanCache.h | 2 +- include/cudnn_frontend_OperationGraph.h | 2 +- include/cudnn_frontend_find_plan.h | 6 +++--- include/cudnn_frontend_get_plan.h | 4 ++-- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/cudnn_frontend_ExecutionPlan.h b/include/cudnn_frontend_ExecutionPlan.h index 7bed4b4..3314b5c 100644 --- a/include/cudnn_frontend_ExecutionPlan.h +++ b/include/cudnn_frontend_ExecutionPlan.h @@ -167,6 +167,10 @@ class ExecutionPlan_v8 : public BackendDescriptor { return json_string; #endif } + + ManagedOpaqueDescriptor GetEngineConfig() const { + return engine_config; + } ExecutionPlan_v8(ExecutionPlan_v8 const &) = default; ExecutionPlan_v8 & @@ -182,7 +186,7 @@ class ExecutionPlan_v8 : public BackendDescriptor { CUDNN_TYPE_NUMERICAL_NOTE, CUDNN_NUMERICAL_NOTE_TYPE_COUNT, &elem_count, - NULL); + nullptr); numeric_notes_vec.resize(elem_count); status = cudnnBackendGetAttribute(extractedEngine_, CUDNN_ATTR_ENGINE_NUMERICAL_NOTE, @@ -206,7 +210,7 @@ class ExecutionPlan_v8 : public BackendDescriptor { CUDNN_TYPE_BEHAVIOR_NOTE, CUDNN_BEHAVIOR_NOTE_TYPE_COUNT, &elem_count, - NULL); + nullptr); behavior_notes_vec.resize(elem_count); status = cudnnBackendGetAttribute(extractedEngine_, CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE, @@ -310,7 +314,7 @@ class ExecutionPlan_v8 : public BackendDescriptor { CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE, CUDNN_TYPE_INT64, 1, - NULL, + nullptr, &workSpaceSize); if (status != CUDNN_STATUS_SUCCESS) { set_error_and_throw_exception(this, diff --git a/include/cudnn_frontend_ExecutionPlanCache.h b/include/cudnn_frontend_ExecutionPlanCache.h index 99a157c..741c490 100644 --- a/include/cudnn_frontend_ExecutionPlanCache.h +++ b/include/cudnn_frontend_ExecutionPlanCache.h @@ -94,7 +94,7 @@ class ExecutionPlanCache_v1 { /// String to map of feature_vector to execution plan /// For a given FeatureVector of type T according to the Operation Graph, we get the plan. - using FeatureVectorToPlanMap = std::map; + using FeatureVectorToPlanMap = std::map; FeatureVectorToPlanMap cache; mutable std::mutex cache_mutex; diff --git a/include/cudnn_frontend_OperationGraph.h b/include/cudnn_frontend_OperationGraph.h index 1478ce8..7894080 100644 --- a/include/cudnn_frontend_OperationGraph.h +++ b/include/cudnn_frontend_OperationGraph.h @@ -78,7 +78,7 @@ class OperationGraph_v8 : public BackendDescriptor { CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT, CUDNN_TYPE_INT64, 1, - NULL, + nullptr, &global_count); if (status != CUDNN_STATUS_SUCCESS) { set_error_and_throw_exception(this, diff --git a/include/cudnn_frontend_find_plan.h b/include/cudnn_frontend_find_plan.h index 02a08a1..5f94e45 100644 --- a/include/cudnn_frontend_find_plan.h +++ b/include/cudnn_frontend_find_plan.h @@ -53,7 +53,7 @@ time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const cudaDeviceSynchronize(); cudaStream_t stream = nullptr; - ::cudnnGetStream(handle, &stream); + cudnnGetStream(handle, &stream); for (auto &plan : plans) { float time_ms = 0.0f; @@ -61,7 +61,7 @@ time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const float min_time_ms = std::numeric_limits::max(); // Warm-up run - auto warmup_status = ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc()); + auto warmup_status = cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc()); if (warmup_status != CUDNN_STATUS_SUCCESS) { getLogger() << "[cudnn_frontend] Plan " << plan.getTag() << " failed with " << to_string(warmup_status) << std::endl; continue; @@ -71,7 +71,7 @@ time_sorted_plan(cudnnHandle_t handle, executionPlans_t plans, VariantPack const for (int i = 0; i < maxIterCount; i++) { cudaEventRecord(start, stream); - ::cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc()); + cudnnBackendExecute(handle, plan.get_raw_desc(), variantPack.get_raw_desc()); cudaEventRecord(stop, stream); cudaEventSynchronize(stop); diff --git a/include/cudnn_frontend_get_plan.h b/include/cudnn_frontend_get_plan.h index 50535ab..c43eec9 100644 --- a/include/cudnn_frontend_get_plan.h +++ b/include/cudnn_frontend_get_plan.h @@ -26,7 +26,7 @@ namespace cudnn_frontend { -auto +inline auto EngineConfigGenerator::cudnnGetPlan(cudnnHandle_t handle, OperationGraph & opGraph) -> executionPlans_t { // Creating a set of execution plans that are supported. @@ -47,7 +47,7 @@ EngineConfigGenerator::cudnnGetPlan(cudnnHandle_t handle, OperationGraph & opGra return plans; } -auto +inline auto EngineConfigGenerator::cudnnGetPlan(cudnnHandle_t handle, OperationGraph & opGraph, Predicate pred) -> executionPlans_t { // Creating a set of execution plans that are supported. -- 2.25.1