diff --git a/mindspore/_akg/op_build.py b/mindspore/_akg/op_build.py index 44a250bd9e3794083d8f89a16c78cd7184252c9c..aa6a65cff13b15f91743f7ce66ed050261cb7fc2 100644 --- a/mindspore/_akg/op_build.py +++ b/mindspore/_akg/op_build.py @@ -24,13 +24,13 @@ import _akg from _akg import save_gpu_param as gpu_utils from _akg.utils import validation_check as vc_util -MS_CUDA_KERNEL_PATH = "/tmp/cuda_meta/" @vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict) def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attrs): """op_build""" + kernel_meta_path = "./cuda_meta_" + str(os.getpid()) + "/" if device == "cuda": - cuda_path = os.path.realpath(MS_CUDA_KERNEL_PATH) + cuda_path = os.path.realpath(kernel_meta_path) if not os.path.isdir(cuda_path): os.makedirs(cuda_path) if not opnames: @@ -43,7 +43,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr logging.error("no schedule func found %s", str(schedule_name)) return None - ptx_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".ptx") + ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx") if os.path.exists(ptx_file): os.chmod(ptx_file, 0o600) try: @@ -55,7 +55,8 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr foo = _akg.tvm.build(s, args, device, name=kernel_name) ptx_code = foo.imported_modules[0].get_source("ptx") file.write(ptx_code) - json_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".json") + json_file = os.path.realpath( + kernel_meta_path + kernel_name + ".json") kernel_info = (ptx_code, json_file, kernel_name) gpu_utils.save_gpu_params(s, args, kernel_info) os.chmod(ptx_file, 0o400) diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc index 2a2a2be06593343fe79499c8080900bbc247caf8..f9d2cb878fcd5912d5b94ede3d22672f81dfedee 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc @@ -28,11 +28,7 @@ namespace gpu { namespace py = pybind11; void GpuBuild(const KernelGraphPtr &kernel_graph) { kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); - if (!bin_map->ReadIndex(kernel::kGpuKernelMeta)) { - MS_LOG(INFO) << "kernel cache miss, cache directory will be created later."; - } else { - MS_LOG(INFO) << "cache initialize to[" << kernel::kGpuKernelMeta << "]."; - } + bin_map->Initialize(); MS_EXCEPTION_IF_NULL(kernel_graph); auto kernels = kernel_graph->execution_order(); for (const auto &kernel : kernels) { diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 7eea5501d5af0f82a586d3d8d83f0b9ec60cf496..2d53097dd8d0debe8115fb83565b1b623d7a2c95 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -27,6 +27,7 @@ #include "device/gpu/gpu_common.h" #include "common/utils.h" #include "device/gpu/gpu_memory_manager.h" +#include "kernel/common_utils.h" namespace mindspore { namespace device { @@ -104,6 +105,7 @@ void GPUKernelRuntime::ReleaseDeviceRes() { if (mem_manager_ != nullptr) { mem_manager_->FreeDeviceMemory(); } + kernel::KernelMeta::GetInstance()->RemoveKernelCache(); } void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc index 5abaff412ecc90317c6a578cdb4ad509162e04b0..8316116486ac7dcbdfdfca81c4cb8b9fe5599ab8 100644 --- a/mindspore/ccsrc/kernel/common_utils.cc +++ b/mindspore/ccsrc/kernel/common_utils.cc @@ -114,53 +114,35 @@ bool IsAtomicNode(const CNodePtr &kernel_node) { return atomic_flag; } -bool KernelMeta::ReadIndex(const std::string &bin_dir) { - DIR *dir = opendir(bin_dir.c_str()); - if (dir == nullptr) { +void KernelMeta::Initialize() { + kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/"; + // remove old kernel cache + RemoveKernelCache(); + #if defined(_WIN32) || defined(_WIN64) - auto ret = mkdir(bin_dir.c_str()); + auto ret = mkdir(kernel_meta_path_.c_str()); #else - auto ret = mkdir(bin_dir.c_str(), S_IRWXG | S_IRWXU); + auto ret = mkdir(kernel_meta_path_.c_str(), S_IRWXG | S_IRWXU); #endif - if (ret != 0) { - MS_LOG(INFO) << "kernel dir not exist[" << bin_dir << "]."; - return false; - } - dir = opendir(bin_dir.c_str()); + if (ret != 0) { + MS_LOG(INFO) << "kernel dir [" << kernel_meta_path_ << "], will be created later"; } + initialized_ = true; +} - struct dirent *entry; - while ((entry = readdir(dir)) != nullptr) { - string bin_dir_tmp = bin_dir; - std::string cce_json = entry->d_name; - if (cce_json.length() <= 5) { - continue; - } - - std::string suffix = cce_json.substr(cce_json.length() - 5); - if (suffix != kJsonSuffix) { - continue; - } - - auto sp = cce_json.rfind('/'); - if (sp != std::string::npos) { - continue; - } - - sp = cce_json.rfind('.'); - if (sp == std::string::npos) { - continue; +void KernelMeta::RemoveKernelCache() { + if (access(kernel_meta_path_.c_str(), 0) == 0) { + DIR *dir = opendir(kernel_meta_path_.c_str()); + MS_EXCEPTION_IF_NULL(dir); + struct dirent *entry; + while ((entry = readdir(dir)) != nullptr) { + std::string kernel_file = entry->d_name; + std::string kernel_file_realpath = kernel_meta_path_ + kernel_file; + (void)remove(kernel_file_realpath.c_str()); } - auto kernel_name = cce_json.substr(0, sp); - (void)bin_dir_tmp.append("/"); - (void)bin_dir_tmp.append(cce_json); - kernel_meta_map_[kernel_name] = bin_dir_tmp; + (void)closedir(dir); + (void)rmdir(kernel_meta_path_.c_str()); } - (void)closedir(dir); - - MS_LOG(INFO) << "Cache kernel initialized, kernel size[" << kernel_meta_map_.size() << "]."; - initialized_ = true; - return true; } std::string KernelMeta::Search(const std::string &kernel_name) const { @@ -176,11 +158,11 @@ std::string KernelMeta::Search(const std::string &kernel_name) const { } } -bool KernelMeta::Insert(const std::string &kernel_name, const std::string &cce_json) { +bool KernelMeta::Insert(const std::string &kernel_name, const std::string &kernel_json) { if (!initialized_) { return false; } - kernel_meta_map_[kernel_name] = cce_json; + kernel_meta_map_[kernel_name] = kernel_json; return true; } @@ -191,8 +173,8 @@ bool CheckCache(const std::string &kernel_name) { MS_LOG(DEBUG) << "kernel cache is invalid."; return false; } - std::string cce_json = bin_map->Search(kernel_name); - bool ret = (!cce_json.empty()); + std::string kernel_json = bin_map->Search(kernel_name); + bool ret = (!kernel_json.empty()); if (ret) { MS_LOG(INFO) << "Kernel name:" << kernel_name << " has registed."; } else { @@ -209,12 +191,12 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro return nullptr; } - std::string cce_json = bin_map->Search(kernel_name); - if (!cce_json.empty()) { + std::string kernel_json = bin_map->Search(kernel_name); + if (!kernel_json.empty()) { KernelPackPtr kernel_pack = std::make_shared(); // just a tmp solution. - if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) { - MS_LOG(DEBUG) << "Read cache json and bin file failed[" << cce_json << "]."; + if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { + MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "]."; return nullptr; } else { return kernel_pack; @@ -227,26 +209,26 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor) { MS_LOG(INFO) << "kernel name:" << kernel_name << ", processr:" << processor; - std::string cce_json; + KernelMeta *bin_map = KernelMeta::GetInstance(); + std::string kernel_json; if (processor == kProcessorAiCore || processor == kProcessorAiCpu) { - cce_json = kCceKernelMeta; + kernel_json = kCceKernelMeta; } else { - cce_json = kGpuKernelMeta; + kernel_json = bin_map->GetKernelMetaPath(); } - (void)cce_json.append(kernel_name).append(kJsonSuffix); + (void)kernel_json.append(kernel_name).append(kJsonSuffix); KernelPackPtr kernel_pack = std::make_shared(); - if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) { - MS_LOG(DEBUG) << "Read json and bin file failed[" << cce_json << "]."; + if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { + MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "]."; return nullptr; } - KernelMeta *bin_map = KernelMeta::GetInstance(); if (bin_map == nullptr) { MS_LOG(DEBUG) << "kernel cache is invalid."; return nullptr; } - if (bin_map->Insert(kernel_name, cce_json)) { - MS_LOG(INFO) << "Insert to cache success[" << cce_json << "], kernelname[" << kernel_name << "]."; + if (bin_map->Insert(kernel_name, kernel_json)) { + MS_LOG(INFO) << "Insert to cache success[" << kernel_json << "], kernelname[" << kernel_name << "]."; } return kernel_pack; } diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/kernel/common_utils.h index 07f191cc7b7eced0b83fedc7161a0c6d6bad273a..47fe96c4c918009f4db1ae980186b01ae9e3c709 100644 --- a/mindspore/ccsrc/kernel/common_utils.h +++ b/mindspore/ccsrc/kernel/common_utils.h @@ -30,7 +30,7 @@ namespace mindspore { namespace kernel { constexpr auto kCceKernelMeta = "./kernel_meta/"; -constexpr auto kGpuKernelMeta = "/tmp/cuda_meta/"; +constexpr auto kGpuKernelMeta = "./cuda_meta"; constexpr auto kProcessorAiCore = "aicore"; constexpr auto kProcessorAiCpu = "aicpu"; constexpr auto kProcessorCuda = "cuda"; @@ -51,9 +51,11 @@ using KernelMetaPtr = std::shared_ptr; class KernelMeta { public: KernelMeta() = default; - bool ReadIndex(const std::string &bin_dir); + void Initialize(); + void RemoveKernelCache(); std::string Search(const std::string &kernel_name) const; - bool Insert(const std::string &kernel_name, const std::string &cce_json); + bool Insert(const std::string &kernel_name, const std::string &kernel_json); + std::string GetKernelMetaPath() { return kernel_meta_path_; } static KernelMeta *GetInstance() { static KernelMeta kernel_meta; @@ -63,6 +65,7 @@ class KernelMeta { private: bool initialized_ = false; + std::string kernel_meta_path_; std::unordered_map kernel_meta_map_; };