提交 3a4c28fa 编写于 作者: L lizhenyu

change directory of akg cuda kernel

上级 a62c3e5c
...@@ -24,13 +24,13 @@ import _akg ...@@ -24,13 +24,13 @@ import _akg
from _akg import save_gpu_param as gpu_utils from _akg import save_gpu_param as gpu_utils
from _akg.utils import validation_check as vc_util from _akg.utils import validation_check as vc_util
MS_CUDA_KERNEL_PATH = "/tmp/cuda_meta/"
@vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict) @vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict)
def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attrs): def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attrs):
"""op_build""" """op_build"""
kernel_meta_path = "./cuda_meta_" + str(os.getpid()) + "/"
if device == "cuda": if device == "cuda":
cuda_path = os.path.realpath(MS_CUDA_KERNEL_PATH) cuda_path = os.path.realpath(kernel_meta_path)
if not os.path.isdir(cuda_path): if not os.path.isdir(cuda_path):
os.makedirs(cuda_path) os.makedirs(cuda_path)
if not opnames: if not opnames:
...@@ -43,7 +43,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr ...@@ -43,7 +43,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
logging.error("no schedule func found %s", str(schedule_name)) logging.error("no schedule func found %s", str(schedule_name))
return None return None
ptx_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".ptx") ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx")
if os.path.exists(ptx_file): if os.path.exists(ptx_file):
os.chmod(ptx_file, 0o600) os.chmod(ptx_file, 0o600)
try: try:
...@@ -55,7 +55,8 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr ...@@ -55,7 +55,8 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
foo = _akg.tvm.build(s, args, device, name=kernel_name) foo = _akg.tvm.build(s, args, device, name=kernel_name)
ptx_code = foo.imported_modules[0].get_source("ptx") ptx_code = foo.imported_modules[0].get_source("ptx")
file.write(ptx_code) file.write(ptx_code)
json_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".json") json_file = os.path.realpath(
kernel_meta_path + kernel_name + ".json")
kernel_info = (ptx_code, json_file, kernel_name) kernel_info = (ptx_code, json_file, kernel_name)
gpu_utils.save_gpu_params(s, args, kernel_info) gpu_utils.save_gpu_params(s, args, kernel_info)
os.chmod(ptx_file, 0o400) os.chmod(ptx_file, 0o400)
......
...@@ -28,11 +28,7 @@ namespace gpu { ...@@ -28,11 +28,7 @@ namespace gpu {
namespace py = pybind11; namespace py = pybind11;
void GpuBuild(const KernelGraphPtr &kernel_graph) { void GpuBuild(const KernelGraphPtr &kernel_graph) {
kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
if (!bin_map->ReadIndex(kernel::kGpuKernelMeta)) { bin_map->Initialize();
MS_LOG(INFO) << "kernel cache miss, cache directory will be created later.";
} else {
MS_LOG(INFO) << "cache initialize to[" << kernel::kGpuKernelMeta << "].";
}
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
auto kernels = kernel_graph->execution_order(); auto kernels = kernel_graph->execution_order();
for (const auto &kernel : kernels) { for (const auto &kernel : kernels) {
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "device/gpu/gpu_common.h" #include "device/gpu/gpu_common.h"
#include "common/utils.h" #include "common/utils.h"
#include "device/gpu/gpu_memory_manager.h" #include "device/gpu/gpu_memory_manager.h"
#include "kernel/common_utils.h"
namespace mindspore { namespace mindspore {
namespace device { namespace device {
...@@ -104,6 +105,7 @@ void GPUKernelRuntime::ReleaseDeviceRes() { ...@@ -104,6 +105,7 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
if (mem_manager_ != nullptr) { if (mem_manager_ != nullptr) {
mem_manager_->FreeDeviceMemory(); mem_manager_->FreeDeviceMemory();
} }
kernel::KernelMeta::GetInstance()->RemoveKernelCache();
} }
void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
......
...@@ -114,53 +114,35 @@ bool IsAtomicNode(const CNodePtr &kernel_node) { ...@@ -114,53 +114,35 @@ bool IsAtomicNode(const CNodePtr &kernel_node) {
return atomic_flag; return atomic_flag;
} }
bool KernelMeta::ReadIndex(const std::string &bin_dir) { void KernelMeta::Initialize() {
DIR *dir = opendir(bin_dir.c_str()); kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/";
if (dir == nullptr) { // remove old kernel cache
RemoveKernelCache();
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
auto ret = mkdir(bin_dir.c_str()); auto ret = mkdir(kernel_meta_path_.c_str());
#else #else
auto ret = mkdir(bin_dir.c_str(), S_IRWXG | S_IRWXU); auto ret = mkdir(kernel_meta_path_.c_str(), S_IRWXG | S_IRWXU);
#endif #endif
if (ret != 0) { if (ret != 0) {
MS_LOG(INFO) << "kernel dir not exist[" << bin_dir << "]."; MS_LOG(INFO) << "kernel dir [" << kernel_meta_path_ << "], will be created later";
return false;
}
dir = opendir(bin_dir.c_str());
} }
initialized_ = true;
}
void KernelMeta::RemoveKernelCache() {
if (access(kernel_meta_path_.c_str(), 0) == 0) {
DIR *dir = opendir(kernel_meta_path_.c_str());
MS_EXCEPTION_IF_NULL(dir);
struct dirent *entry; struct dirent *entry;
while ((entry = readdir(dir)) != nullptr) { while ((entry = readdir(dir)) != nullptr) {
string bin_dir_tmp = bin_dir; std::string kernel_file = entry->d_name;
std::string cce_json = entry->d_name; std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
if (cce_json.length() <= 5) { (void)remove(kernel_file_realpath.c_str());
continue;
}
std::string suffix = cce_json.substr(cce_json.length() - 5);
if (suffix != kJsonSuffix) {
continue;
}
auto sp = cce_json.rfind('/');
if (sp != std::string::npos) {
continue;
}
sp = cce_json.rfind('.');
if (sp == std::string::npos) {
continue;
}
auto kernel_name = cce_json.substr(0, sp);
(void)bin_dir_tmp.append("/");
(void)bin_dir_tmp.append(cce_json);
kernel_meta_map_[kernel_name] = bin_dir_tmp;
} }
(void)closedir(dir); (void)closedir(dir);
(void)rmdir(kernel_meta_path_.c_str());
MS_LOG(INFO) << "Cache kernel initialized, kernel size[" << kernel_meta_map_.size() << "]."; }
initialized_ = true;
return true;
} }
std::string KernelMeta::Search(const std::string &kernel_name) const { std::string KernelMeta::Search(const std::string &kernel_name) const {
...@@ -176,11 +158,11 @@ std::string KernelMeta::Search(const std::string &kernel_name) const { ...@@ -176,11 +158,11 @@ std::string KernelMeta::Search(const std::string &kernel_name) const {
} }
} }
bool KernelMeta::Insert(const std::string &kernel_name, const std::string &cce_json) { bool KernelMeta::Insert(const std::string &kernel_name, const std::string &kernel_json) {
if (!initialized_) { if (!initialized_) {
return false; return false;
} }
kernel_meta_map_[kernel_name] = cce_json; kernel_meta_map_[kernel_name] = kernel_json;
return true; return true;
} }
...@@ -191,8 +173,8 @@ bool CheckCache(const std::string &kernel_name) { ...@@ -191,8 +173,8 @@ bool CheckCache(const std::string &kernel_name) {
MS_LOG(DEBUG) << "kernel cache is invalid."; MS_LOG(DEBUG) << "kernel cache is invalid.";
return false; return false;
} }
std::string cce_json = bin_map->Search(kernel_name); std::string kernel_json = bin_map->Search(kernel_name);
bool ret = (!cce_json.empty()); bool ret = (!kernel_json.empty());
if (ret) { if (ret) {
MS_LOG(INFO) << "Kernel name:" << kernel_name << " has registed."; MS_LOG(INFO) << "Kernel name:" << kernel_name << " has registed.";
} else { } else {
...@@ -209,12 +191,12 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro ...@@ -209,12 +191,12 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
return nullptr; return nullptr;
} }
std::string cce_json = bin_map->Search(kernel_name); std::string kernel_json = bin_map->Search(kernel_name);
if (!cce_json.empty()) { if (!kernel_json.empty()) {
KernelPackPtr kernel_pack = std::make_shared<KernelPack>(); KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
// just a tmp solution. // just a tmp solution.
if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) { if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
MS_LOG(DEBUG) << "Read cache json and bin file failed[" << cce_json << "]."; MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "].";
return nullptr; return nullptr;
} else { } else {
return kernel_pack; return kernel_pack;
...@@ -227,26 +209,26 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro ...@@ -227,26 +209,26 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor) { KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor) {
MS_LOG(INFO) << "kernel name:" << kernel_name << ", processr:" << processor; MS_LOG(INFO) << "kernel name:" << kernel_name << ", processr:" << processor;
std::string cce_json; KernelMeta *bin_map = KernelMeta::GetInstance();
std::string kernel_json;
if (processor == kProcessorAiCore || processor == kProcessorAiCpu) { if (processor == kProcessorAiCore || processor == kProcessorAiCpu) {
cce_json = kCceKernelMeta; kernel_json = kCceKernelMeta;
} else { } else {
cce_json = kGpuKernelMeta; kernel_json = bin_map->GetKernelMetaPath();
} }
(void)cce_json.append(kernel_name).append(kJsonSuffix); (void)kernel_json.append(kernel_name).append(kJsonSuffix);
KernelPackPtr kernel_pack = std::make_shared<KernelPack>(); KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) { if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
MS_LOG(DEBUG) << "Read json and bin file failed[" << cce_json << "]."; MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "].";
return nullptr; return nullptr;
} }
KernelMeta *bin_map = KernelMeta::GetInstance();
if (bin_map == nullptr) { if (bin_map == nullptr) {
MS_LOG(DEBUG) << "kernel cache is invalid."; MS_LOG(DEBUG) << "kernel cache is invalid.";
return nullptr; return nullptr;
} }
if (bin_map->Insert(kernel_name, cce_json)) { if (bin_map->Insert(kernel_name, kernel_json)) {
MS_LOG(INFO) << "Insert to cache success[" << cce_json << "], kernelname[" << kernel_name << "]."; MS_LOG(INFO) << "Insert to cache success[" << kernel_json << "], kernelname[" << kernel_name << "].";
} }
return kernel_pack; return kernel_pack;
} }
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
constexpr auto kCceKernelMeta = "./kernel_meta/"; constexpr auto kCceKernelMeta = "./kernel_meta/";
constexpr auto kGpuKernelMeta = "/tmp/cuda_meta/"; constexpr auto kGpuKernelMeta = "./cuda_meta";
constexpr auto kProcessorAiCore = "aicore"; constexpr auto kProcessorAiCore = "aicore";
constexpr auto kProcessorAiCpu = "aicpu"; constexpr auto kProcessorAiCpu = "aicpu";
constexpr auto kProcessorCuda = "cuda"; constexpr auto kProcessorCuda = "cuda";
...@@ -51,9 +51,11 @@ using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>; ...@@ -51,9 +51,11 @@ using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
class KernelMeta { class KernelMeta {
public: public:
KernelMeta() = default; KernelMeta() = default;
bool ReadIndex(const std::string &bin_dir); void Initialize();
void RemoveKernelCache();
std::string Search(const std::string &kernel_name) const; std::string Search(const std::string &kernel_name) const;
bool Insert(const std::string &kernel_name, const std::string &cce_json); bool Insert(const std::string &kernel_name, const std::string &kernel_json);
std::string GetKernelMetaPath() { return kernel_meta_path_; }
static KernelMeta *GetInstance() { static KernelMeta *GetInstance() {
static KernelMeta kernel_meta; static KernelMeta kernel_meta;
...@@ -63,6 +65,7 @@ class KernelMeta { ...@@ -63,6 +65,7 @@ class KernelMeta {
private: private:
bool initialized_ = false; bool initialized_ = false;
std::string kernel_meta_path_;
std::unordered_map<std::string, std::string> kernel_meta_map_; std::unordered_map<std::string, std::string> kernel_meta_map_;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册