// Copyright (c) 2021 CINN Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/cinn/backends/nvrtc/nvrtc_util.h" #include #include #include #include #include #include #include #include #include "paddle/cinn/backends/cuda_util.h" #include "paddle/cinn/backends/nvrtc/header_generator.h" #include "paddle/cinn/common/common.h" #include "paddle/cinn/runtime/flags.h" #include "paddle/cinn/utils/string.h" DECLARE_string(cinn_nvcc_cmd_path); DECLARE_bool(nvrtc_compile_to_cubin); namespace cinn { namespace backends { namespace nvrtc { std::string Compiler::operator()(const std::string& code, bool include_headers) { if (runtime::CanUseNvccCompiler()) { return CompileWithNvcc(code); } return CompileCudaSource(code, include_headers); } Compiler::Compiler() { if (FLAGS_nvrtc_compile_to_cubin) { #if CUDA_VERSION >= 11010 compile_to_cubin_ = true; #endif } VLOG(4) << "FLAGS_nvrtc_compile_to_cubin: " << FLAGS_nvrtc_compile_to_cubin << ", compile_to_cubin_: " << compile_to_cubin_; } bool Compiler::compile_to_cubin() { return compile_to_cubin_; } std::vector Compiler::FindCUDAIncludePaths() { const std::string delimiter = "/"; std::string cuda_include_path; const char* cuda_path_env = std::getenv("CUDA_PATH"); if (cuda_path_env != nullptr) { cuda_include_path += cuda_path_env; cuda_include_path += delimiter + "include"; return {cuda_include_path}; } #if defined(__linux__) struct stat st; cuda_include_path = "/usr/local/cuda/include"; if (stat(cuda_include_path.c_str(), &st) == 0) { return {cuda_include_path}; } #endif LOG(FATAL) << "Cannot find cuda include path." << "CUDA_PATH is not set or CUDA is not installed in the default installation path." << "In other than linux, it is necessary to set CUDA_PATH."; return {cuda_include_path}; } std::vector Compiler::FindCINNRuntimeIncludePaths() { return {Context::Global().runtime_include_dir()}; } std::string Compiler::CompileCudaSource(const std::string& code, bool include_headers) { const auto& header_gen = JitSafeHeaderGenerator::GetInstance(); std::vector compile_options; std::vector param_cstrings{}; nvrtcProgram prog; std::string cc = "30"; int major, minor; cudaError_t e1 = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, 0); cudaError_t e2 = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, 0); if (e1 == cudaSuccess && e2 == cudaSuccess) { cc = std::to_string(major) + std::to_string(minor); } else { LOG(WARNING) << "cannot detect compute capability from your device, " << "fall back to compute_30."; } if (compile_to_cubin_) { compile_options.push_back("-arch=sm_" + cc); } else { compile_options.push_back("-arch=compute_" + cc); } compile_options.push_back("-std=c++14"); compile_options.push_back("-default-device"); if (include_headers) { // prepare include headers auto cuda_headers = FindCUDAIncludePaths(); auto cinn_headers = FindCINNRuntimeIncludePaths(); std::vector include_paths; for (auto& header : cuda_headers) { include_paths.push_back("--include-path=" + header); } for (auto& header : cinn_headers) { include_paths.push_back("--include-path=" + header); } compile_options.insert(std::end(compile_options), include_paths.begin(), include_paths.end()); } for (const auto& option : compile_options) { param_cstrings.push_back(option.c_str()); } VLOG(3) << "compile options: " << utils::Join(compile_options, " "); NVRTC_CALL(nvrtcCreateProgram( &prog, code.c_str(), nullptr, header_gen.size(), header_gen.headers().data(), header_gen.include_names().data())); nvrtcResult compile_res = nvrtcCompileProgram(prog, param_cstrings.size(), param_cstrings.data()); { // get log size_t log_size; NVRTC_CALL(nvrtcGetProgramLogSize(prog, &log_size)); std::string log; log.resize(log_size); NVRTC_CALL(nvrtcGetProgramLog(prog, &log[0])); CHECK_EQ(compile_res, NVRTC_SUCCESS) << log; } size_t size; std::string data; if (compile_to_cubin_) { NVRTC_CALL(nvrtcGetCUBINSize(prog, &size)); data.resize(size); NVRTC_CALL(nvrtcGetCUBIN(prog, &data[0])); } else { NVRTC_CALL(nvrtcGetPTXSize(prog, &size)); data.resize(size); NVRTC_CALL(nvrtcGetPTX(prog, &data[0])); } NVRTC_CALL(nvrtcDestroyProgram(&prog)); return data; } std::string Compiler::CompileWithNvcc(const std::string& cuda_c) { // read dir source std::string dir = "./source"; if (access(dir.c_str(), 0) == -1) { CHECK(mkdir(dir.c_str(), 7) != -1) << "Fail to mkdir " << dir; } // get unqiue prefix name prefix_name_ = dir + "/" + common::UniqName("rtc_tmp"); auto cuda_c_file = prefix_name_ + ".cu"; std::ofstream ofs(cuda_c_file, std::ios::out); CHECK(ofs.is_open()) << "Fail to open file " << cuda_c_file; ofs << cuda_c; ofs.close(); CompileToPtx(); CompileToCubin(); return prefix_name_ + ".cubin"; } // std::string Compiler::GetPtx() { return ReadFile(prefix_name_ + ".ptx", std::ios::in); } void Compiler::CompileToPtx() { auto include_dir = common::Context::Global().runtime_include_dir(); std::string include_dir_str = ""; for (auto dir : include_dir) { if (include_dir_str.empty()) { include_dir_str = dir; } else { include_dir_str += ":" + dir; } } std::string options = std::string("export PATH=") + FLAGS_cinn_nvcc_cmd_path + std::string(":$PATH && nvcc -std=c++14 --ptx -O3 -I ") + include_dir_str; options += " -arch=" + GetDeviceArch(); options += " -o " + prefix_name_ + ".ptx"; options += " " + prefix_name_ + ".cu"; VLOG(2) << "Nvcc Compile Options : " << options; CHECK(system(options.c_str()) == 0) << options; } void Compiler::CompileToCubin() { std::string options = std::string("export PATH=") + FLAGS_cinn_nvcc_cmd_path + std::string(":$PATH && nvcc --cubin -O3"); options += " -arch=" + GetDeviceArch(); options += " -o " + prefix_name_ + ".cubin"; options += " " + prefix_name_ + ".ptx"; VLOG(2) << "Nvcc Compile Options : " << options; CHECK(system(options.c_str()) == 0) << options; } std::string Compiler::GetDeviceArch() { int major = 0, minor = 0; if (cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, 0) == cudaSuccess && cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, 0) == cudaSuccess) { return "sm_" + std::to_string(major) + std::to_string(minor); } else { LOG(WARNING) << "cannot detect compute capability from your device, " << "fall back to compute_30."; return "sm_30"; } } std::string Compiler::ReadFile(const std::string& file_name, std::ios_base::openmode mode) { // open cubin file std::ifstream ifs(file_name, mode); CHECK(ifs.is_open()) << "Fail to open file " << file_name; ifs.seekg(std::ios::end); auto len = ifs.tellg(); ifs.seekg(0); // read cubin file std::string file_data(len, ' '); ifs.read(&file_data[0], len); ifs.close(); return std::move(file_data); } } // namespace nvrtc } // namespace backends } // namespace cinn