未验证 提交 560c8153 编写于 作者: Y Yiqun Liu 提交者: GitHub

Add some check for CUDA Driver API and NVRTC (#22719)

* Add the check for whether CUDA Driver and NVRTC is available for the runtime system.

* Call cuInit to initialize the CUDA Driver API before all CUDA callings.
test=develop

* Change the behavior when libnvrtc.so can not be found, printing a warning instead of exiting.
test=develop

* Do not initialize CUDA Driver API for windows and macos.
test=develop

* Remove the call of cuInit when entering paddle and enable the test_code_generator.
test=develop

* Add some built-in functions for __half.
test=develop

* Change save_intermediate_out to false in unittest.
test=develop

* Fix error reference to tempropary variable when seting including path for device_code.
test=develop
上级 9ec5eb0e
...@@ -2,7 +2,7 @@ cc_library(code_generator ...@@ -2,7 +2,7 @@ cc_library(code_generator
SRCS operation.cc code_generator.cc code_generator_helper.cc SRCS operation.cc code_generator.cc code_generator_helper.cc
DEPS graph subgraph_detector) DEPS graph subgraph_detector)
if(WITH_GPU) if(WITH_GPU)
# cc_test(test_code_generator SRCS code_generator_tester.cc DEPS code_generator device_code lod_tensor graph_viz_pass) cc_test(test_code_generator SRCS code_generator_tester.cc DEPS code_generator device_code lod_tensor graph_viz_pass)
endif() endif()
cc_library(fusion_group_pass cc_library(fusion_group_pass
......
...@@ -209,7 +209,10 @@ std::set<int> CodeGenerator::DistilIntermediateIds( ...@@ -209,7 +209,10 @@ std::set<int> CodeGenerator::DistilIntermediateIds(
for (size_t i = 0; i < expressions.size(); i++) { for (size_t i = 0; i < expressions.size(); i++) {
for (auto id : expressions[i].GetOutputIds()) { for (auto id : expressions[i].GetOutputIds()) {
auto intermediate_state = expressions[i].GetIntermediateState(); auto intermediate_state = expressions[i].GetIntermediateState();
if (intermediate_state[id]) intermediate_ids.insert(id); if (intermediate_state.find(id) != intermediate_state.end() &&
intermediate_state[id]) {
intermediate_ids.insert(id);
}
} }
} }
return intermediate_ids; return intermediate_ids;
......
...@@ -44,10 +44,11 @@ static inline std::string VarName(int index) { ...@@ -44,10 +44,11 @@ static inline std::string VarName(int index) {
class OperationExpression { class OperationExpression {
public: public:
explicit OperationExpression(std::string op_type, std::vector<int> input_ids, explicit OperationExpression(
std::vector<int> output_ids, std::string op_type, const std::vector<int>& input_ids,
std::string rhs_type, std::string lhs_type, const std::vector<int>& output_ids, std::string rhs_type,
std::unordered_map<int, bool> intermediate_state) std::string lhs_type,
const std::unordered_map<int, bool>& intermediate_state = {})
: op_type_(op_type), : op_type_(op_type),
input_ids_(input_ids), input_ids_(input_ids),
output_ids_(output_ids), output_ids_(output_ids),
......
...@@ -176,7 +176,6 @@ void TestMainImpl(std::string func_name, std::string code_str, ...@@ -176,7 +176,6 @@ void TestMainImpl(std::string func_name, std::string code_str,
bool is_float16 = std::type_index(typeid(T)) == bool is_float16 = std::type_index(typeid(T)) ==
std::type_index(typeid(paddle::platform::float16)); std::type_index(typeid(paddle::platform::float16));
paddle::framework::InitDevices(false, {0});
paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0); paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0);
paddle::platform::CUDADeviceCode device_code(place, func_name, code_str); paddle::platform::CUDADeviceCode device_code(place, func_name, code_str);
device_code.Compile(is_float16); device_code.Compile(is_float16);
...@@ -266,7 +265,7 @@ void TestElementwiseMain( ...@@ -266,7 +265,7 @@ void TestElementwiseMain(
} }
int n = cpu_tensors[0].numel(); int n = cpu_tensors[0].numel();
if (dtype == "float16") { if (dtype == "__half") {
TestMainImpl<paddle::platform::float16>(func_name, code_str, cpu_tensors, n, TestMainImpl<paddle::platform::float16>(func_name, code_str, cpu_tensors, n,
input_ids, output_ids); input_ids, output_ids);
} else { } else {
...@@ -275,7 +274,7 @@ void TestElementwiseMain( ...@@ -275,7 +274,7 @@ void TestElementwiseMain(
} }
// Check the results // Check the results
float eps = (dtype == "float16") ? 1E-2 : 1E-5; float eps = (dtype == "__half") ? 1E-2 : 1E-5;
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
fusion_group::CheckOutput(expressions, cpu_tensors, input_ids, output_ids, fusion_group::CheckOutput(expressions, cpu_tensors, input_ids, output_ids,
i, eps); i, eps);
...@@ -312,7 +311,7 @@ void TestMain(fusion_group::SubGraph* subgraph, std::vector<int> input_ids, ...@@ -312,7 +311,7 @@ void TestMain(fusion_group::SubGraph* subgraph, std::vector<int> input_ids,
} }
TEST(code_generator, elementwise) { TEST(code_generator, elementwise) {
for (std::string dtype : {"float", "float16"}) { for (std::string dtype : {"float", "__half"}) {
// t2 = t0 * t1 // t2 = t0 * t1
// t4 = t2 + t3 // t4 = t2 + t3
// t6 = t4 - t5 // t6 = t4 - t5
...@@ -342,7 +341,7 @@ TEST(code_generator, elementwise) { ...@@ -342,7 +341,7 @@ TEST(code_generator, elementwise) {
} }
TEST(code_generator, elementwise_grad) { TEST(code_generator, elementwise_grad) {
for (std::string dtype : {"float", "float16"}) { for (std::string dtype : {"float", "__half"}) {
// The var order: t0, t1, t2, t3, t0', t1', t2', t3' // The var order: t0, t1, t2, t3, t0', t1', t2', t3'
// t2 = t0 * t1 // t2 = t0 * t1
// t3 = relu(t2) // t3 = relu(t2)
...@@ -407,7 +406,7 @@ std::unique_ptr<paddle::framework::ir::Graph> BuildGraph(bool backward, ...@@ -407,7 +406,7 @@ std::unique_ptr<paddle::framework::ir::Graph> BuildGraph(bool backward,
std::unique_ptr<paddle::framework::ir::Graph> graph( std::unique_ptr<paddle::framework::ir::Graph> graph(
new paddle::framework::ir::Graph(layers.main_program())); new paddle::framework::ir::Graph(layers.main_program()));
auto proto_dtype = (dtype == "float16") auto proto_dtype = (dtype == "__half")
? paddle::framework::proto::VarType::FP16 ? paddle::framework::proto::VarType::FP16
: paddle::framework::proto::VarType::FP32; : paddle::framework::proto::VarType::FP32;
for (auto* n : graph->Nodes()) { for (auto* n : graph->Nodes()) {
...@@ -463,10 +462,10 @@ std::unordered_set<paddle::framework::ir::Node*> DistilGradNodes( ...@@ -463,10 +462,10 @@ std::unordered_set<paddle::framework::ir::Node*> DistilGradNodes(
} }
TEST(code_generator, subgraph) { TEST(code_generator, subgraph) {
for (std::string dtype : {"float", "float16"}) { for (std::string dtype : {"float", "__half"}) {
std::unique_ptr<paddle::framework::ir::Graph> graph = std::unique_ptr<paddle::framework::ir::Graph> graph =
BuildGraph(false, dtype); BuildGraph(false, dtype);
fusion_group::SubGraph subgraph(0, "elementwise_kernel_1", true, fusion_group::SubGraph subgraph(0, "elementwise_kernel_1", false,
graph->Nodes()); graph->Nodes());
// Expressions generated by code_generator (they may be different): // Expressions generated by code_generator (they may be different):
...@@ -482,10 +481,10 @@ TEST(code_generator, subgraph) { ...@@ -482,10 +481,10 @@ TEST(code_generator, subgraph) {
} }
TEST(code_generator, subgraph_grad) { TEST(code_generator, subgraph_grad) {
for (std::string dtype : {"float", "float16"}) { for (std::string dtype : {"float", "__half"}) {
std::unique_ptr<paddle::framework::ir::Graph> graph = std::unique_ptr<paddle::framework::ir::Graph> graph =
BuildGraph(true, dtype); BuildGraph(true, dtype);
fusion_group::SubGraph subgraph(0, "elementwise_grad_kernel_1", true, fusion_group::SubGraph subgraph(0, "elementwise_grad_kernel_1", false,
DistilGradNodes(graph)); DistilGradNodes(graph));
// Expressions generated by code_generator (they may be different): // Expressions generated by code_generator (they may be different):
......
...@@ -35,6 +35,7 @@ __device__ inline double Sqrt(double x) { return sqrt(x); } ...@@ -35,6 +35,7 @@ __device__ inline double Sqrt(double x) { return sqrt(x); }
)"; )";
// List some bulit-in functions of __half implemented in cuda_fp16.hpp
static constexpr char predefined_cuda_functions_fp16[] = R"( static constexpr char predefined_cuda_functions_fp16[] = R"(
#define __HALF_TO_US(var) *(reinterpret_cast<unsigned short *>(&(var))) #define __HALF_TO_US(var) *(reinterpret_cast<unsigned short *>(&(var)))
#define __HALF_TO_CUS(var) *(reinterpret_cast<const unsigned short *>(&(var))) #define __HALF_TO_CUS(var) *(reinterpret_cast<const unsigned short *>(&(var)))
...@@ -193,6 +194,12 @@ __CUDA_FP16_DECL__ __half __hdiv(__half a, __half b) { ...@@ -193,6 +194,12 @@ __CUDA_FP16_DECL__ __half __hdiv(__half a, __half b) {
return v; return v;
} }
__CUDA_FP16_DECL__ __half __hneg(const __half a)
{
__half zero;
zero = __float2half(0.0);
return __hsub(zero, a);
}
/* Some basic arithmetic operations expected of a builtin */ /* Some basic arithmetic operations expected of a builtin */
__device__ __forceinline__ __half operator+(const __half &lh, const __half &rh) { return __hadd(lh, rh); } __device__ __forceinline__ __half operator+(const __half &lh, const __half &rh) { return __hadd(lh, rh); }
...@@ -200,6 +207,10 @@ __device__ __forceinline__ __half operator-(const __half &lh, const __half &rh) ...@@ -200,6 +207,10 @@ __device__ __forceinline__ __half operator-(const __half &lh, const __half &rh)
__device__ __forceinline__ __half operator*(const __half &lh, const __half &rh) { return __hmul(lh, rh); } __device__ __forceinline__ __half operator*(const __half &lh, const __half &rh) { return __hmul(lh, rh); }
__device__ __forceinline__ __half operator/(const __half &lh, const __half &rh) { return __hdiv(lh, rh); } __device__ __forceinline__ __half operator/(const __half &lh, const __half &rh) { return __hdiv(lh, rh); }
/* Unary plus and inverse operators */
__device__ __forceinline__ __half operator+(const __half &h) { return h; }
__device__ __forceinline__ __half operator-(const __half &h) { return __hneg(h); }
/* Some basic comparison operations to make it look like a builtin */ /* Some basic comparison operations to make it look like a builtin */
__device__ __forceinline__ bool operator==(const __half &lh, const __half &rh) { return __heq(lh, rh); } __device__ __forceinline__ bool operator==(const __half &lh, const __half &rh) { return __heq(lh, rh); }
__device__ __forceinline__ bool operator!=(const __half &lh, const __half &rh) { return __hne(lh, rh); } __device__ __forceinline__ bool operator!=(const __half &lh, const __half &rh) { return __hne(lh, rh); }
......
...@@ -30,6 +30,14 @@ namespace ir { ...@@ -30,6 +30,14 @@ namespace ir {
void FusionGroupPass::ApplyImpl(ir::Graph* graph) const { void FusionGroupPass::ApplyImpl(ir::Graph* graph) const {
FusePassBase::Init("fusion_group_pass", graph); FusePassBase::Init("fusion_group_pass", graph);
if (Get<bool>("use_gpu")) { if (Get<bool>("use_gpu")) {
// TODO(liuyiqun): open this check.
// if (!platform::CUDADeviceCode::IsAvailable()) {
// LOG(WARNING)
// << "Disable fusion_group because CUDA Driver or NVRTC is not
// avaiable.";
// return 0;
// }
fusion_group::OperationMap::Init(); fusion_group::OperationMap::Init();
int num_elementwise_groups = DetectFusionGroup(graph, 0); int num_elementwise_groups = DetectFusionGroup(graph, 0);
AddStatis(num_elementwise_groups); AddStatis(num_elementwise_groups);
...@@ -61,7 +69,7 @@ int FusionGroupPass::DetectFusionGroup(Graph* graph, int type) const { ...@@ -61,7 +69,7 @@ int FusionGroupPass::DetectFusionGroup(Graph* graph, int type) const {
subgraph.DetectIntermediateOutWithGraph(graph); subgraph.DetectIntermediateOutWithGraph(graph);
} }
if (subgraph.IsValid(min_subgraph_size)) { if (subgraph.IsValid(min_subgraph_size)) {
subgraph.SetFuncName("FusedElementwise" + std::to_string(index++)); subgraph.SetFuncName("fused_elementwise_" + std::to_string(index++));
if (GenerateCode(&subgraph)) { if (GenerateCode(&subgraph)) {
InsertFusionGroupOp(graph, &subgraph); InsertFusionGroupOp(graph, &subgraph);
num_subgraphs++; num_subgraphs++;
......
...@@ -79,9 +79,61 @@ DeviceCodePool::DeviceCodePool(const std::vector<platform::Place>& places) { ...@@ -79,9 +79,61 @@ DeviceCodePool::DeviceCodePool(const std::vector<platform::Place>& places) {
#endif #endif
} }
} }
#ifdef PADDLE_WITH_CUDA
CUDADeviceCode::CheckAvailableStatus();
#endif
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
static bool CheckCUDADriverResult(CUresult result, std::string caller,
std::string kernel_name = "") {
if (result != CUDA_SUCCESS) {
const char* error = nullptr;
dynload::cuGetErrorString(result, &error);
LOG_FIRST_N(WARNING, 1) << "Call " << caller << " for < " << kernel_name
<< " > failed: " << error << " (" << result << ")";
return false;
}
return true;
}
bool CUDADeviceCode::available_ = false;
void CUDADeviceCode::CheckAvailableStatus() {
available_ = false;
if (!dynload::HasNVRTC() || !dynload::HasCUDADriver()) {
LOG_FIRST_N(WARNING, 1)
<< "NVRTC and CUDA driver are need for JIT compiling of CUDA code.";
return;
}
int nvrtc_major = 0;
int nvrtc_minor = 0;
nvrtcResult nvrtc_result = dynload::nvrtcVersion(&nvrtc_major, &nvrtc_minor);
int driver_version = 0;
int dirver_major = 0;
int driver_minor = 0;
CUresult driver_result = dynload::cuDriverGetVersion(&driver_version);
if (driver_result == CUDA_SUCCESS) {
dirver_major = driver_version / 1000;
driver_minor = (driver_version % 1000) / 10;
}
LOG_FIRST_N(INFO, 1) << "CUDA Driver Version: " << dirver_major << "."
<< driver_minor << "; NVRTC Version: " << nvrtc_major
<< "." << nvrtc_minor;
if (nvrtc_result != NVRTC_SUCCESS || driver_result != CUDA_SUCCESS) {
return;
}
int count = 0;
if (CheckCUDADriverResult(dynload::cuDeviceGetCount(&count),
"cuDeviceGetCount")) {
available_ = true;
}
}
static std::string FindCUDAIncludePath() { static std::string FindCUDAIncludePath() {
auto EndWith = [](std::string str, std::string substr) -> bool { auto EndWith = [](std::string str, std::string substr) -> bool {
size_t pos = str.rfind(substr); size_t pos = str.rfind(substr);
...@@ -137,7 +189,7 @@ CUDADeviceCode::CUDADeviceCode(const Place& place, const std::string& name, ...@@ -137,7 +189,7 @@ CUDADeviceCode::CUDADeviceCode(const Place& place, const std::string& name,
bool CUDADeviceCode::Compile(bool include_path) { bool CUDADeviceCode::Compile(bool include_path) {
is_compiled_ = false; is_compiled_ = false;
if (!dynload::HasNVRTC() || !dynload::HasCUDADriver()) { if (!dynload::HasNVRTC() || !dynload::HasCUDADriver()) {
LOG(WARNING) LOG_FIRST_N(WARNING, 1)
<< "NVRTC and CUDA driver are need for JIT compiling of CUDA code."; << "NVRTC and CUDA driver are need for JIT compiling of CUDA code.";
return false; return false;
} }
...@@ -160,10 +212,11 @@ bool CUDADeviceCode::Compile(bool include_path) { ...@@ -160,10 +212,11 @@ bool CUDADeviceCode::Compile(bool include_path) {
std::string compute_flag = std::string compute_flag =
"--gpu-architecture=compute_" + std::to_string(compute_capability); "--gpu-architecture=compute_" + std::to_string(compute_capability);
std::vector<const char*> options = {"--std=c++11", compute_flag.c_str()}; std::vector<const char*> options = {"--std=c++11", compute_flag.c_str()};
std::string include_option;
if (include_path) { if (include_path) {
std::string cuda_include_path = FindCUDAIncludePath(); std::string cuda_include_path = FindCUDAIncludePath();
if (!cuda_include_path.empty()) { if (!cuda_include_path.empty()) {
std::string include_option = "--include-path=" + cuda_include_path; include_option = "--include-path=" + cuda_include_path;
options.push_back(include_option.c_str()); options.push_back(include_option.c_str());
} }
} }
...@@ -209,13 +262,13 @@ bool CUDADeviceCode::Compile(bool include_path) { ...@@ -209,13 +262,13 @@ bool CUDADeviceCode::Compile(bool include_path) {
} }
if (!CheckCUDADriverResult(dynload::cuModuleLoadData(&module_, ptx_.data()), if (!CheckCUDADriverResult(dynload::cuModuleLoadData(&module_, ptx_.data()),
"cuModuleLoadData")) { "cuModuleLoadData", name_)) {
return false; return false;
} }
if (!CheckCUDADriverResult( if (!CheckCUDADriverResult(
dynload::cuModuleGetFunction(&function_, module_, name_.c_str()), dynload::cuModuleGetFunction(&function_, module_, name_.c_str()),
"cuModuleGetFunction")) { "cuModuleGetFunction", name_)) {
return false; return false;
} }
...@@ -253,19 +306,9 @@ void CUDADeviceCode::Launch(const size_t n, std::vector<void*>* args) const { ...@@ -253,19 +306,9 @@ void CUDADeviceCode::Launch(const size_t n, std::vector<void*>* args) const {
bool CUDADeviceCode::CheckNVRTCResult(nvrtcResult result, bool CUDADeviceCode::CheckNVRTCResult(nvrtcResult result,
std::string function) { std::string function) {
if (result != NVRTC_SUCCESS) { if (result != NVRTC_SUCCESS) {
LOG(WARNING) << "Call " << function LOG_FIRST_N(WARNING, 1)
<< " failed: " << dynload::nvrtcGetErrorString(result); << "Call " << function << " for < " << name_
return false; << " > failed: " << dynload::nvrtcGetErrorString(result);
}
return true;
}
bool CUDADeviceCode::CheckCUDADriverResult(CUresult result,
std::string function) {
if (result != CUDA_SUCCESS) {
const char* error = nullptr;
LOG(WARNING) << "Call " << function
<< " failed: " << dynload::cuGetErrorString(result, &error);
return false; return false;
} }
return true; return true;
......
...@@ -56,9 +56,13 @@ class CUDADeviceCode : public DeviceCode { ...@@ -56,9 +56,13 @@ class CUDADeviceCode : public DeviceCode {
workload_per_thread_ = workload_per_thread; workload_per_thread_ = workload_per_thread;
} }
static void CheckAvailableStatus();
static bool IsAvailable() { return available_; }
private: private:
bool CheckNVRTCResult(nvrtcResult result, std::string function); bool CheckNVRTCResult(nvrtcResult result, std::string function);
bool CheckCUDADriverResult(CUresult result, std::string function);
static bool available_;
bool is_compiled_{false}; bool is_compiled_{false};
int max_threads_{0}; int max_threads_{0};
......
...@@ -60,6 +60,8 @@ extern bool HasCUDADriver(); ...@@ -60,6 +60,8 @@ extern bool HasCUDADriver();
* include all needed cuda driver functions * include all needed cuda driver functions
**/ **/
#define CUDA_ROUTINE_EACH(__macro) \ #define CUDA_ROUTINE_EACH(__macro) \
__macro(cuInit); \
__macro(cuDriverGetVersion); \
__macro(cuGetErrorString); \ __macro(cuGetErrorString); \
__macro(cuModuleLoadData); \ __macro(cuModuleLoadData); \
__macro(cuModuleGetFunction); \ __macro(cuModuleGetFunction); \
...@@ -68,7 +70,7 @@ extern bool HasCUDADriver(); ...@@ -68,7 +70,7 @@ extern bool HasCUDADriver();
__macro(cuLaunchKernel); \ __macro(cuLaunchKernel); \
__macro(cuCtxCreate); \ __macro(cuCtxCreate); \
__macro(cuCtxGetCurrent); \ __macro(cuCtxGetCurrent); \
__macro(cuDeviceGet); \ __macro(cuDeviceGetCount); \
__macro(cuDevicePrimaryCtxGetState) __macro(cuDevicePrimaryCtxGetState)
CUDA_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDA_WRAP); CUDA_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDA_WRAP);
......
...@@ -164,10 +164,10 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root, ...@@ -164,10 +164,10 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
} }
auto error_msg = auto error_msg =
"Failed to find dynamic library: %s ( %s ) \n Please specify " "Failed to find dynamic library: %s ( %s ) \n Please specify "
"its path correctly using following ways: \n Method. set " "its path correctly using following ways: \n set "
"environment variable LD_LIBRARY_PATH on Linux or " "environment variable LD_LIBRARY_PATH on Linux or "
"DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: " "DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
"export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, " "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System " "using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled."; "Integrity Protection (SIP) is disabled.";
#if !defined(_WIN32) #if !defined(_WIN32)
...@@ -238,17 +238,17 @@ void* GetCusolverDsoHandle() { ...@@ -238,17 +238,17 @@ void* GetCusolverDsoHandle() {
void* GetNVRTCDsoHandle() { void* GetNVRTCDsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvrtc.dylib"); return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvrtc.dylib", false);
#else #else
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvrtc.so"); return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libnvrtc.so", false);
#endif #endif
} }
void* GetCUDADsoHandle() { void* GetCUDADsoHandle() {
#if defined(__APPLE__) || defined(__OSX__) #if defined(__APPLE__) || defined(__OSX__)
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcuda.dylib"); return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcuda.dylib", false);
#else #else
return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcuda.so"); return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcuda.so", false);
#endif #endif
} }
......
...@@ -60,6 +60,7 @@ extern bool HasNVRTC(); ...@@ -60,6 +60,7 @@ extern bool HasNVRTC();
* include all needed nvrtc functions * include all needed nvrtc functions
**/ **/
#define NVRTC_ROUTINE_EACH(__macro) \ #define NVRTC_ROUTINE_EACH(__macro) \
__macro(nvrtcVersion); \
__macro(nvrtcGetErrorString); \ __macro(nvrtcGetErrorString); \
__macro(nvrtcCompileProgram); \ __macro(nvrtcCompileProgram); \
__macro(nvrtcCreateProgram); \ __macro(nvrtcCreateProgram); \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册