未验证 提交 0617a3ed 编写于 作者: S sneaxiy 提交者: GitHub

fix gelu compile on CUDA 10 (#39045)

上级 0837a2cc
...@@ -24,7 +24,7 @@ namespace operators { ...@@ -24,7 +24,7 @@ namespace operators {
#ifdef __NVCC__ #ifdef __NVCC__
template <bool FastMode> template <bool FastMode>
static __device__ __forceinline__ float FP32FastTanh(float x) { static __device__ __forceinline__ float FP32FastTanh(float x) {
#if __CUDA_ARCH__ >= 750 && !defined(_WIN32) #if __CUDA_ARCH__ >= 750 && CUDA_VERSION >= 11000 && !defined(_WIN32)
if (FastMode) { if (FastMode) {
float y; float y;
asm("tanh.approx.f32 %0,%1; \n\t" : "=f"(y) : "f"(x)); asm("tanh.approx.f32 %0,%1; \n\t" : "=f"(y) : "f"(x));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册