未验证 提交 771a4144 编写于 作者: S sneaxiy 提交者: GitHub

fix ln (#41918)

上级 6bd39b5e
...@@ -130,7 +130,9 @@ __forceinline__ __device__ U BlockReduceSum(U val, U *shared) { ...@@ -130,7 +130,9 @@ __forceinline__ __device__ U BlockReduceSum(U val, U *shared) {
##__VA_ARGS__) ##__VA_ARGS__)
static __device__ __forceinline__ float real_sqrt(float x) { return sqrtf(x); } static __device__ __forceinline__ float real_sqrt(float x) { return sqrtf(x); }
static __device__ __forceinline__ double real_sqrt(double x) { return sqrt(x); } static __device__ __forceinline__ double real_sqrt(double x) {
return ::sqrt(x);
}
template <typename T> template <typename T>
struct PairForLayerNorm { struct PairForLayerNorm {
...@@ -162,7 +164,7 @@ __inline__ __device__ float rsqrt_(const float val) { ...@@ -162,7 +164,7 @@ __inline__ __device__ float rsqrt_(const float val) {
template <> template <>
__inline__ __device__ double rsqrt_(const double val) { __inline__ __device__ double rsqrt_(const double val) {
return rsqrt(val); return ::rsqrt(val);
} }
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__) #if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册