未验证 提交 771a4144 编写于 作者: S sneaxiy 提交者: GitHub

fix ln (#41918)

上级 6bd39b5e
......@@ -130,7 +130,9 @@ __forceinline__ __device__ U BlockReduceSum(U val, U *shared) {
##__VA_ARGS__)
static __device__ __forceinline__ float real_sqrt(float x) { return sqrtf(x); }
static __device__ __forceinline__ double real_sqrt(double x) { return sqrt(x); }
static __device__ __forceinline__ double real_sqrt(double x) {
return ::sqrt(x);
}
template <typename T>
struct PairForLayerNorm {
......@@ -162,7 +164,7 @@ __inline__ __device__ float rsqrt_(const float val) {
template <>
__inline__ __device__ double rsqrt_(const double val) {
return rsqrt(val);
return ::rsqrt(val);
}
#if CUDA_ARCH_FP16_SUPPORTED(__CUDA_ARCH__)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册