Follow comments

caa4027d · Yu Yang · 4db43c6c · caa4027d
显示空白变更内容
内联并排

Showing with 3 addition and 8 deletion

paddle/fluid/operators/math/blas_impl.cu.h paddle/fluid/operators/math/blas_impl.cu.h +3 -8

未找到文件。
--- a/paddle/fluid/operators/math/blas_impl.cu.h
+++ b/paddle/fluid/operators/math/blas_impl.cu.h
@@ -126,14 +126,9 @@ inline void Blas<platform::CUDADeviceContext>::GEMM(
      CUDA_R_32F, algo));
 #else
  // CUDA 7.5 does not support cublasGemmEx, hence we fall back to use hgemm
-  const half h_alpha = static_cast<const half>(alpha);
+  CUBlas<platform::float16>::GEMM(context_.cublas_handle(), cuTransB, cuTransA,
-  const half h_beta = static_cast<const half>(beta);
+                                  N, M, K, &h_alpha, h_B, ldb, h_A, lda,
-  const half *h_A = reinterpret_cast<const half *>(A);
+                                  &h_beta, h_C, N);
-  const half *h_B = reinterpret_cast<const half *>(B);
-  half *h_C = reinterpret_cast<half *>(C);
-  CUBlas<platform::float16>(context_.cublas_handle(), cuTransB, cuTransA, N, M,
-                            K, &h_alpha, h_B, ldb, h_A, lda, &h_beta, h_C, N);
 #endif  // CUDA_VERSION >= 8000
 }