diff --git a/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu index 068a3e5a9b41c24c330665d6556f0726356d798c..e752b21aaa688f5571b6939a97ca156a71787f6b 100644 --- a/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu +++ b/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu @@ -276,9 +276,9 @@ __global__ void InplaceAddReluAddLayerNormKernel(const float16* y_data, half tmp_0 = __hdiv(__hsub(save_ptr[save_index], mean_i), std_i); half tmp_1 = scale ? __hmul(scale[j], tmp_0) : tmp_0; #else - half tmp_0 = static_cast(static_cast(save_ptr[save_index]) - - static_cast(mean_i) / - static_cast(std_i)); + half tmp_0 = static_cast((static_cast(save_ptr[save_index]) - + static_cast(mean_i)) / + static_cast(std_i)); half tmp_1 = scale ? static_cast(static_cast(scale[j]) * static_cast(tmp_0)) : tmp_0; @@ -394,19 +394,16 @@ class FusedFCElementwiseLayerNormOpKernel : public framework::OpKernel { auto* out_data = dev_ctx.template Alloc(out, out->numel() * sizeof(T)); auto blas = phi::funcs::GetBlas(dev_ctx); - blas.GEMM(false, - false, + blas.GEMM(CblasNoTrans, + CblasNoTrans, M, N, K, static_cast(1.0), x_data, - K, w_data, - N, static_cast(0.0), - out_data, - N); + out_data); auto* y = ctx.Input("Y"); auto* bias_0 = ctx.Input("Bias0"); auto* bias_1 = ctx.Input("Bias1"); diff --git a/paddle/phi/kernels/funcs/fc_functor.cu b/paddle/phi/kernels/funcs/fc_functor.cu index 3a749f3444ca5fe38ba8e9c4cce24ea4a1daa9dc..f78def371d81c88cfb9fcc06d86a801212ab9620 100644 --- a/paddle/phi/kernels/funcs/fc_functor.cu +++ b/paddle/phi/kernels/funcs/fc_functor.cu @@ -345,19 +345,16 @@ void FCFunctor::operator()(const DeviceContext& context, errors::PermissionDenied( "Weight padding in fc can not be used in GPU scope.")); auto blas = phi::funcs::GetBlas(context); - blas.GEMM(false, - false, + blas.GEMM(CblasNoTrans, + CblasNoTrans, M, N, K, static_cast(1.0), X, - K, W, - N, static_cast(0.0), - Y, - N); + Y); if (B == NULL) { return; }