From 91635de3904a0f34e9b8c6aa06cc4e69bf36a41a Mon Sep 17 00:00:00 2001 From: cucuzg Date: Mon, 1 Mar 2021 23:06:44 +0800 Subject: [PATCH] opt matmul and matmul_v2 on kunlun, *test=kunlun (#31326) * add clip_by_norm on kunlun, *test=kunlun * opt matmul and matmul_v2 on kunlun, *test=kunlun --- paddle/fluid/operators/matmul_op_xpu.cc | 25 +++++++--------------- paddle/fluid/operators/matmul_v2_op_xpu.cc | 24 +++++++-------------- 2 files changed, 16 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/operators/matmul_op_xpu.cc b/paddle/fluid/operators/matmul_op_xpu.cc index 8834e95758..f92cff2f6c 100644 --- a/paddle/fluid/operators/matmul_op_xpu.cc +++ b/paddle/fluid/operators/matmul_op_xpu.cc @@ -159,23 +159,14 @@ static void MatMulXPUFunction(const Tensor *x, const Tensor *y, Tensor *out, "XPU fc_fusion kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); } else { - // batch matmul - int x_stride = mat_dim_a.stride_; - int y_stride = mat_dim_b.stride_; - int out_stride = m * n; - for (int i = 0; i < batch_size; ++i) { - const float *x_data = x->data() + x_stride * i; - const float *y_data = y->data() + y_stride * i; - float *out_data = data_c + out_stride * i; - int r = xpu::fc_fusion( - dev_ctx.x_context(), x_data, y_data, out_data, m, n, k, - mat_dim_a.trans_, mat_dim_b.trans_, nullptr, nullptr, nullptr, ldx, - ldy, ldout, alpha, 0, nullptr, xpu::Activation_t::LINEAR); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, - platform::errors::External( - "XPU fc_fusion kernel return wrong value[%d %s]", r, - XPUAPIErrorMsg[r])); - } + int r = xpu::fc_batched( + dev_ctx.x_context(), batch_size, mat_dim_a.trans_, mat_dim_b.trans_, m, + n, k, alpha, x->data(), mat_dim_a.stride_, y->data(), + mat_dim_b.stride_, 0.0, data_c, m * n, nullptr, nullptr); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU fc_batched kernel return wrong value[%d %s]", r, + XPUAPIErrorMsg[r])); } } diff --git a/paddle/fluid/operators/matmul_v2_op_xpu.cc b/paddle/fluid/operators/matmul_v2_op_xpu.cc index 765a380c6b..dbb1d7bfb0 100644 --- a/paddle/fluid/operators/matmul_v2_op_xpu.cc +++ b/paddle/fluid/operators/matmul_v2_op_xpu.cc @@ -79,22 +79,14 @@ static void MatMulXPUFunction(const Tensor* x, const Tensor* y, Tensor* out, "XPU fc_fusion kernel return wrong value[%d %s]", r, XPUAPIErrorMsg[r])); } else { - // batch matmul - int x_stride = mat_dim_a.stride_; - int y_stride = mat_dim_b.stride_; - int out_stride = m * n; - for (int i = 0; i < batch_size; ++i) { - const float* x_data = x->data() + x_stride * i; - const float* y_data = y->data() + y_stride * i; - float* out_data = data_c + out_stride * i; - int r = xpu::fc( - dev_ctx.x_context(), x_data, y_data, out_data, m, n, k, - mat_dim_a.trans_, mat_dim_b.trans_, nullptr, nullptr, nullptr); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, - platform::errors::External( - "XPU fc_fusion kernel return wrong value[%d %s]", r, - XPUAPIErrorMsg[r])); - } + int r = xpu::fc_batched( + dev_ctx.x_context(), batch_size, mat_dim_a.trans_, mat_dim_b.trans_, m, + n, k, 1.0, x->data(), mat_dim_a.stride_, y->data(), + mat_dim_b.stride_, 0.0, data_c, m * n, nullptr, nullptr); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU fc_batched kernel return wrong value[%d %s]", r, + XPUAPIErrorMsg[r])); } } -- GitLab