未验证 提交 de436f07 编写于 作者: Z zyfncg 提交者: GitHub

Move matmul_v2 kernel of xpu from fluid to phi (#45446)

* move matmul_v2 kernel of xpu from fluid to phi, test=kunlun

* fix complie bug, test=kunlun

* fix complie bug, test=kunlun

* fix complie bug, test=kunlun
上级 d3ec3fe3
......@@ -44,13 +44,14 @@ class MatMulXPUKernel : public framework::OpKernel<T> {
auto x_dims = x->dims();
auto y_dims = y->dims();
XpuFcInfo fc_info;
GetFCInfo(x_dims, y_dims, trans_x, trans_y, &fc_info);
phi::XpuFcInfo fc_info;
phi::GetFCInfo(x_dims, y_dims, trans_x, trans_y, &fc_info);
auto& dev_ctx =
context.template device_context<paddle::platform::XPUDeviceContext>();
xpu::Context* xpu_ctx = dev_ctx.x_context();
MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, alpha);
phi::MatMulXPUFunction<XPUType>(
xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, alpha);
}
};
......@@ -109,8 +110,8 @@ class MatMulGradXPUKernel : public framework::OpKernel<T> {
xpu::Context* xpu_ctx = dev_ctx.x_context();
XpuFcInfo info_forward;
GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
phi::XpuFcInfo info_forward;
phi::GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
xpu::ctx_guard RAII_GUARD(xpu_ctx);
// begin calculate
const XPUType* a_1 = reinterpret_cast<const XPUType*>(NULL);
......@@ -121,15 +122,15 @@ class MatMulGradXPUKernel : public framework::OpKernel<T> {
: reinterpret_cast<XPUType*>(dx->data<T>());
XPUType* c_2 = (dy == NULL) ? reinterpret_cast<XPUType*>(NULL)
: reinterpret_cast<XPUType*>(dy->data<T>());
XpuFcInfo info_dx;
XpuFcInfo info_dy;
std::tuple<XpuFcInfo,
XpuFcInfo,
phi::XpuFcInfo info_dx;
phi::XpuFcInfo info_dy;
std::tuple<phi::XpuFcInfo,
phi::XpuFcInfo,
const XPUType*,
const XPUType*,
const XPUType*,
const XPUType*>
fc_info = MatmulGradFcInfo(xpu_ctx,
fc_info = phi::MatmulGradFcInfo(xpu_ctx,
&RAII_GUARD,
info_forward,
transpose_x,
......@@ -139,10 +140,10 @@ class MatMulGradXPUKernel : public framework::OpKernel<T> {
dout_ptr);
std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
if (dx) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, alpha);
phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, alpha);
}
if (dy) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, alpha);
phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, alpha);
}
}
};
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef PADDLE_WITH_XPU
#include <string>
#include <vector>
#include "paddle/fluid/operators/matmul_v2_op.h"
#include "paddle/fluid/operators/xpu_api_wrapper.h"
namespace paddle {
namespace operators {
template <typename T>
class MatMulV2XPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* out = ctx.Output<Tensor>("Out");
bool trans_x = ctx.Attr<bool>("trans_x");
bool trans_y = ctx.Attr<bool>("trans_y");
out->mutable_data<T>(ctx.GetPlace());
const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x->data<T>());
const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y->data<T>());
XPUType* out_ptr = reinterpret_cast<XPUType*>(out->data<T>());
auto x_dims = x->dims();
auto y_dims = y->dims();
XpuFcInfo fc_info;
GetFCInfo(x_dims, y_dims, trans_x, trans_y, &fc_info);
auto& dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
xpu::Context* xpu_ctx = dev_ctx.x_context();
MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
}
};
template <typename T>
class MatMulV2XPUGradKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
bool transpose_x = context.Attr<bool>("trans_x");
bool transpose_y = context.Attr<bool>("trans_y");
auto x = *context.Input<framework::Tensor>("X");
auto y = *context.Input<framework::Tensor>("Y");
auto dout =
*context.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* dx = context.Output<framework::Tensor>(framework::GradVarName("X"));
auto* dy = context.Output<framework::Tensor>(framework::GradVarName("Y"));
if (dx) {
dx->mutable_data<T>(context.GetPlace());
}
if (dy) {
dy->mutable_data<T>(context.GetPlace());
}
auto& dev_ctx =
context.template device_context<paddle::platform::XPUDeviceContext>();
const XPUType* dout_ptr = reinterpret_cast<const XPUType*>(dout.data<T>());
const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x.data<T>());
const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y.data<T>());
xpu::Context* xpu_ctx = dev_ctx.x_context();
XpuFcInfo info_forward;
GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
xpu::ctx_guard RAII_GUARD(xpu_ctx);
// begin calculate
const XPUType* a_1 = reinterpret_cast<const XPUType*>(NULL);
const XPUType* b_1 = reinterpret_cast<const XPUType*>(NULL);
const XPUType* a_2 = reinterpret_cast<const XPUType*>(NULL);
const XPUType* b_2 = reinterpret_cast<const XPUType*>(NULL);
XPUType* c_1 = (dx == NULL) ? reinterpret_cast<XPUType*>(NULL)
: reinterpret_cast<XPUType*>(dx->data<T>());
XPUType* c_2 = (dy == NULL) ? reinterpret_cast<XPUType*>(NULL)
: reinterpret_cast<XPUType*>(dy->data<T>());
XpuFcInfo info_dx;
XpuFcInfo info_dy;
std::tuple<XpuFcInfo,
XpuFcInfo,
const XPUType*,
const XPUType*,
const XPUType*,
const XPUType*>
fc_info = MatmulGradFcInfo(xpu_ctx,
&RAII_GUARD,
info_forward,
transpose_x,
transpose_y,
x_ptr,
y_ptr,
dout_ptr);
std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
if (dx) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
}
if (dy) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_XPU_KERNEL(matmul_v2,
ops::MatMulV2XPUKernel<float>,
ops::MatMulV2XPUKernel<plat::float16>);
REGISTER_OP_XPU_KERNEL(matmul_v2_grad,
ops::MatMulV2XPUGradKernel<float>,
ops::MatMulV2XPUGradKernel<plat::float16>);
#endif
......@@ -59,13 +59,14 @@ class MulXPUKernel : public framework::OpKernel<T> {
auto x_dims = x_matrix.dims();
auto y_dims = y_matrix.dims();
XpuFcInfo fc_info;
GetFCInfo(x_dims, y_dims, trans_a, trans_b, &fc_info);
phi::XpuFcInfo fc_info;
phi::GetFCInfo(x_dims, y_dims, trans_a, trans_b, &fc_info);
auto& dev_ctx =
context.template device_context<paddle::platform::XPUDeviceContext>();
xpu::Context* xpu_ctx = dev_ctx.x_context();
MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
phi::MatMulXPUFunction<XPUType>(
xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
}
};
......@@ -99,8 +100,9 @@ class MulGradXPUKernel : public framework::OpKernel<T> {
}
auto& dev_ctx = ctx.template device_context<DeviceContext>();
XpuFcInfo info_forward;
GetFCInfo(x_matrix.dims(), y_matrix.dims(), false, false, &info_forward);
phi::XpuFcInfo info_forward;
phi::GetFCInfo(
x_matrix.dims(), y_matrix.dims(), false, false, &info_forward);
const XPUType* dout_ptr = reinterpret_cast<const XPUType*>(dout->data<T>());
const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x->data<T>());
......@@ -121,15 +123,15 @@ class MulGradXPUKernel : public framework::OpKernel<T> {
(dy == NULL)
? reinterpret_cast<XPUType*>(NULL)
: reinterpret_cast<XPUType*>(dy->mutable_data<T>(ctx.GetPlace()));
XpuFcInfo info_dx;
XpuFcInfo info_dy;
std::tuple<XpuFcInfo,
XpuFcInfo,
phi::XpuFcInfo info_dx;
phi::XpuFcInfo info_dy;
std::tuple<phi::XpuFcInfo,
phi::XpuFcInfo,
const XPUType*,
const XPUType*,
const XPUType*,
const XPUType*>
fc_info = MatmulGradFcInfo(xpu_ctx,
fc_info = phi::MatmulGradFcInfo(xpu_ctx,
&RAII_GUARD,
info_forward,
false,
......@@ -139,10 +141,10 @@ class MulGradXPUKernel : public framework::OpKernel<T> {
dout_ptr);
std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
if (dx) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
}
if (dy) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
}
}
};
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/matmul_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h"
namespace phi {
template <typename T, typename Context>
void MatmulGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
bool transpose_x,
bool transpose_y,
DenseTensor* dx,
DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type;
if (dx) {
dev_ctx.template Alloc<T>(dx);
}
if (dy) {
dev_ctx.template Alloc<T>(dy);
}
const XPUType* dout_ptr = reinterpret_cast<const XPUType*>(dout.data<T>());
const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x.data<T>());
const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y.data<T>());
xpu::Context* xpu_ctx = dev_ctx.x_context();
XpuFcInfo info_forward;
GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
xpu::ctx_guard RAII_GUARD(xpu_ctx);
// begin calculate
const XPUType* a_1 = reinterpret_cast<const XPUType*>(NULL);
const XPUType* b_1 = reinterpret_cast<const XPUType*>(NULL);
const XPUType* a_2 = reinterpret_cast<const XPUType*>(NULL);
const XPUType* b_2 = reinterpret_cast<const XPUType*>(NULL);
XPUType* c_1 = (dx == NULL) ? reinterpret_cast<XPUType*>(NULL)
: reinterpret_cast<XPUType*>(dx->data<T>());
XPUType* c_2 = (dy == NULL) ? reinterpret_cast<XPUType*>(NULL)
: reinterpret_cast<XPUType*>(dy->data<T>());
XpuFcInfo info_dx;
XpuFcInfo info_dy;
std::tuple<XpuFcInfo,
XpuFcInfo,
const XPUType*,
const XPUType*,
const XPUType*,
const XPUType*>
fc_info = MatmulGradFcInfo(xpu_ctx,
&RAII_GUARD,
info_forward,
transpose_x,
transpose_y,
x_ptr,
y_ptr,
dout_ptr);
std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
if (dx) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
}
if (dy) {
MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
}
}
} // namespace phi
PD_REGISTER_KERNEL(matmul_grad,
XPU,
ALL_LAYOUT,
phi::MatmulGradKernel,
float,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/matmul_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h"
namespace phi {
template <typename T, typename Context>
void MatmulKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
bool transpose_x,
bool transpose_y,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(out);
const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x.data<T>());
const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y.data<T>());
XPUType* out_ptr = reinterpret_cast<XPUType*>(out->data<T>());
auto x_dims = x.dims();
auto y_dims = y.dims();
XpuFcInfo fc_info;
GetFCInfo(x_dims, y_dims, transpose_x, transpose_y, &fc_info);
xpu::Context* xpu_ctx = dev_ctx.x_context();
MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
}
} // namespace phi
PD_REGISTER_KERNEL(
matmul, XPU, ALL_LAYOUT, phi::MatmulKernel, float, phi::dtype::float16) {}
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册