// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifdef PADDLE_WITH_XPU #include #include #include "paddle/fluid/operators/matmul_v2_op.h" #include "paddle/fluid/operators/xpu_api_wrapper.h" namespace paddle { namespace operators { template class MatMulV2XPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; public: void Compute(const paddle::framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); auto* out = ctx.Output("Out"); bool trans_x = ctx.Attr("trans_x"); bool trans_y = ctx.Attr("trans_y"); out->mutable_data(ctx.GetPlace()); const XPUType* x_ptr = reinterpret_cast(x->data()); const XPUType* y_ptr = reinterpret_cast(y->data()); XPUType* out_ptr = reinterpret_cast(out->data()); auto x_dims = x->dims(); auto y_dims = y->dims(); XpuFcInfo fc_info; GetFCInfo(x_dims, y_dims, trans_x, trans_y, &fc_info); auto& dev_ctx = ctx.template device_context(); xpu::Context* xpu_ctx = dev_ctx.x_context(); MatMulXPUFunction(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f); } }; template class MatMulV2XPUGradKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; public: void Compute(const framework::ExecutionContext& context) const override { bool transpose_x = context.Attr("trans_x"); bool transpose_y = context.Attr("trans_y"); auto x = *context.Input("X"); auto y = *context.Input("Y"); auto dout = *context.Input(framework::GradVarName("Out")); auto* dx = context.Output(framework::GradVarName("X")); auto* dy = context.Output(framework::GradVarName("Y")); if (dx) { dx->mutable_data(context.GetPlace()); } if (dy) { dy->mutable_data(context.GetPlace()); } auto& dev_ctx = context.template device_context(); const XPUType* dout_ptr = reinterpret_cast(dout.data()); const XPUType* x_ptr = reinterpret_cast(x.data()); const XPUType* y_ptr = reinterpret_cast(y.data()); xpu::Context* xpu_ctx = dev_ctx.x_context(); XpuFcInfo info_forward; GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward); xpu::ctx_guard RAII_GUARD(xpu_ctx); // begin calculate const XPUType* a_1 = reinterpret_cast(NULL); const XPUType* b_1 = reinterpret_cast(NULL); const XPUType* a_2 = reinterpret_cast(NULL); const XPUType* b_2 = reinterpret_cast(NULL); XPUType* c_1 = (dx == NULL) ? reinterpret_cast(NULL) : reinterpret_cast(dx->data()); XPUType* c_2 = (dy == NULL) ? reinterpret_cast(NULL) : reinterpret_cast(dy->data()); XpuFcInfo info_dx; XpuFcInfo info_dy; std::tuple fc_info = MatmulGradFcInfo(xpu_ctx, &RAII_GUARD, info_forward, transpose_x, transpose_y, x_ptr, y_ptr, dout_ptr); std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info; if (dx) { MatMulXPUFunction(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f); } if (dy) { MatMulXPUFunction(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f); } } }; } // namespace operators } // namespace paddle namespace ops = paddle::operators; namespace plat = paddle::platform; REGISTER_OP_XPU_KERNEL(matmul_v2, ops::MatMulV2XPUKernel, ops::MatMulV2XPUKernel); REGISTER_OP_XPU_KERNEL(matmul_v2_grad, ops::MatMulV2XPUGradKernel, ops::MatMulV2XPUGradKernel); #endif