diff --git a/paddle/fluid/operators/abs_op_mlu.cc b/paddle/fluid/operators/abs_op_mlu.cc deleted file mode 100644 index e635b9547b4fca4c2f82d129dd3b61f61e45a92f..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/abs_op_mlu.cc +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the Licnse. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class AbsMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - - output->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - - MLUCnnl::Abs(ctx, - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; - -template -class AbsGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc(*x); - MLUCnnlOpTensorDesc mul_op_desc( - CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); - - phi::DenseTensor sign_x; - sign_x.mutable_data(x->dims(), ctx.GetPlace()); - - MLUCnnl::Sign(ctx, - input_desc.get(), - GetBasePtr(x), - input_desc.get(), - GetBasePtr(&sign_x)); - MLUCnnl::OpTensor(ctx, - mul_op_desc.get(), - input_desc.get(), - GetBasePtr(&sign_x), - input_desc.get(), - GetBasePtr(dout), - input_desc.get(), - GetBasePtr(dx), - ToCnnlDataType()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(abs, - ops::AbsMLUKernel, - ops::AbsMLUKernel); - -REGISTER_OP_MLU_KERNEL(abs_grad, - ops::AbsGradMLUKernel, - ops::AbsGradMLUKernel); diff --git a/paddle/fluid/operators/activation_op_mlu.cc b/paddle/fluid/operators/activation_op_mlu.cc deleted file mode 100644 index f26af0a5b9743eed88be4e5fe7baf9f8c6d1fd3d..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/activation_op_mlu.cc +++ /dev/null @@ -1,612 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the Licnse. */ - -#include -#include - -#include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ActivationMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; - - output->mutable_data(ctx.GetPlace()); - - MLUCnnlActivationDesc act_desc(act_mode, alpha); - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - - MLUCnnl::Active(ctx, - act_desc.get(), - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; - -// For gelu, leaky_relu -template -class ActivationGradMLUKernelV1 : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; - - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnlActivationDesc act_desc(act_mode, alpha); - MLUCnnl::ActiveGrad(ctx, - act_desc.get(), - nullptr, - nullptr, - nullptr, - nullptr, - dout_desc.get(), - GetBasePtr(dout), - x_desc.get(), - GetBasePtr(x), - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -// For tanh, sigmoid -template -class ActivationGradMLUKernelV2 : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; - - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc out_desc(*out); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnlActivationDesc act_desc(act_mode, alpha); - MLUCnnl::ActiveGrad(ctx, - act_desc.get(), - nullptr, - nullptr, - out_desc.get(), - GetBasePtr(out), - dout_desc.get(), - GetBasePtr(dout), - nullptr, - nullptr, - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -// For relu, relu6 -template -class ActivationGradMLUKernelV3 : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; - - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc out_desc(*out); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnlActivationDesc act_desc(act_mode, alpha); - MLUCnnl::ActiveGrad(ctx, - act_desc.get(), - nullptr, - nullptr, - nullptr, - nullptr, - dout_desc.get(), - GetBasePtr(dout), - out_desc.get(), - GetBasePtr(out), - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -// For sqrt -template -class SqrtMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - auto place = ctx.GetPlace(); - - out->mutable_data(place); - - MLUCnnlTensorDesc input_desc(*x); - MLUCnnlTensorDesc output_desc(*out); - - cnnlComputationPreference_t prefer = CNNL_COMPUTATION_FAST; - MLUCnnl::Sqrt(ctx, - prefer, - input_desc.get(), - GetBasePtr(x), - output_desc.get(), - GetBasePtr(out)); - } -}; - -template -class SqrtGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto place = ctx.GetPlace(); - - dx->mutable_data(place); - - MLUCnnlTensorDesc data_desc(*out); - MLUCnnl::SqrtGrad(ctx, - data_desc.get(), - GetBasePtr(out), - GetBasePtr(dout), - GetBasePtr(dx)); - } -}; - -// CNNL_LOG_E = 0, -// CNNL_LOG_2 = 1, -// CNNL_LOG_10 = 2, -template -class LogMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - output->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - cnnlComputationPreference_t prefer = CNNL_COMPUTATION_HIGH_PRECISION; - - MLUCnnl::Log(ctx, - prefer, - Log_base, - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; - -template -class ExpMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - output->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - cnnlComputationPreference_t prefer = CNNL_COMPUTATION_HIGH_PRECISION; - - MLUCnnl::Exp(ctx, - prefer, - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; - -template -class ExpGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - dx->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnlTensorDesc out_desc(*out); - - MLUCnnlOpTensorDesc op_tensor_desc( - CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); - - MLUCnnl::OpTensor(ctx, - op_tensor_desc.get(), - dout_desc.get(), - GetBasePtr(dout), - out_desc.get(), - GetBasePtr(out), - dx_desc.get(), - GetBasePtr(dx), - ToCnnlDataType()); - } -}; - -template -class HardSwishMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - output->mutable_data(ctx.GetPlace()); - float threshold = ctx.Attr("threshold"); - float scale = ctx.Attr("scale"); - float offset = ctx.Attr("offset"); - PADDLE_ENFORCE_EQ(threshold, - 6.0f, - platform::errors::External( - "Not support threshold [%f] in MLU", threshold)); - PADDLE_ENFORCE_EQ( - scale, - 6.0f, - platform::errors::External("Not support scale [%f] in MLU", scale)); - PADDLE_ENFORCE_EQ( - offset, - 3.0f, - platform::errors::External("Not support offset [%f] in MLU", offset)); - - MLUCnnlActivationDesc act_desc(CNNL_ACTIVATION_HARDSWISH, - 1.0f /*ceof useless*/); - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - - MLUCnnl::Active(ctx, - act_desc.get(), - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; - -template -class HardSwishGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - float threshold = ctx.Attr("threshold"); - float scale = ctx.Attr("scale"); - float offset = ctx.Attr("offset"); - PADDLE_ENFORCE_EQ(threshold, - 6.0f, - platform::errors::External( - "Not support threshold [%f] in MLU", threshold)); - PADDLE_ENFORCE_EQ( - scale, - 6.0f, - platform::errors::External("Not support scale [%f] in MLU", scale)); - PADDLE_ENFORCE_EQ( - offset, - 3.0f, - platform::errors::External("Not support offset [%f] in MLU", offset)); - auto* out = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc out_desc(*out); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnlActivationDesc act_desc(CNNL_ACTIVATION_HARDSWISH, - 1.0f /*ceof useless*/); - MLUCnnl::ActiveGrad(ctx, - act_desc.get(), - nullptr, - nullptr, - nullptr, - nullptr, - dout_desc.get(), - GetBasePtr(dout), - out_desc.get(), - GetBasePtr(out), - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -template -class HardSigmoidMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - float slope = ctx.Attr("slope"); - float offset = ctx.Attr("offset"); - output->mutable_data(ctx.GetPlace()); - - MLUCnnlActivationDesc act_desc(CNNL_ACTIVATION_HARDSIGMOID, - 1.0f /*ceof useless*/, - 1.0f /*sliced_dim useless*/, - slope, - offset); - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - - MLUCnnl::Active(ctx, - act_desc.get(), - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; - -template -class HardSigmoidGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* x = ctx.Input("X"); - auto* dx = ctx.Output(framework::GradVarName("X")); - float slope = ctx.Attr("slope"); - float offset = ctx.Attr("offset"); - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlActivationDesc act_desc(CNNL_ACTIVATION_HARDSIGMOID, - 1.0f /*ceof useless*/, - 1.0f /*sliced_dim useless*/, - slope, - offset); - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnl::ActiveGrad(ctx, - act_desc.get(), - nullptr, - nullptr, - nullptr, - nullptr, - dout_desc.get(), - GetBasePtr(dout), - x_desc.get(), - GetBasePtr(x), - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -template -class FloorMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - output->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - - MLUCnnl::Floor(ctx, - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; - -template -class ReciprocalMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - auto place = ctx.GetPlace(); - out->mutable_data(place); - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::Reciprocal( - ctx, x_desc.get(), GetBasePtr(x), out_desc.get(), GetBasePtr(out)); - } -}; - -template -class ReciprocalGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto place = ctx.GetPlace(); - dx->mutable_data(place); - phi::DenseTensor square_out; - square_out.Resize(out->dims()); - square_out.mutable_data(place); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnlTensorDesc square_out_desc(square_out); - MLUCnnl::Square(ctx, - out_desc.get(), - GetBasePtr(out), - square_out_desc.get(), - GetBasePtr(&square_out)); - cnnlOpTensorDesc_t op_tensor_op = CNNL_OP_TENSOR_MUL; - cnnlDataType_t op_tensor_comp_type = CNNL_DTYPE_FLOAT; - cnnlNanPropagation_t op_tensor_nan_opt = CNNL_NOT_PROPAGATE_NAN; - MLUCnnlOpTensorDesc op_tensor_desc( - op_tensor_op, op_tensor_comp_type, op_tensor_nan_opt); - float alpha1_float = -1; - float alpha2_float = 1; - float beta_float = 0; - MLUCnnl::OpTensor(ctx, - op_tensor_desc.get(), - dout_desc.get(), - GetBasePtr(dout), - square_out_desc.get(), - GetBasePtr(&square_out), - dx_desc.get(), - GetBasePtr(dx), - op_tensor_comp_type, - alpha1_float, - alpha2_float, - beta_float); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -// reciprocal -REGISTER_OP_MLU_KERNEL( - reciprocal, - ops::ReciprocalMLUKernel, - ops::ReciprocalMLUKernel); - -REGISTER_OP_MLU_KERNEL( - reciprocal_grad, - ops::ReciprocalGradMLUKernel, - ops::ReciprocalGradMLUKernel); -// relu -REGISTER_OP_MLU_KERNEL( - relu, - ops::ActivationMLUKernel, - ops::ActivationMLUKernel); -REGISTER_OP_MLU_KERNEL( - relu_grad, - ops::ActivationGradMLUKernelV3, - ops::ActivationGradMLUKernelV3); - -// relu6 -REGISTER_OP_MLU_KERNEL( - relu6, - ops::ActivationMLUKernel, - ops::ActivationMLUKernel); -REGISTER_OP_MLU_KERNEL( - relu6_grad, - ops::ActivationGradMLUKernelV3, - ops::ActivationGradMLUKernelV3); - -// sigmoid -REGISTER_OP_MLU_KERNEL(sigmoid, - ops::ActivationMLUKernel, - ops::ActivationMLUKernel); -REGISTER_OP_MLU_KERNEL( - sigmoid_grad, - ops::ActivationGradMLUKernelV2, - ops::ActivationGradMLUKernelV2); - -// tanh -REGISTER_OP_MLU_KERNEL( - tanh, - ops::ActivationMLUKernel, - ops::ActivationMLUKernel); -REGISTER_OP_MLU_KERNEL( - tanh_grad, - ops::ActivationGradMLUKernelV2, - ops::ActivationGradMLUKernelV2); - -// gelu -REGISTER_OP_MLU_KERNEL( - gelu, - ops::ActivationMLUKernel, - ops::ActivationMLUKernel); -REGISTER_OP_MLU_KERNEL( - gelu_grad, - ops::ActivationGradMLUKernelV1, - ops::ActivationGradMLUKernelV1); - -// leaky_relu -REGISTER_OP_MLU_KERNEL( - leaky_relu, - ops::ActivationMLUKernel, - ops::ActivationMLUKernel); -REGISTER_OP_MLU_KERNEL( - leaky_relu_grad, - ops::ActivationGradMLUKernelV1, - ops::ActivationGradMLUKernelV1); - -// sqrt -REGISTER_OP_MLU_KERNEL(sqrt, - ops::SqrtMLUKernel, - ops::SqrtMLUKernel); -REGISTER_OP_MLU_KERNEL(sqrt_grad, - ops::SqrtGradMLUKernel, - ops::SqrtGradMLUKernel); - -// log log2 log10 -REGISTER_OP_MLU_KERNEL( - log, - ops::LogMLUKernel, - ops::LogMLUKernel); - -REGISTER_OP_MLU_KERNEL( - log2, - ops::LogMLUKernel, - ops::LogMLUKernel); - -REGISTER_OP_MLU_KERNEL( - log10, - ops::LogMLUKernel, - ops::LogMLUKernel); - -REGISTER_OP_MLU_KERNEL(exp, - ops::ExpMLUKernel, - ops::ExpMLUKernel); - -REGISTER_OP_MLU_KERNEL(exp_grad, - ops::ExpGradMLUKernel, - ops::ExpGradMLUKernel); - -REGISTER_OP_MLU_KERNEL(hard_swish, - ops::HardSwishMLUKernel, - ops::HardSwishMLUKernel); - -REGISTER_OP_MLU_KERNEL(hard_swish_grad, - ops::HardSwishGradMLUKernel, - ops::HardSwishGradMLUKernel); - -REGISTER_OP_MLU_KERNEL(hard_sigmoid, - ops::HardSigmoidMLUKernel, - ops::HardSigmoidMLUKernel); - -REGISTER_OP_MLU_KERNEL( - hard_sigmoid_grad, - ops::HardSigmoidGradMLUKernel, - ops::HardSigmoidGradMLUKernel); - -REGISTER_OP_MLU_KERNEL(floor, - ops::FloorMLUKernel, - ops::FloorMLUKernel); diff --git a/paddle/fluid/operators/arg_max_op_mlu.cc b/paddle/fluid/operators/arg_max_op_mlu.cc deleted file mode 100644 index 6d61526bc0c9614763eb7ad7023ac63610caa7a2..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/arg_max_op_mlu.cc +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ArgMaxMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - auto axis = static_cast(ctx.Attr("axis")); - auto dtype = ctx.Attr("dtype"); - const bool& flatten = ctx.Attr("flatten"); - - if (x->numel() == 0) return; - PADDLE_ENFORCE_EQ( - (dtype == 2 || dtype == 3), - true, - platform::errors::InvalidArgument( - "The attribute of dtype in argmax op must be [%s] or [%s], " - "but " - "received [%s]", - paddle::framework::DataTypeToString( - framework::proto::VarType::INT64), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT32), - paddle::framework::DataTypeToString( - static_cast(dtype)))); - - if (axis < 0) { - framework::DDim x_dims; - x_dims = x->dims(); - axis += x_dims.size(); - } - - phi::DenseTensor flatten_x(x->type()); - flatten_x.ShareDataWith(*x); - if (flatten) { - flatten_x.Resize(phi::make_ddim({x->numel()})); - // if flatten, the axis just as 0 - axis = 0; - } - std::vector reduce_dims; - reduce_dims.push_back(axis); - - auto out_dims = out->dims(); - int out_count = out_dims[0]; - for (int i = 1; i < out_dims.size(); i++) { - out_count = out_count * out_dims[i]; - } - size_t indices_size_inbytes = out_count * sizeof(int32_t); - auto& dev_ctx = ctx.template device_context(); - phi::DenseTensor value_out = - ctx.AllocateTmpTensor(out->dims(), dev_ctx); - MLUCnnlTensorDesc value_out_desc(value_out); - MLUCnnlTensorDesc input_desc( - flatten_x, CNNL_LAYOUT_ARRAY, ToCnnlDataType(flatten_x.dtype())); - MLUCnnlReduceDesc reduction_desc(reduce_dims, - CNNL_REDUCE_MAX, - ToCnnlDataType(), - CNNL_NOT_PROPAGATE_NAN, - CNNL_REDUCE_ONLY_INDICES, - CNNL_32BIT_INDICES); - - if (dtype == 2) { - out->template mutable_data(ctx.GetPlace()); - MLUCnnl::Reduce(ctx, - true /*need_workspace*/, - reduction_desc.get(), - nullptr, - input_desc.get(), - GetBasePtr(&flatten_x), - indices_size_inbytes /*indices_size*/, - GetBasePtr(out), - nullptr, - value_out_desc.get(), - GetBasePtr(&value_out)); - } else { - out->template mutable_data(ctx.GetPlace()); - phi::DenseTensor out_int32 = - ctx.AllocateTmpTensor(out->dims(), - dev_ctx); - MLUCnnl::Reduce(ctx, - true /*need_workspace*/, - reduction_desc.get(), - nullptr, - input_desc.get(), - GetBasePtr(&flatten_x), - indices_size_inbytes /*indices_size*/, - GetBasePtr(&out_int32), - nullptr, - value_out_desc.get(), - GetBasePtr(&value_out)); - - // cast indices type to int64 - MLUCnnlTensorDesc out_int32_desc(out_int32); - MLUCnnlTensorDesc cast_output_desc(*out); - cnnlCastDataType_t cast_type = GetCastDataType(VT::INT32, VT::INT64); - MLUCnnl::Cast(ctx, - cast_type, - out_int32_desc.get(), - GetBasePtr(&out_int32), - cast_output_desc.get(), - GetBasePtr(out)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_MLU_KERNEL(arg_max, - ops::ArgMaxMLUKernel, - ops::ArgMaxMLUKernel, - ops::ArgMaxMLUKernel); diff --git a/paddle/fluid/operators/argsort_op_mlu.cc b/paddle/fluid/operators/argsort_op_mlu.cc deleted file mode 100644 index 36cf2a81a2b0fa089b74629958e4cabaa22529e0..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/argsort_op_mlu.cc +++ /dev/null @@ -1,124 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ArgsortMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); - const auto& place = ctx.GetPlace(); - - const auto& sorted = true; - const bool descending = ctx.Attr("descending"); - - // axis < 0, cacluate the real axis - int axis = static_cast(ctx.Attr("axis")); - if (axis < 0) { - const auto& in_dims = input->dims(); - axis += in_dims.size(); - } - - auto in_dims = input->dims(); - size_t k = in_dims[axis]; - - output->mutable_data(place); - indices->mutable_data(place); - - // cnnl only support int32/int16 type of indices - phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32)); - indices_int32.Resize(indices->dims()); - indices_int32.mutable_data(place); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc values_output_desc(*output); - MLUCnnlTensorDesc indices_int32_desc(indices_int32); - MLUCnnl::TopK(ctx, - k, - axis, - descending, - sorted, - input_desc.get(), - GetBasePtr(input), - values_output_desc.get(), - GetBasePtr(output), - indices_int32_desc.get(), - GetBasePtr(&indices_int32)); - - // cast indices type to int64 - MLUCnnlTensorDesc cast_output_desc(*indices); - cnnlCastDataType_t cast_type = GetCastDataType(VT::INT32, VT::INT64); - MLUCnnl::Cast(ctx, - cast_type, - indices_int32_desc.get(), - GetBasePtr(&indices_int32), - cast_output_desc.get(), - GetBasePtr(indices)); - } -}; - -template -class ArgsortGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* indices = ctx.Input("Indices"); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dout = ctx.Input(framework::GradVarName("Out")); - int axis = ctx.Attr("axis"); - dx->mutable_data(ctx.GetPlace()); - - auto in_dims = indices->dims(); - axis = (axis < 0) ? (in_dims.size() + axis) : axis; - if (dout->numel() == 0) return; - - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnlTensorDesc indices_desc(*indices); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnl::ScatterFunctor(ctx, - dx_desc.get(), - GetBasePtr(dx), - dout_desc.get(), - GetBasePtr(dout), - indices_desc.get(), - GetBasePtr(indices), - axis); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(argsort, - ops::ArgsortMLUKernel, - ops::ArgsortMLUKernel, - ops::ArgsortMLUKernel, - ops::ArgsortMLUKernel, - ops::ArgsortMLUKernel, - ops::ArgsortMLUKernel); - -REGISTER_OP_MLU_KERNEL(argsort_grad, - ops::ArgsortGradMLUKernel, - ops::ArgsortGradMLUKernel, - ops::ArgsortGradMLUKernel, - ops::ArgsortGradMLUKernel, - ops::ArgsortGradMLUKernel, - ops::ArgsortGradMLUKernel); diff --git a/paddle/fluid/operators/assign_op_mlu.cc b/paddle/fluid/operators/assign_op_mlu.cc deleted file mode 100644 index 5a4c29447b0bd1d538c3bbd1b297a33764b8d8ae..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/assign_op_mlu.cc +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/operators/assign_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/platform/float16.h" - -namespace paddle { -namespace operators { -template -class AssignMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::Assign( - ctx, x_desc.get(), GetBasePtr(x), out_desc.get(), GetBasePtr(out)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(assign, - ops::AssignMLUKernel, - ops::AssignMLUKernel, - ops::AssignMLUKernel, - ops::AssignMLUKernel) diff --git a/paddle/fluid/operators/assign_value_op_mlu.cc b/paddle/fluid/operators/assign_value_op_mlu.cc deleted file mode 100644 index f408a1a7b40de1cd6aef95cc5ac7419929ea0d7d..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/assign_value_op_mlu.cc +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/assign_value_op.h" - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL(assign_value, - ops::AssignValueKernel, - ops::AssignValueKernel, - ops::AssignValueKernel, - ops::AssignValueKernel); diff --git a/paddle/fluid/operators/batch_norm_op_mlu.cc b/paddle/fluid/operators/batch_norm_op_mlu.cc deleted file mode 100644 index 77397552333d4bb7e9b093801bd4d5cf3fd3c357..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/batch_norm_op_mlu.cc +++ /dev/null @@ -1,336 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/amp/fp16_type_traits.h" -#include "paddle/fluid/operators/batch_norm_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class MLUBatchNormOpKernel : public framework::OpKernel { - using MPDType = typename details::MPTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &ctx) const override { - const auto &place = ctx.GetPlace(); - const float epsilon = ctx.Attr("epsilon"); - float momentum = ctx.Attr("momentum"); - const bool is_test = ctx.Attr("is_test"); - const bool use_global_stats = ctx.Attr("use_global_stats"); - const bool trainable_stats = ctx.Attr("trainable_statistics"); - bool test_mode = is_test && (!trainable_stats); - - bool global_stats = test_mode || use_global_stats; - - const std::string data_layout_str = ctx.Attr("data_layout"); - DataLayout data_layout = phi::StringToDataLayout(data_layout_str); - - const auto *x = ctx.Input("X"); - const auto &x_dims = x->dims(); - PADDLE_ENFORCE_GE( - x_dims.size(), - 2, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be larger than 1." - "But received: the size of input X's dimensions is [%d]", - x_dims.size())); - PADDLE_ENFORCE_LE( - x_dims.size(), - 5, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be less than 6." - "But received: the size of input X's dimensions is [%d]", - x_dims.size())); - const int N = x_dims[0]; - const int C = - (data_layout == DataLayout::kNCHW ? x_dims[1] - : x_dims[x_dims.size() - 1]); - const int sample_size = x->numel() / N / C; - - const auto *running_mean = ctx.Input("Mean"); - const auto *running_var = ctx.Input("Variance"); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - - auto *y = ctx.Output("Y"); - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); - - // alloc memory - y->mutable_data(place); - mean_out->mutable_data(place); - variance_out->mutable_data(place); - saved_mean->mutable_data(place); - saved_variance->mutable_data(place); - - phi::DenseTensor transformed_x; - phi::DenseTensor transformed_y; - const int transformed_dim_size = 4; - const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C}; - MLUCnnlTensorDesc transformed_desc(transformed_dim_size, - transformed_shape, - ToCnnlDataType(), - CNNL_LAYOUT_NHWC); - MLUCnnlTensorDesc others_input_desc(*scale); - // input dimension is 2 and the format is NCHW. The input can be regarded as - // NHWC format. Don't need to transpose. - bool need_transpose = - (data_layout == DataLayout::kNCHW && x_dims.size() != 2); - if (need_transpose) { - auto &dev_ctx = ctx.template device_context(); - transformed_x = ctx.AllocateTmpTensor( - framework::DDim(transformed_shape, transformed_dim_size), dev_ctx); - transformed_y = ctx.AllocateTmpTensor( - framework::DDim(transformed_shape, transformed_dim_size), dev_ctx); - - const int x_reshaped[] = {N, C, sample_size, 1}; - MLUCnnlTensorDesc x_reshaped_desc( - transformed_dim_size, x_reshaped, ToCnnlDataType()); - const std::vector perm = {0, 2, 3, 1}; - MLUCnnl::Transpose(ctx, - perm, - transformed_dim_size, - x_reshaped_desc.get(), - GetBasePtr(x), - transformed_desc.get(), - GetBasePtr(&transformed_x)); - } else { - transformed_x = *x; - transformed_y = *y; - } - - if (ctx.HasInput("MomentumTensor")) { - const auto *mom_tensor = ctx.Input("MomentumTensor"); - phi::DenseTensor mom_cpu; - framework::TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu); - momentum = mom_cpu.data()[0]; - } - - MLUCnnl::FusedBatchNorm(ctx, - !global_stats, - transformed_desc.get(), - GetBasePtr(&transformed_x), - others_input_desc.get(), - GetBasePtr(scale), - GetBasePtr(bias), - GetBasePtr(running_mean), - GetBasePtr(running_var), - epsilon, - momentum, - transformed_desc.get(), - GetBasePtr(&transformed_y), - GetBasePtr(mean_out), - GetBasePtr(variance_out), - GetBasePtr(saved_mean), - GetBasePtr(saved_variance)); - - if (need_transpose) { - const int y_reshaped[] = {N, C, sample_size, 1}; - MLUCnnlTensorDesc y_reshaped_desc( - transformed_dim_size, y_reshaped, ToCnnlDataType()); - const std::vector perm = {0, 3, 1, 2}; - MLUCnnl::Transpose(ctx, - perm, - transformed_y.dims().size(), - transformed_desc.get(), - GetBasePtr(&transformed_y), - y_reshaped_desc.get(), - GetBasePtr(y)); - } - } -}; - -template -class MLUBatchNormGradOpKernel : public framework::OpKernel { - using MPDType = typename details::MPTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &ctx) const override { - const auto *x = ctx.Input("X"); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *saved_mean = ctx.Input("SavedMean"); - // SavedVariance have been reverted in forward operator - const auto *saved_inv_variance = - ctx.Input("SavedVariance"); - const std::string data_layout_str = ctx.Attr("data_layout"); - bool use_global_stats = ctx.Attr("use_global_stats"); - const bool is_test = ctx.Attr("is_test"); - const float epsilon = ctx.Attr("epsilon"); - DataLayout data_layout = phi::StringToDataLayout(data_layout_str); - - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = - ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); - - auto &dev_ctx = ctx.template device_context(); - auto d_x_tmp = - ctx.AllocateTmpTensor(x->dims(), dev_ctx); - auto scale_grad_tmp = ctx.AllocateTmpTensor( - scale->dims(), dev_ctx); - auto bias_grad_tmp = - ctx.AllocateTmpTensor(bias->dims(), dev_ctx); - - if (d_x == nullptr) { - d_x = &d_x_tmp; - } - if (d_scale == nullptr) { - d_scale = &scale_grad_tmp; - } - if (d_bias == nullptr) { - d_bias = &bias_grad_tmp; - } - - const auto &place = ctx.GetPlace(); - d_x->mutable_data(place); - d_scale->mutable_data(place); - d_bias->mutable_data(place); - - use_global_stats = is_test || use_global_stats; - - const auto &x_dims = x->dims(); - PADDLE_ENFORCE_GE( - x_dims.size(), - 2, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be larger than 1." - "But received: the size of input X's dimensions is [%d]", - x_dims.size())); - PADDLE_ENFORCE_LE( - x_dims.size(), - 5, - platform::errors::InvalidArgument( - "The size of input X's dimensions should be less than 6." - "But received: the size of input X's dimensions is [%d]", - x_dims.size())); - const int N = x_dims[0]; - const int C = - (data_layout == DataLayout::kNCHW ? x_dims[1] - : x_dims[x_dims.size() - 1]); - const int sample_size = x->numel() / N / C; - - phi::DenseTensor transformed_d_y; - phi::DenseTensor transformed_x; - phi::DenseTensor transformed_d_x; - const int transformed_dim_size = 4; - const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C}; - - MLUCnnlTensorDesc transformed_desc(transformed_dim_size, - transformed_shape, - ToCnnlDataType(), - CNNL_LAYOUT_NHWC); - MLUCnnlTensorDesc others_input_desc(*scale); - - bool need_transpose = - (data_layout == DataLayout::kNCHW && x_dims.size() != 2); - if (need_transpose) { - transformed_d_y = ctx.AllocateTmpTensor( - framework::DDim(transformed_shape, transformed_dim_size), dev_ctx); - transformed_x = ctx.AllocateTmpTensor( - framework::DDim(transformed_shape, transformed_dim_size), dev_ctx); - transformed_d_x = ctx.AllocateTmpTensor( - framework::DDim(transformed_shape, transformed_dim_size), dev_ctx); - const int org_reshaped[] = {N, C, sample_size, 1}; - MLUCnnlTensorDesc org_reshaped_desc( - transformed_dim_size, org_reshaped, ToCnnlDataType()); - const std::vector perm = {0, 2, 3, 1}; - MLUCnnl::Transpose(ctx, - perm, - transformed_dim_size, - org_reshaped_desc.get(), - GetBasePtr(d_y), - transformed_desc.get(), - GetBasePtr(&transformed_d_y)); - MLUCnnl::Transpose(ctx, - perm, - transformed_dim_size, - org_reshaped_desc.get(), - GetBasePtr(x), - transformed_desc.get(), - GetBasePtr(&transformed_x)); - } else { - transformed_d_y = *d_y; - transformed_x = *x; - transformed_d_x = *d_x; - } - - if (use_global_stats) { - const auto *running_mean = ctx.Input("Mean"); - const auto *running_variance = ctx.Input("Variance"); - MLUCnnl::FusedBatchNormGrad(ctx, - false /*is_training*/, - transformed_desc.get(), - GetBasePtr(&transformed_d_y), - transformed_desc.get(), - GetBasePtr(&transformed_x), - others_input_desc.get(), - GetBasePtr(scale), - GetBasePtr(running_mean), - GetBasePtr(running_variance), - epsilon, - transformed_desc.get(), - GetBasePtr(&transformed_d_x), - GetBasePtr(d_scale), - GetBasePtr(d_bias)); - } else { - MLUCnnl::FusedBatchNormGrad(ctx, - true /*is_training*/, - transformed_desc.get(), - GetBasePtr(&transformed_d_y), - transformed_desc.get(), - GetBasePtr(&transformed_x), - others_input_desc.get(), - GetBasePtr(scale), - GetBasePtr(saved_mean), - GetBasePtr(saved_inv_variance), - epsilon, - transformed_desc.get(), - GetBasePtr(&transformed_d_x), - GetBasePtr(d_scale), - GetBasePtr(d_bias)); - } - - if (need_transpose) { - const int d_x_reshaped[] = {N, C, sample_size, 1}; - MLUCnnlTensorDesc d_x_reshaped_desc( - transformed_dim_size, d_x_reshaped, ToCnnlDataType()); - const std::vector perm = {0, 3, 1, 2}; - MLUCnnl::Transpose(ctx, - perm, - transformed_dim_size, - transformed_desc.get(), - GetBasePtr(&transformed_d_x), - d_x_reshaped_desc.get(), - GetBasePtr(d_x)); - } - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(batch_norm, - ops::MLUBatchNormOpKernel, - ops::MLUBatchNormOpKernel); -REGISTER_OP_MLU_KERNEL(batch_norm_grad, - ops::MLUBatchNormGradOpKernel, - ops::MLUBatchNormGradOpKernel); diff --git a/paddle/fluid/operators/bce_loss_op_mlu.cc b/paddle/fluid/operators/bce_loss_op_mlu.cc deleted file mode 100644 index 6541de153d4be7b9e7e34b2669842e28d2f2b2a3..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/bce_loss_op_mlu.cc +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class BCELossMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* labels = ctx.Input("Label"); - auto* out = ctx.Output("Out"); - - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc label_desc(*labels); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::BceLoss(ctx, - CNNL_BCE_LOSS_NONE, - x_desc.get(), - GetBasePtr(x), - label_desc.get(), - GetBasePtr(labels), - nullptr, - nullptr, - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class BCELossGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* labels = ctx.Input("Label"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc label_desc(*labels); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnl::BceLossBackward(ctx, - CNNL_BCE_LOSS_NONE, - dout_desc.get(), - GetBasePtr(dout), - x_desc.get(), - GetBasePtr(x), - label_desc.get(), - GetBasePtr(labels), - nullptr, - nullptr, - x_desc.get(), - GetBasePtr(dx)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(bce_loss, - ops::BCELossMLUKernel, - ops::BCELossMLUKernel); - -REGISTER_OP_MLU_KERNEL(bce_loss_grad, - ops::BCELossGradMLUKernel, - ops::BCELossGradMLUKernel); diff --git a/paddle/fluid/operators/cast_op_mlu.cc b/paddle/fluid/operators/cast_op_mlu.cc deleted file mode 100644 index cf1d7f39304c1e33d9ec943863976cf298b4fbad..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/cast_op_mlu.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/platform/device/mlu/device_context.h" - -namespace paddle { -namespace operators { - -template -class CastMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto src_type = static_cast(ctx.Attr("in_dtype")); - auto dst_type = static_cast(ctx.Attr("out_dtype")); - auto place = ctx.GetPlace(); - - if (src_type == dst_type) { - auto& dev_ctx = ctx.template device_context(); - output->mutable_data(place); - framework::TensorCopy(*input, place, dev_ctx, output); - return; - } - - PADDLE_ENFORCE_EQ(MLUSupportsCast(src_type, dst_type), - true, - platform::errors::InvalidArgument( - "MLU not support cast [%d] to [%d]", - framework::DataTypeToString(src_type), - framework::DataTypeToString(dst_type))); - - output->mutable_data(place, framework::TransToPhiDataType(dst_type)); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc output_desc(*output); - cnnlCastDataType_t cast_type = GetCastDataType(src_type, dst_type); - - MLUCnnl::Cast(ctx, - cast_type, - input_desc.get(), - GetBasePtr(input), - output_desc.get(), - GetBasePtr(output)); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(cast, - ops::CastMLUKernel, - ops::CastMLUKernel, - ops::CastMLUKernel, - ops::CastMLUKernel, - ops::CastMLUKernel, - ops::CastMLUKernel, - ops::CastMLUKernel); diff --git a/paddle/fluid/operators/clip_op_mlu.cc b/paddle/fluid/operators/clip_op_mlu.cc deleted file mode 100644 index f84a493d6d399dc770715e0e9586c08f415daf1e..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/clip_op_mlu.cc +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ClipMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - - auto min = static_cast(ctx.Attr("min")); - auto max = static_cast(ctx.Attr("max")); - - if (ctx.HasInput("Min")) { - phi::DenseTensor min_cpu; - auto* min_tensor = ctx.Input("Min"); - auto* min_data = min_tensor->data(); - if (platform::is_mlu_place(min_tensor->place())) { - paddle::framework::TensorCopySync( - *min_tensor, platform::CPUPlace(), &min_cpu); - min_data = min_cpu.data(); - } - min = min_data[0]; - } - - if (ctx.HasInput("Max")) { - phi::DenseTensor max_cpu; - auto* max_tensor = ctx.Input("Max"); - auto* max_data = max_tensor->data(); - if (platform::is_mlu_place(max_tensor->place())) { - paddle::framework::TensorCopySync( - *max_tensor, platform::CPUPlace(), &max_cpu); - max_data = max_cpu.data(); - } - max = max_data[0]; - } - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::Clip(ctx, - x_desc.get(), - GetBasePtr(x), - static_cast(&min), - static_cast(&max), - GetBasePtr(out)); - } -}; - -template -class ClipGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - dx->mutable_data(ctx.GetPlace()); - - auto* min_tensor = - ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; - auto* max_tensor = - ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; - - auto min_val = ctx.Attr("min"); - if (min_tensor) { - phi::DenseTensor min_data; - framework::TensorCopy( - *min_tensor, - platform::CPUPlace(), - ctx.template device_context(), - &min_data); - ctx.template device_context().Wait(); - min_val = static_cast(min_data.data()[0]); - } - auto max_val = ctx.Attr("max"); - if (max_tensor) { - phi::DenseTensor max_data; - framework::TensorCopy( - *max_tensor, - platform::CPUPlace(), - ctx.template device_context(), - &max_data); - ctx.template device_context().Wait(); - max_val = static_cast(max_data.data()[0]); - } - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc dx_desc(*dx); - MLUCnnlTensorDesc dout_desc(*dout); - - MLUCnnl::HardtanhBackward(ctx, - x_desc.get(), - GetBasePtr(x), - dout_desc.get(), - GetBasePtr(dout), - max_val, - min_val, - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(clip, - ops::ClipMLUKernel, - ops::ClipMLUKernel); - -REGISTER_OP_MLU_KERNEL(clip_grad, - ops::ClipGradMLUKernel, - ops::ClipGradMLUKernel); diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc deleted file mode 100644 index 26bc2f190ce70e3aef0eb53f9179717503f305f8..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/concat_op_mlu.cc +++ /dev/null @@ -1,170 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/concat_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/phi/core/tensor_utils.h" - -namespace paddle { -namespace operators { - -template -class ConcatMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto ins = ctx.MultiInput("X"); - phi::DenseTensor* out = ctx.Output("Out"); - PADDLE_ENFORCE_NOT_NULL(ins[0], - platform::errors::NotFound( - "The first input tensor is not initalized.")); - auto axis = ctx.Attr("axis"); - auto ins_size = ins.size(); - bool need_resize_out_dims = false; - if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); - axis = phi::GetVectorFromTensor(axis_tensor)[0]; - need_resize_out_dims = true; - } - axis = ComputeAxis(static_cast(axis), - static_cast(ins[0]->dims().size())); - - if (need_resize_out_dims) { - const size_t n = ins.size(); - std::vector ins_dims(n); - for (size_t i = 0; i < n; i++) { - ins_dims[i] = ins[i]->dims(); - } - - framework::DDim out_dims = - phi::funcs::ComputeAndCheckShape(true, ins_dims, axis); - out->Resize(out_dims); - } - const int axis_t = axis; - const int ins_size_t = ins_size; - auto place = ctx.GetPlace(); - out->mutable_data(place); - - // mlu should do sth - // init ins tensors - std::vector inputs; - std::vector input_descs; - std::vector desc_vector; - for (size_t i = 0; i < ins_size; i++) { - input_descs.emplace_back(MLUCnnlTensorDesc( - *ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->dtype()))); - desc_vector.push_back(input_descs.back().get()); - inputs.push_back(GetBasePtr(ins[i])); - } - // init out tensors - MLUCnnlTensorDesc output_desc( - *out, CNNL_LAYOUT_ARRAY, ToCnnlDataType(out->dtype())); - - // MLU should do sth - MLUCnnl::Concat(ctx, - ins_size_t, - axis_t, - desc_vector.data(), - inputs.data(), - output_desc.get(), - GetBasePtr(out)); - } -}; - -template -class ConcatGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto ins = ctx.MultiInput("X"); - auto out_var_names = ctx.OutputNames(framework::GradVarName("X")); - auto outs = ctx.MultiOutput(framework::GradVarName("X")); - auto axis = ctx.Attr("axis"); - int split_num = ins.size(); - - PADDLE_ENFORCE_NOT_NULL(ins[0], - platform::errors::NotFound( - "The first input tensor is not initalized.")); - - if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); - axis = phi::GetVectorFromTensor(axis_tensor)[0]; - } - - axis = ComputeAxis(static_cast(axis), - static_cast(ins[0]->dims().size())); - PADDLE_ENFORCE_GE(axis, - 0, - platform::errors::InvalidArgument( - "concat_grad: axis should be larger than or " - "equal to 0, but received axis is %d.", - axis)); - PADDLE_ENFORCE_LT( - axis, - out_grad->dims().size(), - platform::errors::InvalidArgument( - "concat_grad: axis should be less than ins[0]->dims()!" - "But received axis is %d, while ins[0]->dims()" - "size is %d.", - axis, - out_grad->dims().size())); - // get output tensor that the name is not kEmptyVarName - std::vector outputs_vec; - std::vector tmp_outputs_vec; - std::vector output_descs; - std::vector descs_vec; - for (size_t j = 0; j < outs.size(); ++j) { - if (out_var_names[j] != framework::kEmptyVarName && - outs[j]->numel() != 0UL) { - outs[j]->mutable_data(ctx.GetPlace()); - output_descs.emplace_back(MLUCnnlTensorDesc(*outs[j])); - outputs_vec.push_back(GetBasePtr(outs[j])); - } else { - phi::DenseTensor tmp_tensor; - tmp_tensor.mutable_data(ins[j]->dims(), ctx.GetPlace()); - tmp_outputs_vec.push_back(tmp_tensor); - output_descs.emplace_back(MLUCnnlTensorDesc(*ins[j])); - outputs_vec.push_back(GetBasePtr(&(tmp_outputs_vec.back()))); - } - descs_vec.push_back(output_descs.back().get()); - } - - MLUCnnlTensorDesc out_grad_desc(*out_grad); - MLUCnnl::Split(ctx, - static_cast(split_num), - static_cast(axis), - out_grad_desc.get(), - GetBasePtr(out_grad), - descs_vec.data(), - outputs_vec.data()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL(concat, - ops::ConcatMLUKernel, - ops::ConcatMLUKernel, - ops::ConcatMLUKernel, - ops::ConcatMLUKernel, - ops::ConcatMLUKernel, - ops::ConcatMLUKernel); -REGISTER_OP_MLU_KERNEL(concat_grad, - ops::ConcatGradMLUKernel, - ops::ConcatGradMLUKernel, - ops::ConcatGradMLUKernel, - ops::ConcatGradMLUKernel, - ops::ConcatGradMLUKernel, - ops::ConcatGradMLUKernel); diff --git a/paddle/fluid/operators/conv_op_mlu.cc b/paddle/fluid/operators/conv_op_mlu.cc deleted file mode 100644 index 214af06bbd7c70efb49f7cc76f76816e7de7f087..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/conv_op_mlu.cc +++ /dev/null @@ -1,590 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/conv_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -using DataLayout = phi::DataLayout; - -template -class MLUConvOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* input = ctx.Input("Input"); - auto* filter = ctx.Input("Filter"); - auto* output = ctx.Output("Output"); - output->mutable_data(ctx.GetPlace()); - const std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::vector dilations = ctx.Attr>("dilations"); - int groups = ctx.Attr("groups"); - const std::string padding_algorithm = - ctx.Attr("padding_algorithm"); - const std::string data_format = ctx.Attr("data_format"); - - const bool channel_last = data_format == "NHWC"; - - // update padding and dilation - auto in_dims = input->dims(); - auto filter_dims = filter->dims(); - auto in_dims_size = in_dims.size(); - framework::DDim in_data_dims; - framework::DDim filter_data_dims; - - if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - } - filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size()); - std::vector ksize = phi::vectorize(filter_data_dims); - UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - phi::DenseTensor input_tensor(input->type()); - phi::DenseTensor output_tensor(output->type()); - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - if (channel_last) { - input_tensor.ShareDataWith(*input); - output_tensor.ShareDataWith(*output); - } else { - // transpose input from NCHW to NHWC - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - input, - &input_tensor, - true /*need_reshape_or_alloc*/); - auto output_dims = output->dims(); - output_tensor.mutable_data( - {output_dims[0], output_dims[2], output_dims[3], output_dims[1]}, - ctx.GetPlace()); - } - input_tensor.set_layout(DataLayout::kNHWC); - output_tensor.set_layout(DataLayout::kNHWC); - - // transpose filter from MCHW to MHWC - phi::DenseTensor trans_filter(filter->type()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc( - input_tensor, data_layout, ToCnnlDataType(input_tensor.dtype())); - MLUCnnlTensorDesc filter_desc( - trans_filter, data_layout, ToCnnlDataType(trans_filter.type())); - MLUCnnlTensorDesc output_desc( - output_tensor, data_layout, ToCnnlDataType(output_tensor.dtype())); - - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - ToCnnlDataType()); - - MLUCnnl::ConvolutionForward(ctx, - conv_desc.get(), - nullptr /*alpha*/, - nullptr /*beta*/, - nullptr /*bias_desc*/, - nullptr /*bias_ptr*/, - input_desc.get(), - GetBasePtr(&input_tensor), - filter_desc.get(), - GetBasePtr(&trans_filter), - output_desc.get(), - GetBasePtr(&output_tensor)); - - if (!channel_last) { - // transpose output from NHWC to NCHW - const std::vector perm_to_nchw = {0, 3, 1, 2}; - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &output_tensor, - output, - false /*need_reshape_or_alloc*/); - } - } -}; - -template -class MLUConvGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("Input"); - auto filter = ctx.Input("Filter"); - auto output_grad = - ctx.Input(framework::GradVarName("Output")); - auto input_grad = - ctx.Output(framework::GradVarName("Input")); - auto filter_grad = - ctx.Output(framework::GradVarName("Filter")); - - const std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::vector dilations = ctx.Attr>("dilations"); - int groups = ctx.Attr("groups"); - const std::string padding_algorithm = - ctx.Attr("padding_algorithm"); - const std::string data_format = ctx.Attr("data_format"); - - const bool channel_last = data_format == "NHWC"; - - // update padding and dilation - auto in_dims = input->dims(); - auto filter_dims = filter->dims(); - auto in_dims_size = in_dims.size(); - framework::DDim in_data_dims; - framework::DDim filter_data_dims; - - if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - } - filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size()); - - std::vector ksize = phi::vectorize(filter_data_dims); - UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - phi::DenseTensor input_tensor(input->type()); - phi::DenseTensor output_grad_tensor(output_grad->type()); - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - const std::vector perm_to_nchw = {0, 3, 1, 2}; - if (channel_last) { - input_tensor.ShareDataWith(*input); - output_grad_tensor.ShareDataWith(*output_grad); - } else { - // transpose input and output_grad from NCHW to NHWC - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - input, - &input_tensor, - true /*need_reshape_or_alloc*/); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - output_grad, - &output_grad_tensor, - true /*need_reshape_or_alloc*/); - } - input_tensor.set_layout(DataLayout::kNHWC); - output_grad_tensor.set_layout(DataLayout::kNHWC); - - if (filter_grad) { - filter_grad->mutable_data(ctx.GetPlace()); - - auto filter_grad_dims = filter_grad->dims(); - phi::DenseTensor temp_filter_grad(filter_grad->type()); - temp_filter_grad.mutable_data({filter_grad_dims[0], - filter_grad_dims[2], - filter_grad_dims[3], - filter_grad_dims[1]}, - ctx.GetPlace()); - - cnnlDataType_t tensor_dtype = ToCnnlDataType(); - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc(input_tensor, data_layout, tensor_dtype); - MLUCnnlTensorDesc out_grad_desc( - output_grad_tensor, data_layout, tensor_dtype); - MLUCnnlTensorDesc temp_filter_grad_desc( - temp_filter_grad, data_layout, tensor_dtype); - - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - tensor_dtype); - - MLUCnnl::ConvBackpropFilter(ctx, - conv_desc.get(), - input_desc.get(), - GetBasePtr(&input_tensor), - out_grad_desc.get(), - GetBasePtr(&output_grad_tensor), - temp_filter_grad_desc.get(), - GetBasePtr(&temp_filter_grad)); - - // transpose filter_grad from MHWC to MCHW - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &temp_filter_grad, - filter_grad, - false /*need_reshape_or_alloc*/); - } - if (input_grad) { - input_grad->mutable_data(ctx.GetPlace()); - - phi::DenseTensor input_grad_tensor(input_grad->type()); - if (channel_last) { - input_grad_tensor.ShareDataWith(*input_grad); - } else { - auto input_grad_dims = input_grad->dims(); - input_grad_tensor.mutable_data({input_grad_dims[0], - input_grad_dims[2], - input_grad_dims[3], - input_grad_dims[1]}, - ctx.GetPlace()); - } - input_grad_tensor.set_layout(DataLayout::kNHWC); - - // transpose filter from MCHW to MHWC - phi::DenseTensor trans_filter(filter->type()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - cnnlDataType_t tensor_dtype = ToCnnlDataType(); - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc filter_desc(trans_filter, data_layout, tensor_dtype); - MLUCnnlTensorDesc out_grad_desc( - output_grad_tensor, data_layout, tensor_dtype); - MLUCnnlTensorDesc in_grad_desc( - input_grad_tensor, data_layout, tensor_dtype); - - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - tensor_dtype); - - MLUCnnl::ConvBackpropInput(ctx, - conv_desc.get(), - filter_desc.get(), - GetBasePtr(&trans_filter), - out_grad_desc.get(), - GetBasePtr(&output_grad_tensor), - in_grad_desc.get(), - GetBasePtr(&input_grad_tensor)); - - if (!channel_last) { - // transpose input_grad from NHWC to NCHW - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &input_grad_tensor, - input_grad, - false /*need_reshape_or_alloc*/); - } - } - } -}; - -template -class MLUDepthwiseConvOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* input = ctx.Input("Input"); - auto* filter = ctx.Input("Filter"); - auto* output = ctx.Output("Output"); - output->mutable_data(ctx.GetPlace()); - const std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::vector dilations = ctx.Attr>("dilations"); - const std::string padding_algorithm = - ctx.Attr("padding_algorithm"); - const std::string data_format = ctx.Attr("data_format"); - - const bool channel_last = data_format == "NHWC"; - int groups; - - // update padding and dilation - auto in_dims = input->dims(); - auto filter_dims = filter->dims(); - auto in_dims_size = in_dims.size(); - framework::DDim in_data_dims; - framework::DDim filter_data_dims; - - if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - } - filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size()); - std::vector ksize = phi::vectorize(filter_data_dims); - UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - phi::DenseTensor input_tensor(input->type()); - phi::DenseTensor output_tensor(output->type()); - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - if (channel_last) { - groups = in_dims[3]; - input_tensor.ShareDataWith(*input); - output_tensor.ShareDataWith(*output); - } else { - // transpose input from NCHW to NHWC - groups = in_dims[1]; - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - input, - &input_tensor, - true /*need_reshape_or_alloc*/); - auto output_dims = output->dims(); - output_tensor.mutable_data( - {output_dims[0], output_dims[2], output_dims[3], output_dims[1]}, - ctx.GetPlace()); - } - input_tensor.set_layout(DataLayout::kNHWC); - output_tensor.set_layout(DataLayout::kNHWC); - - // transpose filter from MCHW to MHWC - phi::DenseTensor trans_filter(filter->type()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc( - input_tensor, data_layout, ToCnnlDataType(input_tensor.dtype())); - MLUCnnlTensorDesc filter_desc( - trans_filter, data_layout, ToCnnlDataType(trans_filter.type())); - MLUCnnlTensorDesc output_desc( - output_tensor, data_layout, ToCnnlDataType(output_tensor.dtype())); - - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - ToCnnlDataType()); - - MLUCnnl::ConvolutionForward(ctx, - conv_desc.get(), - nullptr /*alpha*/, - nullptr /*beta*/, - nullptr /*bias_desc*/, - nullptr /*bias_ptr*/, - input_desc.get(), - GetBasePtr(&input_tensor), - filter_desc.get(), - GetBasePtr(&trans_filter), - output_desc.get(), - GetBasePtr(&output_tensor)); - - if (!channel_last) { - // transpose output from NHWC to NCHW - const std::vector perm_to_nchw = {0, 3, 1, 2}; - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &output_tensor, - output, - false /*need_reshape_or_alloc*/); - } - } -}; - -template -class MLUDepthwiseConvGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("Input"); - auto filter = ctx.Input("Filter"); - auto output_grad = - ctx.Input(framework::GradVarName("Output")); - auto input_grad = - ctx.Output(framework::GradVarName("Input")); - auto filter_grad = - ctx.Output(framework::GradVarName("Filter")); - - const std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::vector dilations = ctx.Attr>("dilations"); - const std::string padding_algorithm = - ctx.Attr("padding_algorithm"); - const std::string data_format = ctx.Attr("data_format"); - - const bool channel_last = data_format == "NHWC"; - - // update padding and dilation - auto in_dims = input->dims(); - auto filter_dims = filter->dims(); - auto in_dims_size = in_dims.size(); - framework::DDim in_data_dims; - framework::DDim filter_data_dims; - int groups; - - if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - } - filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size()); - - std::vector ksize = phi::vectorize(filter_data_dims); - UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - phi::DenseTensor input_tensor(input->type()); - phi::DenseTensor output_grad_tensor(output_grad->type()); - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - const std::vector perm_to_nchw = {0, 3, 1, 2}; - const std::vector perm_hwcm_to_mchw = {3, 2, 0, 1}; - const std::vector perm_mchw_to_hwcm = {2, 3, 1, 0}; - if (channel_last) { - input_tensor.ShareDataWith(*input); - output_grad_tensor.ShareDataWith(*output_grad); - groups = in_dims[3]; - } else { - groups = in_dims[1]; - // transpose input and output_grad from NCHW to NHWC - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - input, - &input_tensor, - true /*need_reshape_or_alloc*/); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - output_grad, - &output_grad_tensor, - true /*need_reshape_or_alloc*/); - } - input_tensor.set_layout(DataLayout::kNHWC); - output_grad_tensor.set_layout(DataLayout::kNHWC); - - if (filter_grad) { - filter_grad->mutable_data(ctx.GetPlace()); - - auto filter_grad_dims = filter_grad->dims(); - phi::DenseTensor temp_filter_grad(filter_grad->type()); - // Details about setting diff_w hwcn for better performance, see the CNNL - // documentation. - temp_filter_grad.mutable_data({filter_grad_dims[perm_mchw_to_hwcm[0]], - filter_grad_dims[perm_mchw_to_hwcm[1]], - filter_grad_dims[perm_mchw_to_hwcm[2]], - filter_grad_dims[perm_mchw_to_hwcm[3]]}, - ctx.GetPlace()); - - cnnlDataType_t tensor_dtype = ToCnnlDataType(); - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc(input_tensor, data_layout, tensor_dtype); - MLUCnnlTensorDesc out_grad_desc( - output_grad_tensor, data_layout, tensor_dtype); - MLUCnnlTensorDesc temp_filter_grad_desc( - temp_filter_grad, CNNL_LAYOUT_HWCN, tensor_dtype); - - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - tensor_dtype); - - MLUCnnl::ConvBackpropFilter(ctx, - conv_desc.get(), - input_desc.get(), - GetBasePtr(&input_tensor), - out_grad_desc.get(), - GetBasePtr(&output_grad_tensor), - temp_filter_grad_desc.get(), - GetBasePtr(&temp_filter_grad)); - - // transpose filter_grad from HWCM to MCHW - TransposeFromMLUTensor(ctx, - perm_hwcm_to_mchw, - &temp_filter_grad, - filter_grad, - false /*need_reshape_or_alloc*/); - } - if (input_grad) { - input_grad->mutable_data(ctx.GetPlace()); - - phi::DenseTensor input_grad_tensor(input_grad->type()); - if (channel_last) { - input_grad_tensor.ShareDataWith(*input_grad); - } else { - auto input_grad_dims = input_grad->dims(); - input_grad_tensor.mutable_data({input_grad_dims[0], - input_grad_dims[2], - input_grad_dims[3], - input_grad_dims[1]}, - ctx.GetPlace()); - } - input_grad_tensor.set_layout(DataLayout::kNHWC); - - // transpose filter from MCHW to MHWC - phi::DenseTensor trans_filter(filter->type()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - cnnlDataType_t tensor_dtype = ToCnnlDataType(); - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc filter_desc(trans_filter, data_layout, tensor_dtype); - MLUCnnlTensorDesc out_grad_desc( - output_grad_tensor, data_layout, tensor_dtype); - MLUCnnlTensorDesc in_grad_desc( - input_grad_tensor, data_layout, tensor_dtype); - - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - tensor_dtype); - - MLUCnnl::ConvBackpropInput(ctx, - conv_desc.get(), - filter_desc.get(), - GetBasePtr(&trans_filter), - out_grad_desc.get(), - GetBasePtr(&output_grad_tensor), - in_grad_desc.get(), - GetBasePtr(&input_grad_tensor)); - - if (!channel_last) { - // transpose input_grad from NHWC to NCHW - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &input_grad_tensor, - input_grad, - false /*need_reshape_or_alloc*/); - } - } - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(conv2d, - ops::MLUConvOpKernel, - ops::MLUConvOpKernel); - -REGISTER_OP_MLU_KERNEL(conv2d_grad, - ops::MLUConvGradOpKernel, - ops::MLUConvGradOpKernel); - -REGISTER_OP_MLU_KERNEL(depthwise_conv2d, - ops::MLUDepthwiseConvOpKernel, - ops::MLUDepthwiseConvOpKernel); - -REGISTER_OP_MLU_KERNEL(depthwise_conv2d_grad, - ops::MLUDepthwiseConvGradOpKernel, - ops::MLUDepthwiseConvGradOpKernel); diff --git a/paddle/fluid/operators/conv_transpose_op_mlu.cc b/paddle/fluid/operators/conv_transpose_op_mlu.cc deleted file mode 100644 index 36d0be10575d18115bfa840b550ac87085e4f8c1..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/conv_transpose_op_mlu.cc +++ /dev/null @@ -1,311 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/conv_transpose_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/phi/kernels/cpu/conv_util.h" - -namespace paddle { -namespace operators { - -using DataLayout = phi::DataLayout; - -template -class Conv2DTransposeMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* input = ctx.Input("Input"); - const phi::DenseTensor* filter = ctx.Input("Filter"); - phi::DenseTensor* output = ctx.Output("Output"); - output->mutable_data(ctx.GetPlace()); - std::vector output_padding = - ctx.Attr>("output_padding"); - const std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::vector dilations = ctx.Attr>("dilations"); - const std::string data_format = ctx.Attr("data_format"); - int groups = ctx.Attr("groups"); - const std::string padding_algorithm = - ctx.Attr("padding_algorithm"); - - // check dimension - const bool channel_last = data_format == "NHWC"; - - auto in_dims = input->dims(); - auto filter_dims = filter->dims(); - auto in_dims_size = in_dims.size(); - framework::DDim in_data_dims; - framework::DDim filter_data_dims; - - if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - } - filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size()); - - std::vector ksize = phi::vectorize(filter_data_dims); - phi::UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - phi::DenseTensor input_tensor(input->type()); - phi::DenseTensor output_tensor(output->type()); - input_tensor.set_layout(DataLayout::kNHWC); - output_tensor.set_layout(DataLayout::kNHWC); - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - - if (channel_last) { - input_tensor.ShareDataWith(*input); - output_tensor.ShareDataWith(*output); - } else { - // transpose input from NCHW to NHWC - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - input, - &input_tensor, - true /*need_reshape_or_alloc*/); - auto output_dims = output->dims(); - output_tensor.mutable_data( - {output_dims[0], output_dims[2], output_dims[3], output_dims[1]}, - ctx.GetPlace()); - } - - // transpose filter from MCHW to MHWC - phi::DenseTensor trans_filter(filter->type()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - // construct MLU attr - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc( - input_tensor, data_layout, ToCnnlDataType(input_tensor.dtype())); - MLUCnnlTensorDesc filter_desc( - trans_filter, data_layout, ToCnnlDataType(trans_filter.type())); - MLUCnnlTensorDesc output_desc( - output_tensor, data_layout, ToCnnlDataType(output_tensor.dtype())); - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - ToCnnlDataType()); - - MLUCnnl::ConvBackpropInput(ctx, - conv_desc.get(), - filter_desc.get(), - GetBasePtr(&trans_filter), - input_desc.get(), - GetBasePtr(&input_tensor), - output_desc.get(), - GetBasePtr(&output_tensor)); - - if (!channel_last) { - // transpose output from NHWC to NCHW - const std::vector perm_to_nchw = {0, 3, 1, 2}; - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &output_tensor, - output, - false /*need_reshape_or_alloc*/); - } - } -}; - -template -class Conv2DTransposeGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* input = ctx.Input("Input"); - const phi::DenseTensor* filter = ctx.Input("Filter"); - const phi::DenseTensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - phi::DenseTensor* input_grad = - ctx.Output(framework::GradVarName("Input")); - phi::DenseTensor* filter_grad = - ctx.Output(framework::GradVarName("Filter")); - - if ((!input_grad) && (!filter_grad)) return; - - std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::vector dilations = ctx.Attr>("dilations"); - const int groups = ctx.Attr("groups"); - std::string padding_algorithm = ctx.Attr("padding_algorithm"); - const std::string data_format = ctx.Attr("data_format"); - const phi::DataLayout data_layout = phi::StringToDataLayout(data_format); - - auto in_dims = input->dims(); - auto filter_dims = filter->dims(); - auto in_dims_size = in_dims.size(); - - const bool channel_last = (data_layout == phi::DataLayout::kNHWC); - - framework::DDim in_data_dims; - if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - } - framework::DDim filter_data_dims = - phi::slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = phi::vectorize(filter_data_dims); - phi::UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - phi::DenseTensor input_tensor(input->type()); - phi::DenseTensor output_grad_tensor(output_grad->type()); - output_grad_tensor.set_layout(DataLayout::kNHWC); - - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - if (channel_last) { - input_tensor.ShareDataWith(*input); - output_grad_tensor.ShareDataWith(*output_grad); - } else { - // transpose input from NCHW to NHWC - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - input, - &input_tensor, - true /*need_reshape_or_alloc*/); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - output_grad, - &output_grad_tensor, - true /*need_reshape_or_alloc*/); - } - - // transpose filter from MCHW to MHWC - phi::DenseTensor trans_filter(filter->type()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - // MLU descs - cnnlTensorLayout_t data_layout_mlu = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc( - input_tensor, data_layout_mlu, ToCnnlDataType(input_tensor.dtype())); - MLUCnnlTensorDesc trans_filter_desc( - trans_filter, data_layout_mlu, ToCnnlDataType(trans_filter.type())); - MLUCnnlTensorDesc output_grad_desc( - output_grad_tensor, - data_layout_mlu, - ToCnnlDataType(output_grad_tensor.dtype())); - MLUCnnlConvolutionDesc conv_desc(in_dims_size, - paddings.data(), - strides.data(), - dilations.data(), - groups, - ToCnnlDataType()); - - if (filter_grad) { - filter_grad->mutable_data(ctx.GetPlace()); - phi::DenseTensor filter_grad_tensor(filter_grad->type()); - // filter_grad always MCHW - // filter_grad_tensor always MHWC - auto filter_grad_dims = filter_grad->dims(); - filter_grad_tensor.mutable_data({filter_grad_dims[0], - filter_grad_dims[2], - filter_grad_dims[3], - filter_grad_dims[1]}, - ctx.GetPlace()); - //} - filter_grad_tensor.set_layout(DataLayout::kNHWC); - - MLUCnnlTensorDesc filter_grad_desc( - filter_grad_tensor, - data_layout_mlu, - ToCnnlDataType(filter_grad_tensor.dtype())); - - MLUCnnl::ConvBackpropFilter(ctx, - conv_desc.get(), - output_grad_desc.get(), - GetBasePtr(output_grad), - input_desc.get(), - GetBasePtr(&input_tensor), - filter_grad_desc.get(), - GetBasePtr(&filter_grad_tensor)); - // transpose output from MHWC to MCHW - const std::vector perm_to_mchw = {0, 3, 1, 2}; - TransposeFromMLUTensor(ctx, - perm_to_mchw, - &filter_grad_tensor, - filter_grad, - false /*need_reshape_or_alloc*/); - } - - if (input_grad) { - input_grad->mutable_data(ctx.GetPlace()); - phi::DenseTensor input_grad_tensor(input_grad->type()); - input_tensor.set_layout(DataLayout::kNHWC); - - if (channel_last) { - input_grad_tensor.ShareDataWith(*input_grad); - } else { - auto input_grad_dims = input_grad->dims(); - input_grad_tensor.mutable_data({input_grad_dims[0], - input_grad_dims[2], - input_grad_dims[3], - input_grad_dims[1]}, - ctx.GetPlace()); - } - - MLUCnnlTensorDesc input_grad_desc( - input_grad_tensor, - data_layout_mlu, - ToCnnlDataType(input_grad_tensor.dtype())); - - MLUCnnl::ConvolutionForward(ctx, - conv_desc.get(), - nullptr /*alpha*/, - nullptr /*beta*/, - nullptr /*bias_desc*/, - nullptr /*bias_ptr*/, - output_grad_desc.get(), - GetBasePtr(&output_grad_tensor), - trans_filter_desc.get(), - GetBasePtr(&trans_filter), - input_grad_desc.get(), - GetBasePtr(&input_grad_tensor)); - if (!channel_last) { - // transpose output from NHWC to NCHW - const std::vector perm_to_nchw = {0, 3, 1, 2}; - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &input_grad_tensor, - input_grad, - false /*need_reshape_or_alloc*/); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(conv2d_transpose, - ops::Conv2DTransposeMLUKernel, - ops::Conv2DTransposeMLUKernel); - -REGISTER_OP_MLU_KERNEL(conv2d_transpose_grad, - ops::Conv2DTransposeGradMLUKernel, - ops::Conv2DTransposeGradMLUKernel); diff --git a/paddle/fluid/operators/cumsum_op_mlu.cc b/paddle/fluid/operators/cumsum_op_mlu.cc deleted file mode 100644 index fb586b9585e033a2519fbe1414ffcff182080b16..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/cumsum_op_mlu.cc +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class CumSumMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - int axis = ctx.Attr("axis"); - bool exclusive = ctx.Attr("exclusive"); - bool reverse = ctx.Attr("reverse"); - bool flatten = ctx.Attr("flatten"); - - out->mutable_data(ctx.GetPlace()); - - phi::DenseTensor* input_ptr = const_cast(x); - phi::DenseTensor flat_x(x->type()); - if (flatten) { - PADDLE_ENFORCE_EQ( - axis, - -1, - platform::errors::InvalidArgument( - "when flatten is true, attr axis must be default %d, but got %d", - -1, - axis)); - - flat_x.ShareDataWith(*x); - flat_x.Resize(phi::make_ddim({x->numel()})); - input_ptr = &flat_x; - } - - const int true_axis = (axis < 0) ? input_ptr->dims().size() + axis : axis; - MLUCnnlTensorDesc input_desc(*input_ptr); - MLUCnnlTensorDesc out_desc(*out); - - MLUCnnl::Cumsum(ctx, - true_axis, - exclusive, - reverse, - input_desc.get(), - GetBasePtr(input_ptr), - out_desc.get(), - GetBasePtr(out)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(cumsum, - ops::CumSumMLUKernel, - ops::CumSumMLUKernel, - ops::CumSumMLUKernel); diff --git a/paddle/fluid/operators/deformable_conv_op_mlu.cc b/paddle/fluid/operators/deformable_conv_op_mlu.cc deleted file mode 100644 index f5814efb3f49161a23d26af6df3190dc5cbb9a65..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/deformable_conv_op_mlu.cc +++ /dev/null @@ -1,317 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class DeformableConvMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* offset = ctx.Input("Offset"); - auto* mask = ctx.Input("Mask"); - auto* filter = ctx.Input("Filter"); - auto* output = ctx.Output("Output"); - output->mutable_data(ctx.GetPlace()); - - const int groups = ctx.Attr("groups"); - const int deformable_groups = ctx.Attr("deformable_groups"); - const int im2col_step = ctx.Attr("im2col_step"); - const std::vector strides = ctx.Attr>("strides"); - const std::vector paddings = ctx.Attr>("paddings"); - const std::vector dilations = ctx.Attr>("dilations"); - - // TODO(fwg): Remove this check when cnnl fix the bug that groups > 1. - PADDLE_ENFORCE_EQ( - groups == 1, - true, - platform::errors::InvalidArgument( - "MLU deformable_conv kernel only support groups == 1, but get %d.", - groups)); - - // transform paddings from {h, w} to {top, bottom, left, right}. - const std::vector trans_paddings{ - paddings[0], paddings[0], paddings[1], paddings[1]}; - MLUCnnlDCNDesc dcn_desc(input->dims().size(), - trans_paddings.data(), - strides.data(), - dilations.data(), - deformable_groups, - groups, - im2col_step); - - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - phi::DenseTensor trans_input(input->dtype()); - TransposeFromMLUTensor( - ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); - - phi::DenseTensor trans_offset(offset->dtype()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - offset, - &trans_offset, - true /*need_reshape_or_alloc*/); - - phi::DenseTensor trans_mask(mask->dtype()); - TransposeFromMLUTensor( - ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/); - - phi::DenseTensor trans_filter(filter->dtype()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - phi::DenseTensor tmp_output(output->dtype()); - auto output_dims = output->dims(); - tmp_output.mutable_data( - {output_dims[0], output_dims[2], output_dims[3], output_dims[1]}, - ctx.GetPlace()); - - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc( - trans_input, data_layout, ToCnnlDataType(trans_input.dtype())); - MLUCnnlTensorDesc offset_desc( - trans_offset, data_layout, ToCnnlDataType(trans_offset.dtype())); - MLUCnnlTensorDesc mask_desc( - trans_mask, data_layout, ToCnnlDataType(trans_mask.dtype())); - MLUCnnlTensorDesc filter_desc( - trans_filter, data_layout, ToCnnlDataType(trans_filter.dtype())); - MLUCnnlTensorDesc output_desc( - tmp_output, data_layout, ToCnnlDataType(tmp_output.dtype())); - MLUCnnl::DCNForward(ctx, - dcn_desc.get(), - input_desc.get(), - GetBasePtr(&trans_input), - offset_desc.get(), - GetBasePtr(&trans_offset), - mask_desc.get(), - GetBasePtr(&trans_mask), - filter_desc.get(), - GetBasePtr(&trans_filter), - nullptr, - nullptr, - output_desc.get(), - GetBasePtr(&tmp_output)); - - const std::vector perm_to_nchw = {0, 3, 1, 2}; - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &tmp_output, - output, - false /*need_reshape_or_alloc*/); - } -}; - -template -class DeformableConvGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const phi::DenseTensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - auto* input_grad = - ctx.Output(framework::GradVarName("Input")); - auto* filter_grad = - ctx.Output(framework::GradVarName("Filter")); - auto* offset_grad = - ctx.Output(framework::GradVarName("Offset")); - auto* mask_grad = - ctx.Output(framework::GradVarName("Mask")); - - const phi::DenseTensor* input = ctx.Input("Input"); - auto* offset = ctx.Input("Offset"); - auto* mask = ctx.Input("Mask"); - auto* filter = ctx.Input("Filter"); - - int groups = ctx.Attr("groups"); - int deformable_groups = ctx.Attr("deformable_groups"); - int im2col_step = ctx.Attr("im2col_step"); - std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::vector dilations = ctx.Attr>("dilations"); - - // TODO(fwg): Remove this check when cnnl fix the bug that groups > 1. - PADDLE_ENFORCE_EQ(groups == 1, - true, - platform::errors::InvalidArgument( - "MLU deformable_conv_grad kernel only support groups " - "== 1, but get %d.", - groups)); - - // transform paddings from {h, w} to {top, bottom, left, right}. - const std::vector trans_paddings{ - paddings[0], paddings[0], paddings[1], paddings[1]}; - MLUCnnlDCNDesc dcn_desc(input->dims().size(), - trans_paddings.data(), - strides.data(), - dilations.data(), - deformable_groups, - groups, - im2col_step); - - phi::DenseTensor tmp_input_grad; - auto input_dims = input->dims(); - tmp_input_grad.mutable_data( - {input_dims[0], input_dims[2], input_dims[3], input_dims[1]}, - ctx.GetPlace()); - - phi::DenseTensor tmp_filter_grad; - auto filter_dims = filter->dims(); - tmp_filter_grad.mutable_data( - {filter_dims[0], filter_dims[2], filter_dims[3], filter_dims[1]}, - ctx.GetPlace()); - - phi::DenseTensor tmp_offset_grad; - auto offset_dims = offset->dims(); - tmp_offset_grad.mutable_data( - {offset_dims[0], offset_dims[2], offset_dims[3], offset_dims[1]}, - ctx.GetPlace()); - - phi::DenseTensor tmp_mask_grad; - auto mask_dims = mask->dims(); - tmp_mask_grad.mutable_data( - {mask_dims[0], mask_dims[2], mask_dims[3], mask_dims[1]}, - ctx.GetPlace()); - - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - phi::DenseTensor trans_output_grad(output_grad->dtype()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - output_grad, - &trans_output_grad, - true /*need_reshape_or_alloc*/); - - phi::DenseTensor trans_input(input->dtype()); - TransposeFromMLUTensor( - ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); - - phi::DenseTensor trans_offset(offset->dtype()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - offset, - &trans_offset, - true /*need_reshape_or_alloc*/); - - phi::DenseTensor trans_mask(mask->dtype()); - TransposeFromMLUTensor( - ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/); - - phi::DenseTensor trans_filter(filter->dtype()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - filter, - &trans_filter, - true /*need_reshape_or_alloc*/); - - cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc output_grad_desc( - trans_output_grad, - data_layout, - ToCnnlDataType(trans_output_grad.dtype())); - MLUCnnlTensorDesc input_desc( - trans_input, data_layout, ToCnnlDataType(trans_input.dtype())); - MLUCnnlTensorDesc offset_desc( - trans_offset, data_layout, ToCnnlDataType(trans_offset.dtype())); - MLUCnnlTensorDesc mask_desc( - trans_mask, data_layout, ToCnnlDataType(trans_mask.dtype())); - MLUCnnlTensorDesc filter_desc( - trans_filter, data_layout, ToCnnlDataType(trans_filter.dtype())); - - MLUCnnl::DCNBackwardData(ctx, - dcn_desc.get(), - input_desc.get(), - GetBasePtr(&trans_input), - offset_desc.get(), - GetBasePtr(&trans_offset), - mask_desc.get(), - GetBasePtr(&trans_mask), - filter_desc.get(), - GetBasePtr(&trans_filter), - output_grad_desc.get(), - GetBasePtr(&trans_output_grad), - input_desc.get(), - GetBasePtr(&tmp_input_grad), - offset_desc.get(), - GetBasePtr(&tmp_offset_grad), - mask_desc.get(), - GetBasePtr(&tmp_mask_grad)); - - MLUCnnl::DCNBackwardWeight(ctx, - dcn_desc.get(), - input_desc.get(), - GetBasePtr(&trans_input), - offset_desc.get(), - GetBasePtr(&trans_offset), - mask_desc.get(), - GetBasePtr(&trans_mask), - output_grad_desc.get(), - GetBasePtr(&trans_output_grad), - filter_desc.get(), - GetBasePtr(&tmp_filter_grad), - nullptr, - nullptr); - - const std::vector perm_to_nchw = {0, 3, 1, 2}; - if (input_grad) { - input_grad->mutable_data(ctx.GetPlace()); - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &tmp_input_grad, - input_grad, - false /*need_reshape_or_alloc*/); - } - - if (filter_grad) { - filter_grad->mutable_data(ctx.GetPlace()); - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &tmp_filter_grad, - filter_grad, - false /*need_reshape_or_alloc*/); - } - - if (offset_grad) { - offset_grad->mutable_data(ctx.GetPlace()); - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &tmp_offset_grad, - offset_grad, - false /*need_reshape_or_alloc*/); - } - - if (mask_grad) { - mask_grad->mutable_data(ctx.GetPlace()); - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &tmp_mask_grad, - mask_grad, - false /*need_reshape_or_alloc*/); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(deformable_conv, ops::DeformableConvMLUKernel); -REGISTER_OP_MLU_KERNEL(deformable_conv_grad, - ops::DeformableConvGradMLUKernel); diff --git a/paddle/fluid/operators/dropout_op_mlu.cc b/paddle/fluid/operators/dropout_op_mlu.cc deleted file mode 100644 index 0d0686026da4b365dd9d02fa1df15cb808aaf177..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/dropout_op_mlu.cc +++ /dev/null @@ -1,207 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class DropoutMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - auto dropout_prob = ctx.Attr("dropout_prob"); - auto is_test = ctx.Attr("is_test"); - auto* seed_tensor = - ctx.HasInput("Seed") ? ctx.Input("Seed") : nullptr; - auto dropout_implementation = - ctx.Attr("dropout_implementation"); - - const bool is_upscale = (dropout_implementation == "upscale_in_train"); - - out->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc out_desc(*out); - - if (is_test && is_upscale) { - // dropout op for inference: out = input. - framework::TensorCopy( - *x, - ctx.GetPlace(), - ctx.template device_context(), - out); - return; - } else if (!is_test) { - // dropout op for training: out = input * mask / ( 1.0 - dropout_prob ) or - // out = input * mask. - int seed_data = 0; - if (seed_tensor) { - if (platform::is_mlu_place(seed_tensor->place())) { - memory::Copy(platform::CPUPlace(), - &seed_data, - seed_tensor->place(), - seed_tensor->data(), - sizeof(int)); - } else { - seed_data = *(seed_tensor->data()); - } - } else { - seed_data = ctx.Attr("fix_seed") ? ctx.Attr("seed") : 0; - } - - auto* mask = ctx.Output("Mask"); - mask->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc mask_desc(*mask); - // Special case when dropout_prob is 1.0 - if (dropout_prob == 1.0f) { - auto value_t = static_cast(0.0f); - MLUCnnl::Fill(ctx, - CNNL_POINTER_MODE_HOST, - &value_t, - out_desc.get(), - GetBasePtr(out)); - MLUCnnl::Fill(ctx, - CNNL_POINTER_MODE_HOST, - &value_t, - mask_desc.get(), - GetBasePtr(mask)); - return; - } - - // create mlu random generator - const int device_id = ctx.GetPlace().GetDeviceId(); - auto mlu_gen_random = GetMLURandomGenerator(ctx, device_id, seed_data); - - // compute out = input * mask / ( 1.0 - dropout_prob ) - MLUCnnl::FusedDropout(ctx, - mlu_gen_random->get(), - x_desc.get(), - GetBasePtr(x), - dropout_prob, - GetBasePtr(&(mlu_gen_random->get_state())), - mask_desc.get(), - GetBasePtr(mask), - out_desc.get(), - GetBasePtr(out)); - - if (is_upscale) { - return; - } - } - - // In downgrade_in_infer mode, need to multiply (1.0f - dropout_prob). - phi::DenseTensor scale_tensor(x->dtype()); - phi::DenseTensor bias_tensor(x->dtype()); - scale_tensor.mutable_data({1}, ctx.GetPlace()); - bias_tensor.mutable_data({1}, ctx.GetPlace()); - MLUCnnlTensorDesc scale_desc(scale_tensor); - MLUCnnlTensorDesc bias_desc(bias_tensor); - FillMLUTensorWithHostValue( - ctx, static_cast(1.0f - dropout_prob), &scale_tensor); - FillMLUTensorWithHostValue(ctx, static_cast(0.0f), &bias_tensor); - - MLUCnnl::Scale(ctx, - 0, - is_test ? x_desc.get() : out_desc.get(), - is_test ? GetBasePtr(x) : GetBasePtr(out), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class DropoutGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(!ctx.Attr("is_test"), - true, - platform::errors::InvalidArgument( - "GradOp is only callable when is_test is false")); - auto* grad_x = ctx.Output(framework::GradVarName("X")); - auto* grad_out = ctx.Input(framework::GradVarName("Out")); - auto* mask = ctx.Input("Mask"); - auto dropout_prob = ctx.Attr("dropout_prob"); - auto dropout_impl = ctx.Attr("dropout_implementation"); - - grad_x->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc grad_x_desc(*grad_x); - - if (dropout_prob == 1.) { - auto value_t = static_cast(0.0f); - MLUCnnl::Fill(ctx, - CNNL_POINTER_MODE_HOST, - &value_t, - grad_x_desc.get(), - GetBasePtr(grad_x)); - return; - } - - // cast mask from uint8 to float32/float16 - phi::DenseTensor cast_mask(grad_x->dtype()); - cast_mask.Resize(mask->dims()); - cast_mask.mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc mask_desc(*mask); - MLUCnnlTensorDesc cast_mask_desc(cast_mask); - cnnlCastDataType_t cast_type = - GetCastDataType(framework::TransToProtoVarType(mask->dtype()), - framework::TransToProtoVarType(cast_mask.dtype())); - - MLUCnnl::Cast(ctx, - cast_type, - mask_desc.get(), - GetBasePtr(mask), - cast_mask_desc.get(), - GetBasePtr(&cast_mask)); - - const bool is_upscale = (dropout_impl == "upscale_in_train"); - const float scale = is_upscale ? (1.0f / (1.0f - dropout_prob)) : (1.0f); - - auto data_type = ToCnnlDataType(); - MLUCnnlTensorDesc grad_out_desc(*grad_out); - MLUCnnlOpTensorDesc op_tensor_desc( - CNNL_OP_TENSOR_MUL, data_type, CNNL_NOT_PROPAGATE_NAN); - MLUCnnl::OpTensor(ctx, - op_tensor_desc.get(), - cast_mask_desc.get(), - GetBasePtr(&cast_mask), - grad_out_desc.get(), - GetBasePtr(grad_out), - grad_x_desc.get(), - GetBasePtr(grad_x), - data_type, - scale); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(dropout, - ops::DropoutMLUKernel, - ops::DropoutMLUKernel); - -REGISTER_OP_MLU_KERNEL(dropout_grad, - ops::DropoutGradMLUKernel, - ops::DropoutGradMLUKernel); diff --git a/paddle/fluid/operators/expand_as_v2_op_mlu.cc b/paddle/fluid/operators/expand_as_v2_op_mlu.cc deleted file mode 100644 index 71b154ff02274973d8fccf5aafee1e64718ee9d2..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/expand_as_v2_op_mlu.cc +++ /dev/null @@ -1,109 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/expand_as_v2_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ExpandAsV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); - auto target_shape = context.Attr>("target_shape"); - auto target_rank = target_shape.size(); - PADDLE_ENFORCE_GE(target_rank, - rank, - platform::errors::InvalidArgument( - "The rank (%d) of the input 'target_tensor' for " - "expand_as_v2 op must be greater than or equal to " - "the rank (%d) of the input 'x'.", - target_rank, - rank)); - PADDLE_ENFORCE_GE( - rank, - 1, - platform::errors::InvalidArgument("The rank (%d) of the input 'x' for " - "expand_as_v2 op must be positive.", - rank)); - PADDLE_ENFORCE_LE(target_rank, - MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank (%d) of the input 'target_tensor' for " - "expand_as_v2 op must be less than or equal to %d.", - target_rank, - MAX_RANK_SUPPORTED)); - ExpandAs(context); - } - - protected: - void ExpandAs(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); - auto in_dims = in0->dims(); - auto target_shape = context.Attr>("target_shape"); - auto vec_in_dims = phi::vectorize(in_dims); - auto diff = target_shape.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - - for (size_t i = 0; i < vec_in_dims.size(); ++i) { - PADDLE_ENFORCE_NE(target_shape[i], - 0, - platform::errors::InvalidArgument( - "The value of target shape cannot be zero.")); - if (vec_in_dims[i] != 1) { - PADDLE_ENFORCE_EQ( - vec_in_dims[i], - target_shape[i], - platform::errors::InvalidArgument( - "The value (%d) of the non-singleton dimension does not match" - " the corresponding value (%d) in " - "target tensor for expand_as_v2 op.", - vec_in_dims[i], - target_shape[i])); - } - } - auto* out0 = context.Output("Out"); - - framework::DDim out_dims = phi::make_ddim(target_shape); - - out0->Resize(out_dims); - out0->mutable_data(context.GetPlace()); - - MLUCnnlTensorDesc x_desc(*in0); - MLUCnnlTensorDesc out_desc(*out0); - - MLUCnnl::BroadcastTo(context, - x_desc.get(), - GetBasePtr(in0), - out_desc.get(), - GetBasePtr(out0)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(expand_as_v2, - ops::ExpandAsV2MLUKernel, - ops::ExpandAsV2MLUKernel, - ops::ExpandAsV2MLUKernel, - ops::ExpandAsV2MLUKernel, - ops::ExpandAsV2MLUKernel, - ops::ExpandAsV2MLUKernel, - ops::ExpandAsV2MLUKernel); diff --git a/paddle/fluid/operators/expand_v2_op_mlu.cc b/paddle/fluid/operators/expand_v2_op_mlu.cc deleted file mode 100644 index 4ae0b4192ab535528650e5c926e9ea9dbcf2fe13..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/expand_v2_op_mlu.cc +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_MLU - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/expand_v2_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ExpandV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Out = ctx.Output("Out"); - auto in_dims = X->dims(); - auto expand_shape = get_expand_shape(ctx); - auto vec_in_dims = phi::vectorize(in_dims); - auto diff = expand_shape.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - std::vector final_expand_shape(vec_in_dims.size()); - for (size_t i = 0; i < vec_in_dims.size(); ++i) { - PADDLE_ENFORCE_NE(expand_shape[i], - 0, - platform::errors::InvalidArgument( - "The expanded size cannot be zero.")); - if (i < diff) { // expand_shape = [3,4,-1,-1], X = [10,2] --> - // final_expand_shape = [3,4,10,2] - PADDLE_ENFORCE_GT( - expand_shape[i], - 0, - platform::errors::InvalidArgument( - "The expanded size (%d) for non-existing dimensions must be " - "positive for expand_v2 op.", - expand_shape[i])); - final_expand_shape[i] = expand_shape[i]; - } else if (expand_shape[i] > 0) { // expand_shape = [3,4,10,4], X = - // [10,1] --> final_expand_shape = - // [3,4,10,4] - if (vec_in_dims[i] != 1) { - PADDLE_ENFORCE_EQ( - vec_in_dims[i], - expand_shape[i], - platform::errors::InvalidArgument( - "The value (%d) of the non-singleton dimension does not match" - " the corresponding value (%d) in shape for expand_v2 op.", - vec_in_dims[i], - expand_shape[i])); - final_expand_shape[i] = expand_shape[i]; - } else { - final_expand_shape[i] = expand_shape[i]; - } - } else { // expand_shape = [3,4,-1,-1], X = [10,2] --> final_expand_shape - // = [3,4,10,2] - PADDLE_ENFORCE_EQ( - expand_shape[i], - -1, - platform::errors::InvalidArgument( - "When the value in shape is negative for expand_v2 op, " - "only -1 is supported, but the value received is %d.", - expand_shape[i])); - final_expand_shape[i] = vec_in_dims[i]; - } - } - - auto rank = X->dims().size(); - PADDLE_ENFORCE_GE( - rank, - 1, - platform::errors::InvalidArgument( - "The rank of the input 'X' for expand_v2_mlu op must be positive, " - "but the value received is %d.", - rank)); - auto shape_size = final_expand_shape.size(); - PADDLE_ENFORCE_GE( - shape_size, - rank, - platform::errors::InvalidArgument( - "The number (%d) of elements of 'shape' for expand_v2_mlu op must " - "be " - "greater than or equal to the rank (%d) of the input 'X'.", - shape_size, - rank)); - - framework::DDim out_dims = phi::make_ddim(final_expand_shape); - Out->Resize(out_dims); - auto place = ctx.GetPlace(); - Out->mutable_data(place); - MLUCnnlTensorDesc x_desc(*X); - MLUCnnlTensorDesc out_desc(*Out); - MLUCnnl::BroadcastTo( - ctx, x_desc.get(), GetBasePtr(X), out_desc.get(), GetBasePtr(Out)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(expand_v2, - ops::ExpandV2MLUKernel, - ops::ExpandV2MLUKernel, - ops::ExpandV2MLUKernel, - ops::ExpandV2MLUKernel, - ops::ExpandV2MLUKernel); - -#endif diff --git a/paddle/fluid/operators/fill_any_like_op_mlu.cc b/paddle/fluid/operators/fill_any_like_op_mlu.cc deleted file mode 100644 index 5ef52d7b07ec8b615996027118c410875cffea9f..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/fill_any_like_op_mlu.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class FillAnyLikeMLUKernel : public framework::OpKernel { - public: - using CommonType = typename std::common_type< - float, - typename std::conditional::value, - float, - T>::type>::type; - - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - float value = ctx.Attr("value"); - - auto common_type_value = static_cast(value); - - PADDLE_ENFORCE_EQ( - (common_type_value >= - static_cast(std::numeric_limits::lowest())) && - (common_type_value <= - static_cast(std::numeric_limits::max())), - true, - platform::errors::InvalidArgument( - "The filled value is out of range for target type, " - "current kernel type is %s, the range should between %f " - "and %f, but now value is %f.", - typeid(T).name(), - static_cast(std::numeric_limits::lowest()), - static_cast(std::numeric_limits::max()), - value)); - - PADDLE_ENFORCE_EQ( - std::isnan(value), - false, - platform::errors::InvalidArgument("The filled value is NaN.")); - - auto value_t = static_cast(value); - MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - MLUCnnl::Fill( - ctx, CNNL_POINTER_MODE_HOST, &value_t, out_desc.get(), GetBasePtr(out)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(fill_any_like, - ops::FillAnyLikeMLUKernel, - ops::FillAnyLikeMLUKernel, - ops::FillAnyLikeMLUKernel, - ops::FillAnyLikeMLUKernel); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc deleted file mode 100644 index 34b760252bece7ac4f5e595bc4260948927e0eb4..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/utils.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { -template -class FillConstantBatchSizeLikeOpMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto data_type = - static_cast(ctx.Attr("dtype")); - auto float_value = ctx.Attr("value"); - auto str_value = ctx.Attr("str_value"); - auto force_cpu = ctx.Attr("force_cpu"); - - auto *out = ctx.Output("Out"); - auto *in = ctx.Input("Input"); - if (in->lod().size() && ctx.Attr("input_dim_idx") == 0) { - // set the correct batch size for the phi::DenseTensor. - auto odims = out->dims(); - int output_dim_idx = ctx.Attr("output_dim_idx"); - odims[output_dim_idx] = static_cast(in->lod().back().size()) - 1; - out->mutable_data(odims, ctx.GetPlace()); - } - - T value; - if (str_value.empty()) { - value = static_cast(float_value); - } else { - // handle NaN/Inf first, which cannot be read from stream. - if (str_value == "inf") { - value = static_cast(std::numeric_limits::infinity()); - } else if (str_value == "-inf") { - value = static_cast(-std::numeric_limits::infinity()); - } else if (str_value == "nan") { - value = static_cast(std::numeric_limits::quiet_NaN()); - } else { - std::stringstream convert_stream(str_value); - if (std::is_same::value) { - int64_t tmp_value; - convert_stream >> tmp_value; - value = static_cast(tmp_value); - } else { - double tmp_value; - convert_stream >> tmp_value; - value = static_cast(tmp_value); - } - } - } - - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace(); - if (cpu_place) { - auto &dev_ctx = *pool.Get(platform::CPUPlace()); - phi::funcs::SetConstant functor; - out->mutable_data(platform::CPUPlace(), - framework::TransToPhiDataType(data_type)); - functor(reinterpret_cast(dev_ctx), - out, - static_cast(value)); - } else { - out->mutable_data(ctx.GetPlace(), - framework::TransToPhiDataType(data_type)); - const T *value_data = &value; - cnnlPointerMode_t pointer_mode = CNNL_POINTER_MODE_HOST; - MLUCnnlTensorDesc output_desc(*out); - MLUCnnl::Fill( - ctx, pointer_mode, value_data, output_desc.get(), GetBasePtr(out)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL( - fill_constant_batch_size_like, - ops::FillConstantBatchSizeLikeOpMLUKernel, - ops::FillConstantBatchSizeLikeOpMLUKernel, - ops::FillConstantBatchSizeLikeOpMLUKernel); diff --git a/paddle/fluid/operators/fill_constant_op_mlu.cc b/paddle/fluid/operators/fill_constant_op_mlu.cc deleted file mode 100644 index 8263534f4eeeb76ff99c485cab40c983249b8631..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/fill_constant_op_mlu.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/utils.h" - -namespace paddle { -namespace operators { - -template -class FillConstantMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto str_value = ctx.Attr("str_value"); - auto float_value = ctx.Attr("value"); - - auto *out_var = ctx.Output("Out"); - - T value; - if (str_value.empty()) { - value = static_cast(float_value); - } else { - // handle NaN/Inf first, which cannot be read from stream. - if (str_value == "inf") { - value = static_cast(std::numeric_limits::infinity()); - } else if (str_value == "-inf") { - value = static_cast(-std::numeric_limits::infinity()); - } else if (str_value == "nan") { - value = static_cast(std::numeric_limits::quiet_NaN()); - } else { - std::stringstream convert_stream(str_value); - if (std::is_same::value) { - int64_t tmp_value; - convert_stream >> tmp_value; - value = static_cast(tmp_value); - } else { - double tmp_value; - convert_stream >> tmp_value; - value = static_cast(tmp_value); - } - } - } - const T *value_data = &value; - cnnlPointerMode_t pointer_mode = CNNL_POINTER_MODE_HOST; - if (ctx.HasInput("ValueTensor")) { - auto *value_tensor = ctx.Input("ValueTensor"); - PADDLE_ENFORCE_EQ( - value_tensor->numel(), - 1, - platform::errors::InvalidArgument( - "When use phi::DenseTensor as value to set phi::DenseTensor " - "value in fill_cosntant, " - "value input(ValueTensor) size must be 1, but get %d", - value_tensor->numel())); - value_data = value_tensor->data(); - auto tmp_place = value_tensor->place(); - if (platform::is_mlu_place(tmp_place)) { - pointer_mode = CNNL_POINTER_MODE_DEVICE; - } - } - - auto shape = GetShape(ctx); - out_var->mutable_data(shape, ctx.GetPlace()); - MLUCnnlTensorDesc output_desc(*out_var); - MLUCnnl::Fill( - ctx, pointer_mode, value_data, output_desc.get(), GetBasePtr(out_var)); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL( - fill_constant, - paddle::operators::FillConstantMLUKernel, - paddle::operators::FillConstantMLUKernel, - paddle::operators::FillConstantMLUKernel, - paddle::operators::FillConstantMLUKernel, - paddle::operators::FillConstantMLUKernel, - paddle::operators::FillConstantMLUKernel, - paddle::operators::FillConstantMLUKernel); diff --git a/paddle/fluid/operators/flatten_op_mlu.cc b/paddle/fluid/operators/flatten_op_mlu.cc deleted file mode 100644 index ecfefd47a6aa5680e4cefaaf5341000a75c658c3..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/flatten_op_mlu.cc +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "paddle/fluid/operators/flatten_op.h" - -namespace paddle { -namespace operators { - -template -class FlattenMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); - auto *out = context.Output("Out"); - - auto &axes = context.Attr("axis"); - auto x_dims = in->dims(); - auto out_dims = phi::make_ddim(GetOutputShape(axes, x_dims)); - out->mutable_data(context.GetPlace(), in->type()); - framework::TensorCopy( - *in, - context.GetPlace(), - context.template device_context(), - out); - out->Resize(out_dims); - } - - static std::vector GetOutputShape(const int axis, - const framework::DDim &in_dims) { - int64_t outer = 1, inner = 1; - for (int i = 0; i < in_dims.size(); ++i) { - if (i < axis) { - outer *= in_dims[i]; - } else { - inner *= in_dims[i]; - } - } - std::vector out_shape(2); - out_shape[0] = outer; - out_shape[1] = inner; - return out_shape; - } -}; - -template -class FlattenGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_out = ctx.Input(framework::GradVarName("Out")); - auto in_dims = ctx.Input("X")->dims(); - - d_x->mutable_data(ctx.GetPlace(), d_out->type()); - framework::TensorCopy( - *d_out, - ctx.GetPlace(), - ctx.template device_context(), - d_x); - d_x->Resize(in_dims); - } -}; - -template -class Flatten2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto &axes = context.Attr("axis"); - - auto *in = context.Input("X"); - auto x_dims = in->dims(); - - auto *out = context.Output("Out"); - - auto out_dims = phi::make_ddim( - FlattenMLUKernel::GetOutputShape(axes, x_dims)); - - out->mutable_data(context.GetPlace(), in->type()); - framework::TensorCopy( - *in, - context.GetPlace(), - context.template device_context(), - out); - out->Resize(out_dims); - } -}; - -template -class Flatten2GradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_out = ctx.Input(framework::GradVarName("Out")); - - auto xshape_dims = ctx.Input("XShape")->dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); - - d_x->mutable_data(ctx.GetPlace(), d_out->type()); - framework::TensorCopy( - *d_out, - ctx.GetPlace(), - ctx.template device_context(), - d_x); - d_x->Resize(x_dims); - } -}; - -template -class FlattenContiguousRangeMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); - auto *out = context.Output("Out"); - out->mutable_data(context.GetPlace(), in->type()); - auto &start_axis = context.Attr("start_axis"); - auto &stop_axis = context.Attr("stop_axis"); - - // make out dims - auto in_dims = in->dims(); - auto out_dims = - phi::make_ddim(GetOutputShape(start_axis, stop_axis, in_dims)); - framework::TensorCopy( - *in, - context.GetPlace(), - context.template device_context(), - out); - out->Resize(out_dims); - } - static std::vector GetOutputShape(const int start_axis, - const int stop_axis, - const framework::DDim &in_dims) { - int64_t outer = 1; - std::vector out_shape; - int in_dims_size = in_dims.size(); - out_shape.reserve(in_dims_size - stop_axis + start_axis); - int real_start_axis = start_axis, real_stop_axis = stop_axis; - if (start_axis < 0) { - real_start_axis = start_axis + in_dims_size; - } - if (stop_axis < 0) { - real_stop_axis = stop_axis + in_dims_size; - } - - for (int i = 0; i < real_start_axis; ++i) { - out_shape.push_back(in_dims[i]); - } - for (int i = real_start_axis; i <= real_stop_axis; i++) { - if (in_dims[i] == -1 || outer == -1) { - outer = -1; - } else { - outer *= in_dims[i]; - } - } - out_shape.push_back(outer); - for (int i = real_stop_axis + 1; i < in_dims_size; i++) { - out_shape.push_back(in_dims[i]); - } - - return out_shape; - } -}; - -template -class FlattenContiguousRangeGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_out = ctx.Input(framework::GradVarName("Out")); - - auto xshape_dims = ctx.Input("XShape")->dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); - - d_x->mutable_data(ctx.GetPlace(), d_out->type()); - framework::TensorCopy( - *d_out, - ctx.GetPlace(), - ctx.template device_context(), - d_x); - d_x->Resize(x_dims); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL( - flatten, - ops::FlattenMLUKernel, - ops::FlattenMLUKernel, - ops::FlattenMLUKernel, - ops::FlattenMLUKernel, - ops::FlattenMLUKernel, - ops::FlattenMLUKernel); -REGISTER_OP_MLU_KERNEL( - flatten_grad, - ops::FlattenGradMLUKernel, - ops::FlattenGradMLUKernel, - ops::FlattenGradMLUKernel, - ops::FlattenGradMLUKernel, - ops::FlattenGradMLUKernel, - ops::FlattenGradMLUKernel); -REGISTER_OP_MLU_KERNEL( - flatten2, - ops::Flatten2MLUKernel, - ops::Flatten2MLUKernel, - ops::Flatten2MLUKernel, - ops::Flatten2MLUKernel, - ops::Flatten2MLUKernel, - ops::Flatten2MLUKernel); -REGISTER_OP_MLU_KERNEL( - flatten2_grad, - ops::Flatten2GradMLUKernel, - ops::Flatten2GradMLUKernel, - ops::Flatten2GradMLUKernel, - ops::Flatten2GradMLUKernel, - ops::Flatten2GradMLUKernel, - ops::Flatten2GradMLUKernel); -REGISTER_OP_MLU_KERNEL( - flatten_contiguous_range, - ops::FlattenContiguousRangeMLUKernel, - ops::FlattenContiguousRangeMLUKernel, - ops::FlattenContiguousRangeMLUKernel, - ops::FlattenContiguousRangeMLUKernel, - ops::FlattenContiguousRangeMLUKernel, - ops::FlattenContiguousRangeMLUKernel); -REGISTER_OP_MLU_KERNEL( - flatten_contiguous_range_grad, - ops::FlattenContiguousRangeGradMLUKernel, - ops::FlattenContiguousRangeGradMLUKernel, - ops::FlattenContiguousRangeGradMLUKernel, - ops::FlattenContiguousRangeGradMLUKernel, - ops::FlattenContiguousRangeGradMLUKernel, - ops::FlattenContiguousRangeGradMLUKernel); diff --git a/paddle/fluid/operators/gather_nd_op_mlu.cc b/paddle/fluid/operators/gather_nd_op_mlu.cc deleted file mode 100644 index 93b20c86af8609b83a0da08577635536b3173512..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/gather_nd_op_mlu.cc +++ /dev/null @@ -1,135 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/platform/device_context.h" - -namespace paddle { -namespace operators { - -template -class GatherNdMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *index = ctx.Input("Index"); - auto *out = ctx.Output("Out"); - - auto place = ctx.GetPlace(); - out->template mutable_data(place); - - if (x->numel() == 0) return; - if (index->numel() == 0) { - auto &dev_ctx = ctx.template device_context(); - framework::TensorCopy(*x, place, dev_ctx, out); - return; - } - - const auto &index_type = framework::TransToProtoVarType(index->dtype()); - bool index_type_match = index_type == framework::proto::VarType::INT32 || - index_type == framework::proto::VarType::INT64; - PADDLE_ENFORCE_EQ(index_type_match, - true, - platform::errors::InvalidArgument( - "Index holds the wrong type, it holds [%s]," - "but desires to be [%s] or [%s]", - paddle::framework::DataTypeToString(index_type), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT32), - paddle::framework::DataTypeToString( - framework::proto::VarType::INT64))); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc index_desc(*index); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::GatherNd(ctx, - x_desc.get(), - GetBasePtr(x), - index_desc.get(), - GetBasePtr(index), - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class GatherNdGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *index = ctx.Input("Index"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *x = ctx.Input("X"); - - if (dx->numel() == 0) return; - if (index->numel() == 0) { - auto &dev_ctx = ctx.template device_context(); - framework::TensorCopy(*dout, ctx.GetPlace(), dev_ctx, dx); - return; - } - - phi::DenseTensor tmp_tensor(index->type()); - phi::DenseTensor tmp_tensor2(dout->type()); - const auto index_dims = index->dims(); - if (index_dims.size() == 1) { - tmp_tensor.ShareDataWith(*index); - std::vector new_dim = {1, index_dims[0]}; - tmp_tensor.Resize(phi::make_ddim(new_dim)); - index = &tmp_tensor; - - tmp_tensor2.ShareDataWith(*dout); - std::vector new_dim2{1}; - for (int i = index->numel(); i < x->dims().size(); i++) { - new_dim2.push_back(x->dims()[i]); - } - tmp_tensor2.Resize(phi::make_ddim(new_dim2)); - dout = &tmp_tensor2; - } - - dx->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc dx_desc(*dx); - auto value = static_cast(0); - MLUCnnl::Fill( - ctx, CNNL_POINTER_MODE_HOST, &value, dx_desc.get(), GetBasePtr(dx)); - - MLUCnnlTensorDesc index_desc(*index); - MLUCnnlTensorDesc dout_desc(*dout); - - const cnnlScatterNdMode_t mode = CNNL_SCATTERND_ADD; - MLUCnnl::ScatterNd(ctx, - mode, - index_desc.get(), - GetBasePtr(index), - dout_desc.get(), - GetBasePtr(dout), - dx_desc.get(), - GetBasePtr(dx), - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(gather_nd, - ops::GatherNdMLUKernel, - ops::GatherNdMLUKernel); - -REGISTER_OP_MLU_KERNEL(gather_nd_grad, - ops::GatherNdGradMLUKernel, - ops::GatherNdGradMLUKernel); diff --git a/paddle/fluid/operators/gather_op_mlu.cc b/paddle/fluid/operators/gather_op_mlu.cc deleted file mode 100644 index 20a108c981d7efc13040e549abfb8ef3480204b6..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/gather_op_mlu.cc +++ /dev/null @@ -1,127 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class GatherOpMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *index = ctx.Input("Index"); - auto axis = ctx.Attr("axis"); - - const auto index_dims = index->dims(); - if (index_dims.size() == 2) { - PADDLE_ENFORCE_EQ( - index_dims[1], - 1, - platform::errors::InvalidArgument( - "The last dim of index should be 1 when it is 2D, but we get %d", - index_dims[1])); - } else { - PADDLE_ENFORCE_EQ( - index_dims.size(), - 1, - platform::errors::InvalidArgument( - "The index should be 1D, when it is not 2D, but we get %d", - index_dims.size())); - } - - auto *out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(*x); - int index_shape_1d[1] = {static_cast(index_dims[0])}; - MLUCnnlTensorDesc index_desc( - 1, index_shape_1d, ToCnnlDataType(index->dtype())); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::GatherFunctor(ctx, - axis, - 0 /*batch_dims*/, - x_desc.get(), - GetBasePtr(x), - index_desc.get(), - GetBasePtr(index), - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class GatherGradOpMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *index = ctx.Input("Index"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); - - const auto index_dims = index->dims(); - if (index_dims.size() == 2) { - PADDLE_ENFORCE_EQ( - index_dims[1], - 1, - platform::errors::InvalidArgument( - "The last dim of index should be 1 when it is 2D, but we get %d", - index_dims[1])); - } else { - PADDLE_ENFORCE_EQ( - index_dims.size(), - 1, - platform::errors::InvalidArgument( - "The index should be 1D, when it is not 2D, but we get %d", - index_dims.size())); - } - - dx->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc dx_desc(*dx); - auto value = static_cast(0); - MLUCnnl::Fill( - ctx, CNNL_POINTER_MODE_HOST, &value, dx_desc.get(), GetBasePtr(dx)); - - int index_shape_1d[1] = {static_cast(index_dims[0])}; - MLUCnnlTensorDesc index_desc( - 1, index_shape_1d, ToCnnlDataType(index->dtype())); - MLUCnnlTensorDesc dout_desc(*dout); - const cnnlScatterRefMode_t mode = CNNL_SCATTERREF_UPDATE; - MLUCnnl::ScatterRefFunctor(ctx, - dx_desc.get(), - GetBasePtr(dx), - dout_desc.get(), - GetBasePtr(dout), - index_desc.get(), - GetBasePtr(index), - mode); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(gather, - ops::GatherOpMLUKernel, - ops::GatherOpMLUKernel, - ops::GatherOpMLUKernel); - -REGISTER_OP_MLU_KERNEL(gather_grad, - ops::GatherGradOpMLUKernel, - ops::GatherGradOpMLUKernel, - ops::GatherGradOpMLUKernel); diff --git a/paddle/fluid/operators/gaussian_random_op_mlu.cc b/paddle/fluid/operators/gaussian_random_op_mlu.cc deleted file mode 100644 index dad21a23661d93cc8af55b8fbba1a23622f5a57a..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/gaussian_random_op_mlu.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/generator.h" - -namespace paddle { -namespace operators { - -template -class MLUGaussianRandomKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - float mean = context.Attr("mean"); - float std = context.Attr("std"); - auto* tensor = context.Output("Out"); - tensor->mutable_data(context.GetPlace()); - - phi::DenseTensor cpu_tensor(tensor->type()); - cpu_tensor.Resize(tensor->dims()); - T* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); - std::normal_distribution dist(mean, std); - - int64_t size = tensor->numel(); - - unsigned int seed = static_cast(context.Attr("seed")); - auto engine = phi::GetCPURandomEngine(seed); - for (int64_t i = 0; i < size; ++i) { - cpu_data[i] = dist(*engine); - } - auto& dev_ctx = - context.template device_context(); - framework::TensorCopy(cpu_tensor, context.GetPlace(), dev_ctx, tensor); - dev_ctx.Wait(); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(gaussian_random, ops::MLUGaussianRandomKernel); diff --git a/paddle/fluid/operators/grid_sampler_op_mlu.cc b/paddle/fluid/operators/grid_sampler_op_mlu.cc deleted file mode 100644 index 07aa025a9a26c2048e65d1dfc34091a216eb1c09..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/grid_sampler_op_mlu.cc +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class GridSamplerMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ( - platform::is_mlu_place(ctx.GetPlace()), - true, - platform::errors::Unavailable("This kernel only runs on MLU.")); - - // input and output data - const phi::DenseTensor* input = ctx.Input("X"); - const phi::DenseTensor* grid = ctx.Input("Grid"); - phi::DenseTensor* output = ctx.Output("Output"); - - int n = input->dims()[0]; - int c = input->dims()[1]; - int out_h = grid->dims()[1]; - int out_w = grid->dims()[2]; - - output->mutable_data({n, c, out_h, out_w}, ctx.GetPlace()); - - // attrs - // paddle.nn.functional.grid_sample(x, grid, mode='bilinear', - // padding_mode='zeros', align_corners=True, name=None) - const std::string mode = ctx.Attr("mode"); - const std::string padding_mode = ctx.Attr("padding_mode"); - bool align_corners = ctx.Attr("align_corners"); - const std::string data_format = phi::DataLayoutToString(input->layout()); - - PADDLE_ENFORCE_EQ( - mode == "bilinear", - true, - platform::errors::Unavailable( - "Only support bilinear mode in mlu grid_sample kernel.")); - PADDLE_ENFORCE_EQ( - padding_mode == "zeros", - true, - platform::errors::Unavailable( - "Only support zeros padding_mode in mlu grid_sample kernel.")); - - phi::DenseTensor trans_input(input->dtype()); - // transpose input from NCHW to NHWC - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - TransposeFromMLUTensor( - ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); - - phi::DenseTensor tmp_output(output->dtype()); - tmp_output.mutable_data({n, out_h, out_w, c}, ctx.GetPlace()); - - MLUCnnlGridSampleDesc grid_sample_desc(mode, padding_mode, align_corners); - MLUCnnlTensorDesc input_desc( - trans_input, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - MLUCnnlTensorDesc grid_desc(*grid, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - MLUCnnlTensorDesc tmp_output_desc( - tmp_output, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - - MLUCnnl::GridSample(ctx, - grid_sample_desc.get(), - input_desc.get(), - GetBasePtr(&trans_input), - grid_desc.get(), - GetBasePtr(grid), - tmp_output_desc.get(), - GetBasePtr(&tmp_output)); - - // transpose output from NHWC to NCHW - const std::vector perm_to_nchw = { - 0, - 3, - 1, - 2, - }; - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &tmp_output, - output, - false /*need_reshape_or_alloc*/); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(grid_sampler, - ops::GridSamplerMLUKernel, - ops::GridSamplerMLUKernel); diff --git a/paddle/fluid/operators/huber_loss_op_mlu.cc b/paddle/fluid/operators/huber_loss_op_mlu.cc deleted file mode 100644 index 4dc542b675f54c089321e0357b07c1eda4e9dc45..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/huber_loss_op_mlu.cc +++ /dev/null @@ -1,185 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class HuberLossMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto& dev_ctx = GetDevCtxFromCTX(ctx); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* residual = ctx.Output("Residual"); - auto* out = ctx.Output("Out"); - auto delta = ctx.Attr("delta"); - - auto place = ctx.GetPlace(); - - // compute y-x - cnnlDataType_t data_type = ToCnnlDataType(); - residual->mutable_data(x->dims(), place); - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlOpTensorDesc sub_op_desc( - CNNL_OP_TENSOR_SUB, data_type, CNNL_NOT_PROPAGATE_NAN); - MLUCnnl::OpTensor(ctx, - sub_op_desc.get(), - x_desc.get(), - GetBasePtr(y), - x_desc.get(), - GetBasePtr(x), - x_desc.get(), - GetBasePtr(residual), - data_type); - - // compute smoothl1loss - out->mutable_data(x->dims(), place); - cnnlSmoothL1LossAlgorithm_t smoothl1_algo = - CNNL_SMOOTHL1LOSS_REDUCTION_NONE; // defines whether to do reduction - // here - MLUCnnl::SmoothL1LossForward(ctx, - x_desc.get(), - GetBasePtr(x), - x_desc.get(), /* target has same shape as x */ - GetBasePtr(y), - static_cast(delta), - smoothl1_algo, - x_desc.get(), /* out has same shape as x */ - GetBasePtr(out)); - - // compute multiply by delta - phi::DenseTensor scale_tensor, bias_tensor; - scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); - bias_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); - FillMLUTensorWithHostValue(ctx, static_cast(delta), &scale_tensor); - FillMLUTensorWithHostValue(ctx, static_cast(0.f), &bias_tensor); - const int axis = std::max(out->dims().size() - 1, 0); - - MLUCnnlTensorDesc scale_desc(scale_tensor); - MLUCnnlTensorDesc bias_desc(bias_tensor); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::Scale(ctx, - axis, - out_desc.get(), - GetBasePtr(out), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class HuberLossGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto& dev_ctx = GetDevCtxFromCTX(ctx); - auto* residual = ctx.Input("Residual"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); - auto delta = ctx.Attr("delta"); - - auto place = ctx.GetPlace(); - - phi::DenseTensor t_grad_rd; - t_grad_rd = - ctx.AllocateTmpTensor(residual->dims(), dev_ctx); - MLUCnnlTensorDesc t_grad_rd_desc(t_grad_rd); - if (dx || dy) { - phi::DenseTensor t_zero; - t_zero = - ctx.AllocateTmpTensor(residual->dims(), dev_ctx); - FillMLUTensorWithHostValue(ctx, static_cast(0.f), &t_zero); - - MLUCnnlTensorDesc residual_desc(*residual); - MLUCnnlTensorDesc dout_desc(*dout); - - cnnlSmoothL1LossAlgorithm_t smoothl1_algo = - CNNL_SMOOTHL1LOSS_REDUCTION_NONE; // defines whether to do reduction - // here - MLUCnnl::SmoothL1LossBackward(ctx, - residual_desc.get(), - GetBasePtr(residual), - residual_desc.get(), - GetBasePtr(&t_zero), - dout_desc.get(), - GetBasePtr(dout), - static_cast(delta), - smoothl1_algo, - t_grad_rd_desc.get(), - GetBasePtr(&t_grad_rd)); - } - // compute multiply by delta - phi::DenseTensor scale_tensor, bias_tensor; - scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); - bias_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); - - FillMLUTensorWithHostValue(ctx, static_cast(0.f), &bias_tensor); - const int axis = std::max(t_grad_rd.dims().size() - 1, 0); - - MLUCnnlTensorDesc scale_desc(scale_tensor); - MLUCnnlTensorDesc bias_desc(bias_tensor); - - if (dx) { - dx->mutable_data(place); - FillMLUTensorWithHostValue(ctx, static_cast(-delta), &scale_tensor); - MLUCnnlTensorDesc out_desc(*dx); - MLUCnnl::Scale(ctx, - axis, - t_grad_rd_desc.get(), - GetBasePtr(&t_grad_rd), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - out_desc.get(), - GetBasePtr(dx)); - } - if (dy) { - dy->mutable_data(place); - FillMLUTensorWithHostValue(ctx, static_cast(delta), &scale_tensor); - MLUCnnlTensorDesc out_desc(*dy); - MLUCnnl::Scale(ctx, - axis, - t_grad_rd_desc.get(), - GetBasePtr(&t_grad_rd), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - out_desc.get(), - GetBasePtr(dy)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(huber_loss, - ops::HuberLossMLUKernel, - ops::HuberLossMLUKernel); -REGISTER_OP_MLU_KERNEL(huber_loss_grad, - ops::HuberLossGradMLUKernel, - ops::HuberLossGradMLUKernel); diff --git a/paddle/fluid/operators/interpolate_v2_op_mlu.cc b/paddle/fluid/operators/interpolate_v2_op_mlu.cc deleted file mode 100644 index 7072f050681f10bd4acda32584755bbd1fa665bb..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/interpolate_v2_op_mlu.cc +++ /dev/null @@ -1,546 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/interpolate_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/phi/core/tensor_utils.h" - -namespace paddle { -namespace operators { - -using DataLayout = phi::DataLayout; - -inline std::vector get_new_shape_mlu( - const std::vector& list_new_shape_tensor) { - // get tensor from - std::vector vec_new_shape; - for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { - auto tensor = list_new_shape_tensor[i]; - PADDLE_ENFORCE_EQ( - tensor->dims(), - phi::make_ddim({1}), - platform::errors::InvalidArgument("shape of dim tensor should be [1]")); - phi::DenseTensor temp; - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - vec_new_shape.push_back(static_cast(*temp.data())); - } - - return vec_new_shape; -} - -template -class InterpolateV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto& dev_ctx = ctx.template device_context(); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - - auto input_dims = input->dims(); - PADDLE_ENFORCE_GE( - input_dims.size(), - 4, - platform::errors::External("MLU Interpolate kernel supports input " - "range greater or equal than 4.")); - PADDLE_ENFORCE_LE( - input_dims.size(), - 5, - platform::errors::External("MLU Interpolate kernel supports input " - "range less or equal than 5. ")); - - const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); - int n, c, in_d, in_h, in_w; - ExtractNCDWH(input_dims, data_layout, &n, &c, &in_d, &in_h, &in_w); - - auto interp_method = ctx.Attr("interp_method"); - bool align_corners = ctx.Attr("align_corners"); - int align_mode = ctx.Attr("align_mode"); - int align_center = align_corners ? 0 : (align_mode == 1 ? 0 : 1); - - int out_d = ctx.Attr("out_d"); - int out_h = ctx.Attr("out_h"); - int out_w = ctx.Attr("out_w"); - float scale_d = -1; - float scale_h = -1; - float scale_w = -1; - - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); - if (list_new_size_tensor.size() > 0) { - // have size tensor - auto new_size = get_new_shape_mlu(list_new_size_tensor); - if (new_size.size() <= 2) { - // default NCHW - out_h = new_size[0]; - out_w = new_size[1]; - } else { - // rank of input is 5, HCDHW - out_d = new_size[0]; - out_h = new_size[1]; - out_w = new_size[2]; - } - } else { - auto scale_tensor = ctx.Input("Scale"); - auto scale = ctx.Attr>("scale"); - if (scale_tensor != nullptr) { - std::vector scale_data; - scale_data = phi::GetVectorFromTensor(scale_tensor); - - if (scale_data.size() > 1 && scale_data.size() <= 2) { - scale_h = scale_data[0]; - scale_w = scale_data[1]; - } else if (scale_data.size() > 2) { - scale_d = scale_data[0]; - scale_h = scale_data[1]; - scale_w = scale_data[2]; - } else { - scale_d = scale_data[0]; - scale_h = scale_data[0]; - scale_w = scale_data[0]; - } - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } else { - if (scale.size() > 1 && scale.size() <= 2) { - scale_h = scale[0]; - scale_w = scale[1]; - - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } else if (scale.size() > 2) { - scale_d = scale[0]; - scale_h = scale[1]; - scale_w = scale[2]; - PADDLE_ENFORCE_EQ( - scale_d > 0 && scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } - } - if (scale_h > 0. && scale_w > 0.) { - out_h = static_cast(in_h * scale_h); - out_w = static_cast(in_w * scale_w); - } - - if (scale_d > 0.) { - out_d = static_cast(in_d * scale_d); - } - auto out_size = ctx.Input("OutSize"); - if (out_size != nullptr) { - std::vector out_size_data; - out_size_data = phi::GetVectorFromTensor(out_size); - if (out_size_data.size() <= 2) { - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } else { - out_d = out_size_data[0]; - out_h = out_size_data[1]; - out_w = out_size_data[2]; - } - } - } - PADDLE_ENFORCE_GT( - out_h, - 0, - platform::errors::InvalidArgument("out_h in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT( - out_w, - 0, - platform::errors::InvalidArgument("out_w in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - - // do transpose according to cnnl's constraints - // cnnlInterp_v2 only accepts NHWC when mode is CNNL_INTERP_BILINEAR and - // CNNL_INTERP_NEAREST, - framework::DDim dim_in, dim_in_trans, dim_out, dim_out_trans; - phi::DenseTensor transformed_input, transformed_output; - bool need_transpose = input_dims.size() != 2; - if (input_dims.size() == 4) { - // need to do transpose if layout is kNCHW - need_transpose &= data_layout == DataLayout::kNCHW; - if (need_transpose) { - // if need_transpose, do the following - // 1. transpose input NCHW -> NHWC - // 2. interpolation in(NHWC) -> out(NHWC) - // 3. transpose output NHWC -> HCHW - // dim_in = {n, c, in_h, in_w}; - dim_in_trans = {n, in_h, in_w, c}; - dim_out = {n, c, out_h, out_w}; - dim_out_trans = {n, out_h, out_w, c}; - output->mutable_data(dim_out, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*input, ctx.GetPlace(), output); - return; - } - // do transpose on input tensor, then do interpolation - MLUCnnlTensorDesc input_desc( - *input, CNNL_LAYOUT_NCHW, ToCnnlDataType(input->dtype())); - - transformed_input = - ctx.AllocateTmpTensor(dim_in_trans, dev_ctx); - transformed_output = - ctx.AllocateTmpTensor(dim_out_trans, dev_ctx); - - MLUCnnlTensorDesc input_reshaped_desc( - transformed_input, - CNNL_LAYOUT_NHWC, - ToCnnlDataType(transformed_input.dtype())); - const std::vector perm = {0, 2, 3, 1}; - MLUCnnl::Transpose(ctx, - perm, - input_dims.size(), - input_desc.get(), - GetBasePtr(input), - input_reshaped_desc.get(), - GetBasePtr(&transformed_input)); - } else { - // if no need_transpose, do the following - // 1. interpolation in(NHWC) -> out(NHWC) - // dim_in = {n, in_h, in_w, c}; - dim_out = {n, out_h, out_w, c}; - output->mutable_data(dim_out, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*input, ctx.GetPlace(), output); - return; - } - transformed_input = *input; - transformed_output = *output; - } - - MLUCnnlTensorDesc input_desc(transformed_input, - CNNL_LAYOUT_NHWC, - ToCnnlDataType(transformed_input.dtype())); - MLUCnnlTensorDesc output_desc(transformed_output, - CNNL_LAYOUT_NHWC, - ToCnnlDataType(transformed_output.dtype())); - MLUCnnl::Interp(ctx, - GetMLUCnnlInterpMode(interp_method), - align_corners, - align_center, - input_desc.get(), - GetBasePtr(&transformed_input), - output_desc.get(), - GetBasePtr(&transformed_output)); - - if (need_transpose) { - // if need_transpose, reshape output back to NCHW - const std::vector perm = {0, 3, 1, 2}; - MLUCnnlTensorDesc output_reshape_desc( - *output, CNNL_LAYOUT_NCHW, ToCnnlDataType(output->dtype())); - MLUCnnl::Transpose(ctx, - perm, - dim_out_trans.size(), - output_desc.get(), - GetBasePtr(&transformed_output), - output_reshape_desc.get(), - GetBasePtr(output)); - } - } else { - PADDLE_ENFORCE_EQ( - interp_method, - "trilinear", - platform::errors::External("MLU Interpolate kernel only supports 5D " - "data in trilinear mode.")); - - // need to do transpose if layout is kNCDHW - need_transpose &= data_layout == DataLayout::kNCHW; - if (need_transpose) { - // if need_transpose, do the following - // 1. transpose input NCDHW -> NDHWC - // 2. interpolation in(NDHWC) -> out(NDHWC) - // 3. transpose output NDHWC -> HCDHW - // dim_in = {n, c, in_d, in_h, in_w}; - dim_in_trans = {n, in_d, in_h, in_w, c}; - dim_out = {n, c, out_d, out_h, out_w}; - dim_out_trans = {n, out_d, out_h, out_w, c}; - output->mutable_data(dim_out, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w && in_d == out_d) { - framework::TensorCopy(*input, ctx.GetPlace(), output); - return; - } - // do transpose on input tensor (HCDHW -> NDHWC), then do interpolation - MLUCnnlTensorDesc input_desc( - *input, CNNL_LAYOUT_NCDHW, ToCnnlDataType(input->dtype())); - - transformed_input = - ctx.AllocateTmpTensor(dim_in_trans, dev_ctx); - transformed_output = - ctx.AllocateTmpTensor(dim_out_trans, dev_ctx); - - MLUCnnlTensorDesc input_reshaped_desc( - transformed_input, - CNNL_LAYOUT_NDHWC, - ToCnnlDataType(transformed_input.dtype())); - const std::vector perm = {0, 2, 3, 4, 1}; - MLUCnnl::Transpose(ctx, - perm, - input_dims.size(), - input_desc.get(), - GetBasePtr(input), - input_reshaped_desc.get(), - GetBasePtr(&transformed_input)); - } else { - // if no need_transpose, do the following - // 1. interpolation in(NDHWC) -> out(NDHWC) - // dim_in = {n, in_d, in_h, in_w, c}; - dim_out = {n, out_d, out_h, out_w, c}; - output->mutable_data(dim_out, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w && in_d == out_d) { - framework::TensorCopy(*input, ctx.GetPlace(), output); - return; - } - transformed_input = *input; - transformed_output = *output; - } - - MLUCnnlTensorDesc input_desc(transformed_input, - CNNL_LAYOUT_NDHWC, - ToCnnlDataType(transformed_input.dtype())); - MLUCnnlTensorDesc output_desc(transformed_output, - CNNL_LAYOUT_NDHWC, - ToCnnlDataType(transformed_output.dtype())); - // use trilinear mode in HCDHW layout - MLUCnnl::Interp(ctx, - GetMLUCnnlInterpMode(interp_method), - align_corners, - align_center, - input_desc.get(), - GetBasePtr(&transformed_input), - output_desc.get(), - GetBasePtr(&transformed_output)); - - if (need_transpose) { - // if need_transpose, reshape output back (NDHWC -> NCDHW) - const std::vector perm = {0, 4, 1, 2, 3}; - MLUCnnlTensorDesc output_reshape_desc( - *output, CNNL_LAYOUT_NCDHW, ToCnnlDataType(output->dtype())); - MLUCnnl::Transpose(ctx, - perm, - dim_out_trans.size(), - output_desc.get(), - GetBasePtr(&transformed_output), - output_reshape_desc.get(), - GetBasePtr(output)); - } - } - } -}; - -template -class InterpolateV2GradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto& dev_ctx = ctx.template device_context(); - auto* input_grad = - ctx.Output(framework::GradVarName("X")); - auto* output_grad = - ctx.Input(framework::GradVarName("Out")); - - auto output_grad_dims = output_grad->dims(); - - PADDLE_ENFORCE_EQ(output_grad_dims.size(), - 4, - platform::errors::External( - "XPU Interpolategrad kernel only support 2d")); - - auto* input = ctx.Input("X"); - auto input_dims = input->dims(); - const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); - int n, c, in_d, in_h, in_w; - ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); - - auto interp_method = ctx.Attr("interp_method"); - bool align_corners = ctx.Attr("align_corners"); - int align_mode = ctx.Attr("align_mode"); - int align_center = align_corners ? 0 : (align_mode == 0 ? 0 : 1); - align_center = 0; - - int out_h = ctx.Attr("out_h"); - int out_w = ctx.Attr("out_w"); - float scale_h = -1; - float scale_w = -1; - - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); - if (list_new_size_tensor.size() > 0) { - // have size tensor - auto new_size = get_new_shape_mlu(list_new_size_tensor); - out_h = new_size[0]; - out_w = new_size[1]; - } else { - auto scale_tensor = ctx.Input("Scale"); - auto scale = ctx.Attr>("scale"); - if (scale_tensor != nullptr) { - std::vector scale_data; - scale_data = phi::GetVectorFromTensor(scale_tensor); - if (scale_data.size() > 1) { - scale_h = scale_data[0]; - scale_w = scale_data[1]; - } else { - scale_h = scale_data[0]; - scale_w = scale_data[0]; - } - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } else { - if (scale.size() > 1) { - scale_h = scale[0]; - scale_w = scale[1]; - - PADDLE_ENFORCE_EQ( - scale_w > 0 && scale_h > 0, - true, - platform::errors::InvalidArgument("scale of Op(interpolate) " - "should be greater than 0.")); - } - } - if (scale_h > 0. && scale_w > 0.) { - out_h = static_cast(in_h * scale_h); - out_w = static_cast(in_w * scale_w); - } - auto out_size = ctx.Input("OutSize"); - if (out_size != nullptr) { - std::vector out_size_data; - out_size_data = phi::GetVectorFromTensor(out_size); - out_h = out_size_data[0]; - out_w = out_size_data[1]; - } - } - - framework::DDim dim_grad; - framework::DDim dim_out_grad, dim_out_trans_grad, dim_in_grad, - dim_in_trans_grad; - phi::DenseTensor transformed_output_grad, transformed_input_grad; - bool need_transpose = - input_dims.size() != 2 && data_layout == DataLayout::kNCHW; - - if (need_transpose) { - // if need_transpose, do the following - // 1. transpose output_grad NCHW -> NHWC - // 2. InterpBackward output_grad(NHWC) -> input_grad(NHWC) - // 3. transpose input_grad NHWC -> HCHW - // dim_out_grad = {n, c, out_h, out_w}; - dim_out_trans_grad = {n, out_h, out_w, c}; - dim_in_grad = {n, c, in_h, in_w}; - dim_in_trans_grad = {n, in_h, in_w, c}; - input_grad->mutable_data(dim_in_grad, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad); - return; - } - // do transpose on input tensor, then do interpolation - MLUCnnlTensorDesc input_desc( - *output_grad, CNNL_LAYOUT_NCHW, ToCnnlDataType(output_grad->dtype())); - - transformed_output_grad = ctx.AllocateTmpTensor( - dim_out_trans_grad, dev_ctx); - transformed_input_grad = ctx.AllocateTmpTensor( - dim_in_trans_grad, dev_ctx); - - MLUCnnlTensorDesc input_reshaped_desc( - transformed_output_grad, - CNNL_LAYOUT_NHWC, - ToCnnlDataType(transformed_output_grad.dtype())); - const std::vector perm = {0, 2, 3, 1}; - MLUCnnl::Transpose(ctx, - perm, - input_dims.size(), - input_desc.get(), - GetBasePtr(output_grad), - input_reshaped_desc.get(), - GetBasePtr(&transformed_output_grad)); - } else { - // if no need_transpose, do the following - // 1. InterpBackward output_grad(NHWC) -> input_grad(NHWC) - dim_in_grad = {n, in_h, in_w, c}; - input_grad->mutable_data(dim_in_grad, ctx.GetPlace()); - - if (in_h == out_h && in_w == out_w) { - framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad); - return; - } - transformed_output_grad = *output_grad; - transformed_input_grad = *input_grad; - } - - MLUCnnlTensorDesc input_desc( - transformed_output_grad, - CNNL_LAYOUT_NHWC, - ToCnnlDataType(transformed_output_grad.dtype())); - MLUCnnlTensorDesc output_desc( - transformed_input_grad, - CNNL_LAYOUT_NHWC, - ToCnnlDataType(transformed_input_grad.dtype())); - MLUCnnl::InterpBackward(ctx, - GetMLUCnnlInterpBackwardMode(interp_method), - align_corners, - align_center, - input_desc.get(), - GetBasePtr(&transformed_output_grad), - output_desc.get(), - GetBasePtr(&transformed_input_grad)); - - if (need_transpose) { - const std::vector perm = {0, 3, 1, 2}; - MLUCnnlTensorDesc output_reshape_desc( - *input_grad, CNNL_LAYOUT_NCHW, ToCnnlDataType(input_grad->dtype())); - MLUCnnl::Transpose(ctx, - perm, - dim_in_trans_grad.size(), - output_desc.get(), - GetBasePtr(&transformed_input_grad), - output_reshape_desc.get(), - GetBasePtr(input_grad)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(bilinear_interp_v2, - ops::InterpolateV2MLUKernel, - ops::InterpolateV2MLUKernel); -REGISTER_OP_MLU_KERNEL(nearest_interp_v2, - ops::InterpolateV2MLUKernel, - ops::InterpolateV2MLUKernel); - -REGISTER_OP_MLU_KERNEL(nearest_interp_v2_grad, - ops::InterpolateV2GradMLUKernel, - ops::InterpolateV2GradMLUKernel); -REGISTER_OP_MLU_KERNEL(bilinear_interp_v2_grad, - ops::InterpolateV2GradMLUKernel, - ops::InterpolateV2GradMLUKernel); diff --git a/paddle/fluid/operators/label_smooth_op_mlu.cc b/paddle/fluid/operators/label_smooth_op_mlu.cc deleted file mode 100644 index 96f629e14df5c3157da61aa6fc05026ce7809c99..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/label_smooth_op_mlu.cc +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class LabelSmoothMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in_t = ctx.Input("X"); - auto* dist_t = ctx.Input("PriorDist"); - auto* out_t = ctx.Output("Out"); - auto epsilon = ctx.Attr("epsilon"); - auto epsilon_gt = 1.0f - epsilon; - - if (in_t->numel() == 0) return; - out_t->mutable_data(ctx.GetPlace()); - auto label_dim = in_t->dims()[in_t->dims().size() - 1]; - - MLUCnnlTensorDesc x_desc(*in_t); - MLUCnnlTensorDesc out_desc(*out_t); - auto data_type = ToCnnlDataType(); - MLUCnnlOpTensorDesc op_tensor_desc( - CNNL_OP_TENSOR_ADD, data_type, CNNL_NOT_PROPAGATE_NAN); - if (ctx.HasInput("PriorDist")) { - MLUCnnlTensorDesc dist_desc(*dist_t); - MLUCnnl::OpTensor(ctx, - op_tensor_desc.get(), - x_desc.get(), - GetBasePtr(in_t), - dist_desc.get(), - GetBasePtr(dist_t), - out_desc.get(), - GetBasePtr(out_t), - data_type, - epsilon_gt, - epsilon); - } else { - auto& dev_ctx = ctx.template device_context(); - phi::DenseTensor dist_tensor = - ctx.AllocateTmpTensor({1, label_dim}, dev_ctx); - MLUCnnlTensorDesc dist_desc(dist_tensor); - auto value = static_cast(1.0f / label_dim); - MLUCnnl::Fill(ctx, - CNNL_POINTER_MODE_HOST, - &value, - dist_desc.get(), - GetBasePtr(&dist_tensor)); - MLUCnnl::OpTensor(ctx, - op_tensor_desc.get(), - x_desc.get(), - GetBasePtr(in_t), - dist_desc.get(), - GetBasePtr(&dist_tensor), - out_desc.get(), - GetBasePtr(out_t), - data_type, - epsilon_gt, - epsilon); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(label_smooth, - ops::LabelSmoothMLUKernel, - ops::LabelSmoothMLUKernel); diff --git a/paddle/fluid/operators/layer_norm_op_mlu.cc b/paddle/fluid/operators/layer_norm_op_mlu.cc deleted file mode 100644 index deb7bb5045eba26b4ddeb1552b9314d84f7cb539..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/layer_norm_op_mlu.cc +++ /dev/null @@ -1,280 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/amp/fp16_type_traits.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -using DDim = framework::DDim; - -template -class LayerNormMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); - const auto epsilon = ctx.Attr("epsilon"); - const auto* x = ctx.Input("X"); - const auto* scale = ctx.Input("Scale"); - const auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Y"); - auto* mean = ctx.Output("Mean"); - auto* variance = ctx.Output("Variance"); - - auto place = ctx.GetPlace(); - - y->mutable_data(place); - mean->mutable_data(place); - variance->mutable_data(place); - - const auto& x_dims = x->dims(); - std::vector scale_bias_axes; - std::vector mean_var_axes; - for (auto i = 0; i < x_dims.size(); ++i) { - if (i >= begin_norm_axis) { - scale_bias_axes.push_back(x_dims[i]); - } else { - mean_var_axes.push_back(x_dims[i]); - } - } - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc y_desc(*y); - MLUCnnlTensorDesc mean_var_desc( - mean_var_axes.size(), mean_var_axes.data(), ToCnnlDataType()); - // cnnl only support both of scale and bias is NULL or not. - if (!scale && !bias) { - MLUCnnl::LayerNormForward(ctx, - begin_norm_axis, - x_desc.get(), - GetBasePtr(x), - nullptr /*scale_bias_desc*/, - nullptr /*scale*/, - nullptr /*bias*/, - epsilon, - y_desc.get(), - GetBasePtr(y), - mean_var_desc.get(), - GetBasePtr(mean), - GetBasePtr(variance)); - } else { - phi::DenseTensor tmp_scale(x->dtype()); - if (!scale) { - tmp_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); - FillMLUTensorWithHostValue(ctx, static_cast(1), &tmp_scale); - } else { - tmp_scale = *scale; - } - - phi::DenseTensor tmp_bias(x->dtype()); - if (!bias) { - tmp_bias.mutable_data(phi::make_ddim(scale_bias_axes), place); - FillMLUTensorWithHostValue(ctx, static_cast(0), &tmp_bias); - } else { - tmp_bias = *bias; - } - - // scale and bias should have same type with x/y - MLUCnnlTensorDesc float32_desc( - scale_bias_axes.size(), scale_bias_axes.data(), CNNL_DTYPE_FLOAT); - MLUCnnlTensorDesc float16_desc( - scale_bias_axes.size(), scale_bias_axes.data(), CNNL_DTYPE_HALF); - cnnlCastDataType_t cast_type = GetCastDataType(VT::FP32, VT::FP16); - - phi::DenseTensor final_scale(x->dtype()); - if (final_scale.dtype() == DataType::FLOAT16 && - tmp_scale.dtype() == DataType::FLOAT32) { - final_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); - // cast scale to fp16 - MLUCnnl::Cast(ctx, - cast_type, - float32_desc.get(), - GetBasePtr(&tmp_scale), - float16_desc.get(), - GetBasePtr(&final_scale)); - } else { - final_scale = tmp_scale; - } - - phi::DenseTensor final_bias(x->dtype()); - if (final_bias.dtype() == DataType::FLOAT16 && - tmp_bias.dtype() == DataType::FLOAT32) { - final_bias.mutable_data(phi::make_ddim(scale_bias_axes), place); - // cast bias to fp16 - MLUCnnl::Cast(ctx, - cast_type, - float32_desc.get(), - GetBasePtr(&tmp_bias), - float16_desc.get(), - GetBasePtr(&final_bias)); - } else { - final_bias = tmp_bias; - } - - MLUCnnlTensorDesc scale_bias_desc( - scale_bias_axes.size(), scale_bias_axes.data(), ToCnnlDataType()); - MLUCnnl::LayerNormForward(ctx, - begin_norm_axis, - x_desc.get(), - GetBasePtr(x), - scale_bias_desc.get(), - GetBasePtr(&final_scale), - GetBasePtr(&final_bias), - epsilon, - y_desc.get(), - GetBasePtr(y), - mean_var_desc.get(), - GetBasePtr(mean), - GetBasePtr(variance)); - } - } -}; - -template -class LayerNormGradMLUKernel : public framework::OpKernel { - using MPDType = typename details::MPTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); - const auto* x = ctx.Input("X"); - const auto* mean = ctx.Input("Mean"); - const auto* variance = ctx.Input("Variance"); - const auto* scale = ctx.Input("Scale"); - const auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dscale = - ctx.Output(framework::GradVarName("Scale")); - auto* dbias = ctx.Output(framework::GradVarName("Bias")); - - auto place = ctx.GetPlace(); - dx->mutable_data(place); - - const auto& x_dims = x->dims(); - std::vector scale_bias_axes; - std::vector mean_var_axes; - for (auto i = 0; i < x_dims.size(); ++i) { - if (i >= begin_norm_axis) { - scale_bias_axes.push_back(x_dims[i]); - } else { - mean_var_axes.push_back(x_dims[i]); - } - } - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc dy_desc(*dy); - MLUCnnlTensorDesc mean_var_desc( - mean_var_axes.size(), mean_var_axes.data(), ToCnnlDataType()); - MLUCnnlTensorDesc dx_desc(*dx); - - phi::DenseTensor tmp_scale(x->dtype()); - if (!scale) { - tmp_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); - FillMLUTensorWithHostValue(ctx, static_cast(1), &tmp_scale); - } else { - tmp_scale = *scale; - } - - MLUCnnlTensorDesc float32_desc( - scale_bias_axes.size(), scale_bias_axes.data(), CNNL_DTYPE_FLOAT); - MLUCnnlTensorDesc float16_desc( - scale_bias_axes.size(), scale_bias_axes.data(), CNNL_DTYPE_HALF); - cnnlCastDataType_t cast_fp32_to_fp16 = GetCastDataType(VT::FP32, VT::FP16); - cnnlCastDataType_t cast_fp16_to_fp32 = GetCastDataType(VT::FP16, VT::FP32); - - phi::DenseTensor final_scale(x->dtype()); - if (final_scale.dtype() == DataType::FLOAT16 && - tmp_scale.dtype() == DataType::FLOAT32) { - final_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); - // cast scale to fp16 - MLUCnnl::Cast(ctx, - cast_fp32_to_fp16, - float32_desc.get(), - GetBasePtr(&tmp_scale), - float16_desc.get(), - GetBasePtr(&final_scale)); - } else { - final_scale = tmp_scale; - } - - phi::DenseTensor tmp_dscale(x->dtype()); - if (dscale && (tmp_dscale.dtype() == dscale->dtype())) { - dscale->mutable_data(place); - tmp_dscale = *dscale; - } else { - tmp_dscale.mutable_data(phi::make_ddim(scale_bias_axes), place); - } - phi::DenseTensor tmp_dbias(x->dtype()); - if (dbias && (tmp_dbias.dtype() == dbias->dtype())) { - dbias->mutable_data(place); - tmp_dbias = *dbias; - } else { - tmp_dbias.mutable_data(phi::make_ddim(scale_bias_axes), place); - } - - MLUCnnlTensorDesc scale_desc( - scale_bias_axes.size(), scale_bias_axes.data(), ToCnnlDataType()); - MLUCnnl::LayerNormBackward(ctx, - begin_norm_axis, - x_desc.get(), - GetBasePtr(x), - dy_desc.get(), - GetBasePtr(dy), - scale_desc.get(), - GetBasePtr(&final_scale), - mean_var_desc.get(), - GetBasePtr(mean), - GetBasePtr(variance), - dx_desc.get(), - GetBasePtr(dx), - GetBasePtr(&tmp_dscale), - GetBasePtr(&tmp_dbias)); - - if (dscale && (tmp_dscale.dtype() == DataType::FLOAT16 && - dscale->dtype() == DataType::FLOAT32)) { - dscale->mutable_data(place); - MLUCnnl::Cast(ctx, - cast_fp16_to_fp32, - float16_desc.get(), - GetBasePtr(&tmp_dscale), - float32_desc.get(), - GetBasePtr(dscale)); - } - if (dbias && (tmp_dbias.dtype() == DataType::FLOAT16 && - dbias->dtype() == DataType::FLOAT32)) { - dbias->mutable_data(place); - MLUCnnl::Cast(ctx, - cast_fp16_to_fp32, - float16_desc.get(), - GetBasePtr(&tmp_dbias), - float32_desc.get(), - GetBasePtr(dbias)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(layer_norm, - ops::LayerNormMLUKernel, - ops::LayerNormMLUKernel); -REGISTER_OP_MLU_KERNEL(layer_norm_grad, - ops::LayerNormGradMLUKernel, - ops::LayerNormGradMLUKernel); diff --git a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc b/paddle/fluid/operators/lookup_table_v2_op_mlu.cc deleted file mode 100644 index c407d91e6b80d722e2ed224eb608a2acdc341bce..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc +++ /dev/null @@ -1,129 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class LookupTableV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *ids_t = ctx.Input("Ids"); // int tensor - auto *output_t = ctx.Output("Out"); // float tensor - auto *table_t = ctx.Input("W"); - int padding_idx = static_cast(ctx.Attr("padding_idx")); - - auto *table_var = ctx.InputVar("W"); - PADDLE_ENFORCE_EQ( - table_var->IsType(), - true, - platform::errors::InvalidArgument("mlu only accept phi::DenseTensor")); - output_t->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc ids_desc(*ids_t); - MLUCnnlTensorDesc table_desc(*table_t); - MLUCnnlTensorDesc output_desc(*output_t); - - MLUCnnl::EmbeddingForward(ctx, - padding_idx, - table_desc.get(), - GetBasePtr(table_t), - ids_desc.get(), - static_cast(GetBasePtr(ids_t)), - output_desc.get(), - GetBasePtr(output_t)); - } -}; - -template -class LookupTableV2GradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *table_var = ctx.InputVar("W"); - PADDLE_ENFORCE_EQ( - table_var->IsType(), - true, - platform::errors::PermissionDenied( - "Unsupported Variable Type , idx in " - "LookupTableV2GradMLUKernel should be phi::DenseTensor.")); - bool is_sparse = ctx.Attr("is_sparse"); - PADDLE_ENFORCE_EQ( - is_sparse, - false, - platform::errors::InvalidArgument( - "LookupTableV2GradMLUKernel dose NOT support is_sparse = True.")); - auto *ids_t = ctx.Input("Ids"); - auto *output_grad_t = - ctx.Input(framework::GradVarName("Out")); - auto *table_grad_t = - ctx.Output(framework::GradVarName("W")); - table_grad_t->mutable_data(ctx.GetPlace()); - - int padding_idx = static_cast(ctx.Attr("padding_idx")); - - int64_t ids_numel = ids_t->numel(); - PADDLE_ENFORCE_EQ( - ids_numel <= std::numeric_limits::max(), - true, - platform::errors::OutOfRange( - "Number of ids greater than int32_t::max , please check " - "number of ids in LookupTableV2GradMLUKernel.")); - - phi::DenseTensor ids_int32(ids_t->dtype()); - if (ids_t->dtype() != DataType::INT32) { - ids_int32.mutable_data(ids_t->dims(), ctx.GetPlace()); - MLUCnnlTensorDesc ids_desc(*ids_t); - MLUCnnlTensorDesc ids_int32_desc(ids_int32); - auto cast_type = GetCastDataType(ids_t->dtype(), DataType::INT32); - MLUCnnl::Cast(ctx, - cast_type, - ids_desc.get(), - GetBasePtr(ids_t), - ids_int32_desc.get(), - GetBasePtr(&ids_int32)); - } else { - ids_int32 = *ids_t; - } - - MLUCnnlTensorDesc ids_int32_desc(ids_int32); - MLUCnnlTensorDesc output_grad_desc(*output_grad_t); - MLUCnnlTensorDesc table_grad_desc(*table_grad_t); - - MLUCnnl::EmbeddingBackward(ctx, - padding_idx, - false, - ids_int32_desc.get(), - GetBasePtr(&ids_int32), - output_grad_desc.get(), - GetBasePtr(output_grad_t), - table_grad_desc.get(), - GetBasePtr(table_grad_t)); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(lookup_table_v2, - ops::LookupTableV2MLUKernel, - ops::LookupTableV2MLUKernel, - ops::LookupTableV2MLUKernel); - -REGISTER_OP_MLU_KERNEL(lookup_table_v2_grad, - ops::LookupTableV2GradMLUKernel, - ops::LookupTableV2GradMLUKernel); diff --git a/paddle/fluid/operators/masked_select_op_mlu.cc b/paddle/fluid/operators/masked_select_op_mlu.cc deleted file mode 100644 index 86e4029512b07d517544bf24d49acde402a30c1d..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/masked_select_op_mlu.cc +++ /dev/null @@ -1,204 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class MaskedSelectedMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("X"); - auto mask = ctx.Input("Mask"); - auto out = ctx.Output("Y"); - - auto input_dim = input->dims(); - auto mask_dim = mask->dims(); - PADDLE_ENFORCE_EQ( - input_dim, - mask_dim, - platform::errors::InvalidArgument( - "The dim size of input and mask in OP(masked_selected) " - "must be equal, but got input dim:(%ld), mask dim: " - "(%ld). Please check input " - "value.", - input_dim, - mask_dim)); - - phi::DenseTensor number(framework::TransToPhiDataType(VT::INT32)); - void* number_ptr = number.mutable_data({1}, ctx.GetPlace()); - - out->Resize(mask->dims()); - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc mask_desc(*mask); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::Mask(ctx, - CNNL_MASKED_SELECT, - input_desc.get(), - GetBasePtr(input), - mask_desc.get(), - GetBasePtr(mask), - nullptr, - nullptr, - out_desc.get(), - GetBasePtr(out), - static_cast(number_ptr)); - } -}; - -template -class MaskedSelectedGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto mask = ctx.Input("Mask"); - auto y_grad = ctx.Input(framework::GradVarName("Y")); - auto x_grad = ctx.Output(framework::GradVarName("X")); - - auto& dev_ctx = - ctx.template device_context(); - phi::DenseTensor mask_int32, out_size; - std::vector out_size_vec; - mask_int32.mutable_data(mask->dims(), ctx.GetPlace()); - out_size.mutable_data({1}, ctx.GetPlace()); - - MLUCnnlTensorDesc mask_desc(*mask); - MLUCnnlTensorDesc mask_int32_desc(mask_int32); - MLUCnnlTensorDesc out_size_desc(out_size); - auto cast_type = GetCastDataType(mask->dtype(), DataType::INT32); - MLUCnnl::Cast(ctx, - cast_type, - mask_desc.get(), - GetBasePtr(mask), - mask_int32_desc.get(), - GetBasePtr(&mask_int32)); - - auto mask_int32_dim = phi::vectorize(mask_int32.dims()); - std::vector reduce_dims; - for (size_t i = 0; i < mask_int32_dim.size(); i++) { - reduce_dims.push_back(static_cast(i)); - } - - std::string reduce_name = "reduce_sum"; - cnnlReduceOp_t reduce_op = GetMLUCnnlReduceOp(reduce_name); - MLUCnnlReduceDesc reduce_desc(reduce_dims, - reduce_op, - ToCnnlDataType(), - CNNL_NOT_PROPAGATE_NAN, - CNNL_REDUCE_NO_INDICES, - CNNL_32BIT_INDICES); - - MLUCnnl::Reduce(ctx, - true, - reduce_desc.get(), - nullptr, - mask_int32_desc.get(), - GetBasePtr(&mask_int32), - 0, - nullptr, - nullptr, - out_size_desc.get(), - GetBasePtr(&out_size)); - - paddle::framework::TensorToVector(out_size, dev_ctx, &out_size_vec); - dev_ctx.Wait(); - - phi::DenseTensor mask_int32_tmp; - mask_int32_tmp.ShareDataWith(mask_int32); - mask_int32_tmp.Resize({mask_int32.numel()}); - phi::DenseTensor topk_v2_out(framework::TransToPhiDataType(VT::INT32)), - indices_int32(framework::TransToPhiDataType(VT::INT32)); - topk_v2_out.mutable_data({mask_int32.numel()}, ctx.GetPlace()); - indices_int32.mutable_data({mask_int32.numel()}, ctx.GetPlace()); - - MLUCnnlTensorDesc topk_v2_out_desc(topk_v2_out); - MLUCnnlTensorDesc indices_int32_desc(indices_int32); - MLUCnnlTensorDesc mask_int32_tmp_desc(mask_int32_tmp); - - const int dim = 0; - MLUCnnl::TopK(ctx, - mask_int32.numel(), - dim, - true, - false, - mask_int32_tmp_desc.get(), - GetBasePtr(&mask_int32_tmp), - topk_v2_out_desc.get(), - GetBasePtr(&topk_v2_out), - indices_int32_desc.get(), - GetBasePtr(&indices_int32)); - - auto stream = ctx.template device_context().stream(); - - phi::DenseTensor indices_int32_out; - indices_int32_out.mutable_data({out_size_vec[0]}, ctx.GetPlace()); - memory::Copy(ctx.GetPlace(), - GetBasePtr(&indices_int32_out), - ctx.GetPlace(), - GetBasePtr(&indices_int32), - out_size_vec[0] * sizeof(int32_t), - stream); - - phi::DenseTensor y_grad_tmp_out; - y_grad_tmp_out.mutable_data({out_size_vec[0]}, ctx.GetPlace()); - MLUCnnlTensorDesc y_grad_tmp_out_desc(y_grad_tmp_out); - memory::Copy(ctx.GetPlace(), - GetBasePtr(&y_grad_tmp_out), - ctx.GetPlace(), - GetBasePtr(y_grad), - out_size_vec[0] * sizeof(T), - stream); - - phi::DenseTensor indices_int32_tmp; - indices_int32_tmp.ShareDataWith(indices_int32_out); - indices_int32_tmp.Resize({out_size_vec[0], 1}); - MLUCnnlTensorDesc indices_int32_tmp_desc(indices_int32_tmp); - - const cnnlScatterNdMode_t mode = CNNL_SCATTERND_UPDATE; - x_grad->Resize({x_grad->numel()}); - x_grad->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc x_grad_desc(*x_grad); - MLUCnnl::ScatterNd(ctx, - mode, - indices_int32_tmp_desc.get(), - GetBasePtr(&indices_int32_tmp), - y_grad_tmp_out_desc.get(), - GetBasePtr(&y_grad_tmp_out), - nullptr, - nullptr, - x_grad_desc.get(), - GetBasePtr(x_grad)); - x_grad->Resize(mask->dims()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(masked_select, - ops::MaskedSelectedMLUKernel, - ops::MaskedSelectedMLUKernel, - ops::MaskedSelectedMLUKernel); - -REGISTER_OP_MLU_KERNEL(masked_select_grad, - ops::MaskedSelectedGradMLUKernel, - ops::MaskedSelectedGradMLUKernel, - ops::MaskedSelectedGradMLUKernel); diff --git a/paddle/fluid/operators/matmul_op_mlu.cc b/paddle/fluid/operators/matmul_op_mlu.cc deleted file mode 100644 index 84d2f031d4bcbaa3bba70e53c3f46a3ecf90482d..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/matmul_op_mlu.cc +++ /dev/null @@ -1,389 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -static void Mul(const framework::ExecutionContext& ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* Out, - const float alpha) { - Out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc y_desc(Y, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*Out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - MLUCnnlOpTensorDesc mul_op_desc( - CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); - MLUCnnl::OpTensor(ctx, - mul_op_desc.get(), - x_desc.get(), - GetBasePtr(&X), - y_desc.get(), - GetBasePtr(&Y), - out_desc.get(), - GetBasePtr(Out), - ToCnnlDataType(), - alpha); -} - -template -static void MatMul2D(const framework::ExecutionContext& ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* Out, - const bool trans_x, - const bool trans_y, - const float alpha) { - Out->mutable_data(ctx.GetPlace()); - - PADDLE_ENFORCE_LT(fabs(alpha - 1.0), - std::numeric_limits::epsilon(), - platform::errors::InvalidArgument( - "MLU(matmul): alpha should be equal to 1.0! " - "Other values are not supported yet." - "But received alpha is %d.", - alpha)); - - MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc y_desc(Y, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*Out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - MLUCnnl::Matmul(ctx, - trans_x, - trans_y, - x_desc.get(), - GetBasePtr(&X), - y_desc.get(), - GetBasePtr(&Y), - out_desc.get(), - GetBasePtr(Out)); -} - -template -static void MatMulND(const framework::ExecutionContext& ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* Out, - const bool trans_x, - const bool trans_y, - const float alpha) { - if (!Out->initialized()) { - Out->mutable_data(ctx.GetPlace()); - } - - PADDLE_ENFORCE_LT(fabs(alpha - 1.0), - std::numeric_limits::epsilon(), - platform::errors::InvalidArgument( - "MLU(matmul): alpha should be equal to 1.0! " - "Other values are not supported yet." - "But received alpha is %d.", - alpha)); - - MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc y_desc(Y, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*Out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - MLUCnnl::BatchMatmul(ctx, - trans_x, - trans_y, - x_desc.get(), - GetBasePtr(&X), - y_desc.get(), - GetBasePtr(&Y), - out_desc.get(), - GetBasePtr(Out)); -} - -template -static void ReduceDims(const framework::ExecutionContext& ctx, - const std::vector& dims, - const std::vector& bcast_dims, - const phi::DenseTensor& in, - phi::DenseTensor* out) { - std::vector axes; - int64_t size = bcast_dims.size(); - int64_t diff = bcast_dims.size() - dims.size(); - for (int64_t i = 0; i < size; ++i) { - if (i < diff) { - axes.push_back(i); - continue; - } - if (bcast_dims[i] > dims[i - diff]) { - axes.push_back(i); - } - } - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc in_desc(in, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - std::vector reduce_dims(axes.begin(), axes.end()); - MLUCnnlReduceDesc reduce_desc(reduce_dims, - CNNL_REDUCE_ADD, - ToCnnlDataType(), - CNNL_NOT_PROPAGATE_NAN, - CNNL_REDUCE_NO_INDICES, - CNNL_32BIT_INDICES); - - MLUCnnl::Reduce(ctx, - true /*need_workspace*/, - reduce_desc.get(), - nullptr, - in_desc.get(), - GetBasePtr(&in), - 0 /*indices_size*/, - nullptr, - nullptr, - out_desc.get(), - GetBasePtr(out)); -} - -template -class MatMulMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* Out = ctx.Output("Out"); - bool transpose_x = ctx.Attr("transpose_X"); - bool transpose_y = ctx.Attr("transpose_Y"); - float alpha = static_cast(ctx.Attr("alpha")); - - std::vector x_dims = phi::vectorize(X->dims()); - std::vector y_dims = phi::vectorize(Y->dims()); - std::vector out_dims = phi::vectorize(Out->dims()); - int x_ndim = x_dims.size(); - int y_ndim = y_dims.size(); - - // Case 1: [K] x [K] = [1] - // Equal: [1, K] x [K, 1] = [1, 1] => [1] - const bool all_one_dim = (x_ndim == 1 && y_ndim == 1); - if (all_one_dim) { - Out->Resize({1, 1}); - } - - // Resize dim 1 to 2 - phi::DenseTensor x_temp, y_temp; - x_temp.ShareDataWith(*X); - y_temp.ShareDataWith(*Y); - if (x_ndim == 1) { - x_dims.insert(x_dims.begin(), 1); - x_temp.Resize(phi::make_ddim(x_dims)); - x_ndim = 2; - // matmul op of mlu needs `std::max(x->dim, y->dim) == out->dim` - if (out_dims.size() < y_dims.size()) { - std::vector temp_out_dims(out_dims.begin(), out_dims.end()); - temp_out_dims.insert(temp_out_dims.end() - 1, 1); - Out->Resize(phi::make_ddim(temp_out_dims)); - } - } - if (y_ndim == 1) { - y_dims.push_back(1); - y_temp.Resize(phi::make_ddim(y_dims)); - y_ndim = 2; - // matmul op of mlu needs `std::max(x->dim, y->dim) == out->dim` - if (out_dims.size() < x_dims.size()) { - std::vector temp_out_dims(out_dims.begin(), out_dims.end()); - temp_out_dims.push_back(1); - Out->Resize(phi::make_ddim(temp_out_dims)); - } - } - - const int K = transpose_x ? x_dims[x_ndim - 2] : x_dims[x_ndim - 1]; - if (transpose_y) { - PADDLE_ENFORCE_EQ( - y_dims[y_ndim - 1], - K, - platform::errors::InvalidArgument("Input(Y) has error dim." - "Y'dims[%d] must be equal to %d" - "But received Y'dims[%d] is %d", - y_ndim - 1, - K, - y_ndim - 1, - y_dims[y_ndim - 1])); - } else { - PADDLE_ENFORCE_EQ( - y_dims[y_ndim - 2], - K, - platform::errors::InvalidArgument("Input(Y) has error dim." - "Y'dims[%d] must be equal to %d" - "But received Y'dims[%d] is %d", - y_ndim - 2, - K, - y_ndim - 2, - y_dims[y_ndim - 2])); - } - - if (x_ndim == 2 && y_ndim == 2) { - // Case 2: [M, K] x [K, N] = [M, N] - MatMul2D(ctx, x_temp, y_temp, Out, transpose_x, transpose_y, alpha); - } else { - // Case 3: [B, M, K] x [K, N] = [B, M, N] - // Case 4: [B, M, K] x [B, K, N] = [B, M, N] - MatMulND(ctx, x_temp, y_temp, Out, transpose_x, transpose_y, alpha); - } - - if (phi::vectorize(Out->dims()) != out_dims) { - Out->Resize(phi::make_ddim(out_dims)); - } - } -}; - -template -class MatMulGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dY = ctx.Output(framework::GradVarName("Y")); - bool transpose_x = ctx.Attr("transpose_X"); - bool transpose_y = ctx.Attr("transpose_Y"); - float alpha = static_cast(ctx.Attr("alpha")); - - std::vector x_dims = phi::vectorize(X->dims()); - std::vector y_dims = phi::vectorize(Y->dims()); - std::vector out_dims = phi::vectorize(dOut->dims()); - int x_ndim = x_dims.size(); - int y_ndim = y_dims.size(); - int out_ndim = out_dims.size(); - - // Case 1: [K] x [K] = [1] - if (x_ndim == 1 && y_ndim == 1) { - if (dX) { - Mul(ctx, *dOut, *Y, dX, alpha); - } - if (dY) { - Mul(ctx, *dOut, *X, dY, alpha); - } - return; - } - - // Resize dim 1 to 2 - phi::DenseTensor x_temp, y_temp, dout_temp; - x_temp.ShareDataWith(*X); - y_temp.ShareDataWith(*Y); - dout_temp.ShareDataWith(*dOut); - if (x_ndim == 1) { - x_dims.insert(x_dims.begin(), 1); - out_dims.insert(out_dims.end() - 1, 1); - x_temp.Resize(phi::make_ddim(x_dims)); - dout_temp.Resize(phi::make_ddim(out_dims)); - x_ndim = 2; - out_ndim += 1; - } - if (y_ndim == 1) { - y_dims.push_back(1); - out_dims.push_back(1); - y_temp.Resize(phi::make_ddim(y_dims)); - dout_temp.Resize(phi::make_ddim(out_dims)); - y_ndim = 2; - out_ndim += 1; - } - - // Case 2: [M, K] x [K, N] = [M, N] - if (out_ndim == 2) { - if (dX) { - dX->Resize(phi::make_ddim(x_dims)); - if (transpose_x) { - MatMul2D(ctx, y_temp, dout_temp, dX, transpose_y, true, alpha); - } else { - MatMul2D(ctx, dout_temp, y_temp, dX, false, !transpose_y, alpha); - } - dX->Resize(X->dims()); - } - if (dY) { - dY->Resize(phi::make_ddim(y_dims)); - if (transpose_y) { - MatMul2D(ctx, dout_temp, x_temp, dY, true, transpose_x, alpha); - } else { - MatMul2D(ctx, x_temp, dout_temp, dY, !transpose_x, false, alpha); - } - dY->Resize(Y->dims()); - } - return; - } - - // Case 3: [B, M, K] x [K, N] = [B, M, N] - // Case 4: [B, M, K] x [B, K, N] = [B, M, N] - std::vector x_bcast_dims(out_ndim, 1); - std::vector y_bcast_dims(out_ndim, 1); - std::copy(out_dims.begin(), out_dims.end() - 2, x_bcast_dims.begin()); - std::copy(out_dims.begin(), out_dims.end() - 2, y_bcast_dims.begin()); - std::copy(x_dims.end() - 2, x_dims.end(), x_bcast_dims.end() - 2); - std::copy(y_dims.end() - 2, y_dims.end(), y_bcast_dims.end() - 2); - - if (dX) { - phi::DenseTensor dx_temp(X->type()); - if (x_dims != x_bcast_dims) { - dx_temp.Resize(phi::make_ddim(x_bcast_dims)); - } else { - dX->mutable_data(ctx.GetPlace()); - dx_temp.ShareDataWith(*dX); - } - - if (transpose_x) { - MatMulND(ctx, y_temp, dout_temp, &dx_temp, transpose_y, true, alpha); - } else { - MatMulND( - ctx, dout_temp, y_temp, &dx_temp, false, !transpose_y, alpha); - } - - if (x_dims != x_bcast_dims) { - ReduceDims(ctx, x_dims, x_bcast_dims, dx_temp, dX); - } - } - - if (dY) { - phi::DenseTensor dy_temp(Y->type()); - if (y_dims != y_bcast_dims) { - dy_temp.Resize(phi::make_ddim(y_bcast_dims)); - } else { - dY->mutable_data(ctx.GetPlace()); - dy_temp.ShareDataWith(*dY); - } - - if (transpose_y) { - MatMulND(ctx, dout_temp, x_temp, &dy_temp, true, transpose_x, alpha); - } else { - MatMulND( - ctx, x_temp, dout_temp, &dy_temp, !transpose_x, false, alpha); - } - - if (y_dims != y_bcast_dims) { - ReduceDims(ctx, y_dims, y_bcast_dims, dy_temp, dY); - } - } - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(matmul, - ops::MatMulMLUKernel, - ops::MatMulMLUKernel); -REGISTER_OP_MLU_KERNEL(matmul_grad, - ops::MatMulGradMLUKernel, - ops::MatMulGradMLUKernel); diff --git a/paddle/fluid/operators/matmul_v2_op_mlu.cc b/paddle/fluid/operators/matmul_v2_op_mlu.cc deleted file mode 100644 index db7a92409bf6c5c2cc9acc35eea2f3fa6d958f0e..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/matmul_v2_op_mlu.cc +++ /dev/null @@ -1,406 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/matmul_v2_op.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -static void Mul(const framework::ExecutionContext& ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* Out) { - Out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc y_desc(Y, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*Out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - MLUCnnlOpTensorDesc mul_op_desc( - CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); - MLUCnnl::OpTensor(ctx, - mul_op_desc.get(), - x_desc.get(), - GetBasePtr(&X), - y_desc.get(), - GetBasePtr(&Y), - out_desc.get(), - GetBasePtr(Out), - ToCnnlDataType()); -} - -template -static void MatMul2D(const framework::ExecutionContext& ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* Out, - const bool trans_x, - const bool trans_y) { - Out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc y_desc(Y, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*Out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - MLUCnnl::Matmul(ctx, - trans_x, - trans_y, - x_desc.get(), - GetBasePtr(&X), - y_desc.get(), - GetBasePtr(&Y), - out_desc.get(), - GetBasePtr(Out)); -} - -template -static void MatMul2DwithReduceBatch(const framework::ExecutionContext& ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* Out, - const bool trans_x, - const bool trans_y) { - if (!Out->initialized()) { - Out->mutable_data(ctx.GetPlace()); - } - // reshape to 2D matmul - std::vector x_dims = phi::vectorize(X.dims()); - std::vector y_dims = phi::vectorize(Y.dims()); - std::vector realx_dims( - {static_cast(x_dims[0] * x_dims[1]), static_cast(x_dims[2])}); - std::vector realy_dims( - {static_cast(y_dims[0] * y_dims[1]), static_cast(y_dims[2])}); - MLUCnnlTensorDesc x_desc(2, realx_dims.data(), ToCnnlDataType()); - MLUCnnlTensorDesc y_desc(2, realy_dims.data(), ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*Out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnl::Matmul(ctx, - trans_x, - trans_y, - x_desc.get(), - GetBasePtr(&X), - y_desc.get(), - GetBasePtr(&Y), - out_desc.get(), - GetBasePtr(Out)); -} - -template -static void MatMulND(const framework::ExecutionContext& ctx, - const phi::DenseTensor& X, - const phi::DenseTensor& Y, - phi::DenseTensor* Out, - const bool trans_x, - const bool trans_y) { - if (!Out->initialized()) { - Out->mutable_data(ctx.GetPlace()); - } - - MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc y_desc(Y, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*Out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - MLUCnnl::BatchMatmul(ctx, - trans_x, - trans_y, - x_desc.get(), - GetBasePtr(&X), - y_desc.get(), - GetBasePtr(&Y), - out_desc.get(), - GetBasePtr(Out)); -} - -template -static void ReduceDims(const framework::ExecutionContext& ctx, - const std::vector& dims, - const std::vector& bcast_dims, - const phi::DenseTensor& in, - phi::DenseTensor* out) { - std::vector axes; - int64_t size = bcast_dims.size(); - int64_t diff = bcast_dims.size() - dims.size(); - for (int64_t i = 0; i < size; ++i) { - if (i < diff) { - axes.push_back(i); - continue; - } - if (bcast_dims[i] > dims[i - diff]) { - axes.push_back(i); - } - } - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc in_desc(in, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); - - std::vector reduce_dims(axes.begin(), axes.end()); - MLUCnnlReduceDesc reduce_desc(reduce_dims, - CNNL_REDUCE_ADD, - ToCnnlDataType(), - CNNL_NOT_PROPAGATE_NAN, - CNNL_REDUCE_NO_INDICES, - CNNL_32BIT_INDICES); - - MLUCnnl::Reduce(ctx, - true /*need_workspace*/, - reduce_desc.get(), - nullptr, - in_desc.get(), - GetBasePtr(&in), - 0 /*indices_size*/, - nullptr, - nullptr, - out_desc.get(), - GetBasePtr(out)); -} - -template -class MatMulV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* Out = ctx.Output("Out"); - const bool trans_x = ctx.Attr("trans_x"); - const bool trans_y = ctx.Attr("trans_y"); - - std::vector x_dims = phi::vectorize(X->dims()); - std::vector y_dims = phi::vectorize(Y->dims()); - std::vector out_dims = phi::vectorize(Out->dims()); - int x_ndim = x_dims.size(); - int y_ndim = y_dims.size(); - - // Case 1: [K] x [K] = [1] - // Equal: [1, K] x [K, 1] = [1, 1] => [1] - const bool all_one_dim = (x_ndim == 1 && y_ndim == 1); - if (all_one_dim) { - Out->Resize({1, 1}); - } - - // Resize dim 1 to 2 - phi::DenseTensor x_temp, y_temp; - x_temp.ShareDataWith(*X); - y_temp.ShareDataWith(*Y); - if (x_ndim == 1) { - x_dims.insert(x_dims.begin(), 1); - x_temp.Resize(phi::make_ddim(x_dims)); - x_ndim = 2; - // matmul op of mlu needs `std::max(x->dim, y->dim) == out->dim` - if (out_dims.size() < y_dims.size()) { - std::vector temp_out_dims(out_dims.begin(), out_dims.end()); - temp_out_dims.insert(temp_out_dims.end() - 1, 1); - Out->Resize(phi::make_ddim(temp_out_dims)); - } - } - if (y_ndim == 1) { - y_dims.push_back(1); - y_temp.Resize(phi::make_ddim(y_dims)); - y_ndim = 2; - // matmul op of mlu needs `std::max(x->dim, y->dim) == out->dim` - if (out_dims.size() < x_dims.size()) { - std::vector temp_out_dims(out_dims.begin(), out_dims.end()); - temp_out_dims.push_back(1); - Out->Resize(phi::make_ddim(temp_out_dims)); - } - } - - const int K = trans_x ? x_dims[x_ndim - 2] : x_dims[x_ndim - 1]; - if (trans_y) { - PADDLE_ENFORCE_EQ( - y_dims[y_ndim - 1], - K, - platform::errors::InvalidArgument("Input(Y) has error dim." - "Y'dims[%d] must be equal to %d" - "But received Y'dims[%d] is %d", - y_ndim - 1, - K, - y_ndim - 1, - y_dims[y_ndim - 1])); - } else { - PADDLE_ENFORCE_EQ( - y_dims[y_ndim - 2], - K, - platform::errors::InvalidArgument("Input(Y) has error dim." - "Y'dims[%d] must be equal to %d" - "But received Y'dims[%d] is %d", - y_ndim - 2, - K, - y_ndim - 2, - y_dims[y_ndim - 2])); - } - - if (x_ndim == 2 && y_ndim == 2) { - // Case 2: [M, K] x [K, N] = [M, N] - MatMul2D(ctx, x_temp, y_temp, Out, trans_x, trans_y); - } else { - // Case 3: [B, M, K] x [K, N] = [B, M, N] - // Case 4: [B, M, K] x [B, K, N] = [B, M, N] - MatMulND(ctx, x_temp, y_temp, Out, trans_x, trans_y); - } - - if (phi::vectorize(Out->dims()) != out_dims) { - Out->Resize(phi::make_ddim(out_dims)); - } - } -}; - -template -class MatMulGradV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dY = ctx.Output(framework::GradVarName("Y")); - const bool trans_x = ctx.Attr("trans_x"); - const bool trans_y = ctx.Attr("trans_y"); - - std::vector x_dims = phi::vectorize(X->dims()); - std::vector y_dims = phi::vectorize(Y->dims()); - std::vector out_dims = phi::vectorize(dOut->dims()); - int x_ndim = x_dims.size(); - int y_ndim = y_dims.size(); - int out_ndim = out_dims.size(); - - // Case 1: [K] x [K] = [1] - if (x_ndim == 1 && y_ndim == 1) { - if (dX) { - Mul(ctx, *dOut, *Y, dX); - } - if (dY) { - Mul(ctx, *dOut, *X, dY); - } - return; - } - - // Resize dim 1 to 2 - phi::DenseTensor x_temp, y_temp, dout_temp; - x_temp.ShareDataWith(*X); - y_temp.ShareDataWith(*Y); - dout_temp.ShareDataWith(*dOut); - if (x_ndim == 1) { - x_dims.insert(x_dims.begin(), 1); - out_dims.insert(out_dims.end() - 1, 1); - x_temp.Resize(phi::make_ddim(x_dims)); - dout_temp.Resize(phi::make_ddim(out_dims)); - x_ndim = 2; - out_ndim += 1; - } - if (y_ndim == 1) { - y_dims.push_back(1); - out_dims.push_back(1); - y_temp.Resize(phi::make_ddim(y_dims)); - dout_temp.Resize(phi::make_ddim(out_dims)); - y_ndim = 2; - out_ndim += 1; - } - - // Case 2: [M, K] x [K, N] = [M, N] - if (out_ndim == 2) { - if (dX) { - dX->Resize(phi::make_ddim(x_dims)); - if (trans_x) { - MatMul2D(ctx, y_temp, dout_temp, dX, trans_y, true); - } else { - MatMul2D(ctx, dout_temp, y_temp, dX, false, !trans_y); - } - dX->Resize(X->dims()); - } - if (dY) { - dY->Resize(phi::make_ddim(y_dims)); - if (trans_y) { - MatMul2D(ctx, dout_temp, x_temp, dY, true, trans_x); - } else { - MatMul2D(ctx, x_temp, dout_temp, dY, !trans_x, false); - } - dY->Resize(Y->dims()); - } - return; - } - - // Case 3: [B, M, K] x [K, N] = [B, M, N] - // Case 4: [B, M, K] x [B, K, N] = [B, M, N] - std::vector x_bcast_dims(out_ndim, 1); - std::vector y_bcast_dims(out_ndim, 1); - std::copy(out_dims.begin(), out_dims.end() - 2, x_bcast_dims.begin()); - std::copy(out_dims.begin(), out_dims.end() - 2, y_bcast_dims.begin()); - std::copy(x_dims.end() - 2, x_dims.end(), x_bcast_dims.end() - 2); - std::copy(y_dims.end() - 2, y_dims.end(), y_bcast_dims.end() - 2); - - if (dX) { - phi::DenseTensor dx_temp(X->type()); - if (x_dims != x_bcast_dims) { - dx_temp.Resize(phi::make_ddim(x_bcast_dims)); - } else { - dX->mutable_data(ctx.GetPlace()); - dx_temp.ShareDataWith(*dX); - } - - if (trans_x) { - MatMulND(ctx, y_temp, dout_temp, &dx_temp, trans_y, true); - } else { - MatMulND(ctx, dout_temp, y_temp, &dx_temp, false, !trans_y); - } - - if (x_dims != x_bcast_dims) { - ReduceDims(ctx, x_dims, x_bcast_dims, dx_temp, dX); - } - } - - if (dY) { - // Case 3: [B, M, K] x [K, N] = [B, M, N] better performance - // otherwise, tensor dy_temp in else branch might encounter - // numel overflow due to cnnlTensorDescriptor limitation - if (x_dims.size() == 3 && phi::vectorize(Y->dims()).size() == 2) { - if (trans_y) { - MatMul2DwithReduceBatch(ctx, dout_temp, x_temp, dY, true, trans_x); - } else { - MatMul2DwithReduceBatch( - ctx, x_temp, dout_temp, dY, !trans_x, false); - } - } else { - phi::DenseTensor dy_temp(Y->type()); - if (y_dims != y_bcast_dims) { - dy_temp.Resize(phi::make_ddim(y_bcast_dims)); - } else { - dY->mutable_data(ctx.GetPlace()); - dy_temp.ShareDataWith(*dY); - } - if (trans_y) { - MatMulND(ctx, dout_temp, x_temp, &dy_temp, true, trans_x); - } else { - MatMulND(ctx, x_temp, dout_temp, &dy_temp, !trans_x, false); - } - if (y_dims != y_bcast_dims) { - ReduceDims(ctx, y_dims, y_bcast_dims, dy_temp, dY); - } - } - } - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(matmul_v2, - ops::MatMulV2MLUKernel, - ops::MatMulV2MLUKernel); -REGISTER_OP_MLU_KERNEL(matmul_v2_grad, - ops::MatMulGradV2MLUKernel, - ops::MatMulGradV2MLUKernel); diff --git a/paddle/fluid/operators/mean_op_mlu.cc b/paddle/fluid/operators/mean_op_mlu.cc deleted file mode 100644 index e9266b30fcd01926a6f0a9531b6e5afdf1e592ed..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/mean_op_mlu.cc +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/platform/device/mlu/device_context.h" -#include "paddle/fluid/platform/float16.h" - -namespace paddle { -namespace operators { - -template -class MeanMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* output = context.Output("Out"); - - const T* in_data = input->data(); - T* out_data = output->mutable_data(context.GetPlace()); - auto numel = input->numel(); - auto rank = input->dims().size(); - auto place = context.GetPlace(); - auto stream = context.template device_context().stream(); - - if (rank == 0) { // scalar - memory::Copy(place, out_data, place, in_data, numel * sizeof(T), stream); - return; - } - - std::vector reduce_dims; - reduce_dims.reserve(rank); - for (decltype(rank) i = 0; i < rank; ++i) { - reduce_dims.push_back(i); - } - - MLUCnnlTensorDesc input_desc( - *input, CNNL_LAYOUT_ARRAY, ToCnnlDataType(input->dtype())); - MLUCnnlTensorDesc output_desc( - *output, CNNL_LAYOUT_ARRAY, ToCnnlDataType(output->dtype())); - - MLUCnnlReduceDesc reduction_desc(reduce_dims, - CNNL_REDUCE_AVG, - ToCnnlDataType(), - CNNL_NOT_PROPAGATE_NAN, - CNNL_REDUCE_NO_INDICES, - CNNL_32BIT_INDICES); - - MLUCnnl::Reduce(context, - true /*need_workspace*/, - reduction_desc.get(), - nullptr, - input_desc.get(), - reinterpret_cast(in_data), - 0 /*indices_size*/, - nullptr, - nullptr, - output_desc.get(), - reinterpret_cast(out_data)); - } -}; - -template -class MeanMLUGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto output_grad = - context.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ( - output_grad->numel(), - 1, - platform::errors::InvalidArgument( - "Mean Gradient Input phi::DenseTensor len should be 1. But " - "received Out@Grad's elements num is %d.", - output_grad->numel())); - auto input_grad = - context.Output(framework::GradVarName("X")); - input_grad->mutable_data(context.GetPlace()); - - auto in_data = output_grad->data(); - auto numel = input_grad->numel(); - auto rank = input_grad->dims().size(); - auto out_data = input_grad->data(); - auto place = context.GetPlace(); - auto stream = context.template device_context().stream(); - - if (rank == 0) { // scalar - memory::Copy(place, out_data, place, in_data, numel * sizeof(T), stream); - return; - } - - // means - phi::DenseTensor mean_var(output_grad->dtype()); - mean_var.mutable_data(input_grad->dims(), context.GetPlace()); - MLUCnnlTensorDesc mean_var_desc( - mean_var, CNNL_LAYOUT_ARRAY, ToCnnlDataType(mean_var.dtype())); - auto value = static_cast(1.0 / static_cast(input_grad->numel())); - MLUCnnl::Fill(context, - CNNL_POINTER_MODE_HOST, - &value, - mean_var_desc.get(), - GetBasePtr(&mean_var)); - - // means mul output_grad - MLUCnnlTensorDesc in_desc( - *output_grad, CNNL_LAYOUT_ARRAY, ToCnnlDataType(output_grad->dtype())); - MLUCnnlTensorDesc out_desc( - *input_grad, CNNL_LAYOUT_ARRAY, ToCnnlDataType(input_grad->dtype())); - - MLUCnnlOpTensorDesc op_tensor_desc( - CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); - - MLUCnnl::OpTensor(context, - op_tensor_desc.get(), - in_desc.get(), - reinterpret_cast(in_data), - mean_var_desc.get(), - GetBasePtr(&mean_var), - out_desc.get(), - reinterpret_cast(out_data), - ToCnnlDataType()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(mean, - ops::MeanMLUKernel, - ops::MeanMLUKernel); -REGISTER_OP_MLU_KERNEL(mean_grad, - ops::MeanMLUGradKernel, - ops::MeanMLUGradKernel); diff --git a/paddle/fluid/operators/meshgrid_op_mlu.cc b/paddle/fluid/operators/meshgrid_op_mlu.cc deleted file mode 100644 index f0103afbb0bc554baaa786489d5d647ad31d8210..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/meshgrid_op_mlu.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class MeshgridMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto ins = ctx.MultiInput("X"); - auto outs = ctx.MultiOutput("Out"); - PADDLE_ENFORCE_EQ((ins.size() > 1) && (ins.size() < 7), - true, - platform::errors::InvalidArgument( - "Excepted phi::DenseTensor numbers between 2 and 6, " - "but only received d% .", - ins.size())); - - int64_t size = ins.size(); - std::vector shape(size); - - for (int64_t i = 0; i < size; i++) { - switch (ins[i]->dims().size()) { - case 0: - shape[i] = 1; - break; - case 1: - shape[i] = ins[i]->dims()[0]; - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Expected scalar or 1D tensor in the tensor list but got tensor " - "%d: ", - i)); - } - } - - MLUCnnlTensorDesc out_desc(size, shape.data(), ToCnnlDataType()); - framework::DDim out_dims = phi::make_ddim(shape); - for (int64_t i = 0; i < size; i++) { - std::vector view_shape(size, 1); - view_shape[i] = shape[i]; - - outs[i]->Resize(out_dims); - outs[i]->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc in_desc(size, view_shape.data(), ToCnnlDataType()); - MLUCnnl::BroadcastTo(ctx, - in_desc.get(), - GetBasePtr(ins[i]), - out_desc.get(), - GetBasePtr(outs[i])); - } - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_MLU_KERNEL( - meshgrid, - paddle::operators::MeshgridMLUKernel, - paddle::operators::MeshgridMLUKernel, - paddle::operators::MeshgridMLUKernel, - paddle::operators::MeshgridMLUKernel); diff --git a/paddle/fluid/operators/one_hot_v2_op_mlu.cc b/paddle/fluid/operators/one_hot_v2_op_mlu.cc deleted file mode 100644 index 92c76e14e1a8484bd524b18a485f7c038d3bb980..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/one_hot_v2_op_mlu.cc +++ /dev/null @@ -1,103 +0,0 @@ - -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/phi/core/tensor_utils.h" - -namespace paddle { -namespace operators { - -template -class OneHotV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto& dev_ctx = - ctx.template device_context(); - auto* in = ctx.Input("X"); - auto* out = ctx.Output("Out"); - int depth = ctx.Attr("depth"); - if (ctx.HasInput("depth_tensor")) { - std::vector depth_data; - depth_data = phi::GetVectorFromTensor( - ctx.Input("depth_tensor")); - depth = depth_data[0]; - - auto out_dims = out->dims(); - out_dims[out_dims.size() - 1] = depth; - out->Resize(out_dims); - } - out->mutable_data(ctx.GetPlace()); - - float on_value = 1.0f, off_value = 0.0f; - const int in_off_dim[1] = {1}; - phi::DenseTensor on_value_tensor = - ctx.AllocateTmpTensor( - framework::DDim(in_off_dim, 1), dev_ctx); - phi::DenseTensor off_value_tensor = - ctx.AllocateTmpTensor( - framework::DDim(in_off_dim, 1), dev_ctx); - FillMLUTensorWithHostValue(ctx, on_value, &on_value_tensor); - FillMLUTensorWithHostValue(ctx, off_value, &off_value_tensor); - - if (framework::TransToProtoVarType(in->dtype()) == - framework::proto::VarType::INT32) { - MLUCnnlTensorDesc desc_indices(*in); - MLUCnnl::OneHot(ctx, - desc_indices.get(), - GetBasePtr(in), - depth, - GetBasePtr(&on_value_tensor), - GetBasePtr(&off_value_tensor), - -1, - ToCnnlDataType(out->dtype()), - GetBasePtr(out)); - } else { - phi::DenseTensor transformed_in; - transformed_in.mutable_data(in->dims(), dev_ctx.GetPlace()); - // use cnnlCast to cast int64_t to int32_t then do one_hot - MLUCnnlTensorDesc in_desc(*in); - MLUCnnlTensorDesc transformed_in_desc(transformed_in); - cnnlCastDataType_t cast_type = GetCastDataType( - framework::TransToProtoVarType(in->dtype()), - framework::TransToProtoVarType(transformed_in.dtype())); - MLUCnnl::Cast(ctx, - cast_type, - in_desc.get(), - GetBasePtr(in), - transformed_in_desc.get(), - GetBasePtr(&transformed_in)); - MLUCnnl::OneHot(ctx, - transformed_in_desc.get(), - GetBasePtr(&transformed_in), - depth, - GetBasePtr(&on_value_tensor), - GetBasePtr(&off_value_tensor), - -1, - ToCnnlDataType(out->dtype()), - GetBasePtr(out)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(one_hot_v2, - ops::OneHotV2MLUKernel, - ops::OneHotV2MLUKernel); diff --git a/paddle/fluid/operators/pool_op_mlu.cc b/paddle/fluid/operators/pool_op_mlu.cc deleted file mode 100644 index 6e422a645fffb61f81f185b62344c54f97ac6198..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/pool_op_mlu.cc +++ /dev/null @@ -1,384 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/phi/kernels/funcs/pooling.h" - -namespace paddle { -namespace operators { - -namespace { - -cnnlPoolingMode_t ToCnnlPoolingMode(const std::string &pooling_type, - bool exclusive, - bool adaptive) { - cnnlPoolingMode_t pooling_mode; - if (pooling_type == "max") { - pooling_mode = CNNL_POOLING_MAX; - } else if (pooling_type == "avg") { - if (exclusive && !adaptive) { - pooling_mode = CNNL_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; - } else { - pooling_mode = CNNL_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; - } - } else { - PADDLE_THROW(platform::errors::InvalidArgument("Unknown pooling_type: %s", - pooling_type)); - } - return pooling_mode; -} -} // namespace - -template -class MLUPoolOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto &dev_ctx = ctx.template device_context(); - const phi::DenseTensor *in_x = ctx.Input("X"); - phi::DenseTensor *out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - std::string pooling_type = ctx.Attr("pooling_type"); - std::vector ksize = ctx.Attr>("ksize"); - std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - std::string data_format = ctx.Attr("data_format"); - - bool global_pooling = ctx.Attr("global_pooling"); - bool ceil_mode = ctx.Attr("ceil_mode"); - bool exclusive = ctx.Attr("exclusive"); - bool adaptive = ctx.Attr("adaptive"); - std::string padding_algorithm = ctx.Attr("padding_algorithm"); - - PADDLE_ENFORCE_EQ(in_x->dims().size(), - 4, - platform::errors::InvalidArgument( - "Only support 4-dims for mlu pool2d kernel.")); - - const bool channel_last = data_format == "NHWC"; - // default - cnnlTensorLayout_t cnnl_layout = CNNL_LAYOUT_NCHW; - auto out_dims = out->dims(); - int64_t out_h = out_dims[2]; - int64_t out_w = out_dims[3]; - auto in_x_dims = in_x->dims(); - framework::DDim data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size()); - - if (channel_last) { - cnnl_layout = CNNL_LAYOUT_NHWC; - out_h = out_dims[1]; - out_w = out_dims[2]; - data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); - } - - phi::funcs::UpdatePadding(&paddings, - global_pooling, - adaptive, - padding_algorithm, - data_dims, - strides, - ksize); - if (global_pooling) { - phi::funcs::UpdateKernelSize(&ksize, data_dims); - } - - MLUCnnlTensorDesc in_x_desc(*in_x, cnnl_layout, ToCnnlDataType()); - MLUCnnlTensorDesc out_desc(*out, cnnl_layout, ToCnnlDataType()); - - cnnlPoolingMode_t pool_mode = - ToCnnlPoolingMode(pooling_type, exclusive, adaptive); - - // transpose NCHW to NHWC since cnnl pool2d has worse performance in that - // layout. - phi::DenseTensor trans_in_x; - phi::DenseTensor trans_out; - if (channel_last) { - trans_in_x = *in_x; - trans_out = *out; - } else { - std::vector perm{0, 2, 3, 1}; - TransposeFromMLUTensor( - ctx, perm, in_x, &trans_in_x, true /*need_reshape_or_alloc*/); - trans_out = ctx.AllocateTmpTensor( - {out_dims[0], out_dims[2], out_dims[3], out_dims[1]}, dev_ctx); - } - MLUCnnlTensorDesc trans_in_x_desc( - trans_in_x, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - MLUCnnlTensorDesc trans_out_desc( - trans_out, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - - if (!adaptive) { - MLUCnnlPoolingDesc pool_desc(pool_mode, - CNNL_NOT_PROPAGATE_NAN, - ksize[0], - ksize[1], - paddings[0], - paddings[1], - paddings[2], - paddings[3], - strides[0], - strides[1], - 1 /*row_dilation*/, - 1 /*col_dilation*/, - ceil_mode); - - size_t extra_input_size = 0; - cnnlHandle_t handle = - ctx.template device_context().cnnl_handle(); - cnnlGetPoolingExtraInputSize( - handle, pool_mode, out_w, out_h, &extra_input_size); - - if (extra_input_size > 0) { - phi::DenseTensor extra_host_tensor; - extra_host_tensor.mutable_data( - {static_cast(extra_input_size)}, platform::CPUPlace()); - cnnlInitPoolingExtraInput(handle, - pool_desc.get(), - trans_in_x_desc.get(), - trans_out_desc.get(), - GetBasePtr(&extra_host_tensor)); - phi::DenseTensor extra_device_tensor = - ctx.AllocateTmpTensor( - {static_cast(extra_input_size)}, dev_ctx); - framework::TensorCopy( - extra_host_tensor, ctx.GetPlace(), &extra_device_tensor); - // Increase extra_host_tensor holder_ reference count until copy - // complete. - auto increase_ref_count = [extra_host_tensor]() { - VLOG(4) << "Finished copying extra_host_tensor[" - << GetBasePtr(&extra_host_tensor) - << "] in mlu pooling kernel."; - }; - dev_ctx.AddStreamCallback(increase_ref_count); - MLUCnnl::PoolingForward( - ctx, - pool_mode, - out_h, - out_w, - pool_desc.get(), - nullptr /*alpha*/, - trans_in_x_desc.get(), - GetBasePtr(&trans_in_x), - nullptr /*beta*/, - GetBasePtr(&extra_device_tensor) /*params_shape_ptr*/, - trans_out_desc.get(), - GetBasePtr(&trans_out)); - } else { - MLUCnnl::PoolingForward(ctx, - pool_mode, - out_h, - out_w, - pool_desc.get(), - nullptr /*alpha*/, - trans_in_x_desc.get(), - GetBasePtr(&trans_in_x), - nullptr /*beta*/, - nullptr /*params_shape_ptr*/, - trans_out_desc.get(), - GetBasePtr(&trans_out)); - } - } else { - MLUCnnl::AdaptivePoolingForward(ctx, - pool_mode, - trans_in_x_desc.get(), - GetBasePtr(&trans_in_x), - trans_out_desc.get(), - GetBasePtr(&trans_out), - nullptr, - nullptr); - } - if (!channel_last) { - std::vector perm{0, 3, 1, 2}; - TransposeFromMLUTensor( - ctx, perm, &trans_out, out, false /*need_reshape_or_alloc*/); - } - } -}; - -template -class MLUPoolGradOpKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto &dev_ctx = ctx.template device_context(); - const phi::DenseTensor *in_x = ctx.Input("X"); - const phi::DenseTensor *out = ctx.Input("Out"); - const phi::DenseTensor *out_grad = - ctx.Input(framework::GradVarName("Out")); - phi::DenseTensor *in_x_grad = - ctx.Output(framework::GradVarName("X")); - in_x_grad->mutable_data(ctx.GetPlace()); - - std::string pooling_type = ctx.Attr("pooling_type"); - std::vector ksize = ctx.Attr>("ksize"); - std::vector strides = ctx.Attr>("strides"); - std::vector paddings = ctx.Attr>("paddings"); - bool ceil_mode = ctx.Attr("ceil_mode"); - bool exclusive = ctx.Attr("exclusive"); - bool adaptive = ctx.Attr("adaptive"); - std::string data_format = ctx.Attr("data_format"); - bool global_pooling = ctx.Attr("global_pooling"); - std::string padding_algorithm = ctx.Attr("padding_algorithm"); - - const bool channel_last = data_format == "NHWC"; - - auto in_x_dims = in_x->dims(); - framework::DDim data_dims = phi::slice_ddim(in_x_dims, 2, in_x_dims.size()); - if (channel_last) { - data_dims = phi::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); - } - - phi::funcs::UpdatePadding(&paddings, - global_pooling, - adaptive, - padding_algorithm, - data_dims, - strides, - ksize); - if (global_pooling) { - phi::funcs::UpdateKernelSize(&ksize, data_dims); - } - - // inputs need with NHWC layout - phi::DenseTensor trans_in_x; - phi::DenseTensor trans_out; - phi::DenseTensor trans_out_grad; - phi::DenseTensor trans_in_x_grad; - if (channel_last) { - trans_in_x = *in_x; - trans_out = *out; - trans_out_grad = *out_grad; - trans_in_x_grad = *in_x_grad; - } else { - std::vector perm{0, 2, 3, 1}; - TransposeFromMLUTensor( - ctx, perm, in_x, &trans_in_x, true /*need_reshape_or_alloc*/); - TransposeFromMLUTensor( - ctx, perm, out, &trans_out, true /*need_reshape_or_alloc*/); - TransposeFromMLUTensor( - ctx, perm, out_grad, &trans_out_grad, true /*need_reshape_or_alloc*/); - auto in_x_grad_dims = in_x_grad->dims(); - trans_in_x_grad = - ctx.AllocateTmpTensor({in_x_grad_dims[0], - in_x_grad_dims[2], - in_x_grad_dims[3], - in_x_grad_dims[1]}, - dev_ctx); - } - MLUCnnlTensorDesc trans_in_x_desc( - trans_in_x, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - MLUCnnlTensorDesc trans_out_desc( - trans_out, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - MLUCnnlTensorDesc trans_out_grad_desc( - trans_out_grad, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - MLUCnnlTensorDesc trans_in_x_grad_desc( - trans_in_x_grad, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - - cnnlPoolingMode_t pool_mode = - ToCnnlPoolingMode(pooling_type, exclusive, adaptive); - MLUCnnlPoolingDesc pool_desc(pool_mode, - CNNL_NOT_PROPAGATE_NAN, - ksize[0], - ksize[1], - paddings[0], - paddings[1], - paddings[2], - paddings[3], - strides[0], - strides[1], - 1 /*row_dilation*/, - 1 /*col_dilation*/, - ceil_mode); - - if (pooling_type == "max") { - phi::DenseTensor index_tensor = - ctx.AllocateTmpTensor(trans_out_grad.dims(), - dev_ctx); - MLUCnnlTensorDesc index_tensor_desc( - index_tensor, CNNL_LAYOUT_NHWC, ToCnnlDataType()); - MLUCnnl::PoolingIndex(ctx, - pool_desc.get(), - trans_in_x_desc.get(), - GetBasePtr(&trans_in_x), - index_tensor_desc.get(), - GetBasePtr(&index_tensor)); - if (adaptive) { - MLUCnnl::AdaptivePoolingBackward(ctx, - pool_mode, - trans_out_grad_desc.get(), - GetBasePtr(&trans_out_grad), - index_tensor_desc.get(), - GetBasePtr(&index_tensor), - trans_in_x_grad_desc.get(), - GetBasePtr(&trans_in_x_grad)); - } else { - MLUCnnl::PoolingBackward(ctx, - pool_desc.get(), - nullptr /*alpha*/, - index_tensor_desc.get(), - GetBasePtr(&index_tensor), - trans_out_grad_desc.get(), - GetBasePtr(&trans_out_grad), - trans_in_x_desc.get(), - GetBasePtr(&trans_in_x), - nullptr /*beta*/, - trans_in_x_grad_desc.get(), - GetBasePtr(&trans_in_x_grad)); - } - } else { - if (adaptive) { - MLUCnnl::AdaptivePoolingBackward(ctx, - pool_mode, - trans_out_grad_desc.get(), - GetBasePtr(&trans_out_grad), - nullptr /*index_tensor_desc.get()*/, - nullptr /*GetBasePtr(&index_tensor)*/, - trans_in_x_grad_desc.get(), - GetBasePtr(&trans_in_x_grad)); - } else { - MLUCnnl::PoolingBackward(ctx, - pool_desc.get(), - nullptr /*alpha*/, - nullptr, - nullptr, - trans_out_grad_desc.get(), - GetBasePtr(&trans_out_grad), - nullptr, - nullptr, - nullptr /*beta*/, - trans_in_x_grad_desc.get(), - GetBasePtr(&trans_in_x_grad)); - } - } - if (!channel_last) { - std::vector perm{0, 3, 1, 2}; - TransposeFromMLUTensor(ctx, - perm, - &trans_in_x_grad, - in_x_grad, - false /*need_reshape_or_alloc*/); - } - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_MLU_KERNEL(pool2d, - ops::MLUPoolOpKernel, - ops::MLUPoolOpKernel); -REGISTER_OP_MLU_KERNEL(pool2d_grad, - ops::MLUPoolGradOpKernel, - ops::MLUPoolGradOpKernel); diff --git a/paddle/fluid/operators/randperm_op_mlu.cc b/paddle/fluid/operators/randperm_op_mlu.cc deleted file mode 100644 index 2dcb0ff27e1cae49c24a3742e032ded9aea2a2b3..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/randperm_op_mlu.cc +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/randperm_op.h" - -namespace paddle { -namespace operators { - -template -class RandpermMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - int n = ctx.Attr("n"); - unsigned int seed = static_cast(ctx.Attr("seed")); - framework::Variable* out_var = ctx.OutputVar("Out"); - phi::DenseTensor* out_tensor = - framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); - - phi::DenseTensor tmp_tensor; - tmp_tensor.Resize(phi::make_ddim({n})); - T* tmp_data = tmp_tensor.mutable_data(platform::CPUPlace()); - random_permate(tmp_data, n, seed); - framework::TensorCopySync(tmp_tensor, ctx.GetPlace(), out_tensor); - } -}; - -} // namespace operators -} // namespace paddle - -template -using kernel = paddle::operators::RandpermMLUKernel; - -REGISTER_OP_MLU_KERNEL( - randperm, kernel, kernel, kernel, kernel); diff --git a/paddle/fluid/operators/range_op_mlu.cc b/paddle/fluid/operators/range_op_mlu.cc deleted file mode 100644 index 13d067f8421ad4d80bbdbd53c000cd5f3e623809..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/range_op_mlu.cc +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/range_op.h" - -namespace paddle { -namespace operators { - -template -class RangeMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* start_t = context.Input("Start"); - auto* end_t = context.Input("End"); - auto* step_t = context.Input("Step"); - auto* out = context.Output("Out"); - - phi::DenseTensor n; - framework::TensorCopy( - *start_t, - platform::CPUPlace(), - context.template device_context(), - &n); - context.template device_context() - .Wait(); - T start = n.data()[0]; - framework::TensorCopy( - *end_t, - platform::CPUPlace(), - context.template device_context(), - &n); - context.template device_context() - .Wait(); - T end = n.data()[0]; - framework::TensorCopy( - *step_t, - platform::CPUPlace(), - context.template device_context(), - &n); - context.template device_context() - .Wait(); - T step = n.data()[0]; - - int64_t size = 0; - GetSize(start, end, step, &size); - - out->Resize(phi::make_ddim({size})); - out->mutable_data(context.GetPlace()); - - std::vector odata; - T value = start; - for (int64_t i = 0; i < size; ++i) { - odata.push_back(value); - value += step; - } - - framework::TensorFromVector(odata, context.device_context(), out); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_MLU_KERNEL(range, - paddle::operators::RangeMLUKernel, - paddle::operators::RangeMLUKernel, - paddle::operators::RangeMLUKernel, - paddle::operators::RangeMLUKernel) diff --git a/paddle/fluid/operators/reshape_op_mlu.cc b/paddle/fluid/operators/reshape_op_mlu.cc deleted file mode 100644 index acba35f1bce2e053bd82faa3136b5a5182e19b9c..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/reshape_op_mlu.cc +++ /dev/null @@ -1,158 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/tensor_utils.h" - -namespace paddle { -namespace operators { - -template -class Reshape2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - - std::vector target_shape_vector; - auto shape_tensor_vector = ctx.MultiInput("ShapeTensor"); - if (shape_tensor_vector.size() > 0) { - for (auto* shape_tensor : shape_tensor_vector) { - PADDLE_ENFORCE_EQ( - shape_tensor->dims().size(), - 1, - platform::errors::InvalidArgument( - "If the element type of 'shape' in Reshape Op is Tensor, " - "the element's shape must be [1]. But received the element's " - "shape is [%d]", - shape_tensor->dims().size())); - - target_shape_vector.push_back( - phi::GetVectorFromTensor(shape_tensor)[0]); - } - } else { - auto* shape_tensor = ctx.HasInput("Shape") - ? ctx.Input("Shape") - : nullptr; - if (shape_tensor) { - target_shape_vector = phi::GetVectorFromTensor(shape_tensor); - } else { - target_shape_vector = ctx.Attr>("shape"); - PADDLE_ENFORCE_GT( - target_shape_vector.size(), - 0, - platform::errors::InvalidArgument( - "The length of shape attribute should be larger than 0 when " - "input ShapeTensor and Shape are empty!")); - } - } - - int num_negative = - std::count(target_shape_vector.begin(), target_shape_vector.end(), -1); - PADDLE_ENFORCE_LE( - num_negative, - 1, - platform::errors::InvalidArgument( - "The max number of -1 in shape attribute or shape tensor is 1 " - "but received %d.", - num_negative)); - auto it_zero = - std::find(target_shape_vector.begin(), target_shape_vector.end(), 0); - if (it_zero != target_shape_vector.end()) { - int x_rank = x->dims().size(); - for (size_t i = 0; i < target_shape_vector.size(); i++) { - if (target_shape_vector[i] == 0) { - PADDLE_ENFORCE_LT( - i, - x_rank, - platform::errors::InvalidArgument( - "The index of 0 in shape attribute or shape tensor", - "should be less than input dim size, ", - "but the index is %d and input dim size is %d", - i, - x_rank)); - target_shape_vector[i] = x->dims().at(i); - } - } - } - - auto it = - std::find(target_shape_vector.begin(), target_shape_vector.end(), -1); - if (it != target_shape_vector.end()) { - auto ddim_out_vec = phi::vectorize(x->dims()); - int ddim_out_product = std::accumulate( - ddim_out_vec.begin(), ddim_out_vec.end(), 1, std::multiplies()); - int reshape_out_product = std::accumulate(target_shape_vector.begin(), - target_shape_vector.end(), - -1, - std::multiplies()); - int index = std::distance(target_shape_vector.begin(), it); - target_shape_vector[index] = ddim_out_product / reshape_out_product; - } - - auto out_dims = phi::make_ddim(target_shape_vector); - out->mutable_data(out_dims, ctx.GetPlace()); - - // output should copy to mlu - framework::TensorCopy( - *x, - ctx.GetPlace(), - ctx.template device_context(), - out); - out->Resize(out_dims); - } -}; - -template -class Reshape2GradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* d_x = ctx.Output(framework::GradVarName("X")); - auto* d_out = ctx.Input(framework::GradVarName("Out")); - auto in_dims = d_x->dims(); - - d_x->mutable_data(ctx.GetPlace(), d_out->type()); - framework::TensorCopy( - *d_out, - ctx.GetPlace(), - ctx.template device_context(), - d_x); - d_x->Resize(in_dims); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL( - reshape2, - ops::Reshape2MLUKernel, - ops::Reshape2MLUKernel, - ops::Reshape2MLUKernel, - ops::Reshape2MLUKernel, - ops::Reshape2MLUKernel, - ops::Reshape2MLUKernel, - ops::Reshape2MLUKernel); -REGISTER_OP_MLU_KERNEL( - reshape2_grad, - ops::Reshape2GradMLUKernel, - ops::Reshape2GradMLUKernel, - ops::Reshape2GradMLUKernel, - ops::Reshape2GradMLUKernel, - ops::Reshape2GradMLUKernel, - ops::Reshape2GradMLUKernel, - ops::Reshape2GradMLUKernel); diff --git a/paddle/fluid/operators/rnn_op_mlu.cc b/paddle/fluid/operators/rnn_op_mlu.cc deleted file mode 100644 index 0f4c81d25d8cc8c35f61eaf597984e7d4f7296b0..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/rnn_op_mlu.cc +++ /dev/null @@ -1,745 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/device/xpu/xpu_header.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -using DDim = framework::DDim; -using TensorList = std::vector; -template -void reset_parameter_vector( - const std::vector& raw_params_vec, - const int& num_layers, - const bool& is_bidirec, - std::vector>>* params_vec) { - // the parameter raw seuquence is [FWhi, FWhh, BWhi, BWhh] * num_layers - // + [FBhi, FBhh, BBhi, BBhh] * num_layers, we will reset the parameter to - // ([FWhi, FWhh, FBhi, FBhh] + [BWhi, BWhh, BBhi, BBhh]) * num_layers - const int& direction_num = is_bidirec ? 2 : 1; - const int& layer_weight_size = 4 * direction_num; - const int& all_weight_size = num_layers * layer_weight_size; - const int& bias_start_idx = all_weight_size / 2; - for (int i = 0; i < num_layers; i++) { - params_vec->at(i).resize(layer_weight_size); - for (int j = 0; j < layer_weight_size; j++) { - int k = j % 4; - const int& section = j / 4; - int tensor_idx = i * 2 * direction_num + section * 2 + k % 2; - if (k >= 2) { - tensor_idx += bias_start_idx; - } - using remove_cv_t = typename std::remove_cv::type; - params_vec->at(i)[j] = std::make_pair( - const_cast( - raw_params_vec[tensor_idx]->template data()), - raw_params_vec[tensor_idx]->numel() * sizeof(T)); - } - } -} - -template -class RNNMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - // Input - auto& dev_ctx = GetDevCtxFromCTX(ctx); - auto* input = ctx.Input("Input"); - auto pre_state = ctx.MultiInput("PreState"); - auto weight_list = ctx.MultiInput("WeightList"); - bool has_seq_length = ctx.HasInput("SequenceLength"); - // Output - auto state = ctx.MultiOutput("State"); - auto* output = ctx.Output("Out"); - auto* reserve_data = ctx.Output("Reserve"); - // Attributes - const int& num_layers = ctx.Attr("num_layers"); - const bool& is_bidirec = ctx.Attr("is_bidirec"); - const int& hidden_size = ctx.Attr("hidden_size"); - const std::string& mode = ctx.Attr("mode"); - - const phi::DenseTensor* sequence_length = nullptr; - if (has_seq_length) { - sequence_length = ctx.Input("SequenceLength"); - } - - auto init_h = pre_state[0]; // -> hx - auto init_c = pre_state[1]; // -> cx - auto last_h = state[0]; - auto last_c = state[1]; - - // check shape - const int in_out_dim_num = input->dims().size(); - const int& seq_len = input->dims()[0]; // time_step - const int& batch_size = input->dims()[1]; - const int& input_dim = input->dims()[2]; - const int& direction_num = is_bidirec ? 2 : 1; - int in_dim_arr[in_out_dim_num] = {seq_len, batch_size, input_dim}; - int out_dim_arr[in_out_dim_num] = { - seq_len, batch_size, direction_num * hidden_size}; - int proj_size = hidden_size; - - std::vector seq_len_vec(batch_size, seq_len); - if (has_seq_length) { // set seq_len if no padding, otherwise seq_len for - // each element. - seq_len_vec = phi::GetVectorFromTensor(sequence_length); - } - cnnlDirectionMode_t direction = - is_bidirec ? CNNL_RNN_BIDIRECTIONAL : CNNL_RNN_UNIDIRECTIONAL; - - PADDLE_ENFORCE_EQ( - mode, - "LSTM", - platform::errors::InvalidArgument( - "MLU only support LSTM mode now, current mode is %s", mode)); - PADDLE_ENFORCE_EQ( - num_layers, - 1, - platform::errors::InvalidArgument( - "MLU only support 1 num_layers, current num_layers is %s", - num_layers)); - PADDLE_ENFORCE_EQ( - init_h->dims()[0], - num_layers * direction_num, - platform::errors::InvalidArgument("The num_layers of in RNN layer must" - " be the same as first dim of init " - "hidden, but received num_layers:%d," - " dim:%d", - num_layers, - init_h->dims()[0])); - - PADDLE_ENFORCE_EQ( - init_c->dims()[0], - num_layers * direction_num, - platform::errors::InvalidArgument( - "The num_layers of in RNN layer must" - " be the same as first dim of cell state hidden, but received" - " num_layers:%d, dim:%d", - num_layers, - init_c->dims()[0])); - - // weightlist - std::vector>> parameter_lists; - parameter_lists.resize(num_layers); - reset_parameter_vector( - weight_list, num_layers, is_bidirec, ¶meter_lists); - - // init the output and allocate the memory - output->mutable_data(ctx.GetPlace()); // -> y in cnnl - last_h->mutable_data(ctx.GetPlace()); // -> hy in cnnl - last_c->mutable_data(ctx.GetPlace()); // -> cy in cnnl - - MLUSeqDataDesc input_seq_data_desc(CNNL_SEQDATA_TNC, - ToCnnlDataType(input->dtype()), - in_out_dim_num, - in_dim_arr, - static_cast(seq_len_vec.size()), - seq_len_vec.data(), - nullptr); - MLUSeqDataDesc out_seq_data_desc(CNNL_SEQDATA_TNC, - ToCnnlDataType(input->dtype()), - in_out_dim_num, - out_dim_arr, - static_cast(seq_len_vec.size()), - seq_len_vec.data(), - nullptr); - MLUCnnlTensorDesc hx_desc(*init_h); - MLUCnnlTensorDesc cx_desc(*init_c); - - MLURNNDesc rnn_desc(CNNL_LSTM, - CNNL_RNN_DOUBLE_BIAS, - direction, - CNNL_RNN_LINEAR_INPUT, - ToCnnlDataType(input->dtype()), - ToCnnlDataType(input->dtype()), - input_dim, - hidden_size, - /*projection*/ proj_size, - num_layers, - nullptr, - CNNL_RNN_PADDED_IO_DISABLED); - rnn_desc.SetRNNMaskMode(CNNL_LSTM_MASK_ENABLED); - - // copy weight params - size_t weightspace_size; - phi::DenseTensor weightspace; - PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNWeightSpaceSize( - GetHandleFromCTX(ctx), rnn_desc.get(), &weightspace_size)); - - weightspace = ctx.AllocateTmpTensor( - {static_cast(weightspace_size)}, dev_ctx); - void* weightspace_ptr = weightspace.mutable_data(ctx.GetPlace()); - auto w_x = parameter_lists[0][0]; - auto w_h = parameter_lists[0][1]; - auto b_x = parameter_lists[0][2]; - auto b_h = parameter_lists[0][3]; - auto actual_total_w_size = - w_x.second + w_h.second + b_x.second + b_h.second; - - void* w_x_ptr = weightspace_ptr; - void* w_h_ptr = static_cast(weightspace_ptr) + w_x.second; - void* b_x_ptr = - static_cast(weightspace_ptr) + w_x.second + w_h.second; - void* b_h_ptr = static_cast(weightspace_ptr) + w_x.second + - w_h.second + b_x.second; - - memory::Copy(weightspace.place(), - w_x_ptr, - weightspace.place(), - w_x.first, - w_x.second, - nullptr); - memory::Copy(weightspace.place(), - w_h_ptr, - weightspace.place(), - w_h.first, - w_h.second, - nullptr); - memory::Copy(weightspace.place(), - b_x_ptr, - weightspace.place(), - b_x.first, - b_x.second, - nullptr); - memory::Copy(weightspace.place(), - b_h_ptr, - weightspace.place(), - b_h.first, - b_h.second, - nullptr); - - if (is_bidirec) { - auto bw_x = parameter_lists[0][4]; - auto bw_h = parameter_lists[0][5]; - auto bb_x = parameter_lists[0][6]; - auto bb_h = parameter_lists[0][7]; - void* bw_x_ptr = - static_cast(weightspace_ptr) + actual_total_w_size; - void* bw_h_ptr = static_cast(weightspace_ptr) + - actual_total_w_size + bw_x.second; - void* bb_x_ptr = static_cast(weightspace_ptr) + - actual_total_w_size + bw_x.second + bw_h.second; - void* bb_h_ptr = static_cast(weightspace_ptr) + - actual_total_w_size + bw_x.second + bw_h.second + - bb_x.second; - actual_total_w_size += - bw_x.second + bw_h.second + bb_x.second + bb_h.second; - - memory::Copy(weightspace.place(), - bw_x_ptr, - weightspace.place(), - bw_x.first, - bw_x.second, - nullptr); - memory::Copy(weightspace.place(), - bw_h_ptr, - weightspace.place(), - bw_h.first, - bw_h.second, - nullptr); - memory::Copy(weightspace.place(), - bb_x_ptr, - weightspace.place(), - bb_x.first, - bb_x.second, - nullptr); - memory::Copy(weightspace.place(), - bb_h_ptr, - weightspace.place(), - bb_h.first, - bb_h.second, - nullptr); - } - - PADDLE_ENFORCE_EQ(weightspace_size, - actual_total_w_size, - platform::errors::InvalidArgument( - "The weightsize doesn't match" - " weightspace_size:%d, actual_total_w_size:%d", - weightspace_size, - actual_total_w_size)); - - // get reservespace_ptr - int gate_num = 4; - int hidden_data_idx = (num_layers - 1); - hidden_data_idx += (gate_num + 1) * num_layers; - const int& block_size = direction_num * seq_len * batch_size * hidden_size; - reserve_data->Resize({hidden_data_idx, block_size}); - - reserve_data->mutable_data(ctx.GetPlace()); - - MLUCnnl::RNNForward(ctx, - rnn_desc.get(), - seq_len_vec.data(), - weightspace_ptr, - weightspace_size, - input_seq_data_desc.get(), - GetBasePtr(input), - out_seq_data_desc.get(), - GetBasePtr(output), - hx_desc.get(), - GetBasePtr(init_h), - GetBasePtr(last_h), - cx_desc.get(), - GetBasePtr(init_c), - GetBasePtr(last_c), - GetBasePtr(reserve_data)); - - if (has_seq_length) { - // if has_seq_length, do mask out the output of cnnlRNNForwardTraining - auto masked_mode = CNNL_MASKED_FILL; - float off_value = 0.0f; - - phi::DenseTensor on_value_tensor(input->dtype()); - phi::DenseTensor masked_tensor(framework::TransToPhiDataType(VT::INT8)); - phi::DenseTensor h_masked_tensor(framework::TransToPhiDataType(VT::INT8)); - on_value_tensor.Resize({1}); - masked_tensor.Resize({seq_len, batch_size, direction_num * hidden_size}); - h_masked_tensor.Resize( - {seq_len, batch_size, direction_num * hidden_size}); - - on_value_tensor.mutable_data(ctx.GetPlace()); - masked_tensor.mutable_data(ctx.GetPlace()); - int8_t* h_masked_ptr = - h_masked_tensor.mutable_data(platform::CPUPlace()); - - for (int t = 0; t < seq_len; ++t) { - for (int n = 0; n < batch_size; ++n) { - for (int c = 0; c < direction_num * hidden_size; ++c) { - auto tmp_seq_len = seq_len_vec[n]; - auto offset = t * batch_size * direction_num * hidden_size + - n * direction_num * hidden_size + c; - *(h_masked_ptr + offset) = t >= tmp_seq_len ? 1 : 0; - } - } - } - - framework::TensorCopy( - h_masked_tensor, ctx.GetPlace(), dev_ctx, &masked_tensor); - dev_ctx.Wait(); - - FillMLUTensorWithHostValue(ctx, off_value, &on_value_tensor); - MLUCnnlTensorDesc on_value_desc(on_value_tensor); - MLUCnnlTensorDesc output_desc(*output); - MLUCnnlTensorDesc masked_desc(masked_tensor); - - MLUCnnl::Mask(ctx, - masked_mode, - output_desc.get(), - GetBasePtr(output), - masked_desc.get(), - GetBasePtr(&masked_tensor), - on_value_desc.get(), - GetBasePtr(&on_value_tensor), - output_desc.get(), - GetBasePtr(output), - nullptr); - } - } -}; - -template -class RNNMLUGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto& dev_ctx = ctx.template device_context(); - auto stream = ctx.template device_context().stream(); - // get the tensor pointer for the input - auto* input = ctx.Input("Input"); - auto pre_state = ctx.MultiInput("PreState"); - auto weight_list = ctx.MultiInput("WeightList"); - auto* output = ctx.Input("Out"); - auto* reserve_data = ctx.Input("Reserve"); - const int& num_layers = ctx.Attr("num_layers"); - const bool& is_bidirec = ctx.Attr("is_bidirec"); - const int& hidden_size = ctx.Attr("hidden_size"); - const std::string& mode = ctx.Attr("mode"); - - bool has_seq_length = ctx.HasInput("SequenceLength"); - const phi::DenseTensor* sequence_length = nullptr; - if (has_seq_length) { - sequence_length = ctx.Input("SequenceLength"); - } - - PADDLE_ENFORCE_EQ( - mode, - "LSTM", - platform::errors::InvalidArgument( - "XPU only support LSTM mode now, current mode is %s", mode)); - - auto init_h = pre_state[0]; // -> hx - auto init_c = pre_state[1]; // -> cx - - auto output_grad = - ctx.Input(framework::GradVarName("Out")); - auto state_grad = - ctx.MultiInput(framework::GradVarName("State")); - auto last_h_grad = state_grad[0]; // -> dhy - auto last_c_grad = state_grad[1]; // -> dcy - - // get the tensor pointer for the output - auto* input_grad = - ctx.Output(framework::GradVarName("Input")); - auto weight_grad_list = - ctx.MultiOutput(framework::GradVarName("WeightList")); - auto pre_state_grad = - ctx.MultiOutput(framework::GradVarName("PreState")); - phi::DenseTensor* init_h_grad = nullptr; - phi::DenseTensor* init_c_grad = nullptr; - if (pre_state_grad.size() > 0) { // has gradient - init_h_grad = pre_state_grad[0]; // -> dhx - init_c_grad = pre_state_grad[1]; // -> dcx - } - - // check shape - const int in_out_dim_num = input->dims().size(); - const int& seq_len = input->dims()[0]; - const int& batch_size = input->dims()[1]; - const int& input_dim = input->dims()[2]; - const int& direction_num = is_bidirec ? 2 : 1; - int in_dim_arr[in_out_dim_num] = {seq_len, batch_size, input_dim}; - int out_dim_arr[in_out_dim_num] = { - seq_len, batch_size, direction_num * hidden_size}; - int proj_size = hidden_size; - PADDLE_ENFORCE_EQ( - num_layers, - 1, - platform::errors::InvalidArgument( - "MLU only support 1 num_layers, current num_layers is %s", - num_layers)); - PADDLE_ENFORCE_EQ( - init_h->dims()[0], - num_layers * direction_num, - platform::errors::InvalidArgument("The num_layers of in RNN layer must" - " be the same as first dim of init" - "hidden, but received num_layers:%d," - " dim:%d", - num_layers, - init_h->dims()[0])); - PADDLE_ENFORCE_EQ( - init_c->dims()[0], - num_layers * direction_num, - platform::errors::InvalidArgument( - "The num_layers of in RNN layer must" - " be the same as first dim of cell state hidden, but received" - " num_layers:%d, dim:%d", - num_layers, - init_c->dims()[0])); - - std::vector>> parameter_lists; - parameter_lists.resize(num_layers); - reset_parameter_vector( - weight_list, num_layers, is_bidirec, ¶meter_lists); - - for (unsigned int i = 0; i < weight_grad_list.size(); ++i) { - weight_grad_list[i]->mutable_data(ctx.GetPlace()); - } - std::vector>> parameter_lists_grad; - parameter_lists_grad.resize(num_layers); - reset_parameter_vector( - weight_grad_list, num_layers, is_bidirec, ¶meter_lists_grad); - - // allocate the memory and initization the input_grad - input_grad->mutable_data(input->dims(), ctx.GetPlace()); - FillMLUTensorWithHostValue(ctx, static_cast(0.0), input_grad); - - phi::DenseTensor a, b; - phi::DenseTensor* dynamic_grad_pre_h = &a; - phi::DenseTensor* dynamic_grad_pre_c = &b; - if (init_h_grad) { - init_h_grad->mutable_data(last_h_grad->dims(), ctx.GetPlace()); - FillMLUTensorWithHostValue(ctx, static_cast(0.0), init_h_grad); - } else { - dynamic_grad_pre_h->Resize(last_h_grad->dims()); - dynamic_grad_pre_h->mutable_data(ctx.GetPlace()); - FillMLUTensorWithHostValue(ctx, static_cast(0.0), dynamic_grad_pre_h); - init_h_grad = dynamic_grad_pre_h; - } - if (init_c_grad) { - init_c_grad->mutable_data(last_c_grad->dims(), ctx.GetPlace()); - } else { - dynamic_grad_pre_c->Resize(last_h_grad->dims()); - dynamic_grad_pre_c->mutable_data(ctx.GetPlace()); - init_c_grad = dynamic_grad_pre_c; - } - - std::vector seq_len_vec(batch_size, seq_len); - if (has_seq_length) { - seq_len_vec = phi::GetVectorFromTensor(sequence_length); - } - cnnlDirectionMode_t direction = - is_bidirec ? CNNL_RNN_BIDIRECTIONAL : CNNL_RNN_UNIDIRECTIONAL; - - MLUSeqDataDesc input_seq_data_desc(CNNL_SEQDATA_TNC, - ToCnnlDataType(input->dtype()), - in_out_dim_num, - in_dim_arr, - static_cast(seq_len_vec.size()), - seq_len_vec.data(), - nullptr); - MLUSeqDataDesc out_seq_data_desc(CNNL_SEQDATA_TNC, - ToCnnlDataType(input->dtype()), - in_out_dim_num, - out_dim_arr, - static_cast(seq_len_vec.size()), - seq_len_vec.data(), - nullptr); - MLUCnnlTensorDesc hx_desc(*init_h); - MLUCnnlTensorDesc cx_desc(*init_c); - MLURNNDesc rnn_desc(CNNL_LSTM, - CNNL_RNN_DOUBLE_BIAS, - direction, - CNNL_RNN_LINEAR_INPUT, - ToCnnlDataType(input->dtype()), - ToCnnlDataType(input->dtype()), - input_dim, - hidden_size, - /*projection*/ proj_size, - num_layers, - nullptr, - CNNL_RNN_PADDED_IO_DISABLED); - rnn_desc.SetRNNMaskMode(CNNL_LSTM_MASK_ENABLED); - - // copy weight - size_t weightspace_size; - phi::DenseTensor weightspace, dweightspace; - PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNWeightSpaceSize( - GetHandleFromCTX(ctx), rnn_desc.get(), &weightspace_size)); - - weightspace = ctx.AllocateTmpTensor( - {static_cast(weightspace_size)}, dev_ctx); - dweightspace = ctx.AllocateTmpTensor( - {static_cast(weightspace_size)}, dev_ctx); - void* weightspace_ptr = weightspace.mutable_data(ctx.GetPlace()); - auto w_x = parameter_lists[0][0]; - auto w_h = parameter_lists[0][1]; - auto b_x = parameter_lists[0][2]; - auto b_h = parameter_lists[0][3]; - auto actual_total_w_size = - w_x.second + w_h.second + b_x.second + b_h.second; - - void* w_x_ptr = weightspace_ptr; - void* w_h_ptr = static_cast(weightspace_ptr) + w_x.second; - void* b_x_ptr = - static_cast(weightspace_ptr) + w_x.second + w_h.second; - void* b_h_ptr = static_cast(weightspace_ptr) + w_x.second + - w_h.second + b_x.second; - - memory::Copy(weightspace.place(), - w_x_ptr, - weightspace.place(), - w_x.first, - w_x.second, - stream); - memory::Copy(weightspace.place(), - w_h_ptr, - weightspace.place(), - w_h.first, - w_h.second, - stream); - memory::Copy(weightspace.place(), - b_x_ptr, - weightspace.place(), - b_x.first, - b_x.second, - stream); - memory::Copy(weightspace.place(), - b_h_ptr, - weightspace.place(), - b_h.first, - b_h.second, - stream); - - if (is_bidirec) { - auto bw_x = parameter_lists[0][4]; - auto bw_h = parameter_lists[0][5]; - auto bb_x = parameter_lists[0][6]; - auto bb_h = parameter_lists[0][7]; - void* bw_x_ptr = - static_cast(weightspace_ptr) + actual_total_w_size; - void* bw_h_ptr = static_cast(weightspace_ptr) + - actual_total_w_size + bw_x.second; - void* bb_x_ptr = static_cast(weightspace_ptr) + - actual_total_w_size + bw_x.second + bw_h.second; - void* bb_h_ptr = static_cast(weightspace_ptr) + - actual_total_w_size + bw_x.second + bw_h.second + - bb_x.second; - actual_total_w_size += - bw_x.second + bw_h.second + bb_x.second + bb_h.second; - - memory::Copy(weightspace.place(), - bw_x_ptr, - weightspace.place(), - bw_x.first, - bw_x.second, - stream); - memory::Copy(weightspace.place(), - bw_h_ptr, - weightspace.place(), - bw_h.first, - bw_h.second, - stream); - memory::Copy(weightspace.place(), - bb_x_ptr, - weightspace.place(), - bb_x.first, - bb_x.second, - stream); - memory::Copy(weightspace.place(), - bb_h_ptr, - weightspace.place(), - bb_h.first, - bb_h.second, - stream); - } - dev_ctx.Wait(); - - PADDLE_ENFORCE_EQ(weightspace_size, - actual_total_w_size, - platform::errors::InvalidArgument( - "The weightsize doesn't match" - " weightspace_size:%d, actual_total_w_size:%d", - weightspace_size, - actual_total_w_size)); - - MLUCnnl::RNNBackward(ctx, - rnn_desc.get(), - CNNL_WGRAD_MODE_SET, - seq_len_vec.data(), - GetBasePtr(&weightspace), - GetBasePtr(&dweightspace), - weightspace.numel() * sizeof(T), - input_seq_data_desc.get(), - GetBasePtr(input), - GetBasePtr(input_grad), - out_seq_data_desc.get(), - GetBasePtr(output), - GetBasePtr(output_grad), - hx_desc.get(), - GetBasePtr(init_h), - GetBasePtr(last_h_grad), - GetBasePtr(init_h_grad), - cx_desc.get(), - GetBasePtr(init_c), - GetBasePtr(last_c_grad), - GetBasePtr(init_c_grad), - const_cast(GetBasePtr(reserve_data)), - reserve_data->numel() * sizeof(T)); - - void* dweightspace_ptr = dweightspace.mutable_data(ctx.GetPlace()); - auto dw_x = parameter_lists_grad[0][0]; - auto dw_h = parameter_lists_grad[0][1]; - auto db_x = parameter_lists_grad[0][2]; - auto db_h = parameter_lists_grad[0][3]; - auto dactual_total_w_size = - dw_x.second + dw_h.second + db_x.second + db_h.second; - - void* dw_x_ptr = dweightspace_ptr; - void* dw_h_ptr = static_cast(dweightspace_ptr) + dw_x.second; - void* db_x_ptr = - static_cast(dweightspace_ptr) + dw_x.second + dw_h.second; - void* db_h_ptr = static_cast(dweightspace_ptr) + dw_x.second + - dw_h.second + db_x.second; - - memory::Copy(weightspace.place(), - dw_x.first, - weightspace.place(), - dw_x_ptr, - dw_x.second, - stream); - memory::Copy(weightspace.place(), - dw_h.first, - weightspace.place(), - dw_h_ptr, - dw_h.second, - stream); - memory::Copy(weightspace.place(), - db_x.first, - weightspace.place(), - db_x_ptr, - db_x.second, - stream); - memory::Copy(weightspace.place(), - db_h.first, - weightspace.place(), - db_h_ptr, - db_h.second, - stream); - - if (is_bidirec) { - auto dbw_x = parameter_lists_grad[0][4]; - auto dbw_h = parameter_lists_grad[0][5]; - auto dbb_x = parameter_lists_grad[0][6]; - auto dbb_h = parameter_lists_grad[0][7]; - void* dbw_x_ptr = - static_cast(dweightspace_ptr) + dactual_total_w_size; - void* dbw_h_ptr = static_cast(dweightspace_ptr) + - dactual_total_w_size + dbw_x.second; - void* dbb_x_ptr = static_cast(dweightspace_ptr) + - dactual_total_w_size + dbw_x.second + dbw_h.second; - void* dbb_h_ptr = static_cast(dweightspace_ptr) + - dactual_total_w_size + dbw_x.second + dbw_h.second + - dbb_x.second; - dactual_total_w_size += - dbw_x.second + dbw_h.second + dbb_x.second + dbb_h.second; - - memory::Copy(weightspace.place(), - dbw_x.first, - weightspace.place(), - dbw_x_ptr, - dbw_x.second, - stream); - memory::Copy(weightspace.place(), - dbw_h.first, - weightspace.place(), - dbw_h_ptr, - dbw_h.second, - stream); - memory::Copy(weightspace.place(), - dbb_x.first, - weightspace.place(), - dbb_x_ptr, - dbb_x.second, - stream); - memory::Copy(weightspace.place(), - dbb_h.first, - weightspace.place(), - dbb_h_ptr, - dbb_h.second, - stream); - } - dev_ctx.Wait(); - - PADDLE_ENFORCE_EQ(weightspace_size, - dactual_total_w_size, - platform::errors::InvalidArgument( - "The weightsize doesn't match" - " weightspace_size:%d, dactual_total_w_size:%d", - weightspace_size, - dactual_total_w_size)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL( - rnn, ops::RNNMLUKernel); -REGISTER_OP_MLU_KERNEL( - rnn_grad, ops::RNNMLUGradKernel); diff --git a/paddle/fluid/operators/roi_align_op_mlu.cc b/paddle/fluid/operators/roi_align_op_mlu.cc deleted file mode 100644 index de0a8be93452de8958d6d81fe72e829b45c7843a..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/roi_align_op_mlu.cc +++ /dev/null @@ -1,296 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ROIAlignOpMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - out->set_layout(phi::DataLayout::kNHWC); - - auto pooled_height = ctx.Attr("pooled_height"); - auto pooled_width = ctx.Attr("pooled_width"); - auto spatial_scale = ctx.Attr("spatial_scale"); - auto sampling_ratio = ctx.Attr("sampling_ratio"); - auto aligned = ctx.Attr("aligned"); - const auto& in_dims = in->dims(); - int batch_size = in_dims[0]; - int rois_num = rois->dims()[0]; - - if (rois_num == 0) return; - auto cplace = platform::CPUPlace(); - std::vector roi_batch_id_list(rois_num); - int rois_batch_size = 0; - if (ctx.HasInput("RoisNum")) { - auto* rois_num_t = ctx.Input("RoisNum"); - rois_batch_size = rois_num_t->numel(); - PADDLE_ENFORCE_EQ( - rois_batch_size, - batch_size, - platform::errors::InvalidArgument( - "The batch size of rois and the batch size of images " - " must be the same. But received the batch size of rois is %d, " - "and the batch size of images is %d", - rois_batch_size, - batch_size)); - std::vector rois_num_list(rois_batch_size); - memory::Copy(cplace, - rois_num_list.data(), - ctx.GetPlace(), - rois_num_t->data(), - sizeof(int) * rois_batch_size, - nullptr /*stream*/); - int last_idx = 0; - for (int i = 0; i < rois_batch_size; i++) { - int end_idx = last_idx + rois_num_list[i]; - for (int j = last_idx; j < end_idx; j++) { - roi_batch_id_list[j] = i; - } - last_idx = end_idx; - } - } else { - auto lod = rois->lod(); - PADDLE_ENFORCE_EQ(lod.empty(), - false, - platform::errors::InvalidArgument( - "Input(ROIs) phi::DenseTensor of ROIAlignOp " - "does not contain LoD information.")); - auto rois_lod = lod.back(); - rois_batch_size = rois_lod.size() - 1; - PADDLE_ENFORCE_EQ(rois_batch_size, - batch_size, - platform::errors::InvalidArgument( - "The rois_batch_size and imgs " - "batch_size must be the same. But received " - "rois_batch_size = %d, " - "batch_size = %d", - rois_batch_size, - batch_size)); - int rois_num_with_lod = rois_lod[rois_batch_size]; - PADDLE_ENFORCE_EQ( - rois_num, - rois_num_with_lod, - platform::errors::InvalidArgument( - "The actual number of rois and the number of rois " - "provided from Input(RoIsLoD) in RoIAlign must be the same." - " But received actual number of rois is %d, and the number " - "of rois from RoIsLoD is %d", - rois_num, - rois_num_with_lod)); - for (int i = 0; i < rois_batch_size; i++) { - int start_idx = rois_lod[i]; - int end_idx = rois_lod[i + 1]; - for (int j = start_idx; j < end_idx; j++) { - roi_batch_id_list[j] = i; - } - } - } - - // only support float32 for now - phi::DenseTensor rois_cpu(framework::TransToPhiDataType(VT::FP32)); - rois_cpu.Resize({rois_num, 4}); - rois_cpu.mutable_data(ctx.GetPlace()); - auto& dev_ctx = ctx.template device_context(); - framework::TensorCopy(*rois, cplace, dev_ctx, &rois_cpu); - dev_ctx.Wait(); - T* rois_cpu_ptr = rois_cpu.mutable_data(platform::CPUPlace()); - - // boxes; [batch_idx, x1, y1, x2, y2] - phi::DenseTensor boxes_cpu(framework::TransToPhiDataType(VT::FP32)); - phi::DenseTensor boxes_mlu(framework::TransToPhiDataType(VT::FP32)); - boxes_cpu.Resize({rois_num, 5}); - boxes_mlu.Resize({rois_num, 5}); - T* boxes_cpu_ptr = boxes_cpu.mutable_data(platform::CPUPlace()); - boxes_mlu.mutable_data(ctx.GetPlace()); - for (int i = 0; i < rois_num; ++i) { - boxes_cpu_ptr[i * 5 + 0] = static_cast(roi_batch_id_list[i]); - boxes_cpu_ptr[i * 5 + 1] = rois_cpu_ptr[i * 4 + 0]; - boxes_cpu_ptr[i * 5 + 2] = rois_cpu_ptr[i * 4 + 1]; - boxes_cpu_ptr[i * 5 + 3] = rois_cpu_ptr[i * 4 + 2]; - boxes_cpu_ptr[i * 5 + 4] = rois_cpu_ptr[i * 4 + 3]; - } - - // copy boxes_cpu to boxes_mlu - framework::TensorCopy(boxes_cpu, ctx.GetPlace(), dev_ctx, &boxes_mlu); - dev_ctx.Wait(); - - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - const std::vector perm_to_nchw = {0, 3, 1, 2}; - phi::DenseTensor input_nhwc(in->type()); - phi::DenseTensor output_nhwc(out->type()); - TransposeFromMLUTensor( - ctx, perm_to_nhwc, in, &input_nhwc, true /*need_reshape_or_alloc*/); - auto output_dims = out->dims(); - output_nhwc.mutable_data( - {output_dims[0], output_dims[2], output_dims[3], output_dims[1]}, - ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc( - input_nhwc, CNNL_LAYOUT_NHWC, ToCnnlDataType(input_nhwc.dtype())); - MLUCnnlTensorDesc boxes_desc(boxes_mlu); - MLUCnnlTensorDesc out_desc( - output_nhwc, CNNL_LAYOUT_NHWC, ToCnnlDataType(output_nhwc.dtype())); - MLUCnnl::RoiAlign(ctx, - pooled_height, - pooled_width, - sampling_ratio, - spatial_scale, - aligned, - input_desc.get(), - GetBasePtr(&input_nhwc), - boxes_desc.get(), - GetBasePtr(&boxes_mlu), - out_desc.get(), - GetBasePtr(&output_nhwc)); - TransposeFromMLUTensor( - ctx, perm_to_nchw, &output_nhwc, out, false /*need_reshape_or_alloc*/); - }; -}; - -template -class ROIAlignGradOpMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* rois = ctx.Input("ROIs"); - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto* in_grad = ctx.Output(framework::GradVarName("X")); - - auto spatial_scale = ctx.Attr("spatial_scale"); - auto sampling_ratio = ctx.Attr("sampling_ratio"); - auto aligned = ctx.Attr("aligned"); - int rois_num = rois->dims()[0]; - - if (!in_grad) { - return; - } - in_grad->mutable_data(ctx.GetPlace()); - - std::vector roi_batch_id_list(rois_num); - auto cplace = platform::CPUPlace(); - int rois_batch_size = 0; - if (ctx.HasInput("RoisNum")) { - auto* rois_num_t = ctx.Input("RoisNum"); - rois_batch_size = rois_num_t->numel(); - std::vector rois_num_list(rois_batch_size); - memory::Copy(cplace, - rois_num_list.data(), - ctx.GetPlace(), - rois_num_t->data(), - sizeof(int) * rois_batch_size, - nullptr /*stream*/); - int last_idx = 0; - for (int i = 0; i < rois_batch_size; i++) { - int end_idx = last_idx + rois_num_list[i]; - for (int j = last_idx; j < end_idx; j++) { - roi_batch_id_list[j] = i; - } - last_idx = end_idx; - } - } else { - auto rois_lod = rois->lod().back(); - rois_batch_size = rois_lod.size() - 1; - for (int i = 0; i < rois_batch_size; i++) { - int start_idx = rois_lod[i]; - int end_idx = rois_lod[i + 1]; - for (int j = start_idx; j < end_idx; j++) { - roi_batch_id_list[j] = i; - } - } - } - - phi::DenseTensor rois_cpu(framework::TransToPhiDataType(VT::FP32)); - rois_cpu.Resize({rois_num, 4}); - rois_cpu.mutable_data(ctx.GetPlace()); - auto& dev_ctx = ctx.template device_context(); - framework::TensorCopy(*rois, cplace, dev_ctx, &rois_cpu); - dev_ctx.Wait(); - T* rois_cpu_ptr = rois_cpu.mutable_data(platform::CPUPlace()); - - // boxes; [batch_idx, x1, y1, x2, y2] - phi::DenseTensor boxes_cpu(framework::TransToPhiDataType(VT::FP32)); - phi::DenseTensor boxes_mlu(framework::TransToPhiDataType(VT::FP32)); - boxes_cpu.Resize({rois_num, 5}); - boxes_mlu.Resize({rois_num, 5}); - T* boxes_cpu_ptr = boxes_cpu.mutable_data(platform::CPUPlace()); - boxes_mlu.mutable_data(ctx.GetPlace()); - for (int i = 0; i < rois_num; ++i) { - boxes_cpu_ptr[i * 5 + 0] = static_cast(roi_batch_id_list[i]); - boxes_cpu_ptr[i * 5 + 1] = rois_cpu_ptr[i * 4 + 0]; - boxes_cpu_ptr[i * 5 + 2] = rois_cpu_ptr[i * 4 + 1]; - boxes_cpu_ptr[i * 5 + 3] = rois_cpu_ptr[i * 4 + 2]; - boxes_cpu_ptr[i * 5 + 4] = rois_cpu_ptr[i * 4 + 3]; - } - - // copy boxes_cpu to boxes_mlu - framework::TensorCopy(boxes_cpu, ctx.GetPlace(), dev_ctx, &boxes_mlu); - dev_ctx.Wait(); - - const std::vector perm_to_nhwc = {0, 2, 3, 1}; - const std::vector perm_to_nchw = {0, 3, 1, 2}; - phi::DenseTensor grads_nhwc(out_grad->type()); - phi::DenseTensor grads_image_nhwc(in_grad->type()); - TransposeFromMLUTensor(ctx, - perm_to_nhwc, - out_grad, - &grads_nhwc, - true /*need_reshape_or_alloc*/); - auto grads_image_dims = in_grad->dims(); - grads_image_nhwc.mutable_data({grads_image_dims[0], - grads_image_dims[2], - grads_image_dims[3], - grads_image_dims[1]}, - ctx.GetPlace()); - - MLUCnnlTensorDesc grads_desc( - grads_nhwc, CNNL_LAYOUT_NHWC, ToCnnlDataType(grads_nhwc.dtype())); - MLUCnnlTensorDesc boxes_desc(boxes_mlu); - MLUCnnlTensorDesc grads_image_desc( - grads_image_nhwc, - CNNL_LAYOUT_NHWC, - ToCnnlDataType(grads_image_nhwc.dtype())); - MLUCnnl::RoiAlignBackward(ctx, - sampling_ratio, - spatial_scale, - aligned, - grads_desc.get(), - GetBasePtr(&grads_nhwc), - boxes_desc.get(), - GetBasePtr(&boxes_mlu), - grads_image_desc.get(), - GetBasePtr(&grads_image_nhwc)); - TransposeFromMLUTensor(ctx, - perm_to_nchw, - &grads_image_nhwc, - in_grad, - false /*need_reshape_or_alloc*/); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(roi_align, ops::ROIAlignOpMLUKernel); - -REGISTER_OP_MLU_KERNEL(roi_align_grad, ops::ROIAlignGradOpMLUKernel); diff --git a/paddle/fluid/operators/scale_op_mlu.cc b/paddle/fluid/operators/scale_op_mlu.cc deleted file mode 100644 index c9aefcfc5b1fc3fbc9b8d44662528094e4114ade..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/scale_op_mlu.cc +++ /dev/null @@ -1,137 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ScaleMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const { - auto& dev_ctx = GetDevCtxFromCTX(ctx); - auto* in_var = ctx.InputVar("X"); - auto* in = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_var); - - // cnnl require input, scale, bias with same type. And all in device side. - auto scale = static_cast(ctx.Attr("scale")); - phi::DenseTensor scale_tensor; - if (ctx.HasInput("ScaleTensor")) { - phi::DenseTensor float_scale_tensor = - *ctx.Input("ScaleTensor"); - if (framework::TransToProtoVarType(float_scale_tensor.dtype()) != - framework::TransToProtoVarType(in->dtype())) { - scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); - MLUCnnlTensorDesc float_scale_desc(float_scale_tensor); - MLUCnnlTensorDesc final_scale_desc(scale_tensor); - cnnlCastDataType_t cast_type = GetCastDataType( - framework::TransToProtoVarType(float_scale_tensor.dtype()), - framework::TransToProtoVarType(scale_tensor.dtype())); - MLUCnnl::Cast(ctx, - cast_type, - float_scale_desc.get(), - GetBasePtr(&float_scale_tensor), - final_scale_desc.get(), - GetBasePtr(&scale_tensor)); - } else { - scale_tensor = float_scale_tensor; - } - } else { - scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); - MLUCnnlTensorDesc scale_desc(scale_tensor); - MLUCnnl::Fill(ctx, - CNNL_POINTER_MODE_HOST, - &scale, - scale_desc.get(), - GetBasePtr(&scale_tensor)); - } - - auto bias = static_cast(ctx.Attr("bias")); - phi::DenseTensor bias_tensor = - ctx.AllocateTmpTensor({1}, dev_ctx); - MLUCnnlTensorDesc bias_desc(bias_tensor); - MLUCnnl::Fill(ctx, - CNNL_POINTER_MODE_HOST, - &bias, - bias_desc.get(), - GetBasePtr(&bias_tensor)); - - auto* out_var = ctx.OutputVar("Out"); - if (in_var->IsType() && in_var != out_var) { - auto& in_slr = in_var->Get(); - auto* out_slr = out_var->GetMutable(); - out_slr->set_rows(in_slr.rows()); - out_slr->set_height(in_slr.height()); - } - auto* out = - framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); - out->mutable_data(in->place()); - - MLUCnnlTensorDesc input_desc(*in); - MLUCnnlTensorDesc scale_desc(scale_tensor); - MLUCnnlTensorDesc output_desc(*out); - - const int axis = std::max(in->dims().size() - 1, 0); - auto bias_after_scale = ctx.Attr("bias_after_scale"); - if (bias_after_scale) { - MLUCnnl::Scale(ctx, - axis, - input_desc.get(), - GetBasePtr(in), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - output_desc.get(), - GetBasePtr(out)); - } else { - phi::DenseTensor new_bias_tensor = - ctx.AllocateTmpTensor({1}, dev_ctx); - MLUCnnlTensorDesc new_bias_desc(new_bias_tensor); - - MLUCnnlOpTensorDesc mul_op_desc(CNNL_OP_TENSOR_MUL, - ToCnnlDataType(in->dtype()), - CNNL_NOT_PROPAGATE_NAN); - MLUCnnl::OpTensor(ctx, - mul_op_desc.get(), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - new_bias_desc.get(), - GetBasePtr(&new_bias_tensor), - ToCnnlDataType(in->dtype())); - MLUCnnl::Scale(ctx, - axis, - input_desc.get(), - GetBasePtr(in), - scale_desc.get(), - GetBasePtr(&scale_tensor), - new_bias_desc.get(), - GetBasePtr(&new_bias_tensor), - output_desc.get(), - GetBasePtr(out)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(scale, - ops::ScaleMLUKernel, - ops::ScaleMLUKernel); diff --git a/paddle/fluid/operators/scatter_op_mlu.cc b/paddle/fluid/operators/scatter_op_mlu.cc deleted file mode 100644 index 83cbbbd7b9e691f08abdb8be658b19301379ded2..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/scatter_op_mlu.cc +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class ScatterMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* indices = ctx.Input("Ids"); - auto* updates = ctx.Input("Updates"); - bool overwrite = ctx.Attr("overwrite"); - auto* out = ctx.Output("Out"); - auto place = ctx.GetPlace(); - out->mutable_data(place); - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc indices_desc(*indices); - MLUCnnlTensorDesc updates_desc(*updates); - MLUCnnlTensorDesc out_desc(*out); - cnnlScatterRefMode_t mode; - if (overwrite) { - mode = CNNL_SCATTERREF_UPDATE; - MLUCnnl::ScatterRefFunctor(ctx, - x_desc.get(), - GetBasePtr(x), - updates_desc.get(), - GetBasePtr(updates), - indices_desc.get(), - GetBasePtr(indices), - mode); - } else { - phi::DenseTensor tensor_zeros(updates->type()); - tensor_zeros.mutable_data(updates->dims(), ctx.GetPlace()); - MLUCnnlTensorDesc tensor_zeros_desc(tensor_zeros); - float value = 0.0; - auto value_t = static_cast(value); - MLUCnnl::Fill(ctx, - CNNL_POINTER_MODE_HOST, - &value_t, - tensor_zeros_desc.get(), - GetBasePtr(&tensor_zeros)); - mode = CNNL_SCATTERREF_UPDATE; - MLUCnnl::ScatterRefFunctor(ctx, - x_desc.get(), - GetBasePtr(x), - tensor_zeros_desc.get(), - GetBasePtr(&tensor_zeros), - indices_desc.get(), - GetBasePtr(indices), - mode); - mode = CNNL_SCATTERREF_ADD; - MLUCnnl::ScatterRefFunctor(ctx, - x_desc.get(), - GetBasePtr(x), - updates_desc.get(), - GetBasePtr(updates), - indices_desc.get(), - GetBasePtr(indices), - mode); - } - paddle::framework::TensorCopy(*x, place, out); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(scatter, - ops::ScatterMLUKernel, - ops::ScatterMLUKernel); diff --git a/paddle/fluid/operators/set_value_op_mlu.cc b/paddle/fluid/operators/set_value_op_mlu.cc deleted file mode 100644 index 06369b83bbab91691a4bfb831d4784e5d4e9ba25..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/set_value_op_mlu.cc +++ /dev/null @@ -1,214 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/set_value_op.h" - -namespace paddle { -namespace operators { - -using MLUDeviceContext = platform::MLUDeviceContext; - -template -class SetValueMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input("Input"); - auto* value_tensor = ctx.Input("ValueTensor"); - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - auto starts_tensor_list = - ctx.MultiInput("StartsTensorList"); - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - auto steps_tensor_list = - ctx.MultiInput("StepsTensorList"); - - auto axes = ctx.Attr>("axes"); - auto starts = ctx.Attr>("starts"); - auto ends = ctx.Attr>("ends"); - auto steps = ctx.Attr>("steps"); - auto shape = ctx.Attr>("shape"); - auto decrease_axes = ctx.Attr>("decrease_axes"); - auto none_axes = ctx.Attr>("none_axes"); - - if (!starts_tensor_list.empty()) { - starts = GetDataFromTensorList(starts_tensor_list); - } - if (!ends_tensor_list.empty()) { - ends = GetDataFromTensorList(ends_tensor_list); - } - if (!steps_tensor_list.empty()) { - steps = GetDataFromTensorList(steps_tensor_list); - } - - auto in_dims = in->dims(); - phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends, &steps); - auto slice_dims = - phi::funcs::GetSliceDims(in_dims, axes, starts, ends, &steps); - auto decrease_slice_dims = - phi::funcs::GetDecreasedDims(slice_dims, decrease_axes); - - auto slice_dims_for_assign = decrease_slice_dims; - if (!none_axes.empty()) { - std::vector slice_dims_with_none; - size_t none_axes_cur = 0, decrease_axes_cur = 0; - for (int i = 0; i < slice_dims.size(); ++i) { - while (none_axes_cur < none_axes.size() && - none_axes[none_axes_cur] <= i) { - slice_dims_with_none.push_back(1); - none_axes_cur++; - } - if (decrease_axes_cur < decrease_axes.size() && - decrease_axes[decrease_axes_cur] == i) { - decrease_axes_cur++; - } else { - slice_dims_with_none.push_back(slice_dims[i]); - } - } - while (none_axes_cur < none_axes.size()) { - slice_dims_with_none.push_back(1); - none_axes_cur++; - } - - slice_dims_for_assign = phi::make_ddim(slice_dims_with_none); - } - int in_size = in_dims.size(); - int starts_indices[in_size] = {0}; - int ends_indices[in_size] = {0}; - int strides_indices[in_size] = {0}; - - for (int i = 0; i < in_dims.size(); ++i) { - starts_indices[i] = 0; - ends_indices[i] = static_cast(slice_dims[i]); - strides_indices[i] = 1; - } - for (size_t i = 0; i < axes.size(); i++) { - int axis_index = axes[i]; - starts_indices[axis_index] = static_cast(starts[i]); - ends_indices[axis_index] = static_cast(ends[i]); - strides_indices[axis_index] = static_cast(steps[i]); - } - phi::DenseTensor value_t(in->type()); - if (value_tensor != nullptr) { - value_t.ShareDataWith(*value_tensor); - } else { - auto value_dims = phi::make_ddim(shape); - CheckIsDimsMatch(slice_dims_for_assign, value_dims); - - value_t.mutable_data(value_dims, ctx.GetPlace()); - auto value_name = - GetValueName(framework::TransToProtoVarType(in->dtype())); - CopyVectorToTensor(value_name.c_str(), &value_t, ctx); - value_t.Resize(value_dims); - } - - phi::DenseTensor value_temp(in->type()); - if (slice_dims_for_assign == value_t.dims()) { - value_temp.ShareDataWith(value_t); - } else { - value_temp.Resize(slice_dims_for_assign); - value_temp.mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc value_t_desc(value_t); - MLUCnnlTensorDesc value_temp_desc(value_temp); - MLUCnnl::BroadcastTo(ctx, - value_t_desc.get(), - GetBasePtr(&value_t), - value_temp_desc.get(), - GetBasePtr(&value_temp)); - } - - int64_t input_numel = phi::product(in_dims); - int64_t value_numel = phi::product(value_temp.dims()); - phi::DenseTensor in_temp, out_temp, val_temp, index_out; - int64_t stride_step = phi::product(in_dims); - std::vector index_indices(stride_step); - std::iota(index_indices.begin(), index_indices.end(), 0); - phi::DenseTensor index_temp; - in_temp.ShareDataWith(*in); - val_temp.ShareDataWith(value_temp); - paddle::framework::TensorFromVector( - index_indices, ctx.device_context(), &index_temp); - index_temp.Resize(in_dims); - auto index_dims = in_dims; - for (int i = 0; i < in_dims.size(); ++i) { - if (starts_indices[i] < 0 || ends_indices[i] < 0) { - starts_indices[i] -= in_dims[i]; - ends_indices[i] -= in_dims[i]; - } - if (strides_indices[i] > 0) - index_dims[i] = - static_cast((ends_indices[i] - starts_indices[i] - 1) / - strides_indices[i]) + - 1; - else - index_dims[i] = - static_cast((ends_indices[i] - starts_indices[i] + 1) / - strides_indices[i]) + - 1; - } - auto new_in_dims = phi::make_ddim({input_numel}); - auto new_val_dims = phi::make_ddim({value_numel}); - in_temp.Resize(new_in_dims); - val_temp.Resize(new_val_dims); - index_out.Resize(index_dims); - index_out.mutable_data(ctx.GetPlace()); - cnnlScatterRefMode_t mode = CNNL_SCATTERREF_UPDATE; - MLUCnnlTensorDesc x_desc(in_temp); - MLUCnnlTensorDesc indices_desc(index_temp); - MLUCnnlTensorDesc indices_out_desc(index_out); - MLUCnnlTensorDesc updates_desc(val_temp); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::StridedSlice(ctx, - starts_indices, - ends_indices, - strides_indices, - indices_desc.get(), - GetBasePtr(&index_temp), - indices_out_desc.get(), - GetBasePtr(&index_out)); - PADDLE_ENFORCE_EQ( - static_cast(phi::product(index_out.dims())), - phi::product(slice_dims_for_assign), - platform::errors::InvalidArgument( - "OP(set_value) error index indices and value update not match ")); - phi::DenseTensor index_final; - index_final.ShareDataWith(index_out); - int64_t indices_numel = phi::product(index_dims); - auto new_index_dims = phi::make_ddim({indices_numel}); - index_final.Resize(new_index_dims); - MLUCnnlTensorDesc indices_final_desc(index_final); - MLUCnnl::ScatterRefFunctor(ctx, - x_desc.get(), - GetBasePtr(&in_temp), - updates_desc.get(), - GetBasePtr(&val_temp), - indices_final_desc.get(), - GetBasePtr(&index_final), - mode); - in_temp.Resize(in_dims); - paddle::framework::TensorCopy(in_temp, ctx.GetPlace(), out); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL(set_value, - ops::SetValueMLUKernel, - ops::SetValueMLUKernel); diff --git a/paddle/fluid/operators/shape_op_mlu.cc b/paddle/fluid/operators/shape_op_mlu.cc deleted file mode 100644 index f69a202819935a16576f10d3e1aea94cd6d1d0ed..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/shape_op_mlu.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_MLU -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using SelectedRows = phi::SelectedRows; - -template -class ShapeMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in_var = ctx.InputVar("Input"); - framework::DDim in_dims; - if (in_var->IsType()) { - in_dims = in_var->Get().value().dims(); - } else { - in_dims = in_var->Get().dims(); - } - auto* out_t = ctx.Output("Out"); - out_t->Resize({in_dims.size()}); - out_t->mutable_data(ctx.GetPlace()); - - // shape op cpu - phi::DenseTensor shape_on_cpu( - framework::TransToPhiDataType(framework::proto::VarType::INT32)); - shape_on_cpu.Resize({in_dims.size()}); - auto cpu_data = shape_on_cpu.mutable_data(platform::CPUPlace()); - for (int i = 0; i < in_dims.size(); ++i) { - cpu_data[i] = in_dims[i]; - } - - // cpu to mlu - auto& dev_ctx = ctx.template device_context(); - framework::TensorCopy(shape_on_cpu, ctx.GetPlace(), dev_ctx, out_t); - dev_ctx.Wait(); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(shape, - ops::ShapeMLUKernel, - ops::ShapeMLUKernel, - ops::ShapeMLUKernel, - ops::ShapeMLUKernel, - ops::ShapeMLUKernel, - ops::ShapeMLUKernel, - ops::ShapeMLUKernel, - ops::ShapeMLUKernel); - -#endif diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc deleted file mode 100644 index 431a36d414c99c2c14d1344fcbb7f4439875ae64..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -const int kIgnoreIndex = -100; - -void CheckAttrs(const framework::ExecutionContext& ctx) { - // cnnl not support normalize and ignore_index - bool normalize = ctx.Attr("normalize"); - int ignore_index = ctx.Attr("ignore_index"); - PADDLE_ENFORCE_EQ(normalize, - false, - platform::errors::InvalidArgument( - "attr normalize must be false, but got true")); - PADDLE_ENFORCE_EQ(ignore_index, - kIgnoreIndex, - platform::errors::InvalidArgument( - "attr ignore_index must be default %d, but got %d", - kIgnoreIndex, - ignore_index)); -} - -template -class SigmoidCrossEntropyWithLogitsMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - CheckAttrs(ctx); - - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); - - auto* out = ctx.Output("Out"); - - auto place = ctx.GetPlace(); - - out->mutable_data(place); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc label_desc(*label); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::BceWithLogits(ctx, - CNNL_BCE_WITH_LOGITS_NONE, - x_desc.get(), - GetBasePtr(x), - label_desc.get(), - GetBasePtr(label), - nullptr, - nullptr, - nullptr, - nullptr, - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class SigmoidCrossEntropyWithLogitsMLUGradKernel - : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - CheckAttrs(ctx); - - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - - auto* dx = ctx.Output(framework::GradVarName("X")); - - auto place = ctx.GetPlace(); - - dx->mutable_data(place); - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc label_desc(*label); - MLUCnnlTensorDesc dout_desc(*dout); - MLUCnnl::BceWithLogitsBackward(ctx, - CNNL_BCE_WITH_LOGITS_NONE, - dout_desc.get(), - GetBasePtr(dout), - x_desc.get(), - GetBasePtr(x), - label_desc.get(), - GetBasePtr(label), - nullptr, - nullptr, - nullptr, - nullptr, - x_desc.get(), - GetBasePtr(dx)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_MLU_KERNEL( - sigmoid_cross_entropy_with_logits, - ops::SigmoidCrossEntropyWithLogitsMLUKernel, - ops::SigmoidCrossEntropyWithLogitsMLUKernel); -REGISTER_OP_MLU_KERNEL( - sigmoid_cross_entropy_with_logits_grad, - ops::SigmoidCrossEntropyWithLogitsMLUGradKernel, - ops::SigmoidCrossEntropyWithLogitsMLUGradKernel); diff --git a/paddle/fluid/operators/size_op_mlu.cc b/paddle/fluid/operators/size_op_mlu.cc deleted file mode 100644 index 5553f538a575d52b4cfb1989f08976c2d4dbbd54..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/size_op_mlu.cc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class SizeMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("Input"); - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - int64_t size = x->numel(); - FillMLUTensorWithHostValue(ctx, size, out); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL(size, - ops::SizeMLUKernel, - ops::SizeMLUKernel, - ops::SizeMLUKernel, - ops::SizeMLUKernel, - ops::SizeMLUKernel, - ops::SizeMLUKernel); diff --git a/paddle/fluid/operators/slice_op_mlu.cc b/paddle/fluid/operators/slice_op_mlu.cc deleted file mode 100644 index 107907ecc7642641f20988c13579e00432d5c2f0..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/slice_op_mlu.cc +++ /dev/null @@ -1,217 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/utils.h" -#include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/kernels/funcs/slice_utils.h" - -namespace paddle { -namespace operators { - -template -class SliceMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* out = ctx.Output("Out"); - - auto axes = ctx.Attr>("axes"); - auto starts = ctx.Attr>("starts"); - auto ends = ctx.Attr>("ends"); - - auto decrease_axis = ctx.Attr>("decrease_axis"); - auto infer_flags = ctx.Attr>("infer_flags"); - - // Get the accurate attribute value of starts and ends - auto starts_tensor_list = - ctx.MultiInput("StartsTensorList"); - if (ctx.HasInput("StartsTensor")) { - starts = phi::GetVectorFromTensor( - ctx.Input("StartsTensor")); - } else if (starts_tensor_list.size() > 0) { - starts = GetDataFromTensorList(starts_tensor_list); - } - - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - if (ctx.HasInput("EndsTensor")) { - ends = phi::GetVectorFromTensor( - ctx.Input("EndsTensor")); - } else if (ends_tensor_list.size() > 0) { - ends = GetDataFromTensorList(ends_tensor_list); - } - - PADDLE_ENFORCE_EQ( - starts.size(), - axes.size(), - platform::errors::InvalidArgument( - "The size of starts must be equal to the size of axes.")); - PADDLE_ENFORCE_EQ( - ends.size(), - axes.size(), - platform::errors::InvalidArgument( - "The size of ends must be equal to the size of axes.")); - - const auto& in_dims = input->dims(); - auto slice_dims = out->dims(); - bool reset_slice_dims = false; - if (ctx.HasInput("StartsTensor") || ctx.HasInput("EndsTensor") || - starts_tensor_list.size() > 0 || ends_tensor_list.size() > 0) { - // Infer output dims - for (size_t i = 0; i < axes.size(); ++i) { - // when start == -1 && end == start+1 - if (starts[i] == -1 && ends[i] == 0 && infer_flags[i] == -1) { - auto ret = - std::find(decrease_axis.begin(), decrease_axis.end(), axes[i]); - if (ret != decrease_axis.end()) { - ends[i] = in_dims[axes[i]]; - } - } - } - - phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends); - slice_dims = phi::funcs::GetSliceDims( - in_dims, axes, starts, ends, nullptr, nullptr); - reset_slice_dims = true; - auto out_dims = phi::funcs::GetDecreasedDims(slice_dims, decrease_axis); - - out->Resize(out_dims); - } - if (slice_dims.size() != in_dims.size() && !reset_slice_dims) { - phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends); - slice_dims = phi::funcs::GetSliceDims( - in_dims, axes, starts, ends, nullptr, nullptr); - } - - int in_dim_size = input->dims().size(); - if (static_cast(axes.size()) != in_dim_size) { - std::vector tmp_starts(in_dim_size, 0); - const auto& in_dims_vec = phi::vectorize(input->dims()); - std::vector tmp_ends(in_dims_vec.begin(), in_dims_vec.end()); - for (size_t i = 0; i < axes.size(); ++i) { - tmp_starts[axes[i]] = starts[i]; - tmp_ends[axes[i]] = ends[i]; - } - starts.swap(tmp_starts); - ends.swap(tmp_ends); - } - std::vector strides(in_dim_size, 1); - - out->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc out_desc(slice_dims.size(), - phi::vectorize(slice_dims).data(), - ToCnnlDataType()); - MLUCnnl::StridedSlice(ctx, - starts.data(), - ends.data(), - strides.data(), - input_desc.get(), - GetBasePtr(input), - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class SliceGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dinput = - ctx.Output(framework::GradVarName("Input")); - - auto axes = ctx.Attr>("axes"); - auto starts = ctx.Attr>("starts"); - auto ends = ctx.Attr>("ends"); - - // Get the accurate attribute value of starts and ends - auto starts_tensor_list = - ctx.MultiInput("StartsTensorList"); - if (ctx.HasInput("StartsTensor")) { - starts = phi::GetVectorFromTensor( - ctx.Input("StartsTensor")); - } else if (starts_tensor_list.size() > 0) { - starts = GetDataFromTensorList(starts_tensor_list); - } - - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - if (ctx.HasInput("EndsTensor")) { - ends = phi::GetVectorFromTensor( - ctx.Input("EndsTensor")); - } else if (ends_tensor_list.size() > 0) { - ends = GetDataFromTensorList(ends_tensor_list); - } - - const auto& in_dims = input->dims(); - auto slice_dims = dout->dims(); - if (slice_dims.size() != in_dims.size()) { - phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends); - slice_dims = phi::funcs::GetSliceDims( - in_dims, axes, starts, ends, nullptr, nullptr); - } - - int in_dim_size = input->dims().size(); - if (static_cast(axes.size()) != in_dim_size) { - std::vector tmp_starts(in_dim_size, 0); - const auto& in_dims_vec = phi::vectorize(input->dims()); - std::vector tmp_ends(in_dims_vec.begin(), in_dims_vec.end()); - for (size_t i = 0; i < axes.size(); ++i) { - tmp_starts[axes[i]] = starts[i]; - tmp_ends[axes[i]] = ends[i]; - } - starts.swap(tmp_starts); - ends.swap(tmp_ends); - } - std::vector strides(in_dim_size, 1); - - dinput->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc dout_desc(slice_dims.size(), - phi::vectorize(slice_dims).data(), - ToCnnlDataType()); - MLUCnnlTensorDesc dinput_desc(*dinput); - MLUCnnl::StridedSliceGrad(ctx, - starts.data(), - ends.data(), - strides.data(), - dout_desc.get(), - GetBasePtr(dout), - dinput_desc.get(), - GetBasePtr(dinput)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL(slice, - ops::SliceMLUKernel, - ops::SliceMLUKernel, - ops::SliceMLUKernel, - ops::SliceMLUKernel, - ops::SliceMLUKernel, - ops::SliceMLUKernel); - -REGISTER_OP_MLU_KERNEL(slice_grad, - ops::SliceGradMLUKernel, - ops::SliceGradMLUKernel, - ops::SliceGradMLUKernel, - ops::SliceGradMLUKernel, - ops::SliceGradMLUKernel); diff --git a/paddle/fluid/operators/softmax_op_mlu.cc b/paddle/fluid/operators/softmax_op_mlu.cc deleted file mode 100644 index e1e3ab8291b28f08e00dd9ab604b2787c509bc97..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/softmax_op_mlu.cc +++ /dev/null @@ -1,126 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/phi/kernels/funcs/axis_utils.h" - -namespace paddle { -namespace operators { - -template -class SoftmaxMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* out = ctx.Output("Out"); - out->mutable_data(ctx.GetPlace()); - - const int rank = in->dims().size(); - const int axis = phi::funcs::CanonicalAxis(ctx.Attr("axis"), rank); - - // cnnl softmax only support 3-dims, regard all shape as [d1, d2, d3] - const int cnnl_softmax_dims = 3; - const int d1 = phi::funcs::SizeToAxis(axis, in->dims()); - const int d2 = in->dims()[axis]; - const int d3 = phi::funcs::SizeOutAxis(axis, in->dims()); - - // CNNL_SOFTMAX_MODE_LOW_DIMENSION has better perfermence, use it as much as - // possible. - cnnlSoftmaxMode_t mode = CNNL_SOFTMAX_MODE_LOW_DIMENSION; - std::vector regard_in_shape{d1, 1, d2}; - if (d3 != 1) { - mode = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION; - regard_in_shape = {d1, d2, d3}; - } - - static const cnnlSoftmaxAlgorithm_t algo = softmax_algo; - MLUCnnlTensorDesc in_desc( - cnnl_softmax_dims, regard_in_shape.data(), ToCnnlDataType()); - MLUCnnl::SoftmaxForward(ctx, - algo, - mode, - NULL, - in_desc.get(), - GetBasePtr(in), - NULL, - in_desc.get(), - GetBasePtr(out)); - } -}; - -template -class SoftmaxGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - - auto* dX = ctx.Output(framework::GradVarName("X")); - dX->mutable_data(ctx.GetPlace()); - - const int rank = out->dims().size(); - const int axis = phi::funcs::CanonicalAxis(ctx.Attr("axis"), rank); - - // cnnl softmax only support 3-dims, regard all shape as [d1, d2, d3] - const int cnnl_softmax_dims = 3; - const int d1 = phi::funcs::SizeToAxis(axis, out->dims()); - const int d2 = out->dims()[axis]; - const int d3 = phi::funcs::SizeOutAxis(axis, out->dims()); - - // CNNL_SOFTMAX_MODE_LOW_DIMENSION has better perfermence, use it as much as - // possible. - cnnlSoftmaxMode_t mode = CNNL_SOFTMAX_MODE_LOW_DIMENSION; - std::vector regard_out_shape{d1, 1, d2}; - if (d3 != 1) { - mode = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION; - regard_out_shape = {d1, d2, d3}; - } - - static const cnnlSoftmaxAlgorithm_t algo = softmax_algo; - MLUCnnlTensorDesc out_desc( - cnnl_softmax_dims, regard_out_shape.data(), ToCnnlDataType()); - MLUCnnl::SoftmaxBackward(ctx, - algo, - mode, - out_desc.get(), - GetBasePtr(out), - out_desc.get(), - GetBasePtr(dOut), - out_desc.get(), - GetBasePtr(dX)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL( - softmax, - ops::SoftmaxMLUKernel, - ops::SoftmaxMLUKernel); -REGISTER_OP_MLU_KERNEL(softmax_grad, - ops::SoftmaxGradMLUKernel, - ops::SoftmaxGradMLUKernel); -REGISTER_OP_MLU_KERNEL(log_softmax, - ops::SoftmaxMLUKernel, - ops::SoftmaxMLUKernel); -REGISTER_OP_MLU_KERNEL( - log_softmax_grad, - ops::SoftmaxGradMLUKernel, - ops::SoftmaxGradMLUKernel); diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc deleted file mode 100644 index 87d788b478367b72c8a36c92919b440f37050b75..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc +++ /dev/null @@ -1,182 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/phi/kernels/funcs/axis_utils.h" - -namespace paddle { -namespace operators { - -template -class SoftmaxWithCrossEntropyMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* logits = ctx.Input("Logits"); - auto* labels = ctx.Input("Label"); - auto* softmax = ctx.Output("Softmax"); - auto* loss = ctx.Output("Loss"); - auto* backprop = ctx.Output("Backprop"); - auto soft_label = ctx.Attr("soft_label"); - - PADDLE_ENFORCE_EQ(ctx.Attr("use_softmax"), - true, - platform::errors::InvalidArgument( - "use_softmax=False is not supported in " - "the mlu kernel of softmax_with_cross_entropy.")); - - const int rank = logits->dims().size(); - const int axis = phi::funcs::CanonicalAxis(ctx.Attr("axis"), rank); - - loss->mutable_data(ctx.GetPlace()); - backprop->mutable_data(ctx.GetPlace()); - softmax->mutable_data(ctx.GetPlace()); - - // cnnl softmax only support 3-dims, regard all shape as [d1, d2, d3] - const int cnnl_softmax_dims = 3; - const int d1 = phi::funcs::SizeToAxis(axis, logits->dims()); - const int d2_logits = logits->dims()[axis]; - const int d2_labels = labels->dims()[axis]; - const int d3 = phi::funcs::SizeOutAxis(axis, logits->dims()); - - // CNNL_SOFTMAX_MODE_LOW_DIMENSION has better perfermence, use it as much as - // possible. - cnnlSoftmaxMode_t mode = CNNL_SOFTMAX_MODE_LOW_DIMENSION; - std::vector regard_logits_shape{d1, 1, d2_logits}; - std::vector regard_labels_shape{d1, 1, d2_labels}; - std::vector regard_loss_shape{d1, 1, 1}; - if (d3 != 1) { - mode = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION; - regard_logits_shape = {d1, d2_logits, d3}; - regard_labels_shape = {d1, d2_labels, d3}; - regard_loss_shape = {d1, 1, d3}; - } - - MLUCnnlTensorDesc logits_desc( - cnnl_softmax_dims, regard_logits_shape.data(), ToCnnlDataType()); - MLUCnnlTensorDesc labels_desc( - cnnl_softmax_dims, regard_labels_shape.data(), ToCnnlDataType()); - MLUCnnlTensorDesc loss_desc( - cnnl_softmax_dims, regard_loss_shape.data(), ToCnnlDataType()); - - const cnnlSoftmaxAlgorithm_t algo = CNNL_SOFTMAX_ACCURATE; - MLUCnnl::SoftmaxForward(ctx, - algo, - mode, - NULL, - logits_desc.get(), - GetBasePtr(logits), - NULL, - logits_desc.get(), - GetBasePtr(softmax)); - - if (soft_label) { - const cnnlComputationPreference_t prefer = - CNNL_COMPUTATION_HIGH_PRECISION; - MLUCnnl::SoftmaxCrossEntropyWithLogits(ctx, - mode, - prefer, - logits_desc.get(), - GetBasePtr(logits), - labels_desc.get(), - GetBasePtr(labels), - loss_desc.get(), - GetBasePtr(loss), - logits_desc.get(), - GetBasePtr(backprop)); - } else { - PADDLE_ENFORCE_EQ(d3, - 1, - platform::errors::InvalidArgument( - "If soft_label=False, axis must be -1 or" - " can be regard as last dimention in mlu kernel.")); - phi::DenseTensor labels_int32(framework::TransToPhiDataType(VT::INT32)); - labels_int32.Resize(labels->dims()); - labels_int32.mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc labels_int64_desc(*labels); - MLUCnnlTensorDesc labels_int32_desc(labels_int32); - cnnlCastDataType_t cast_type = GetCastDataType(VT::INT64, VT::INT32); - MLUCnnl::Cast(ctx, - cast_type, - labels_int64_desc.get(), - GetBasePtr(labels), - labels_int32_desc.get(), - GetBasePtr(&labels_int32)); - - const int regard_sparse_shape[cnnl_softmax_dims - 1] = {d1, 1}; - MLUCnnlTensorDesc sparse_labels_desc(cnnl_softmax_dims - 1, - regard_sparse_shape, - ToCnnlDataType()); - MLUCnnlTensorDesc sparse_loss_desc( - cnnl_softmax_dims - 1, regard_sparse_shape, ToCnnlDataType()); - - MLUCnnl::SparseSoftmaxXentWithLogits(ctx, - mode, - logits_desc.get(), - GetBasePtr(logits), - sparse_labels_desc.get(), - GetBasePtr(&labels_int32), - sparse_loss_desc.get(), - GetBasePtr(loss), - logits_desc.get(), - GetBasePtr(backprop)); - } - } -}; - -template -class SoftmaxWithCrossEntropyGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* backprop = ctx.Input("Backprop"); - auto* loss_grad = - ctx.Input(framework::GradVarName("Loss")); - auto* logits_grad = - ctx.Output(framework::GradVarName("Logits")); - PADDLE_ENFORCE_NOT_NULL(backprop, - platform::errors::PreconditionNotMet( - "backprop should not be null in MLU kernel of " - "softmax_with_cross_entropy_grad.")); - logits_grad->mutable_data(ctx.GetPlace()); - - MLUCnnlOpTensorDesc mul_op_desc( - CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); - MLUCnnlTensorDesc backprop_desc(*backprop); - MLUCnnlTensorDesc loss_grad_desc(*loss_grad); - MLUCnnlTensorDesc logits_grad_desc(*logits_grad); - MLUCnnl::OpTensor(ctx, - mul_op_desc.get(), - backprop_desc.get(), - GetBasePtr(backprop), - loss_grad_desc.get(), - GetBasePtr(loss_grad), - logits_grad_desc.get(), - GetBasePtr(logits_grad), - ToCnnlDataType()); - } -}; -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL( - softmax_with_cross_entropy, - ops::SoftmaxWithCrossEntropyMLUKernel, - ops::SoftmaxWithCrossEntropyMLUKernel); -REGISTER_OP_MLU_KERNEL( - softmax_with_cross_entropy_grad, - ops::SoftmaxWithCrossEntropyGradMLUKernel, - ops::SoftmaxWithCrossEntropyGradMLUKernel); diff --git a/paddle/fluid/operators/split_op_mlu.cc b/paddle/fluid/operators/split_op_mlu.cc deleted file mode 100644 index af547afd0329437467a24986018f6dd4b7b17fa5..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/split_op_mlu.cc +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/split_op.h" -#include "paddle/phi/core/tensor_utils.h" - -namespace paddle { -namespace operators { - -template -class SplitMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - // init parameter - auto* in = ctx.Input("X"); - auto outs = ctx.MultiOutput("Out"); - int num = ctx.Attr("num"); - std::vector sections = ctx.Attr>("sections"); - int axis = ctx.Attr("axis"); - auto in_dims = in->dims(); - auto out_size = outs.size(); - auto num_tensor = num == 0 ? out_size : num; - - bool need_resize_outs_dims = false; - if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); - axis = phi::GetVectorFromTensor(axis_tensor)[0]; - need_resize_outs_dims = true; - } - auto sections_tensor_list = - ctx.MultiInput("SectionsTensorList"); - if (sections_tensor_list.size() > 0) { - sections = GetDataFromTensorList(sections_tensor_list); - need_resize_outs_dims = true; - } - if (need_resize_outs_dims) { - std::vector outs_dims = - UpdateOutsDims(true, true, in_dims, num, sections, axis, out_size); - for (size_t j = 0; j < outs.size(); ++j) { - outs[j]->Resize(outs_dims[j]); - } - } - - // init out tensors - std::vector vct_tensor; - std::vector output_descs; - std::vector desc_vector; - for (size_t i = 0; i < outs.size(); i++) { - outs[i]->mutable_data(ctx.GetPlace()); - output_descs.emplace_back(MLUCnnlTensorDesc( - *outs[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(outs[i]->dtype()))); - desc_vector.push_back(output_descs.back().get()); - vct_tensor.push_back(GetBasePtr(outs[i])); - } - // init in tensors - MLUCnnlTensorDesc input_desc( - *in, CNNL_LAYOUT_ARRAY, ToCnnlDataType(in->dtype())); - - // MLU should do sth - MLUCnnl::Split(ctx, - num_tensor, - axis, - input_desc.get(), - GetBasePtr(in), - desc_vector.data(), - vct_tensor.data()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(split, - ops::SplitMLUKernel, - ops::SplitMLUKernel, - ops::SplitMLUKernel, - ops::SplitMLUKernel, - ops::SplitMLUKernel); diff --git a/paddle/fluid/operators/squared_l2_norm_op_mlu.cc b/paddle/fluid/operators/squared_l2_norm_op_mlu.cc deleted file mode 100644 index ba647c55d8f63d8929966eb59d4ea4b2ff2fe24d..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/squared_l2_norm_op_mlu.cc +++ /dev/null @@ -1,145 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class SquaredL2NormMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto &dev_ctx = context.template device_context(); - auto *x = context.Input("X"); - auto *out = context.Output("Out"); - - auto place = context.GetPlace(); - - out->mutable_data(place); - - MLUCnnlTensorDesc input_desc(*x); - MLUCnnlTensorDesc out_desc(*out); - - // L2Loss - MLUCnnl::L2Loss(context, input_desc.get(), GetBasePtr(x), GetBasePtr(out)); - - // do mul - phi::DenseTensor scale_tensor = - context.AllocateTmpTensor({1}, dev_ctx); - phi::DenseTensor bias_tensor = - context.AllocateTmpTensor({1}, dev_ctx); - MLUCnnlTensorDesc scale_desc(scale_tensor); - MLUCnnlTensorDesc bias_desc(bias_tensor); - FillMLUTensorWithHostValue(context, static_cast(2.0f), &scale_tensor); - FillMLUTensorWithHostValue(context, static_cast(0.0f), &bias_tensor); - - MLUCnnl::Scale(context, - 0, - out_desc.get(), - GetBasePtr(out), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class SquaredL2NormGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &context) const override { - auto &dev_ctx = context.template device_context(); - auto *x = context.Input("X"); - auto *x_grad = - context.Output(framework::GradVarName("X")); - auto *out_grad = - context.Input(framework::GradVarName("Out")); - - PADDLE_ENFORCE_EQ( - out_grad->numel(), - 1, - platform::errors::InvalidArgument( - "Input(GRAD@Out) of SquaredL2NormGradOP should be a scalar.")); - - auto place = context.GetPlace(); - - // broadcast out_grad - phi::DenseTensor broadcasted_out_grad; - broadcasted_out_grad.mutable_data(x_grad->dims(), place); - MLUCnnlTensorDesc broadcasted_out_grad_desc(broadcasted_out_grad); - MLUCnnlTensorDesc out_grad_desc(*out_grad); - MLUCnnl::BroadcastTo(context, - out_grad_desc.get(), - GetBasePtr(out_grad), - broadcasted_out_grad_desc.get(), - GetBasePtr(&broadcasted_out_grad)); - - // mul x - phi::DenseTensor tmp_x_grad; - tmp_x_grad.mutable_data(x_grad->dims(), place); - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc tmp_x_grad_desc(tmp_x_grad); - MLUCnnlOpTensorDesc mul_op_desc( - CNNL_OP_TENSOR_MUL, ToCnnlDataType(x->dtype()), CNNL_NOT_PROPAGATE_NAN); - MLUCnnl::OpTensor(context, - mul_op_desc.get(), - x_desc.get(), - GetBasePtr(x), - broadcasted_out_grad_desc.get(), - GetBasePtr(&broadcasted_out_grad), - tmp_x_grad_desc.get(), - GetBasePtr(&tmp_x_grad), - ToCnnlDataType(x->dtype())); - - // mul - phi::DenseTensor scale_tensor = - context.AllocateTmpTensor({1}, dev_ctx); - phi::DenseTensor bias_tensor = - context.AllocateTmpTensor({1}, dev_ctx); - MLUCnnlTensorDesc scale_desc(scale_tensor); - MLUCnnlTensorDesc bias_desc(bias_tensor); - FillMLUTensorWithHostValue(context, static_cast(2.0f), &scale_tensor); - FillMLUTensorWithHostValue(context, static_cast(0.0f), &bias_tensor); - - x_grad->mutable_data(place); - MLUCnnlTensorDesc x_grad_desc(*x_grad); - MLUCnnl::Scale(context, - 0, - tmp_x_grad_desc.get(), - GetBasePtr(&tmp_x_grad), - scale_desc.get(), - GetBasePtr(&scale_tensor), - bias_desc.get(), - GetBasePtr(&bias_tensor), - x_grad_desc.get(), - GetBasePtr(x_grad)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(squared_l2_norm, - ops::SquaredL2NormMLUKernel, - ops::SquaredL2NormMLUKernel); -REGISTER_OP_MLU_KERNEL(squared_l2_norm_grad, - ops::SquaredL2NormGradMLUKernel, - ops::SquaredL2NormGradMLUKernel); diff --git a/paddle/fluid/operators/squeeze_op_mlu.cc b/paddle/fluid/operators/squeeze_op_mlu.cc deleted file mode 100644 index 5418ddd488994ac50cb38edeccaf16925a7b368f..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/squeeze_op_mlu.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_MLU -#include -#include - -#include "paddle/fluid/operators/squeeze_op.h" -#include "paddle/fluid/platform/device/mlu/device_context.h" - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL( - squeeze, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel); - -REGISTER_OP_MLU_KERNEL( - squeeze_grad, - ops::SqueezeGradKernel, - ops::SqueezeGradKernel, - ops::SqueezeGradKernel, - ops::SqueezeGradKernel, - ops::SqueezeGradKernel, - ops::SqueezeGradKernel, - ops::SqueezeGradKernel, - ops::SqueezeGradKernel); - -REGISTER_OP_MLU_KERNEL( - squeeze2, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel, - ops::SqueezeKernel); - -REGISTER_OP_MLU_KERNEL( - squeeze2_grad, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel, - ops::Squeeze2GradKernel); -#endif diff --git a/paddle/fluid/operators/stack_op_mlu.cc b/paddle/fluid/operators/stack_op_mlu.cc deleted file mode 100644 index 16076a180a54ecbc36a1287603bbf2840fe41965..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/stack_op_mlu.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class StackMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto x = ctx.MultiInput("X"); - auto* y = ctx.Output("Y"); - int axis = ctx.Attr("axis"); - if (axis < 0) axis += (x[0]->dims().size() + 1); - int num = static_cast(x.size()); - - PADDLE_ENFORCE_GT(num, - 0, - platform::errors::InvalidArgument( - "number of input phi::DenseTensor <= 0")); - - std::vector x_descs; - std::vector x_raw_descs; - std::vector x_ptrs; - for (int i = 0; i < num; i++) { - if (x[i]->dims().size() != 0) { - std::vector in_dims = phi::vectorize(x[i]->dims()); - in_dims.insert(in_dims.begin() + axis, 1); - x_descs.emplace_back(MLUCnnlTensorDesc( - in_dims.size(), in_dims.data(), ToCnnlDataType())); - } else { - int input_dims = 1; - x_descs.emplace_back( - MLUCnnlTensorDesc(1, &input_dims, ToCnnlDataType())); - } - x_raw_descs.push_back(x_descs.back().get()); - x_ptrs.push_back(GetBasePtr(x[i])); - } - y->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc y_desc(*y); - MLUCnnl::Concat(ctx, - num, - axis, - x_raw_descs.data(), - x_ptrs.data(), - y_desc.get(), - GetBasePtr(y)); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_MLU_KERNEL( - stack, - paddle::operators::StackMLUKernel, - paddle::operators::StackMLUKernel, - paddle::operators::StackMLUKernel, - paddle::operators::StackMLUKernel); diff --git a/paddle/fluid/operators/strided_slice_op_mlu.cc b/paddle/fluid/operators/strided_slice_op_mlu.cc deleted file mode 100644 index ea4adbfe678c625f54082c28dafe160698444648..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/strided_slice_op_mlu.cc +++ /dev/null @@ -1,417 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/utils.h" -#include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/kernels/funcs/strided_slice.h" - -namespace paddle { -namespace operators { - -using Variable = framework::Variable; -using LoDTensorArray = framework::LoDTensorArray; -using DDim = framework::DDim; - -static void ProcessStridedSliceParams( - const std::vector& axes, - const DDim& input_dims, - const std::vector& starts, - const std::vector& ends, - const std::vector& strides, - const std::vector& infer_flags, - const std::vector& decrease_axis, - std::vector* starts_indices_vector, - std::vector* ends_indices_vector, - std::vector* strides_indices_vector) { - for (size_t axis = 0; axis < axes.size(); axis++) { - int64_t start = starts[axis]; - int64_t end = ends[axis]; - int64_t stride = strides[axis]; - - int axis_index = axes[axis]; - int64_t dim_size = input_dims[axis_index]; - - bool decrease_axis_affect = false; - if (start == -1 && end == 0 && infer_flags[axis] == -1) { - auto ret = - std::find(decrease_axis.begin(), decrease_axis.end(), axis_index); - if (ret != decrease_axis.end()) { - decrease_axis_affect = true; - } - } - - if (stride < 0) { - if (start < 0) { - start = std::max(start, -dim_size); - } else { - start = std::min(start, dim_size - 1) - dim_size; - } - if (end < 0) { - end = std::max(end, -dim_size - 1); - } else { - end = end - dim_size; - } - } else { - if (start < 0) { - start = std::max(start, -dim_size) + dim_size; - } else { - start = std::min(start, dim_size - 1); - } - if (end < 0) { - end = end + dim_size; - } else { - end = std::min(end, dim_size); - } - } - - if (decrease_axis_affect) { - if (stride < 0) { - end = start - 1; - } else { - end = start + 1; - } - } - - (*starts_indices_vector)[axis_index] = static_cast(start); - (*ends_indices_vector)[axis_index] = static_cast(end); - (*strides_indices_vector)[axis_index] = static_cast(stride); - } -} - -template -class StridedSliceMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const Variable* input_var = ctx.InputVar("Input"); - bool is_tensor_array = input_var->IsType(); - PADDLE_ENFORCE_EQ(is_tensor_array, - false, - platform::errors::InvalidArgument( - "phi::DenseTensor array as input is not supported.")); - int rank = ctx.Input("Input")->dims().size(); - switch (rank) { - case 1: - StridedSliceCompute<1>(ctx); - break; - case 2: - StridedSliceCompute<2>(ctx); - break; - case 3: - StridedSliceCompute<3>(ctx); - break; - case 4: - StridedSliceCompute<4>(ctx); - break; - case 5: - StridedSliceCompute<5>(ctx); - break; - case 6: - StridedSliceCompute<6>(ctx); - break; - case 7: - StridedSliceCompute<7>(ctx); - break; - case 8: - StridedSliceCompute<8>(ctx); - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "The rank of input is supported up to 8.")); - break; - } - } - - private: - template - void StridedSliceCompute(const framework::ExecutionContext& ctx) const { - auto place = ctx.GetPlace(); - - auto in = ctx.Input("Input"); - auto out = ctx.Output("Out"); - auto in_dims = in->dims(); - - // list - auto starts_int = ctx.Attr>("starts"); - auto ends_int = ctx.Attr>("ends"); - auto strides_int = ctx.Attr>("strides"); - - std::vector starts(starts_int.begin(), starts_int.end()); - std::vector ends(ends_int.begin(), ends_int.end()); - std::vector strides(strides_int.begin(), strides_int.end()); - - auto axes = ctx.Attr>("axes"); - auto infer_flags = ctx.Attr>("infer_flags"); - auto decrease_axis = ctx.Attr>("decrease_axis"); - - // vector> - auto list_new_starts_tensor = - ctx.MultiInput("StartsTensorList"); - auto list_new_ends_tensor = - ctx.MultiInput("EndsTensorList"); - auto list_new_strides_tensor = - ctx.MultiInput("StridesTensorList"); - - // phi::DenseTensor - if (list_new_starts_tensor.size() > 0) { - starts = GetDataFromTensorList(list_new_starts_tensor); - } else if (ctx.HasInput("StartsTensor")) { - auto* starts_tensor = ctx.Input("StartsTensor"); - starts = phi::GetVectorFromTensor(starts_tensor); - } - - if (list_new_ends_tensor.size() > 0) { - ends = GetDataFromTensorList(list_new_ends_tensor); - } else if (ctx.HasInput("EndsTensor")) { - auto* ends_tensor = ctx.Input("EndsTensor"); - ends = phi::GetVectorFromTensor(ends_tensor); - } - - if (list_new_strides_tensor.size() > 0) { - strides = GetDataFromTensorList(list_new_strides_tensor); - } else if (ctx.HasInput("StridesTensor")) { - auto* strides_tensor = ctx.Input("StridesTensor"); - strides = phi::GetVectorFromTensor(strides_tensor); - } - - // out dims calculation - std::vector out_dims_vector(in_dims.size(), -1); - phi::funcs::StridedSliceOutDims(starts, - ends, - strides, - axes, - infer_flags, - in_dims, - decrease_axis, - out_dims_vector.data(), - axes.size(), - false); - framework::DDim out_dims(phi::make_ddim(out_dims_vector)); - - // construct the starts_indices, ends_indices and strides_indices tensor for - // calling StridedSlice op - std::vector starts_indices_vector(D, 0); - std::vector ends_indices_vector(out_dims_vector.begin(), - out_dims_vector.end()); - std::vector strides_indices_vector(D, 1); - - ProcessStridedSliceParams(axes, - in_dims, - starts, - ends, - strides, - infer_flags, - decrease_axis, - &starts_indices_vector, - &ends_indices_vector, - &strides_indices_vector); - - auto out_dims_origin = out_dims; - if (decrease_axis.size() > 0) { - std::vector new_out_shape; - for (size_t i = 0; i < decrease_axis.size(); ++i) { - PADDLE_ENFORCE_EQ( - out_dims[decrease_axis[i]], - 1, - platform::errors::InvalidArgument( - "the size of decrease dimension should be 1, but received %d.", - out_dims[decrease_axis[i]])); - out_dims_origin[decrease_axis[i]] = 0; - } - - for (int i = 0; i < out_dims_origin.size(); ++i) { - if (out_dims_origin[i] != 0) { - new_out_shape.push_back(out_dims_origin[i]); - } - } - if (new_out_shape.size() == 0) { - new_out_shape.push_back(1); - } - out_dims_origin = phi::make_ddim(new_out_shape); - } - - out->Resize(out_dims_origin); - out->mutable_data(place); - - MLUCnnlTensorDesc in_desc(*in); - MLUCnnlTensorDesc out_desc( - out_dims_vector.size(), out_dims_vector.data(), ToCnnlDataType()); - MLUCnnl::StridedSlice(ctx, - starts_indices_vector.data(), - ends_indices_vector.data(), - strides_indices_vector.data(), - in_desc.get(), - GetBasePtr(in), - out_desc.get(), - GetBasePtr(out)); - } -}; - -template -class StridedSliceGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const Variable* input_var = ctx.InputVar("Input"); - bool is_tensor_array = input_var->IsType(); - PADDLE_ENFORCE_EQ(is_tensor_array, - false, - platform::errors::InvalidArgument( - "phi::DenseTensor array as input is not supported.")); - int rank = ctx.Input("Input")->dims().size(); - - switch (rank) { - case 1: - StridedSliceGradCompute<1>(ctx); - break; - case 2: - StridedSliceGradCompute<2>(ctx); - break; - case 3: - StridedSliceGradCompute<3>(ctx); - break; - case 4: - StridedSliceGradCompute<4>(ctx); - break; - case 5: - StridedSliceGradCompute<5>(ctx); - break; - case 6: - StridedSliceGradCompute<6>(ctx); - break; - case 7: - StridedSliceGradCompute<7>(ctx); - break; - case 8: - StridedSliceGradCompute<8>(ctx); - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "The rank of input is supported up to 8.")); - break; - } - } - - private: - template - void StridedSliceGradCompute(const framework::ExecutionContext& ctx) const { - auto place = ctx.GetPlace(); - - auto* input = ctx.Input("Input"); - auto input_dims = input->dims(); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("Input")); - dx->mutable_data(input_dims, place); - - auto starts_int = ctx.Attr>("starts"); - auto ends_int = ctx.Attr>("ends"); - auto strides_int = ctx.Attr>("strides"); - - std::vector starts(starts_int.begin(), starts_int.end()); - std::vector ends(ends_int.begin(), ends_int.end()); - std::vector strides(strides_int.begin(), strides_int.end()); - - auto axes = ctx.Attr>("axes"); - auto infer_flags = ctx.Attr>("infer_flags"); - auto decrease_axis = ctx.Attr>("decrease_axis"); - - auto list_new_ends_tensor = - ctx.MultiInput("EndsTensorList"); - auto list_new_starts_tensor = - ctx.MultiInput("StartsTensorList"); - auto list_new_strides_tensor = - ctx.MultiInput("StridesTensorList"); - - if (list_new_starts_tensor.size() > 0) { - starts = GetDataFromTensorList(list_new_starts_tensor); - } else if (ctx.HasInput("StartsTensor")) { - auto* starts_tensor = ctx.Input("StartsTensor"); - starts = phi::GetVectorFromTensor(starts_tensor); - } - - if (list_new_ends_tensor.size() > 0) { - ends = GetDataFromTensorList(list_new_ends_tensor); - } else if (ctx.HasInput("EndsTensor")) { - auto* ends_tensor = ctx.Input("EndsTensor"); - ends = phi::GetVectorFromTensor(ends_tensor); - } - - if (list_new_strides_tensor.size() > 0) { - strides = GetDataFromTensorList(list_new_strides_tensor); - } else if (ctx.HasInput("StridesTensor")) { - auto* strides_tensor = ctx.Input("StridesTensor"); - strides = phi::GetVectorFromTensor(strides_tensor); - } - - std::vector out_dims_vector(input_dims.size(), -1); - phi::funcs::StridedSliceOutDims(starts, - ends, - strides, - axes, - infer_flags, - input_dims, - decrease_axis, - out_dims_vector.data(), - axes.size(), - false); - - std::vector starts_indices_vector(D, 0); - std::vector ends_indices_vector(out_dims_vector.begin(), - out_dims_vector.end()); - std::vector strides_indices_vector(D, 1); - - ProcessStridedSliceParams(axes, - input_dims, - starts, - ends, - strides, - infer_flags, - decrease_axis, - &starts_indices_vector, - &ends_indices_vector, - &strides_indices_vector); - - MLUCnnlTensorDesc dout_desc( - out_dims_vector.size(), out_dims_vector.data(), ToCnnlDataType()); - MLUCnnlTensorDesc dx_desc(*input); - MLUCnnl::StridedSliceGrad(ctx, - starts_indices_vector.data(), - ends_indices_vector.data(), - strides_indices_vector.data(), - dout_desc.get(), - GetBasePtr(dout), - dx_desc.get(), - GetBasePtr(dx)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL(strided_slice, - ops::StridedSliceMLUKernel, - ops::StridedSliceMLUKernel, - ops::StridedSliceMLUKernel, - ops::StridedSliceMLUKernel, - ops::StridedSliceMLUKernel); - -REGISTER_OP_MLU_KERNEL(strided_slice_grad, - ops::StridedSliceGradMLUKernel, - ops::StridedSliceGradMLUKernel, - ops::StridedSliceGradMLUKernel, - ops::StridedSliceGradMLUKernel, - ops::StridedSliceGradMLUKernel); diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc deleted file mode 100644 index a2f69a394902c8dbe23234438abaa5c9acc9e3f0..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/sum_op_mlu.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/lod_tensor_array.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -using SelectedRows = phi::SelectedRows; - -template -class SumMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto out_var = ctx.OutputVar("Out"); - if (out_var->IsType()) { - // init - auto *out = out_var->GetMutable(); - auto ins = ctx.MultiInput("X"); - out->mutable_data(ctx.GetPlace()); - auto place = ctx.GetPlace(); - int ins_size = static_cast(ins.size()); - if (ins_size == 1) { - framework::TensorCopy(*ins[0], place, out); - return; - } - - // MLU shoul do sth - std::vector inputs; - std::vector input_descs; - std::vector desc_vector; - for (int i = 0; i < ins_size; i++) { - input_descs.emplace_back(MLUCnnlTensorDesc( - *ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->dtype()))); - desc_vector.push_back(input_descs.back().get()); - inputs.push_back(GetBasePtr(ins[i])); - } - // init out tensors - MLUCnnlTensorDesc output_desc( - *out, CNNL_LAYOUT_ARRAY, ToCnnlDataType(out->dtype())); - uint32_t ins_size_t = static_cast(ins_size); - MLUCnnl::AddN(ctx, - ins_size_t, - desc_vector.data(), - inputs.data(), - output_desc.get(), - GetBasePtr(out)); - - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) must be phi::DenseTensor or But got " - "unsupport type: %s.", - framework::ToTypeName(out_var->Type()))); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL( - sum, - ops::SumMLUKernel, - ops::SumMLUKernel); diff --git a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc deleted file mode 100644 index 6d9e161806d8201245d858e9e064eb0689708256..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc +++ /dev/null @@ -1,510 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the Licnse. */ - -#include "paddle/fluid/operators/amp/fp16_type_traits.h" -#include "paddle/fluid/operators/batch_norm_op.h" -#include "paddle/fluid/platform/collective_helper.h" -#if defined(PADDLE_WITH_CNCL) -#include "paddle/fluid/platform/device/mlu/cncl_helper.h" -#endif -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -#define NO_USE_CNCL 0 -#define GET_LAYOUT_OFFSET 2 - -static std::vector supported_input_layout = { - CNNL_LAYOUT_NC, CNNL_LAYOUT_NLC, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NDHWC}; - -template -class SyncBatchNormMLUKernel : public framework::OpKernel { - using MPDType = typename details::MPTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &ctx) const override { - float epsilon = ctx.Attr("epsilon"); - float momentum = ctx.Attr("momentum"); - const bool is_test = ctx.Attr("is_test"); - const bool use_global_stats = ctx.Attr("use_global_stats"); - const bool trainable_stats = ctx.Attr("trainable_statistics"); - const std::string layout_str = ctx.Attr("data_layout"); - const DataLayout layout = phi::StringToDataLayout(layout_str); - - PADDLE_ENFORCE_EQ(use_global_stats, - false, - platform::errors::InvalidArgument( - "sync_batch_norm doesn't support " - "to set use_global_stats True. Please use batch_norm " - "in this case.")); - - const auto *x = ctx.Input("X"); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *mean = ctx.Input("Mean"); - const auto *variance = ctx.Input("Variance"); - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); - auto *y = ctx.Output("Y"); - - const auto &x_dims = x->dims(); - PADDLE_ENFORCE_GE(x_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Input dim size should be larger than 1.")); - PADDLE_ENFORCE_LE(x_dims.size(), - 5, - platform::errors::InvalidArgument( - "The Input dim size should be less than 6.")); - - int N, C, H, W, D; - phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D); - - y->mutable_data(ctx.GetPlace()); - mean_out->mutable_data(ctx.GetPlace()); - variance_out->mutable_data(ctx.GetPlace()); - saved_mean->mutable_data(ctx.GetPlace()); - saved_variance->mutable_data(ctx.GetPlace()); - - phi::DenseTensor trans_x; - phi::DenseTensor trans_y; - std::vector forward_perm; - std::vector backward_perm; - std::vector trans_shape; - const bool need_transpose = - ((layout == DataLayout::kNCHW && x_dims.size() != 2) || - x_dims.size() == 5); - if (need_transpose) { - SetMLUTransposePerm( - x_dims, layout, &forward_perm, &backward_perm, &trans_shape); - trans_x.mutable_data(phi::make_ddim(trans_shape), ctx.GetPlace()); - trans_y.mutable_data(phi::make_ddim(trans_shape), ctx.GetPlace()); - MLUCnnlTensorDesc desc_x(*x); - MLUCnnlTensorDesc desc_trans_x( - trans_shape.size(), trans_shape.data(), ToCnnlDataType(x->dtype())); - MLUCnnl::Transpose(ctx, - forward_perm, - x_dims.size(), - desc_x.get(), - GetBasePtr(x), - desc_trans_x.get(), - GetBasePtr(&trans_x)); - } else { - trans_x = *x; - trans_y = *y; - } - - MLUCnnlTensorDesc desc_trans( - trans_x, - supported_input_layout[x_dims.size() - GET_LAYOUT_OFFSET], - ToCnnlDataType()); - - bool test_mode = is_test && (!trainable_stats); - if (test_mode) { // inference - MLUCnnlTensorDesc desc_weight_bias_mean_var(*bias); - MLUCnnl::FusedBatchNorm(ctx, - false /*is_training*/, - desc_trans.get(), - GetBasePtr(&trans_x), - desc_weight_bias_mean_var.get(), - GetBasePtr(scale), - GetBasePtr(bias), - GetBasePtr(mean), - GetBasePtr(variance), - epsilon, - momentum, - desc_trans.get(), - GetBasePtr(&trans_y), - nullptr, - nullptr, - nullptr, - nullptr); - } else { // training - if (ctx.HasInput("MomentumTensor")) { - const auto *mom_tensor = ctx.Input("MomentumTensor"); - phi::DenseTensor mom_cpu; - paddle::framework::TensorCopySync( - *mom_tensor, platform::CPUPlace(), &mom_cpu); - momentum = mom_cpu.data()[0]; - } - - phi::DenseTensor local_mean, local_var; - local_mean.mutable_data(mean->dims(), ctx.GetPlace()); - local_var.mutable_data(variance->dims(), ctx.GetPlace()); - MLUCnnlTensorDesc desc_mean_var(*mean_out); - - // cacl local_mean and local_var - MLUCnnl::SyncBatchNormStats(ctx, - desc_trans.get(), - GetBasePtr(&trans_x), - epsilon, - desc_mean_var.get(), - GetBasePtr(&local_mean), - desc_mean_var.get(), - GetBasePtr(&local_var)); - - phi::DenseTensor input_count; - input_count.mutable_data(phi::make_ddim({1}), ctx.GetPlace()); - FillMLUTensorWithHostValue( - ctx, static_cast(x->numel() / C), &input_count); - - phi::DenseTensor count_all; - phi::DenseTensor mean_all(mean->dtype()); - phi::DenseTensor invstd_all(variance->dtype()); - -#ifdef PADDLE_WITH_CNCL - auto &dev_ctx = - ctx.template device_context(); - auto *comm = dev_ctx.cncl_comm(); - if (comm) { - auto cncl_comm = paddle::platform::CNCLCommContext::Instance().Get( - 0, ctx.GetPlace()); - auto *comm = cncl_comm->comm(); - auto comm_stream = cncl_comm->stream(); - int count; - PADDLE_ENFORCE_MLU_SUCCESS(cnclGetCommCount(&count, comm)); - count_all.mutable_data(phi::make_ddim({count}), - ctx.GetPlace()); - mean_all.mutable_data(phi::make_ddim({count, mean->numel()}), - ctx.GetPlace()); - invstd_all.mutable_data( - phi::make_ddim({count, variance->numel()}), ctx.GetPlace()); - // before comm_stream exec, need sync compute_stream. - dev_ctx.Wait(); - - cnclDataType_t dtype = platform::ToCNCLDataType( - framework::TransToProtoVarType(count_all.dtype())); - PADDLE_ENFORCE_MLU_SUCCESS(cnclAllGather(GetBasePtr(&input_count), - GetBasePtr(&count_all), - 1, - dtype, - comm, - comm_stream)); - - auto cncl_dtype = platform::ToCNCLDataType( - framework::TransToProtoVarType(mean_all.dtype())); - PADDLE_ENFORCE_MLU_SUCCESS(cnclAllGather(GetBasePtr(&local_mean), - GetBasePtr(&mean_all), - local_mean.numel(), - cncl_dtype, - comm, - comm_stream)); - - PADDLE_ENFORCE_MLU_SUCCESS(cnclAllGather(GetBasePtr(&local_var), - GetBasePtr(&invstd_all), - local_var.numel(), - cncl_dtype, - comm, - comm_stream)); - // after comm_stream exec, need sync queue for using compute_stream - // correctly. - PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueSync(comm_stream)); -#else - if (NO_USE_CNCL) { -#endif - } else { - count_all = input_count; - mean_all.ShareDataWith(local_mean); - invstd_all.ShareDataWith(local_var); - mean_all.Resize(phi::make_ddim({1, local_mean.numel()})); - invstd_all.Resize(phi::make_ddim({1, local_var.numel()})); - } - - MLUCnnlTensorDesc desc_all_mean_invstd( - invstd_all, CNNL_LAYOUT_NC, ToCnnlDataType()); - MLUCnnlTensorDesc desc_moving_mean_var(*mean_out); - MLUCnnlTensorDesc desc_saved_mean_var(*saved_mean); - MLUCnnlTensorDesc desc_count_all(count_all); - - MLUCnnl::SyncBatchNormGatherStatsWithCounts(ctx, - momentum, - epsilon, - desc_all_mean_invstd.get(), - GetBasePtr(&mean_all), - desc_all_mean_invstd.get(), - GetBasePtr(&invstd_all), - desc_moving_mean_var.get(), - GetBasePtr(mean_out), - desc_moving_mean_var.get(), - GetBasePtr(variance_out), - desc_count_all.get(), - GetBasePtr(&count_all), - desc_saved_mean_var.get(), - GetBasePtr(saved_mean), - desc_saved_mean_var.get(), - GetBasePtr(saved_variance)); - - MLUCnnlTensorDesc desc_other_param(*saved_mean); - MLUCnnl::SyncBatchNormElemt(ctx, - desc_trans.get(), - GetBasePtr(&trans_x), - desc_other_param.get(), - GetBasePtr(saved_mean), - desc_other_param.get(), - GetBasePtr(saved_variance), - desc_other_param.get(), - GetBasePtr(scale), - desc_other_param.get(), - GetBasePtr(bias), - desc_trans.get(), - GetBasePtr(&trans_y)); - } - if (need_transpose) { - MLUCnnlTensorDesc desc_y(*y); - MLUCnnlTensorDesc desc_trans_y(trans_y); - MLUCnnl::Transpose(ctx, - backward_perm, - trans_y.dims().size(), - desc_trans_y.get(), - GetBasePtr(&trans_y), - desc_y.get(), - GetBasePtr(y)); - } - } -}; - -template -class SyncBatchNormMLUGradKernel : public framework::OpKernel { - using MPDType = typename details::MPTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext &ctx) const override { - const std::string layout_str = ctx.Attr("data_layout"); - const DataLayout layout = phi::StringToDataLayout(layout_str); - - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - - // init output - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = - ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); - - const auto *saved_mean = ctx.Input("SavedMean"); - const auto *saved_inv_var = ctx.Input("SavedVariance"); - - const phi::DenseTensor *x; - if (ctx.HasInput("Y")) { - PADDLE_ENFORCE_EQ(true, - false, - platform::errors::InvalidArgument( - "sync_batch_norm_grad doesn't support input Y")); - } else { - x = ctx.Input("X"); - } - - const auto &x_dims = x->dims(); - PADDLE_ENFORCE_GE(x_dims.size(), - 2, - platform::errors::InvalidArgument( - "The Input X dim size should be larger than 1.")); - PADDLE_ENFORCE_LE(x_dims.size(), - 5, - platform::errors::InvalidArgument( - "The Input X dim size should be less than 6.")); - - int N, C, H, W, D; - phi::funcs::ExtractNCWHD(x_dims, layout, &N, &C, &H, &W, &D); - PADDLE_ENFORCE_EQ(scale->dims()[0], - C, - platform::errors::InvalidArgument( - "Expected first dim for input parameter(scale) of " - "OP(sync_batch_norm) be (%d), but given (%d).", - C, - scale->dims()[0])); - - d_x->mutable_data(ctx.GetPlace()); - if (d_scale && d_bias) { - d_scale->mutable_data(ctx.GetPlace()); - d_bias->mutable_data(ctx.GetPlace()); - } - PADDLE_ENFORCE_EQ(scale->dims().size(), - 1UL, - platform::errors::InvalidArgument( - "Expected rank for input parameter(scale) of " - "OP(sync_batch_norm) be (1), but given (%d).", - scale->dims().size())); - - phi::DenseTensor trans_x; - phi::DenseTensor trans_dy; - phi::DenseTensor trans_dx; - std::vector forward_perm; - std::vector backward_perm; - std::vector trans_shape; - const bool need_transpose = - ((layout == DataLayout::kNCHW && x_dims.size() != 2) || - x_dims.size() == 5); - if (need_transpose) { - SetMLUTransposePerm( - x_dims, layout, &forward_perm, &backward_perm, &trans_shape); - trans_x.mutable_data(phi::make_ddim(trans_shape), ctx.GetPlace()); - trans_dy.mutable_data(phi::make_ddim(trans_shape), ctx.GetPlace()); - trans_dx.mutable_data(phi::make_ddim(trans_shape), ctx.GetPlace()); - MLUCnnlTensorDesc desc_x(*x); - MLUCnnlTensorDesc desc_trans_x( - trans_shape.size(), trans_shape.data(), ToCnnlDataType(x->dtype())); - MLUCnnl::Transpose(ctx, - forward_perm, - x_dims.size(), - desc_x.get(), - GetBasePtr(x), - desc_trans_x.get(), - GetBasePtr(&trans_x)); - MLUCnnl::Transpose(ctx, - forward_perm, - x_dims.size(), - desc_x.get(), - GetBasePtr(d_y), - desc_trans_x.get(), - GetBasePtr(&trans_dy)); - } else { - trans_x = *x; - trans_dy = *d_y; - trans_dx = *d_x; - } - MLUCnnlTensorDesc desc_trans( - trans_x, - supported_input_layout[x_dims.size() - GET_LAYOUT_OFFSET], - ToCnnlDataType()); - - phi::DenseTensor sum_dy, sum_dy_xmu; - sum_dy.mutable_data(bias->dims(), ctx.GetPlace()); - sum_dy_xmu.mutable_data(bias->dims(), ctx.GetPlace()); - MLUCnnlTensorDesc desc_other_param(*bias); - - MLUCnnl::SyncBatchnormBackwardReduce( - ctx, - desc_trans.get(), - GetBasePtr(&trans_dy), - desc_trans.get(), - GetBasePtr(&trans_x), - desc_other_param.get(), - GetBasePtr(saved_mean), - desc_other_param.get(), - GetBasePtr(saved_inv_var), - d_scale ? desc_other_param.get() : nullptr, - d_scale ? GetBasePtr(d_scale) : nullptr, - d_bias ? desc_other_param.get() : nullptr, - d_bias ? GetBasePtr(d_bias) : nullptr, - desc_other_param.get(), - GetBasePtr(&sum_dy), - desc_other_param.get(), - GetBasePtr(&sum_dy_xmu), - true /*compute sum_dy, sum_dy_xmu*/, - d_scale ? true : false /*compute d_scale*/, - d_bias ? true : false /*compute d_bias*/); - - phi::DenseTensor numel_count; - numel_count.mutable_data(phi::make_ddim({1}), ctx.GetPlace()); - FillMLUTensorWithHostValue( - ctx, static_cast(x->numel() / C), &numel_count); - -#ifdef PADDLE_WITH_CNCL - auto &dev_ctx = - ctx.template device_context(); - auto *comm = dev_ctx.cncl_comm(); - if (comm) { - auto cncl_comm = - paddle::platform::CNCLCommContext::Instance().Get(0, ctx.GetPlace()); - auto *comm = cncl_comm->comm(); - auto comm_stream = cncl_comm->stream(); - // before comm_stream exec, need sync compute_stream. - dev_ctx.Wait(); - cnclDataType_t dtype = platform::ToCNCLDataType( - framework::TransToProtoVarType(numel_count.dtype())); - PADDLE_ENFORCE_MLU_SUCCESS(cnclAllReduce(GetBasePtr(&numel_count), - GetBasePtr(&numel_count), - 1, - dtype, - cnclSum, - comm, - comm_stream)); - - auto cncl_dtype = platform::ToCNCLDataType( - framework::TransToProtoVarType(sum_dy.dtype())); - PADDLE_ENFORCE_MLU_SUCCESS(cnclAllReduce(GetBasePtr(&sum_dy), - GetBasePtr(&sum_dy), - sum_dy.numel(), - cncl_dtype, - cnclSum, - comm, - comm_stream)); - - PADDLE_ENFORCE_MLU_SUCCESS(cnclAllReduce(GetBasePtr(&sum_dy_xmu), - GetBasePtr(&sum_dy_xmu), - sum_dy_xmu.numel(), - cncl_dtype, - cnclSum, - comm, - comm_stream)); - // after comm_stream exec, need sync queue for using compute_stream - // correctly. - PADDLE_ENFORCE_MLU_SUCCESS(cnrtQueueSync(comm_stream)); - } -#endif - - if (d_x) { - MLUCnnlTensorDesc desc_count(numel_count); - MLUCnnl::SyncBatchNormBackwardElemt(ctx, - desc_trans.get(), - GetBasePtr(&trans_dy), - desc_trans.get(), - GetBasePtr(&trans_x), - desc_other_param.get(), - GetBasePtr(saved_mean), - desc_other_param.get(), - GetBasePtr(saved_inv_var), - desc_other_param.get(), - GetBasePtr(scale), - desc_other_param.get(), - GetBasePtr(&sum_dy), - desc_other_param.get(), - GetBasePtr(&sum_dy_xmu), - desc_count.get(), - GetBasePtr(&numel_count), - desc_trans.get(), - GetBasePtr(&trans_dx)); - - if (need_transpose) { - MLUCnnlTensorDesc desc_dx(*d_x); - MLUCnnlTensorDesc desc_trans_dx(trans_dx); - MLUCnnl::Transpose(ctx, - backward_perm, - trans_dx.dims().size(), - desc_trans_dx.get(), - GetBasePtr(&trans_dx), - desc_dx.get(), - GetBasePtr(d_x)); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_MLU_KERNEL(sync_batch_norm, - ops::SyncBatchNormMLUKernel, - ops::SyncBatchNormMLUKernel); - -REGISTER_OP_MLU_KERNEL(sync_batch_norm_grad, - ops::SyncBatchNormMLUGradKernel, - ops::SyncBatchNormMLUGradKernel); diff --git a/paddle/fluid/operators/tile_op_mlu.cc b/paddle/fluid/operators/tile_op_mlu.cc deleted file mode 100644 index 3660627b8b578d9b5b6230640a2b00ef58a6e04c..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/tile_op_mlu.cc +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_MLU - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/tile_op_functor.h" - -namespace paddle { -namespace operators { - -template -class TileMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); - PADDLE_ENFORCE_GE( - rank, - 1, - platform::errors::InvalidArgument( - "The rank of the input 'x' for tile op must be a positive " - "integer, but the value received is %d.", - rank)); - PADDLE_ENFORCE_LE( - rank, - MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank of the input 'x' for tile op " - "must be less than or equal to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, - rank)); - auto repeat_times = get_repeat_times(context); - int repeat_times_size = repeat_times.size(); - PADDLE_ENFORCE_GE( - repeat_times_size, - 1, - platform::errors::InvalidArgument( - "The number of elements of the input 'repeat_times' for tile " - "op must be positive, but the value received is %d.", - repeat_times_size)); - PADDLE_ENFORCE_LE( - repeat_times_size, - MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The number of elements of the input 'repeat_times' for tile op " - "must be less than or equal to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, - repeat_times_size)); - - auto* in0 = context.Input("X"); - auto in_dims = in0->dims(); - for (size_t i = 0; i < repeat_times.size(); ++i) { - PADDLE_ENFORCE_GT( - repeat_times[i], - 0, - platform::errors::InvalidArgument( - "All elements of the input 'repeat_times' for tile op must " - "be positive integers, but the value received is %d.", - repeat_times[i])); - } - auto vec_in_dims = phi::vectorize(in_dims); - if (repeat_times.size() < vec_in_dims.size()) { - int diff = vec_in_dims.size() - repeat_times.size(); - repeat_times.insert(repeat_times.begin(), diff, 1); - } else { - int diff = repeat_times.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - } - PADDLE_ENFORCE_EQ( - repeat_times.size(), - vec_in_dims.size(), - platform::errors::InvalidArgument( - "The rank (%d) of the input 'x' and the rank (%d) of the input " - "'repeat_times' for tile op must match after promotion.", - vec_in_dims.size(), - repeat_times.size())); - - auto* out0 = context.Output("Out"); - bool repeat_one_times = true; - for (size_t i = 0; i < repeat_times.size(); ++i) { - if (repeat_times[i] != 1) { - repeat_one_times = false; - } - } - if (repeat_one_times) { - paddle::framework::TensorCopy(*in0, context.GetPlace(), out0); - } else { - framework::DDim new_in_dims = phi::make_ddim(vec_in_dims); - framework::DDim out_dims(new_in_dims); - for (size_t i = 0; i < repeat_times.size(); ++i) { - out_dims[i] *= repeat_times[i]; - } - out0->Resize(out_dims); - out0->mutable_data(context.GetPlace()); - MLUCnnlTensorDesc x_desc(*in0); - MLUCnnlTensorDesc out_desc(*out0); - MLUCnnl::BroadcastTo(context, - x_desc.get(), - GetBasePtr(in0), - out_desc.get(), - GetBasePtr(out0)); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(tile, - ops::TileMLUKernel, - ops::TileMLUKernel, - ops::TileMLUKernel, - ops::TileMLUKernel); - -#endif diff --git a/paddle/fluid/operators/top_k_op_mlu.cc b/paddle/fluid/operators/top_k_op_mlu.cc deleted file mode 100644 index 8b2774407e15ded3d876575f9e6e701b54efdee8..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/top_k_op_mlu.cc +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/top_k_op.h" - -namespace paddle { -namespace operators { - -template -class TopkMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); - const auto& place = ctx.GetPlace(); - - size_t k = static_cast(ctx.Attr("k")); - auto* k_t = ctx.Input("K"); - if (k_t) { - auto k_t_ptr = static_cast(k_t->data()); - auto size = k_t->numel() * sizeof(int); - memory::Copy(platform::CPUPlace(), - reinterpret_cast(&k), - k_t->place(), - k_t_ptr, - size, - nullptr); - framework::DDim output_dims = output->dims(); - output_dims[output_dims.size() - 1] = k; - output->Resize(output_dims); - indices->Resize(output_dims); - } - - output->mutable_data(place); - indices->mutable_data(place); - - const bool largest = true; - const bool sorted = true; - const int axis = -1; - // cnnl only support int32/int16 type of indices - phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32)); - indices_int32.Resize(indices->dims()); - indices_int32.mutable_data(place); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc values_output_desc(*output); - MLUCnnlTensorDesc indices_int32_desc(indices_int32); - MLUCnnl::TopK(ctx, - k, - axis, - largest, - sorted, - input_desc.get(), - GetBasePtr(input), - values_output_desc.get(), - GetBasePtr(output), - indices_int32_desc.get(), - GetBasePtr(&indices_int32)); - - // cast indices type to int64 - MLUCnnlTensorDesc cast_output_desc(*indices); - cnnlCastDataType_t cast_type = GetCastDataType(VT::INT32, VT::INT64); - MLUCnnl::Cast(ctx, - cast_type, - indices_int32_desc.get(), - GetBasePtr(&indices_int32), - cast_output_desc.get(), - GetBasePtr(indices)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(top_k, - ops::TopkMLUKernel, - ops::TopkMLUKernel); diff --git a/paddle/fluid/operators/top_k_v2_op_mlu.cc b/paddle/fluid/operators/top_k_v2_op_mlu.cc deleted file mode 100644 index 439b51e1ee93a50905bdd65fdbd6136dede47e04..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/top_k_v2_op_mlu.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class TopkV2MLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); - const auto& place = ctx.GetPlace(); - - const auto& sorted = static_cast(ctx.Attr("sorted")); - const auto& largest = static_cast(ctx.Attr("largest")); - - // axis < 0, cacluate the real axis - int axis = static_cast(ctx.Attr("axis")); - if (axis < 0) { - const auto& in_dims = input->dims(); - axis += in_dims.size(); - } - - size_t k = static_cast(ctx.Attr("k")); - auto* k_t = ctx.Input("K"); - if (k_t) { - auto k_t_ptr = static_cast(k_t->data()); - auto size = k_t->numel() * sizeof(int); - memory::Copy(platform::CPUPlace(), - reinterpret_cast(&k), - k_t->place(), - k_t_ptr, - size, - nullptr); - framework::DDim output_dims = output->dims(); - // accroding to axis to set K value in the dim - output_dims[axis] = k; - output->Resize(output_dims); - indices->Resize(output_dims); - } - - output->mutable_data(place); - indices->mutable_data(place); - - // cnnl only support int32/int16 type of indices - phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32)); - indices_int32.Resize(indices->dims()); - indices_int32.mutable_data(place); - - MLUCnnlTensorDesc input_desc(*input); - MLUCnnlTensorDesc values_output_desc(*output); - MLUCnnlTensorDesc indices_int32_desc(indices_int32); - MLUCnnl::TopK(ctx, - k, - axis, - largest, - sorted, - input_desc.get(), - GetBasePtr(input), - values_output_desc.get(), - GetBasePtr(output), - indices_int32_desc.get(), - GetBasePtr(&indices_int32)); - - // cast indices type to int64 - MLUCnnlTensorDesc cast_output_desc(*indices); - cnnlCastDataType_t cast_type = GetCastDataType(VT::INT32, VT::INT64); - MLUCnnl::Cast(ctx, - cast_type, - indices_int32_desc.get(), - GetBasePtr(&indices_int32), - cast_output_desc.get(), - GetBasePtr(indices)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(top_k_v2, - ops::TopkV2MLUKernel, - ops::TopkV2MLUKernel); diff --git a/paddle/fluid/operators/transpose_op_mlu.cc b/paddle/fluid/operators/transpose_op_mlu.cc deleted file mode 100644 index ba9997cf0f77a9fa0bef13721fbb5e08af366939..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/transpose_op_mlu.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class TransposeMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - std::vector axis = ctx.Attr>("axis"); - out->mutable_data(ctx.device_context().GetPlace()); - - TransposeFromMLUTensor( - ctx, axis, x, out, false /*need_reshape_or_alloc*/); - } -}; - -template -class TransposeGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto* x_grad = ctx.Output(framework::GradVarName("X")); - std::vector axis = ctx.Attr>("axis"); - std::vector reversed_axis(axis); - for (size_t i = 0; i < axis.size(); i++) { - reversed_axis[axis[i]] = i; - } - x_grad->mutable_data(ctx.GetPlace()); - - TransposeFromMLUTensor( - ctx, reversed_axis, out_grad, x_grad, false /*need_reshape_or_alloc*/); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL(transpose2, - ops::TransposeMLUKernel, - ops::TransposeMLUKernel, - ops::TransposeMLUKernel, - ops::TransposeMLUKernel, - ops::TransposeMLUKernel, - ops::TransposeMLUKernel, - ops::TransposeMLUKernel); - -REGISTER_OP_MLU_KERNEL(transpose2_grad, - ops::TransposeGradMLUKernel, - ops::TransposeGradMLUKernel, - ops::TransposeGradMLUKernel, - ops::TransposeGradMLUKernel, - ops::TransposeGradMLUKernel, - ops::TransposeGradMLUKernel, - ops::TransposeGradMLUKernel); diff --git a/paddle/fluid/operators/tril_triu_op_mlu.cc b/paddle/fluid/operators/tril_triu_op_mlu.cc deleted file mode 100644 index 892261d6693ce311cf62e89e33dd9c812afa5051..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/tril_triu_op_mlu.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at -http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { -template -class TrilTriuMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - int diagonal = ctx.Attr("diagonal"); - bool lower = ctx.Attr("lower"); - bool upper; - if (lower) { - upper = 0; - } else { - upper = 1; - } - - out->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc x_desc(*x); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::TrilTriu(ctx, - diagonal, - upper, - x_desc.get(), - GetBasePtr(x), - out_desc.get(), - GetBasePtr(out)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_MLU_KERNEL(tril_triu, - ops::TrilTriuMLUKernel, - ops::TrilTriuMLUKernel, - ops::TrilTriuMLUKernel); diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_mlu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_mlu.cc deleted file mode 100644 index 960c26961b99c5c1b417299bf01ccc9e7f2f963c..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/truncated_gaussian_random_op_mlu.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/truncated_gaussian_random_op.h" -#include "paddle/phi/core/generator.h" - -namespace paddle { -namespace operators { - -template -class TruncatedGaussianRandomMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - float mean = context.Attr("mean"); - float std = context.Attr("std"); - auto* tensor = context.Output("Out"); - tensor->mutable_data(context.GetPlace()); - - phi::DenseTensor cpu_tensor(tensor->dtype()); - cpu_tensor.Resize(tensor->dims()); - T* data_cpu = cpu_tensor.mutable_data(platform::CPUPlace()); - - std::uniform_real_distribution dist(std::numeric_limits::min(), - 1.0); - TruncatedNormal truncated_normal(mean, std); - int64_t size = tensor->numel(); - - unsigned int seed = static_cast(context.Attr("seed")); - auto engine = phi::GetCPURandomEngine(seed); - - for (int64_t i = 0; i < size; ++i) { - data_cpu[i] = truncated_normal(dist(*engine)); - } - - auto& dev_ctx = - context.template device_context(); - framework::TensorCopy(cpu_tensor, context.GetPlace(), dev_ctx, tensor); - dev_ctx.Wait(); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(truncated_gaussian_random, - ops::TruncatedGaussianRandomMLUKernel); diff --git a/paddle/fluid/operators/uniform_random_op_mlu.cc b/paddle/fluid/operators/uniform_random_op_mlu.cc deleted file mode 100644 index bcf51c522d34600dd9b74fd5797b13d14e02a741..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/uniform_random_op_mlu.cc +++ /dev/null @@ -1,114 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/mlu/mlu_baseop.h" -#include "paddle/fluid/operators/uniform_random_op.h" -#include "paddle/phi/core/generator.h" - -namespace paddle { -namespace operators { - -template -class MLUUniformRandomKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - phi::DenseTensor *tensor = nullptr; - auto out_var = ctx.OutputVar("Out"); - - std::vector new_shape; - auto list_new_shape_tensor = - ctx.MultiInput("ShapeTensorList"); - if (list_new_shape_tensor.size() > 0 || ctx.HasInput("ShapeTensor")) { - if (ctx.HasInput("ShapeTensor")) { - auto *shape_tensor = ctx.Input("ShapeTensor"); - new_shape = GetNewDataFromShapeTensor(shape_tensor); - } else if (list_new_shape_tensor.size() > 0) { - new_shape = GetNewDataFromShapeTensorList(list_new_shape_tensor); - } - } - - if (out_var->IsType()) { - auto *selected_rows = out_var->GetMutable(); - tensor = selected_rows->mutable_value(); - auto shape = ctx.Attr>("shape"); - if (!new_shape.empty()) shape = new_shape; - tensor->Resize(phi::make_ddim(shape)); - selected_rows->mutable_rows()->reserve(shape[0]); - } else if (out_var->IsType()) { - tensor = out_var->GetMutable(); - if (!new_shape.empty()) tensor->Resize(phi::make_ddim(new_shape)); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) in uniform_random_op must be " - "phi::DenseTensor, " - "SelectedRows. But got " - "unsupport type: %s.", - framework::ToTypeName(out_var->Type()))); - } - - tensor->mutable_data(ctx.GetPlace()); - int64_t size = tensor->numel(); - - phi::DenseTensor cpu_tensor(tensor->dtype()); - cpu_tensor.Resize(tensor->dims()); - T *data_cpu = cpu_tensor.mutable_data(platform::CPUPlace()); - - std::uniform_real_distribution dist( - static_cast(ctx.Attr("min")), - static_cast(ctx.Attr("max"))); - unsigned int seed = static_cast(ctx.Attr("seed")); - auto engine = phi::GetCPURandomEngine(seed); - - for (int64_t i = 0; i < size; ++i) { - data_cpu[i] = dist(*engine); - } - - unsigned int diag_num = - static_cast(ctx.Attr("diag_num")); - unsigned int diag_step = - static_cast(ctx.Attr("diag_step")); - auto diag_val = static_cast(ctx.Attr("diag_val")); - if (diag_num > 0) { - PADDLE_ENFORCE_GT( - size, - (diag_num - 1) * (diag_step + 1), - platform::errors::InvalidArgument( - "ShapeInvalid: the diagonal's elements is equal (num-1) " - "* (step-1) with num %d, step %d," - "It should be smaller than %d, but received %d", - diag_num, - diag_step, - (diag_num - 1) * (diag_step + 1), - size)); - for (int64_t i = 0; i < diag_num; ++i) { - int64_t pos = i * diag_step + i; - data_cpu[pos] = diag_val; - } - } - - // copy to MLU - framework::TensorCopy( - cpu_tensor, - ctx.GetPlace(), - ctx.template device_context(), - tensor); - ctx.template device_context().Wait(); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP_MLU_KERNEL(uniform_random, - paddle::operators::MLUUniformRandomKernel); diff --git a/paddle/fluid/operators/unsqueeze_op_mlu.cc b/paddle/fluid/operators/unsqueeze_op_mlu.cc deleted file mode 100644 index d168a8ab32b07f7b937eecab17a5b3b499025d58..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/unsqueeze_op_mlu.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef PADDLE_WITH_MLU -#include -#include - -#include "paddle/fluid/operators/unsqueeze_op.h" -#include "paddle/fluid/platform/device/mlu/device_context.h" - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_MLU_KERNEL( - unsqueeze, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel); -REGISTER_OP_MLU_KERNEL( - unsqueeze2, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel, - ops::UnsqueezeKernel); -REGISTER_OP_MLU_KERNEL( - unsqueeze_grad, - ops::UnsqueezeGradKernel, - ops::UnsqueezeGradKernel, - ops::UnsqueezeGradKernel, - ops::UnsqueezeGradKernel, - ops::UnsqueezeGradKernel, - ops::UnsqueezeGradKernel, - ops::UnsqueezeGradKernel); -REGISTER_OP_MLU_KERNEL( - unsqueeze2_grad, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel, - ops::Unsqueeze2GradKernel); -#endif diff --git a/paddle/fluid/operators/unstack_op_mlu.cc b/paddle/fluid/operators/unstack_op_mlu.cc deleted file mode 100644 index 55171364377e09e99a85782e01fe48ba709ecfd7..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/unstack_op_mlu.cc +++ /dev/null @@ -1,107 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class UnStackMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto out = ctx.MultiOutput("Y"); - int axis = ctx.Attr("axis"); - if (axis < 0) axis += x->dims().size(); - int num = x->dims()[axis]; - - std::vector out_descs; - std::vector out_raw_descs; - std::vector out_ptrs; - std::vector new_dims = phi::vectorize(x->dims()); - new_dims[axis] = 1; - for (int i = 0; i < num; i++) { - out[i]->mutable_data(ctx.GetPlace()); - out_descs.emplace_back(MLUCnnlTensorDesc( - new_dims.size(), new_dims.data(), ToCnnlDataType())); - out_raw_descs.push_back(out_descs.back().get()); - out_ptrs.push_back(GetBasePtr(out[i])); - } - - MLUCnnlTensorDesc x_desc(*x); - MLUCnnl::Split(ctx, - num, - axis, - x_desc.get(), - GetBasePtr(x), - out_raw_descs.data(), - out_ptrs.data()); - } -}; - -template -class UnStackGradMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto x = ctx.MultiInput(framework::GradVarName("Y")); - auto *y = ctx.Output(framework::GradVarName("X")); - int axis = ctx.Attr("axis"); - if (axis < 0) axis += (x[0]->dims().size() + 1); - int num = static_cast(x.size()); - - std::vector x_descs; - std::vector x_raw_descs; - std::vector x_ptrs; - for (int i = 0; i < num; i++) { - if (x[i]->dims().size() != 0) { - std::vector in_dims = phi::vectorize(x[i]->dims()); - in_dims.insert(in_dims.begin() + axis, 1); - x_descs.emplace_back(MLUCnnlTensorDesc( - in_dims.size(), in_dims.data(), ToCnnlDataType())); - } else { - int input_dims = 1; - x_descs.emplace_back( - MLUCnnlTensorDesc(1, &input_dims, ToCnnlDataType())); - } - x_raw_descs.push_back(x_descs.back().get()); - x_ptrs.push_back(GetBasePtr(x[i])); - } - y->mutable_data(ctx.GetPlace()); - - MLUCnnlTensorDesc y_desc(*y); - MLUCnnl::Concat(ctx, - num, - axis, - x_raw_descs.data(), - x_ptrs.data(), - y_desc.get(), - GetBasePtr(y)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace plat = paddle::platform; -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL(unstack, - ops::UnStackMLUKernel, - ops::UnStackMLUKernel); - -REGISTER_OP_MLU_KERNEL(unstack_grad, - ops::UnStackGradMLUKernel, - ops::UnStackGradMLUKernel); diff --git a/paddle/fluid/operators/where_index_op_mlu.cc b/paddle/fluid/operators/where_index_op_mlu.cc deleted file mode 100644 index 59ffb43f7ce5ce51a1d399b1e3086ea7b2b7a163..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/where_index_op_mlu.cc +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class MLUWhereIndexKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* condition = context.Input("Condition"); - auto* out = context.Output("Out"); - auto dims = condition->dims(); - const int rank = dims.size(); - - phi::DenseTensor num_true; - num_true.mutable_data({1}, context.GetPlace()); - MLUCnnlTensorDesc con_desc(*condition); - MLUCnnlTensorDesc num_true_desc(num_true); - MLUCnnl::NumTrue(context, - con_desc.get(), - GetBasePtr(condition), - num_true_desc.get(), - GetBasePtr(&num_true)); - - phi::DenseTensor local_true_num; - paddle::framework::TensorCopySync( - num_true, platform::CPUPlace(), &local_true_num); - auto true_num = *local_true_num.data(); - - out->Resize(phi::make_ddim({true_num, rank})); - out->mutable_data(context.GetPlace()); - - if (true_num == 0) { - return; - } - - auto& dev_ctx = context.template device_context(); - phi::DenseTensor out_int32 = - context.AllocateTmpTensor(out->dims(), - dev_ctx); - MLUCnnlTensorDesc out_int32_desc(out_int32); - MLUCnnlTensorDesc out_desc(*out); - bool as_tuple = false; - MLUCnnl::Where(context, - con_desc.get(), - GetBasePtr(condition), - num_true_desc.get(), - GetBasePtr(&num_true), - as_tuple, - out_int32_desc.get(), - GetBasePtr(&out_int32)); - cnnlCastDataType_t cast_type = GetCastDataType(VT::INT32, VT::INT64); - MLUCnnl::Cast(context, - cast_type, - out_int32_desc.get(), - GetBasePtr(&out_int32), - out_desc.get(), - GetBasePtr(out)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_MLU_KERNEL(where_index, - ops::MLUWhereIndexKernel, - ops::MLUWhereIndexKernel, - ops::MLUWhereIndexKernel); diff --git a/paddle/fluid/operators/where_op_mlu.cc b/paddle/fluid/operators/where_op_mlu.cc deleted file mode 100644 index 53ae38bb48b27002f2f2109026798c060c305a45..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/where_op_mlu.cc +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef PADDLE_WITH_MLU - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/mlu/mlu_baseop.h" - -namespace paddle { -namespace operators { - -template -class WhereMLUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* condition = context.Input("Condition"); - auto* X = context.Input("X"); - auto* Y = context.Input("Y"); - auto* out = context.Output("Out"); - auto place = context.GetPlace(); - out->mutable_data(place); - MLUCnnlTensorDesc x_desc(*X); - MLUCnnlTensorDesc y_desc(*Y); - MLUCnnlTensorDesc condition_desc(*condition); - MLUCnnlTensorDesc out_desc(*out); - MLUCnnl::Select(context, - condition_desc.get(), - GetBasePtr(condition), - x_desc.get(), - GetBasePtr(X), - y_desc.get(), - GetBasePtr(Y), - out_desc.get(), - GetBasePtr(out)); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_MLU_KERNEL( - where, - ops::WhereMLUKernel, - ops::WhereMLUKernel); -#endif