diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc new file mode 100644 index 0000000000000000000000000000000000000000..a7505890f41d441cbbf958cda3e86b36343e1b2c --- /dev/null +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc @@ -0,0 +1,169 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/elementwise/elementwise_mul_op.h" +#include "paddle/fluid/operators/mlu/mlu_baseop.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using MLUDeviceContext = platform::MLUDeviceContext; + +static void GetReduceAxes(const int axis, const framework::DDim& src_ddims, + const framework::DDim& target_ddims, + std::vector* axes) { + int64_t src_dim_size = src_ddims.size(); + int64_t target_dim_size = target_ddims.size(); + for (int64_t i = 0; i < src_dim_size; ++i) { + if (i < axis || i >= target_dim_size + axis) { + axes->push_back(i); + continue; + } + if (src_ddims[i] > target_ddims[i - axis]) { + axes->push_back(i); + } + } +} + +template +class ElementwiseMulMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + int axis = ctx.Attr("axis"); + const auto& x_dims = x->dims(); + const auto& y_dims = y->dims(); + axis = (axis < 0 ? (std::abs(x_dims.size() - y_dims.size()) + axis + 1) + : axis); + int max_dim = std::max(x_dims.size(), y_dims.size()); + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), + y_dims_array.data(), out_dims_array.data(), max_dim, + axis); + + MLUCnnlTensorDesc x_desc(max_dim, x_dims_array.data(), ToCnnlDataType()); + MLUCnnlTensorDesc y_desc(max_dim, y_dims_array.data(), ToCnnlDataType()); + MLUCnnlTensorDesc out_desc(*out); + MLUCnnlOpTensorDesc op_tensor_desc(CNNL_OP_TENSOR_MUL, ToCnnlDataType(), + CNNL_NOT_PROPAGATE_NAN); + + MLUCnnl::OpTensor(ctx, op_tensor_desc.get(), x_desc.get(), GetBasePtr(x), + y_desc.get(), GetBasePtr(y), out_desc.get(), + GetBasePtr(out), ToCnnlDataType()); + } +}; + +template +class ElementwiseMulGradMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + int axis = ctx.Attr("axis"); + + const auto& x_dims = x->dims(); + const auto& y_dims = y->dims(); + axis = (axis < 0 ? (std::abs(x_dims.size() - y_dims.size()) + axis + 1) + : axis); + int max_dim = std::max(x_dims.size(), y_dims.size()); + std::vector x_dims_array(max_dim); + std::vector y_dims_array(max_dim); + std::vector out_dims_array(max_dim); + GetBroadcastDimsArrays(x_dims, y_dims, x_dims_array.data(), + y_dims_array.data(), out_dims_array.data(), max_dim, + axis); + + MLUCnnlTensorDesc x_desc(max_dim, x_dims_array.data(), ToCnnlDataType()); + MLUCnnlTensorDesc y_desc(max_dim, y_dims_array.data(), ToCnnlDataType()); + MLUCnnlTensorDesc dout_desc(*dout); + MLUCnnlOpTensorDesc mul_op_desc(CNNL_OP_TENSOR_MUL, ToCnnlDataType(), + CNNL_NOT_PROPAGATE_NAN); + if (dx) { + dx->mutable_data(ctx.GetPlace()); + if (dx->dims() == dout->dims()) { + MLUCnnl::OpTensor(ctx, mul_op_desc.get(), dout_desc.get(), + GetBasePtr(dout), y_desc.get(), GetBasePtr(y), + x_desc.get(), GetBasePtr(dx), ToCnnlDataType()); + } else { + Tensor dx_temp(x->dtype()); + dx_temp.Resize(dout->dims()); + dx_temp.mutable_data(ctx.GetPlace()); + MLUCnnl::OpTensor(ctx, mul_op_desc.get(), dout_desc.get(), + GetBasePtr(dout), y_desc.get(), GetBasePtr(y), + dout_desc.get(), GetBasePtr(&dx_temp), + ToCnnlDataType()); + + std::vector reduce_axes; + GetReduceAxes(axis, dx_temp.dims(), dx->dims(), &reduce_axes); + MLUCnnlReduceDesc reduction_desc( + reduce_axes, CNNL_REDUCE_ADD, ToCnnlDataType(), + CNNL_NOT_PROPAGATE_NAN, CNNL_REDUCE_NO_INDICES, CNNL_32BIT_INDICES); + MLUCnnlTensorDesc dx_desc(*dx); + MLUCnnl::Reduce(ctx, true /*need_workspace*/, reduction_desc.get(), + nullptr, dout_desc.get(), GetBasePtr(&dx_temp), 0, + nullptr, nullptr, dx_desc.get(), GetBasePtr(dx)); + } + } + if (dy) { + dy->mutable_data(ctx.GetPlace()); + if (dy->dims() == dout->dims()) { + MLUCnnl::OpTensor(ctx, mul_op_desc.get(), dout_desc.get(), + GetBasePtr(dout), x_desc.get(), GetBasePtr(x), + y_desc.get(), GetBasePtr(dy), ToCnnlDataType()); + } else { + Tensor dy_temp(y->dtype()); + dy_temp.Resize(dout->dims()); + dy_temp.mutable_data(ctx.GetPlace()); + MLUCnnl::OpTensor(ctx, mul_op_desc.get(), dout_desc.get(), + GetBasePtr(dout), x_desc.get(), GetBasePtr(x), + dout_desc.get(), GetBasePtr(&dy_temp), + ToCnnlDataType()); + + std::vector reduce_axes; + GetReduceAxes(axis, dy_temp.dims(), dy->dims(), &reduce_axes); + MLUCnnlReduceDesc reduction_desc( + reduce_axes, CNNL_REDUCE_ADD, ToCnnlDataType(), + CNNL_NOT_PROPAGATE_NAN, CNNL_REDUCE_NO_INDICES, CNNL_32BIT_INDICES); + MLUCnnlTensorDesc dy_desc(*dy); + MLUCnnl::Reduce(ctx, true /*need_workspace*/, reduction_desc.get(), + nullptr, dout_desc.get(), GetBasePtr(&dy_temp), 0, + nullptr, nullptr, dy_desc.get(), GetBasePtr(dy)); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_MLU_KERNEL(elementwise_mul, ops::ElementwiseMulMLUKernel, + ops::ElementwiseMulMLUKernel, + ops::ElementwiseMulMLUKernel); + +REGISTER_OP_MLU_KERNEL( + elementwise_mul_grad, ops::ElementwiseMulGradMLUKernel, + ops::ElementwiseMulGradMLUKernel, + ops::ElementwiseMulGradMLUKernel); diff --git a/python/paddle/fluid/tests/unittests/mlu/test_elementwise_mul_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_mul_op_mlu.py new file mode 100644 index 0000000000000000000000000000000000000000..bc8a08c39ffc8c96f55f3367cf183448cf55d9ba --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_elementwise_mul_op_mlu.py @@ -0,0 +1,240 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid import Program, compiler, program_guard +from paddle.fluid.op import Operator + +import sys +sys.path.append('..') +from op_test import OpTest, skip_check_grad_ci + +paddle.enable_static() + + +class ElementwiseMulOp(OpTest): + def init_kernel_type(self): + self.__class__.use_mlu = True + self.place = paddle.device.MLUPlace(0) + + def setUp(self): + self.op_type = "elementwise_mul" + self.dtype = np.float32 + self.axis = -1 + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Y': OpTest.np_dtype_to_fluid_dtype(self.y) + } + self.outputs = {'Out': self.out} + self.attrs = {'axis': self.axis} + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad_normal(self): + self.check_grad_with_place(self.place, ['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + self.check_grad_with_place( + self.place, ['Y'], 'Out', no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad_with_place( + self.place, ['X'], 'Out', no_grad_set=set('Y')) + + def init_input_output(self): + self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.out = np.multiply(self.x, self.y) + + def init_dtype(self): + pass + + def init_axis(self): + pass + + +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +class TestElementwiseMulOp_scalar(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 3, 4).astype(np.float32), + 'Y': np.random.rand(1).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() + + +class TestElementwiseMulOp_Vector(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.random((100, )).astype("float32"), + 'Y': np.random.random((100, )).astype("float32") + } + self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])} + self.init_kernel_type() + + +class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): + def init_input_output(self): + self.x = np.random.rand(100, 2, 3).astype(self.dtype) + self.y = np.random.rand(100).astype(self.dtype) + self.out = self.x * self.y.reshape(100, 1, 1) + + def init_axis(self): + self.axis = 0 + + +class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 100, 3).astype(np.float32), + 'Y': np.random.rand(100).astype(np.float32) + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 100, 1) + } + self.init_kernel_type() + + +class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 3, 100).astype(np.float32), + 'Y': np.random.rand(100).astype(np.float32) + } + + self.outputs = { + 'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 1, 100) + } + self.init_kernel_type() + + +class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 10, 12, 3).astype(np.float32), + 'Y': np.random.rand(10, 12).astype(np.float32) + } + + self.attrs = {'axis': 1} + self.outputs = { + 'Out': self.inputs['X'] * self.inputs['Y'].reshape(1, 10, 12, 1) + } + self.init_kernel_type() + + +class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 2, 11).astype(np.float32), + 'Y': np.random.rand(10, 1, 11).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() + + +class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 4, 2, 3).astype(np.float32), + 'Y': np.random.rand(10, 4, 1, 3).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() + + +class TestElementwiseMulOpFp16(ElementwiseMulOp): + def init_dtype(self): + self.dtype = np.float16 + + +class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(2, 3, 100).astype(np.float32), + 'Y': np.random.rand(1, 1, 100).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() + + +class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(30, 3, 1, 5).astype(np.float32), + 'Y': np.random.rand(30, 1, 4, 1).astype(np.float32) + } + self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']} + self.init_kernel_type() + + +class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): + def setUp(self): + self.op_type = "elementwise_mul" + self.inputs = { + 'X': np.random.rand(10, 10).astype(np.float32), + 'Y': np.random.rand(2, 2, 10, 10).astype(np.float32) + } + + self.attrs = {'axis': 2} + + self.outputs = { + 'Out': self.inputs['X'].reshape(1, 1, 10, 10) * self.inputs['Y'] + } + self.init_kernel_type() + + +class TestElementwiseMulOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + # the input of elementwise_mul must be Variable. + x1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + y1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + self.assertRaises(TypeError, fluid.layers.elementwise_mul, x1, y1) + + # the input dtype of elementwise_mul must be float16 or float32 or int32 + x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="uint8") + y2 = fluid.layers.data(name='y2', shape=[3, 4, 5, 6], dtype="uint8") + self.assertRaises(TypeError, fluid.layers.elementwise_mul, x2, y2) + + +if __name__ == '__main__': + unittest.main()