From fa9586a73cff4e964fa9947e5a5e64b3aa55b6fd Mon Sep 17 00:00:00 2001 From: fuyou765 <64373205+fuyou765@users.noreply.github.com> Date: Fri, 24 Jun 2022 16:00:42 +0800 Subject: [PATCH] [MLU]add mlu kernel for set_value op (#43687) --- paddle/fluid/operators/set_value_op_mlu.cc | 199 ++++++ .../unittests/mlu/test_set_value_op_mlu.py | 616 ++++++++++++++++++ 2 files changed, 815 insertions(+) create mode 100644 paddle/fluid/operators/set_value_op_mlu.cc create mode 100644 python/paddle/fluid/tests/unittests/mlu/test_set_value_op_mlu.py diff --git a/paddle/fluid/operators/set_value_op_mlu.cc b/paddle/fluid/operators/set_value_op_mlu.cc new file mode 100644 index 00000000000..44422994f60 --- /dev/null +++ b/paddle/fluid/operators/set_value_op_mlu.cc @@ -0,0 +1,199 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/set_value_op.h" + +namespace paddle { +namespace operators { + +using MLUDeviceContext = platform::MLUDeviceContext; + +template +class SetValueMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* in = ctx.Input("Input"); + auto* value_tensor = ctx.Input("ValueTensor"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto steps_tensor_list = ctx.MultiInput("StepsTensorList"); + + auto axes = ctx.Attr>("axes"); + auto starts = ctx.Attr>("starts"); + auto ends = ctx.Attr>("ends"); + auto steps = ctx.Attr>("steps"); + auto shape = ctx.Attr>("shape"); + auto decrease_axes = ctx.Attr>("decrease_axes"); + auto none_axes = ctx.Attr>("none_axes"); + + if (!starts_tensor_list.empty()) { + starts = GetDataFromTensorList(starts_tensor_list); + } + if (!ends_tensor_list.empty()) { + ends = GetDataFromTensorList(ends_tensor_list); + } + if (!steps_tensor_list.empty()) { + steps = GetDataFromTensorList(steps_tensor_list); + } + + auto in_dims = in->dims(); + phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends, &steps); + auto slice_dims = + phi::funcs::GetSliceDims(in_dims, axes, starts, ends, &steps); + auto decrease_slice_dims = + phi::funcs::GetDecreasedDims(slice_dims, decrease_axes); + + auto slice_dims_for_assign = decrease_slice_dims; + if (!none_axes.empty()) { + std::vector slice_dims_with_none; + + size_t none_axes_cur = 0, decrease_axes_cur = 0; + for (int i = 0; i < slice_dims.size(); ++i) { + while (none_axes_cur < none_axes.size() && + none_axes[none_axes_cur] <= i) { + slice_dims_with_none.push_back(1); + none_axes_cur++; + } + if (decrease_axes_cur < decrease_axes.size() && + decrease_axes[decrease_axes_cur] == i) { + decrease_axes_cur++; + } else { + slice_dims_with_none.push_back(slice_dims[i]); + } + } + while (none_axes_cur < none_axes.size()) { + slice_dims_with_none.push_back(1); + none_axes_cur++; + } + + slice_dims_for_assign = phi::make_ddim(slice_dims_with_none); + } + + auto starts_indices = std::vector(in_dims.size(), 0); + auto ends_indices = std::vector(in_dims.size(), 0); + auto strides_indices = std::vector(in_dims.size(), 0); + + for (int i = 0; i < in_dims.size(); ++i) { + starts_indices[i] = 0; + ends_indices[i] = slice_dims[i]; + strides_indices[i] = 1; + } + for (size_t i = 0; i < axes.size(); i++) { + int axis_index = axes[i]; + starts_indices[axis_index] = starts[i]; + ends_indices[axis_index] = ends[i]; + strides_indices[axis_index] = steps[i]; + } + + int64_t stride_step = phi::product(in_dims); + std::vector index_indices(1, 0); + for (size_t i = 0; i < strides_indices.size(); ++i) { + auto index_size = index_indices.size(); + stride_step /= in_dims[i]; + for (size_t j = 0; j < index_size; ++j) { + auto start_index = *index_indices.begin(); + if (strides_indices[i] > 0) { + for (int64_t k = starts_indices[i]; k < ends_indices[i]; + k += strides_indices[i]) { + index_indices.push_back(start_index + k * stride_step); + } + } else { + for (int64_t k = starts_indices[i]; k > ends_indices[i]; + k += strides_indices[i]) { + index_indices.push_back(start_index + k * stride_step); + } + } + index_indices.erase(index_indices.begin()); + } + } + + PADDLE_ENFORCE_EQ( + static_cast(index_indices.size()), + phi::product(slice_dims_for_assign), + platform::errors::InvalidArgument( + "OP(set_value) error index indices and value update not match ")); + + Tensor value_t(in->type()); + if (value_tensor != nullptr) { + value_t.ShareDataWith(*value_tensor); + } else { + auto value_dims = phi::make_ddim(shape); + CheckIsDimsMatch(slice_dims_for_assign, value_dims); + + value_t.mutable_data(value_dims, ctx.GetPlace()); + auto value_name = + GetValueName(framework::TransToProtoVarType(in->dtype())); + CopyVectorToTensor(value_name.c_str(), &value_t, ctx); + value_t.Resize(value_dims); + } + + Tensor value_temp(in->type()); + if (slice_dims_for_assign == value_t.dims()) { + value_temp.ShareDataWith(value_t); + } else { + value_temp.Resize(slice_dims_for_assign); + value_temp.mutable_data(ctx.GetPlace()); + MLUCnnlTensorDesc value_t_desc(value_t); + MLUCnnlTensorDesc value_temp_desc(value_temp); + MLUCnnl::BroadcastTo(ctx, + value_t_desc.get(), + GetBasePtr(&value_t), + value_temp_desc.get(), + GetBasePtr(&value_temp)); + } + + int64_t input_numel = phi::product(in_dims); + int64_t value_numel = phi::product(value_temp.dims()); + Tensor in_temp, out_temp, val_temp; + framework::Tensor index_temp; + in_temp.ShareDataWith(*in); + val_temp.ShareDataWith(value_temp); + paddle::framework::TensorFromVector( + index_indices, ctx.device_context(), &index_temp); + auto new_in_dims = phi::make_ddim({input_numel}); + auto new_val_dims = phi::make_ddim({value_numel}); + in_temp.Resize(new_in_dims); + val_temp.Resize(new_val_dims); + cnnlScatterRefMode_t mode = CNNL_SCATTERREF_UPDATE; + MLUCnnlTensorDesc x_desc(in_temp); + MLUCnnlTensorDesc indices_desc(index_temp); + MLUCnnlTensorDesc updates_desc(val_temp); + MLUCnnlTensorDesc out_desc(*out); + + MLUCnnl::ScatterRefFunctor(ctx, + x_desc.get(), + GetBasePtr(&in_temp), + updates_desc.get(), + GetBasePtr(&val_temp), + indices_desc.get(), + GetBasePtr(&index_temp), + mode); + in_temp.Resize(in_dims); + paddle::framework::TensorCopy(in_temp, ctx.GetPlace(), out); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_MLU_KERNEL(set_value, + ops::SetValueMLUKernel, + ops::SetValueMLUKernel); diff --git a/python/paddle/fluid/tests/unittests/mlu/test_set_value_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_set_value_op_mlu.py new file mode 100644 index 00000000000..f6183687f6a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_set_value_op_mlu.py @@ -0,0 +1,616 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import numpy as np +import unittest +import sys + +sys.path.append("..") +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import core + + +class TestSetValueBase(unittest.TestCase): + + def set_mlu(self): + self.__class__.use_mlu = True + self.place = paddle.device.MLUPlace(0) + + def setUp(self): + paddle.enable_static() + self.set_mlu() + self.set_dtype() + self.set_value() + self.set_shape() + self.data = np.ones(self.shape).astype(self.dtype) + self.program = paddle.static.Program() + + def set_shape(self): + self.shape = [2, 3, 4] + + def set_value(self): + self.value = 6 + + def set_dtype(self): + self.dtype = "float32" + + def _call_setitem(self, x): + x[0, 0] = self.value + + def _get_answer(self): + self.data[0, 0] = self.value + + +class TestSetValueApi(TestSetValueBase): + + def _run_static(self): + paddle.enable_static() + with paddle.static.program_guard(self.program): + x = paddle.ones(shape=self.shape, dtype=self.dtype) + self._call_setitem(x) + + exe = paddle.static.Executor(self.place) + out = exe.run(self.program, fetch_list=[x]) + paddle.disable_static() + return out + + def test_api(self): + static_out = self._run_static() + self._get_answer() + + error_msg = "\nIn {} mode: \nExpected res = \n{}, \n\nbut received : \n{}" + self.assertTrue((self.data == static_out).all(), + msg=error_msg.format("static", self.data, static_out)) + + +# 1. Test different type of item: int, Python slice, Paddle Tensor +# 1.1 item is int +class TestSetValueItemInt(TestSetValueApi): + + def _call_setitem(self, x): + x[0] = self.value + + def _get_answer(self): + self.data[0] = self.value + + +# 1.2 item is slice +# 1.2.1 step is 1 +class TestSetValueItemSlice(TestSetValueApi): + + def _call_setitem(self, x): + x[0:2] = self.value + + def _get_answer(self): + self.data[0:2] = self.value + + +class TestSetValueItemSlice2(TestSetValueApi): + + def _call_setitem(self, x): + x[0:-1] = self.value + + def _get_answer(self): + self.data[0:-1] = self.value + + +class TestSetValueItemSlice3(TestSetValueApi): + + def _call_setitem(self, x): + x[0:-1, 0:2] = self.value + + def _get_answer(self): + self.data[0:-1, 0:2] = self.value + + +class TestSetValueItemSlice4(TestSetValueApi): + + def _call_setitem(self, x): + x[0:, 1:2, :] = self.value + + def _get_answer(self): + self.data[0:, 1:2, :] = self.value + + +#TODO: Fix this after MLU support while_loop +#class TestSetValueItemSliceInWhile(TestSetValueApi): +# def _call_setitem(self, x): +# def cond(i, x): +# return i < 1 +# +# def body(i, x): +# x[i] = self.value +# i = i + 1 +# return i, x +# +# i = paddle.zeros(shape=(1, ), dtype='int32') +# i, x = paddle.fluid.layers.while_loop(cond, body, [i, x]) +# +# def _get_answer(self): +# self.data[0] = self.value + + +# 1.2.2 step > 1 +class TestSetValueItemSliceStep(TestSetValueApi): + + def set_shape(self): + self.shape = [5, 5, 5] + + def _call_setitem(self, x): + x[0:2:2] = self.value + + def _get_answer(self): + self.data[0:2:2] = self.value + + +class TestSetValueItemSliceStep2(TestSetValueApi): + + def set_shape(self): + self.shape = [7, 5, 5] + + def _call_setitem(self, x): + x[0:-1:3] = self.value + + def _get_answer(self): + self.data[0:-1:3] = self.value + + +class TestSetValueItemSliceStep3(TestSetValueApi): + + def _call_setitem(self, x): + x[0:-1, 0:2, ::2] = self.value + + def _get_answer(self): + self.data[0:-1, 0:2, ::2] = self.value + + +class TestSetValueItemSliceStep4(TestSetValueApi): + + def _call_setitem(self, x): + x[0:, 1:2:2, :] = self.value + + def _get_answer(self): + self.data[0:, 1:2:2, :] = self.value + + +# 1.2.3 step < 0 +class TestSetValueItemSliceNegetiveStep(TestSetValueApi): + + def set_shape(self): + self.shape = [5, 2] + + def set_value(self): + self.value = np.array([3, 4]) + + def _call_setitem(self, x): + x[5:2:-1] = self.value + + def _get_answer(self): + self.data[5:2:-1] = self.value + + +class TestSetValueItemSliceNegetiveStep2(TestSetValueApi): + + def set_shape(self): + self.shape = [5] + + def set_value(self): + self.value = np.array([3, 4]) + + def _call_setitem(self, x): + x[1::-1] = self.value + + def _get_answer(self): + self.data[1::-1] = self.value + + +class TestSetValueItemSliceNegetiveStep3(TestSetValueApi): + + def set_shape(self): + self.shape = [3] + + def set_value(self): + self.value = np.array([3, 4, 5]) + + def _call_setitem(self, x): + x[::-1] = self.value + + def _get_answer(self): + self.data[::-1] = self.value + + +class TestSetValueItemSliceNegetiveStep4(TestSetValueApi): + + def set_shape(self): + self.shape = [3, 4, 5] + + def _call_setitem(self, x): + x[2:0:-1, 0:2, ::-1] = self.value + + def _get_answer(self): + self.data[2:0:-1, 0:2, ::-1] = self.value + + +# 1.3 item is Ellipsis + + +class TestSetValueItemEllipsis1(TestSetValueApi): + + def _call_setitem(self, x): + x[0:, ..., 1:] = self.value + + def _get_answer(self): + self.data[0:, ..., 1:] = self.value + + +class TestSetValueItemEllipsis2(TestSetValueApi): + + def _call_setitem(self, x): + x[0:, ...] = self.value + + def _get_answer(self): + self.data[0:, ...] = self.value + + +class TestSetValueItemEllipsis3(TestSetValueApi): + + def _call_setitem(self, x): + x[..., 1:] = self.value + + def _get_answer(self): + self.data[..., 1:] = self.value + + +class TestSetValueItemEllipsis4(TestSetValueApi): + + def _call_setitem(self, x): + x[...] = self.value + + def _get_answer(self): + self.data[...] = self.value + + +# 1.4 item is Paddle Tensor +class TestSetValueItemTensor(TestSetValueApi): + + def _call_setitem(self, x): + zero = paddle.full([1], 0, dtype="int32") + x[zero] = self.value + + def _get_answer(self): + self.data[0] = self.value + + +class TestSetValueItemTensor2(TestSetValueApi): + + def _call_setitem(self, x): + zero = paddle.full([1], 0, dtype="int32") + two = paddle.full([1], 2, dtype="int64") + x[zero:two] = self.value + + def _get_answer(self): + self.data[0:2] = self.value + + +class TestSetValueItemTensor3(TestSetValueApi): + + def _call_setitem(self, x): + zero = paddle.full([1], 0, dtype="int32") + two = paddle.full([1], 2, dtype="int64") + x[zero:-1, 0:two] = self.value + + def _get_answer(self): + self.data[0:-1, 0:2] = self.value + + +class TestSetValueItemTensor4(TestSetValueApi): + + def _call_setitem(self, x): + zero = paddle.full([1], 0, dtype="int32") + two = paddle.full([1], 2, dtype="int64") + x[0:-1, zero:2, 0:6:two] = self.value + + def _get_answer(self): + self.data[0:-1, 0:2, ::2] = self.value + + +class TestSetValueItemTensor5(TestSetValueApi): + + def _call_setitem(self, x): + zero = paddle.full([1], 0, dtype="int32") + two = paddle.full([1], 2, dtype="int64") + x[zero:, 1:2:two, :] = self.value + + def _get_answer(self): + self.data[0:, 1:2:2, :] = self.value + + +class TestSetValueItemTensor6(TestSetValueApi): + + def set_shape(self): + self.shape = [3, 4, 5] + + def _call_setitem(self, x): + minus1 = paddle.full([1], -1, dtype="int32") + zero = paddle.full([1], 0, dtype="int32") + x[2:zero:minus1, 0:2, 10:-6:minus1] = self.value + + def _get_answer(self): + self.data[2:0:-1, 0:2, ::-1] = self.value + + +# 1.5 item is None +class TestSetValueItemNone1(TestSetValueApi): + + def _call_setitem(self, x): + x[None] = self.value + + def _get_answer(self): + self.data[None] = self.value + + +class TestSetValueItemNone2(TestSetValueApi): + + def _call_setitem(self, x): + x[0, None, 1] = self.value + + def _get_answer(self): + self.data[0, None, 1] = self.value + + +class TestSetValueItemNone3(TestSetValueApi): + + def _call_setitem(self, x): + x[:, None, None, 1] = self.value + + def _get_answer(self): + self.data[:, None, None, 1] = self.value + + +class TestSetValueItemNone4(TestSetValueApi): + + def _call_setitem(self, x): + x[0, 0, None, 1] = self.value + + def _get_answer(self): + self.data[0, 0, None, 1] = self.value + + +class TestSetValueItemNone5(TestSetValueApi): + + def _call_setitem(self, x): + x[0, None, 0, None, 1] = self.value + + def _get_answer(self): + self.data[0, None, 0, None, 1] = self.value + + +class TestSetValueItemNone6(TestSetValueApi): + + def _call_setitem(self, x): + x[None, 0, 0, None, 0] = self.value + + def _get_answer(self): + self.data[None, 0, 0, None, 0] = self.value + + +class TestSetValueItemNone7(TestSetValueApi): + + def _call_setitem(self, x): + x[:, None, 1] = np.zeros(self.shape)[:, None, 0] + + def _get_answer(self): + self.data[:, None, 1] = np.zeros(self.shape)[:, None, 0] + + +class TestSetValueItemNone8(TestSetValueApi): + + def _call_setitem(self, x): + x[:, 1, None] = np.zeros(self.shape)[:, 0, None] + + def _get_answer(self): + self.data[:, 1, None] = np.zeros(self.shape)[:, 0, None] + + +class TestSetValueItemNone9(TestSetValueApi): + + def _call_setitem(self, x): + x[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None] + + def _get_answer(self): + self.data[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None] + + +# 1.5 item is list or Tensor of bol +class TestSetValueItemBool1(TestSetValueApi): + + def _call_setitem(self, x): + x[[True, False]] = self.value + + def _get_answer(self): + self.data[[True, False]] = self.value + + +class TestSetValueItemBool2(TestSetValueApi): + + def _call_setitem(self, x): + x[[False, False]] = self.value + + def _get_answer(self): + self.data[[False, False]] = self.value + + +class TestSetValueItemBool3(TestSetValueApi): + + def _call_setitem(self, x): + x[[False, True]] = np.zeros(self.shape[2]) + + def _get_answer(self): + self.data[[False, True]] = np.zeros(self.shape[2]) + + +class TestSetValueItemBool4(TestSetValueApi): + + def _call_setitem(self, x): + idx = paddle.assign(np.array([False, True])) + x[idx] = np.zeros(self.shape[2]) + + def _get_answer(self): + self.data[np.array([False, True])] = np.zeros(self.shape[2]) + + +class TestSetValueItemBool5(TestSetValueApi): + + def _call_setitem(self, x): + idx = paddle.assign( + np.array([[False, True, False], [True, True, False]])) + x[idx] = self.value + + def _get_answer(self): + self.data[np.array([[False, True, False], [True, True, + False]])] = self.value + + +class TestSetValueItemBool6(TestSetValueApi): + + def _call_setitem(self, x): + x[0, ...] = 0 + x[x > 0] = self.value + + def _get_answer(self): + self.data[0, ...] = 0 + self.data[self.data > 0] = self.value + + +def create_test_value_int32(parent): + + class TestValueInt(parent): + + def set_value(self): + self.value = 7 + + def set_dtype(self): + self.dtype = "int32" + + cls_name = "{0}_{1}".format(parent.__name__, "ValueInt32") + TestValueInt.__name__ = cls_name + globals()[cls_name] = TestValueInt + + +create_test_value_int32(TestSetValueItemInt) +create_test_value_int32(TestSetValueItemSlice) +create_test_value_int32(TestSetValueItemSlice2) +create_test_value_int32(TestSetValueItemSlice3) +create_test_value_int32(TestSetValueItemSlice4) + + +def create_test_value_tensor_fp32(parent): + + class TestValueInt(parent): + + def set_dtype(self): + self.dtype = "float32" + + def _call_setitem(self, x): + value = paddle.full(shape=[1], fill_value=3, dtype=self.dtype) + x[0, 1] = value + + def _get_answer(self): + self.data[0, 1] = 3 + + cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorFp32") + TestValueInt.__name__ = cls_name + globals()[cls_name] = TestValueInt + + +create_test_value_tensor_fp32(TestSetValueItemInt) +create_test_value_tensor_fp32(TestSetValueItemSlice) +create_test_value_tensor_fp32(TestSetValueItemSlice2) +create_test_value_tensor_fp32(TestSetValueItemSlice3) +create_test_value_tensor_fp32(TestSetValueItemSlice4) + + +# 3. Test different shape of value +class TestSetValueValueShape1(TestSetValueApi): + + def set_value(self): + self.value = np.array([3, 4, 5, 6]) # shape is (4,) + + def _call_setitem(self, x): + x[0] = self.value + + def _get_answer(self): + self.data[0] = self.value + + +class TestSetValueValueShape2(TestSetValueApi): + + def set_value(self): + self.value = np.array([[3, 4, 5, 6]]) # shape is (1,4) + + def _call_setitem(self, x): + x[0:1] = self.value + + def _get_answer(self): + self.data[0:1] = self.value + + +class TestSetValueValueShape3(TestSetValueApi): + + def set_value(self): + self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2], + [3, 3, 3, 3]]) # shape is (3,4) + + def _call_setitem(self, x): + x[0] = self.value + + def _get_answer(self): + self.data[0] = self.value + + +class TestSetValueValueShape4(TestSetValueApi): + + def set_value(self): + self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2], + [3, 3, 3, + 3]]).astype(self.dtype) # shape is (3,4) + + def _call_setitem(self, x): + x[0] = paddle.assign(self.value) # x is Paddle.Tensor + + def _get_answer(self): + self.data[0] = self.value + + +class TestSetValueValueShape5(TestSetValueApi): + + def set_value(self): + self.value = np.array([3, 3, 3]).astype(self.dtype) + + def set_shape(self): + self.shape = [3, 4] + + def _call_setitem(self, x): + x[:, 0] = paddle.assign(self.value) # x is Paddle.Tensor + + def _get_answer(self): + self.data[:, 0] = self.value + + +if __name__ == '__main__': + unittest.main() -- GitLab