未验证 提交 fa9586a7 编写于 作者: F fuyou765 提交者: GitHub

[MLU]add mlu kernel for set_value op (#43687)

上级 89c783db
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/operators/set_value_op.h"
namespace paddle {
namespace operators {
using MLUDeviceContext = platform::MLUDeviceContext;
template <typename T>
class SetValueMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* in = ctx.Input<Tensor>("Input");
auto* value_tensor = ctx.Input<Tensor>("ValueTensor");
auto* out = ctx.Output<Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
auto starts_tensor_list = ctx.MultiInput<Tensor>("StartsTensorList");
auto ends_tensor_list = ctx.MultiInput<Tensor>("EndsTensorList");
auto steps_tensor_list = ctx.MultiInput<Tensor>("StepsTensorList");
auto axes = ctx.Attr<std::vector<int64_t>>("axes");
auto starts = ctx.Attr<std::vector<int64_t>>("starts");
auto ends = ctx.Attr<std::vector<int64_t>>("ends");
auto steps = ctx.Attr<std::vector<int64_t>>("steps");
auto shape = ctx.Attr<std::vector<int64_t>>("shape");
auto decrease_axes = ctx.Attr<std::vector<int64_t>>("decrease_axes");
auto none_axes = ctx.Attr<std::vector<int64_t>>("none_axes");
if (!starts_tensor_list.empty()) {
starts = GetDataFromTensorList<int64_t>(starts_tensor_list);
}
if (!ends_tensor_list.empty()) {
ends = GetDataFromTensorList<int64_t>(ends_tensor_list);
}
if (!steps_tensor_list.empty()) {
steps = GetDataFromTensorList<int64_t>(steps_tensor_list);
}
auto in_dims = in->dims();
phi::funcs::CheckAndUpdateSliceAttrs(in_dims, axes, &starts, &ends, &steps);
auto slice_dims =
phi::funcs::GetSliceDims(in_dims, axes, starts, ends, &steps);
auto decrease_slice_dims =
phi::funcs::GetDecreasedDims(slice_dims, decrease_axes);
auto slice_dims_for_assign = decrease_slice_dims;
if (!none_axes.empty()) {
std::vector<int64_t> slice_dims_with_none;
size_t none_axes_cur = 0, decrease_axes_cur = 0;
for (int i = 0; i < slice_dims.size(); ++i) {
while (none_axes_cur < none_axes.size() &&
none_axes[none_axes_cur] <= i) {
slice_dims_with_none.push_back(1);
none_axes_cur++;
}
if (decrease_axes_cur < decrease_axes.size() &&
decrease_axes[decrease_axes_cur] == i) {
decrease_axes_cur++;
} else {
slice_dims_with_none.push_back(slice_dims[i]);
}
}
while (none_axes_cur < none_axes.size()) {
slice_dims_with_none.push_back(1);
none_axes_cur++;
}
slice_dims_for_assign = phi::make_ddim(slice_dims_with_none);
}
auto starts_indices = std::vector<int64_t>(in_dims.size(), 0);
auto ends_indices = std::vector<int64_t>(in_dims.size(), 0);
auto strides_indices = std::vector<int64_t>(in_dims.size(), 0);
for (int i = 0; i < in_dims.size(); ++i) {
starts_indices[i] = 0;
ends_indices[i] = slice_dims[i];
strides_indices[i] = 1;
}
for (size_t i = 0; i < axes.size(); i++) {
int axis_index = axes[i];
starts_indices[axis_index] = starts[i];
ends_indices[axis_index] = ends[i];
strides_indices[axis_index] = steps[i];
}
int64_t stride_step = phi::product(in_dims);
std::vector<int64_t> index_indices(1, 0);
for (size_t i = 0; i < strides_indices.size(); ++i) {
auto index_size = index_indices.size();
stride_step /= in_dims[i];
for (size_t j = 0; j < index_size; ++j) {
auto start_index = *index_indices.begin();
if (strides_indices[i] > 0) {
for (int64_t k = starts_indices[i]; k < ends_indices[i];
k += strides_indices[i]) {
index_indices.push_back(start_index + k * stride_step);
}
} else {
for (int64_t k = starts_indices[i]; k > ends_indices[i];
k += strides_indices[i]) {
index_indices.push_back(start_index + k * stride_step);
}
}
index_indices.erase(index_indices.begin());
}
}
PADDLE_ENFORCE_EQ(
static_cast<int64_t>(index_indices.size()),
phi::product(slice_dims_for_assign),
platform::errors::InvalidArgument(
"OP(set_value) error index indices and value update not match "));
Tensor value_t(in->type());
if (value_tensor != nullptr) {
value_t.ShareDataWith(*value_tensor);
} else {
auto value_dims = phi::make_ddim(shape);
CheckIsDimsMatch(slice_dims_for_assign, value_dims);
value_t.mutable_data<T>(value_dims, ctx.GetPlace());
auto value_name =
GetValueName(framework::TransToProtoVarType(in->dtype()));
CopyVectorToTensor<T>(value_name.c_str(), &value_t, ctx);
value_t.Resize(value_dims);
}
Tensor value_temp(in->type());
if (slice_dims_for_assign == value_t.dims()) {
value_temp.ShareDataWith(value_t);
} else {
value_temp.Resize(slice_dims_for_assign);
value_temp.mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc value_t_desc(value_t);
MLUCnnlTensorDesc value_temp_desc(value_temp);
MLUCnnl::BroadcastTo(ctx,
value_t_desc.get(),
GetBasePtr(&value_t),
value_temp_desc.get(),
GetBasePtr(&value_temp));
}
int64_t input_numel = phi::product(in_dims);
int64_t value_numel = phi::product(value_temp.dims());
Tensor in_temp, out_temp, val_temp;
framework::Tensor index_temp;
in_temp.ShareDataWith(*in);
val_temp.ShareDataWith(value_temp);
paddle::framework::TensorFromVector(
index_indices, ctx.device_context(), &index_temp);
auto new_in_dims = phi::make_ddim({input_numel});
auto new_val_dims = phi::make_ddim({value_numel});
in_temp.Resize(new_in_dims);
val_temp.Resize(new_val_dims);
cnnlScatterRefMode_t mode = CNNL_SCATTERREF_UPDATE;
MLUCnnlTensorDesc x_desc(in_temp);
MLUCnnlTensorDesc indices_desc(index_temp);
MLUCnnlTensorDesc updates_desc(val_temp);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::ScatterRefFunctor(ctx,
x_desc.get(),
GetBasePtr(&in_temp),
updates_desc.get(),
GetBasePtr(&val_temp),
indices_desc.get(),
GetBasePtr(&index_temp),
mode);
in_temp.Resize(in_dims);
paddle::framework::TensorCopy(in_temp, ctx.GetPlace(), out);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(set_value,
ops::SetValueMLUKernel<int>,
ops::SetValueMLUKernel<float>);
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
class TestSetValueBase(unittest.TestCase):
def set_mlu(self):
self.__class__.use_mlu = True
self.place = paddle.device.MLUPlace(0)
def setUp(self):
paddle.enable_static()
self.set_mlu()
self.set_dtype()
self.set_value()
self.set_shape()
self.data = np.ones(self.shape).astype(self.dtype)
self.program = paddle.static.Program()
def set_shape(self):
self.shape = [2, 3, 4]
def set_value(self):
self.value = 6
def set_dtype(self):
self.dtype = "float32"
def _call_setitem(self, x):
x[0, 0] = self.value
def _get_answer(self):
self.data[0, 0] = self.value
class TestSetValueApi(TestSetValueBase):
def _run_static(self):
paddle.enable_static()
with paddle.static.program_guard(self.program):
x = paddle.ones(shape=self.shape, dtype=self.dtype)
self._call_setitem(x)
exe = paddle.static.Executor(self.place)
out = exe.run(self.program, fetch_list=[x])
paddle.disable_static()
return out
def test_api(self):
static_out = self._run_static()
self._get_answer()
error_msg = "\nIn {} mode: \nExpected res = \n{}, \n\nbut received : \n{}"
self.assertTrue((self.data == static_out).all(),
msg=error_msg.format("static", self.data, static_out))
# 1. Test different type of item: int, Python slice, Paddle Tensor
# 1.1 item is int
class TestSetValueItemInt(TestSetValueApi):
def _call_setitem(self, x):
x[0] = self.value
def _get_answer(self):
self.data[0] = self.value
# 1.2 item is slice
# 1.2.1 step is 1
class TestSetValueItemSlice(TestSetValueApi):
def _call_setitem(self, x):
x[0:2] = self.value
def _get_answer(self):
self.data[0:2] = self.value
class TestSetValueItemSlice2(TestSetValueApi):
def _call_setitem(self, x):
x[0:-1] = self.value
def _get_answer(self):
self.data[0:-1] = self.value
class TestSetValueItemSlice3(TestSetValueApi):
def _call_setitem(self, x):
x[0:-1, 0:2] = self.value
def _get_answer(self):
self.data[0:-1, 0:2] = self.value
class TestSetValueItemSlice4(TestSetValueApi):
def _call_setitem(self, x):
x[0:, 1:2, :] = self.value
def _get_answer(self):
self.data[0:, 1:2, :] = self.value
#TODO: Fix this after MLU support while_loop
#class TestSetValueItemSliceInWhile(TestSetValueApi):
# def _call_setitem(self, x):
# def cond(i, x):
# return i < 1
#
# def body(i, x):
# x[i] = self.value
# i = i + 1
# return i, x
#
# i = paddle.zeros(shape=(1, ), dtype='int32')
# i, x = paddle.fluid.layers.while_loop(cond, body, [i, x])
#
# def _get_answer(self):
# self.data[0] = self.value
# 1.2.2 step > 1
class TestSetValueItemSliceStep(TestSetValueApi):
def set_shape(self):
self.shape = [5, 5, 5]
def _call_setitem(self, x):
x[0:2:2] = self.value
def _get_answer(self):
self.data[0:2:2] = self.value
class TestSetValueItemSliceStep2(TestSetValueApi):
def set_shape(self):
self.shape = [7, 5, 5]
def _call_setitem(self, x):
x[0:-1:3] = self.value
def _get_answer(self):
self.data[0:-1:3] = self.value
class TestSetValueItemSliceStep3(TestSetValueApi):
def _call_setitem(self, x):
x[0:-1, 0:2, ::2] = self.value
def _get_answer(self):
self.data[0:-1, 0:2, ::2] = self.value
class TestSetValueItemSliceStep4(TestSetValueApi):
def _call_setitem(self, x):
x[0:, 1:2:2, :] = self.value
def _get_answer(self):
self.data[0:, 1:2:2, :] = self.value
# 1.2.3 step < 0
class TestSetValueItemSliceNegetiveStep(TestSetValueApi):
def set_shape(self):
self.shape = [5, 2]
def set_value(self):
self.value = np.array([3, 4])
def _call_setitem(self, x):
x[5:2:-1] = self.value
def _get_answer(self):
self.data[5:2:-1] = self.value
class TestSetValueItemSliceNegetiveStep2(TestSetValueApi):
def set_shape(self):
self.shape = [5]
def set_value(self):
self.value = np.array([3, 4])
def _call_setitem(self, x):
x[1::-1] = self.value
def _get_answer(self):
self.data[1::-1] = self.value
class TestSetValueItemSliceNegetiveStep3(TestSetValueApi):
def set_shape(self):
self.shape = [3]
def set_value(self):
self.value = np.array([3, 4, 5])
def _call_setitem(self, x):
x[::-1] = self.value
def _get_answer(self):
self.data[::-1] = self.value
class TestSetValueItemSliceNegetiveStep4(TestSetValueApi):
def set_shape(self):
self.shape = [3, 4, 5]
def _call_setitem(self, x):
x[2:0:-1, 0:2, ::-1] = self.value
def _get_answer(self):
self.data[2:0:-1, 0:2, ::-1] = self.value
# 1.3 item is Ellipsis
class TestSetValueItemEllipsis1(TestSetValueApi):
def _call_setitem(self, x):
x[0:, ..., 1:] = self.value
def _get_answer(self):
self.data[0:, ..., 1:] = self.value
class TestSetValueItemEllipsis2(TestSetValueApi):
def _call_setitem(self, x):
x[0:, ...] = self.value
def _get_answer(self):
self.data[0:, ...] = self.value
class TestSetValueItemEllipsis3(TestSetValueApi):
def _call_setitem(self, x):
x[..., 1:] = self.value
def _get_answer(self):
self.data[..., 1:] = self.value
class TestSetValueItemEllipsis4(TestSetValueApi):
def _call_setitem(self, x):
x[...] = self.value
def _get_answer(self):
self.data[...] = self.value
# 1.4 item is Paddle Tensor
class TestSetValueItemTensor(TestSetValueApi):
def _call_setitem(self, x):
zero = paddle.full([1], 0, dtype="int32")
x[zero] = self.value
def _get_answer(self):
self.data[0] = self.value
class TestSetValueItemTensor2(TestSetValueApi):
def _call_setitem(self, x):
zero = paddle.full([1], 0, dtype="int32")
two = paddle.full([1], 2, dtype="int64")
x[zero:two] = self.value
def _get_answer(self):
self.data[0:2] = self.value
class TestSetValueItemTensor3(TestSetValueApi):
def _call_setitem(self, x):
zero = paddle.full([1], 0, dtype="int32")
two = paddle.full([1], 2, dtype="int64")
x[zero:-1, 0:two] = self.value
def _get_answer(self):
self.data[0:-1, 0:2] = self.value
class TestSetValueItemTensor4(TestSetValueApi):
def _call_setitem(self, x):
zero = paddle.full([1], 0, dtype="int32")
two = paddle.full([1], 2, dtype="int64")
x[0:-1, zero:2, 0:6:two] = self.value
def _get_answer(self):
self.data[0:-1, 0:2, ::2] = self.value
class TestSetValueItemTensor5(TestSetValueApi):
def _call_setitem(self, x):
zero = paddle.full([1], 0, dtype="int32")
two = paddle.full([1], 2, dtype="int64")
x[zero:, 1:2:two, :] = self.value
def _get_answer(self):
self.data[0:, 1:2:2, :] = self.value
class TestSetValueItemTensor6(TestSetValueApi):
def set_shape(self):
self.shape = [3, 4, 5]
def _call_setitem(self, x):
minus1 = paddle.full([1], -1, dtype="int32")
zero = paddle.full([1], 0, dtype="int32")
x[2:zero:minus1, 0:2, 10:-6:minus1] = self.value
def _get_answer(self):
self.data[2:0:-1, 0:2, ::-1] = self.value
# 1.5 item is None
class TestSetValueItemNone1(TestSetValueApi):
def _call_setitem(self, x):
x[None] = self.value
def _get_answer(self):
self.data[None] = self.value
class TestSetValueItemNone2(TestSetValueApi):
def _call_setitem(self, x):
x[0, None, 1] = self.value
def _get_answer(self):
self.data[0, None, 1] = self.value
class TestSetValueItemNone3(TestSetValueApi):
def _call_setitem(self, x):
x[:, None, None, 1] = self.value
def _get_answer(self):
self.data[:, None, None, 1] = self.value
class TestSetValueItemNone4(TestSetValueApi):
def _call_setitem(self, x):
x[0, 0, None, 1] = self.value
def _get_answer(self):
self.data[0, 0, None, 1] = self.value
class TestSetValueItemNone5(TestSetValueApi):
def _call_setitem(self, x):
x[0, None, 0, None, 1] = self.value
def _get_answer(self):
self.data[0, None, 0, None, 1] = self.value
class TestSetValueItemNone6(TestSetValueApi):
def _call_setitem(self, x):
x[None, 0, 0, None, 0] = self.value
def _get_answer(self):
self.data[None, 0, 0, None, 0] = self.value
class TestSetValueItemNone7(TestSetValueApi):
def _call_setitem(self, x):
x[:, None, 1] = np.zeros(self.shape)[:, None, 0]
def _get_answer(self):
self.data[:, None, 1] = np.zeros(self.shape)[:, None, 0]
class TestSetValueItemNone8(TestSetValueApi):
def _call_setitem(self, x):
x[:, 1, None] = np.zeros(self.shape)[:, 0, None]
def _get_answer(self):
self.data[:, 1, None] = np.zeros(self.shape)[:, 0, None]
class TestSetValueItemNone9(TestSetValueApi):
def _call_setitem(self, x):
x[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None]
def _get_answer(self):
self.data[None, :, 1, ..., None] = np.zeros(self.shape)[0, 0, :, None]
# 1.5 item is list or Tensor of bol
class TestSetValueItemBool1(TestSetValueApi):
def _call_setitem(self, x):
x[[True, False]] = self.value
def _get_answer(self):
self.data[[True, False]] = self.value
class TestSetValueItemBool2(TestSetValueApi):
def _call_setitem(self, x):
x[[False, False]] = self.value
def _get_answer(self):
self.data[[False, False]] = self.value
class TestSetValueItemBool3(TestSetValueApi):
def _call_setitem(self, x):
x[[False, True]] = np.zeros(self.shape[2])
def _get_answer(self):
self.data[[False, True]] = np.zeros(self.shape[2])
class TestSetValueItemBool4(TestSetValueApi):
def _call_setitem(self, x):
idx = paddle.assign(np.array([False, True]))
x[idx] = np.zeros(self.shape[2])
def _get_answer(self):
self.data[np.array([False, True])] = np.zeros(self.shape[2])
class TestSetValueItemBool5(TestSetValueApi):
def _call_setitem(self, x):
idx = paddle.assign(
np.array([[False, True, False], [True, True, False]]))
x[idx] = self.value
def _get_answer(self):
self.data[np.array([[False, True, False], [True, True,
False]])] = self.value
class TestSetValueItemBool6(TestSetValueApi):
def _call_setitem(self, x):
x[0, ...] = 0
x[x > 0] = self.value
def _get_answer(self):
self.data[0, ...] = 0
self.data[self.data > 0] = self.value
def create_test_value_int32(parent):
class TestValueInt(parent):
def set_value(self):
self.value = 7
def set_dtype(self):
self.dtype = "int32"
cls_name = "{0}_{1}".format(parent.__name__, "ValueInt32")
TestValueInt.__name__ = cls_name
globals()[cls_name] = TestValueInt
create_test_value_int32(TestSetValueItemInt)
create_test_value_int32(TestSetValueItemSlice)
create_test_value_int32(TestSetValueItemSlice2)
create_test_value_int32(TestSetValueItemSlice3)
create_test_value_int32(TestSetValueItemSlice4)
def create_test_value_tensor_fp32(parent):
class TestValueInt(parent):
def set_dtype(self):
self.dtype = "float32"
def _call_setitem(self, x):
value = paddle.full(shape=[1], fill_value=3, dtype=self.dtype)
x[0, 1] = value
def _get_answer(self):
self.data[0, 1] = 3
cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorFp32")
TestValueInt.__name__ = cls_name
globals()[cls_name] = TestValueInt
create_test_value_tensor_fp32(TestSetValueItemInt)
create_test_value_tensor_fp32(TestSetValueItemSlice)
create_test_value_tensor_fp32(TestSetValueItemSlice2)
create_test_value_tensor_fp32(TestSetValueItemSlice3)
create_test_value_tensor_fp32(TestSetValueItemSlice4)
# 3. Test different shape of value
class TestSetValueValueShape1(TestSetValueApi):
def set_value(self):
self.value = np.array([3, 4, 5, 6]) # shape is (4,)
def _call_setitem(self, x):
x[0] = self.value
def _get_answer(self):
self.data[0] = self.value
class TestSetValueValueShape2(TestSetValueApi):
def set_value(self):
self.value = np.array([[3, 4, 5, 6]]) # shape is (1,4)
def _call_setitem(self, x):
x[0:1] = self.value
def _get_answer(self):
self.data[0:1] = self.value
class TestSetValueValueShape3(TestSetValueApi):
def set_value(self):
self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2],
[3, 3, 3, 3]]) # shape is (3,4)
def _call_setitem(self, x):
x[0] = self.value
def _get_answer(self):
self.data[0] = self.value
class TestSetValueValueShape4(TestSetValueApi):
def set_value(self):
self.value = np.array([[1, 1, 1, 1], [2, 2, 2, 2],
[3, 3, 3,
3]]).astype(self.dtype) # shape is (3,4)
def _call_setitem(self, x):
x[0] = paddle.assign(self.value) # x is Paddle.Tensor
def _get_answer(self):
self.data[0] = self.value
class TestSetValueValueShape5(TestSetValueApi):
def set_value(self):
self.value = np.array([3, 3, 3]).astype(self.dtype)
def set_shape(self):
self.shape = [3, 4]
def _call_setitem(self, x):
x[:, 0] = paddle.assign(self.value) # x is Paddle.Tensor
def _get_answer(self):
self.data[:, 0] = self.value
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册