未验证 提交 e726960a 编写于 作者: Q qipengh 提交者: GitHub

[MLU] add lookup_table_v2 and unstack op (#42847)

上级 313f5d01
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
constexpr int64_t kNoPadding = -1;
template <typename T>
class LookupTableV2MLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *ids_t = ctx.Input<framework::LoDTensor>("Ids"); // int tensor
auto *output_t = ctx.Output<framework::LoDTensor>("Out"); // float tensor
auto *table_t = ctx.Input<framework::LoDTensor>("W");
auto *table_var = ctx.InputVar("W");
PADDLE_ENFORCE_EQ(
table_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument("mlu only accept LoDTensor"));
output_t->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc ids_desc(*ids_t);
MLUCnnlTensorDesc table_desc(*table_t);
MLUCnnlTensorDesc output_desc(*output_t);
int64_t padding_idx = ctx.Attr<int64_t>("padding_idx");
if (padding_idx == kNoPadding) {
MLUCnnl::GatherFunctor(ctx, /*axis=*/0, /*batch_dims=*/0,
table_desc.get(), GetBasePtr(table_t),
ids_desc.get(), GetBasePtr(ids_t),
output_desc.get(), GetBasePtr(output_t));
} else {
Tensor tmp_table_t(table_t->type());
tmp_table_t.mutable_data<T>(table_t->dims(), ctx.GetPlace());
Tensor index;
index.mutable_data<int32_t>({1, 1}, ctx.GetPlace());
auto idx_value = static_cast<int32_t>(padding_idx);
MLUCnnlTensorDesc index_desc(index);
MLUCnnl::Fill(ctx, CNNL_POINTER_MODE_HOST, &idx_value, index_desc.get(),
GetBasePtr(&index));
auto update_dim = phi::make_ddim({1, table_t->dims()[1]});
Tensor update;
update.mutable_data<T>(update_dim, ctx.GetPlace());
auto update_value = static_cast<T>(0);
MLUCnnlTensorDesc update_desc(update);
MLUCnnl::Fill(ctx, CNNL_POINTER_MODE_HOST, &update_value,
update_desc.get(), GetBasePtr(&update));
MLUCnnlTensorDesc tmp_table_desc(tmp_table_t);
MLUCnnl::ScatterNd(
ctx, CNNL_SCATTERND_UPDATE, index_desc.get(), GetBasePtr(&index),
update_desc.get(), GetBasePtr(&update), table_desc.get(),
GetBasePtr(table_t), tmp_table_desc.get(), GetBasePtr(&tmp_table_t));
MLUCnnl::GatherFunctor(ctx, /*axis=*/0, /*batch_dims=*/0,
tmp_table_desc.get(), GetBasePtr(&tmp_table_t),
ids_desc.get(), GetBasePtr(ids_t),
output_desc.get(), GetBasePtr(output_t));
}
}
};
template <typename T>
class LookupTableV2GradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *ids_t = ctx.Input<framework::LoDTensor>("Ids");
auto *output_grad_t =
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto *table_grad_t =
ctx.Output<framework::LoDTensor>(framework::GradVarName("W"));
table_grad_t->mutable_data<T>(ctx.GetPlace());
int padding_idx = static_cast<int>(ctx.Attr<int64_t>("padding_idx"));
Tensor ids_int32(ids_t->dtype());
if (ids_t->dtype() != DataType::INT32) {
ids_int32.mutable_data<int>(ids_t->dims(), ctx.GetPlace());
MLUCnnlTensorDesc ids_desc(*ids_t);
MLUCnnlTensorDesc ids_int32_desc(ids_int32);
auto cast_type = GetCastDataType(ids_t->dtype(), DataType::INT32);
MLUCnnl::Cast(ctx, cast_type, ids_desc.get(), GetBasePtr(ids_t),
ids_int32_desc.get(), GetBasePtr(&ids_int32));
} else {
ids_int32 = *ids_t;
}
MLUCnnlTensorDesc ids_int32_desc(ids_int32);
MLUCnnlTensorDesc output_grad_desc(*output_grad_t);
MLUCnnlTensorDesc table_grad_desc(*table_grad_t);
MLUCnnl::EmbeddingBackward(ctx, padding_idx, false, ids_int32_desc.get(),
GetBasePtr(&ids_int32), output_grad_desc.get(),
GetBasePtr(output_grad_t), table_grad_desc.get(),
GetBasePtr(table_grad_t));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(lookup_table_v2, ops::LookupTableV2MLUKernel<float>,
ops::LookupTableV2MLUKernel<int>,
ops::LookupTableV2MLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(lookup_table_v2_grad,
ops::LookupTableV2GradMLUKernel<float>,
ops::LookupTableV2GradMLUKernel<int>,
ops::LookupTableV2GradMLUKernel<plat::float16>);
......@@ -34,6 +34,12 @@ cnnlCastDataType_t GetCastDataType(const VT::Type& src_type,
return cast_type;
}
cnnlCastDataType_t GetCastDataType(const DataType& src_type,
const DataType& dst_type) {
return GetCastDataType(framework::TransToProtoVarType(src_type),
framework::TransToProtoVarType(dst_type));
}
bool MLUSupportsCast(const VT::Type& src_type, const VT::Type& dst_type) {
for (auto it = MLU_SUPPORTED_CAST_TYPE.begin();
it != MLU_SUPPORTED_CAST_TYPE.end(); ++it) {
......@@ -2713,17 +2719,16 @@ MLUCnnlTrigonDesc::~MLUCnnlTrigonDesc() {
output_desc, output));
}
/* static */ void MLUCnnl::ScatterNd(const ExecutionContext& ctx,
const cnnlTensorDescriptor_t indices_desc,
const void* indices,
const cnnlTensorDescriptor_t updates_desc,
const void* updates,
const cnnlTensorDescriptor_t output_desc,
void* output) {
/* static */ void MLUCnnl::ScatterNd(
const ExecutionContext& ctx, cnnlScatterNdMode_t mode,
const cnnlTensorDescriptor_t indices_desc, const void* indices,
const cnnlTensorDescriptor_t updates_desc, const void* updates,
const cnnlTensorDescriptor_t input_desc, const void* input,
const cnnlTensorDescriptor_t output_desc, void* output) {
cnnlHandle_t handle = GetHandleFromCTX(ctx);
PADDLE_ENFORCE_MLU_SUCCESS(cnnlScatterNd(handle, indices_desc, indices,
updates_desc, updates, output_desc,
output));
PADDLE_ENFORCE_MLU_SUCCESS(
cnnlScatterNd_v2(handle, mode, indices_desc, indices, updates_desc,
updates, input_desc, input, output_desc, output));
}
/* static */ void MLUCnnl::BitWise(
......@@ -2777,5 +2782,26 @@ MLUCnnlTrigonDesc::~MLUCnnlTrigonDesc() {
cnnlReciprocal(handle, input_desc, input, output_desc, output));
}
/* static */ void MLUCnnl::EmbeddingBackward(
const ExecutionContext& ctx, int padding_idx, bool scale_grad_by_freq,
const cnnlTensorDescriptor_t indices_desc, const void* indices,
const cnnlTensorDescriptor_t diff_desc, const void* diff,
const cnnlTensorDescriptor_t output_desc, void* output) {
cnnlHandle_t handle = GetHandleFromCTX(ctx);
size_t workspace_size;
PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetEmbeddingBackwardWorkspaceSize(
handle, diff_desc, output_desc, scale_grad_by_freq, &workspace_size));
auto& dev_ctx = GetDevCtxFromCTX(ctx);
Tensor workspace = ctx.AllocateTmpTensor<int8_t, MLUDeviceContext>(
{static_cast<int64_t>(workspace_size)}, dev_ctx);
void* workspace_ptr = workspace.mutable_data(ctx.GetPlace());
PADDLE_ENFORCE_MLU_SUCCESS(cnnlEmbeddingBackward(
handle, padding_idx, scale_grad_by_freq, indices_desc, indices, diff_desc,
diff, workspace_ptr, workspace_size, output_desc, output));
}
} // namespace operators
} // namespace paddle
......@@ -175,6 +175,10 @@ const std::map<std::pair<VT::Type, VT::Type>, cnnlCastDataType_t>
cnnlCastDataType_t GetCastDataType(const VT::Type& src_type,
const VT::Type& dst_type);
cnnlCastDataType_t GetCastDataType(const DataType& src_type,
const DataType& dst_type);
bool MLUSupportsCast(const VT::Type& src_type, const VT::Type& dst_type);
cnnlDeviceType_t GetCnnlDev(int dev_ordinal);
......@@ -1202,11 +1206,13 @@ class MLUCnnl {
const void* k, const int k_int,
const cnnlTensorDescriptor_t output_desc, void* output);
static void ScatterNd(const ExecutionContext& ctx,
static void ScatterNd(const ExecutionContext& ctx, cnnlScatterNdMode_t mode,
const cnnlTensorDescriptor_t indices_desc,
const void* indices,
const cnnlTensorDescriptor_t updates_desc,
const void* updates,
const cnnlTensorDescriptor_t input_desc,
const void* input,
const cnnlTensorDescriptor_t output_desc, void* output);
static void BitWise(const ExecutionContext& ctx,
......@@ -1227,6 +1233,12 @@ class MLUCnnl {
const void* input,
const cnnlTensorDescriptor_t output_desc,
void* output);
static void EmbeddingBackward(
const ExecutionContext& ctx, int padding_idx, bool scale_grad_by_freq,
const cnnlTensorDescriptor_t indices_desc, const void* indices,
const cnnlTensorDescriptor_t diff_desc, const void* diff,
const cnnlTensorDescriptor_t output_desc, void* output);
};
template <typename T>
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class UnStackMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *x = ctx.Input<Tensor>("X");
auto out = ctx.MultiOutput<Tensor>("Y");
int axis = ctx.Attr<int>("axis");
if (axis < 0) axis += x->dims().size();
int num = x->dims()[axis];
std::vector<MLUCnnlTensorDesc> out_descs;
std::vector<cnnlTensorDescriptor_t> out_raw_descs;
std::vector<void *> out_ptrs;
std::vector<int64_t> new_dims = phi::vectorize(x->dims());
new_dims[axis] = 1;
for (int i = 0; i < num; i++) {
out[i]->mutable_data<T>(ctx.GetPlace());
out_descs.emplace_back(MLUCnnlTensorDesc(new_dims.size(), new_dims.data(),
ToCnnlDataType<T>()));
out_raw_descs.push_back(out_descs.back().get());
out_ptrs.push_back(GetBasePtr(out[i]));
}
MLUCnnlTensorDesc x_desc(*x);
MLUCnnl::Split(ctx, num, axis, x_desc.get(), GetBasePtr(x),
out_raw_descs.data(), out_ptrs.data());
}
};
template <typename T>
class UnStackGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto x = ctx.MultiInput<Tensor>(framework::GradVarName("Y"));
auto *y = ctx.Output<Tensor>(framework::GradVarName("X"));
int axis = ctx.Attr<int>("axis");
if (axis < 0) axis += (x[0]->dims().size() + 1);
int num = static_cast<int>(x.size());
std::vector<MLUCnnlTensorDesc> x_descs;
std::vector<cnnlTensorDescriptor_t> x_raw_descs;
std::vector<const void *> x_ptrs;
for (int i = 0; i < num; i++) {
if (x[i]->dims().size() != 0) {
std::vector<int64_t> in_dims = phi::vectorize(x[i]->dims());
in_dims.insert(in_dims.begin() + axis, 1);
x_descs.emplace_back(MLUCnnlTensorDesc(in_dims.size(), in_dims.data(),
ToCnnlDataType<T>()));
} else {
int input_dims = 1;
x_descs.emplace_back(
MLUCnnlTensorDesc(1, &input_dims, ToCnnlDataType<T>()));
}
x_raw_descs.push_back(x_descs.back().get());
x_ptrs.push_back(GetBasePtr(x[i]));
}
y->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc y_desc(*y);
MLUCnnl::Concat(ctx, num, axis, x_raw_descs.data(), x_ptrs.data(),
y_desc.get(), GetBasePtr(y));
}
};
} // namespace operators
} // namespace paddle
namespace plat = paddle::platform;
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(unstack, ops::UnStackMLUKernel<float>,
ops::UnStackMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(unstack_grad, ops::UnStackGradMLUKernel<float>,
ops::UnStackGradMLUKernel<plat::float16>);
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid as fluid
paddle.enable_static()
SEED = 2022
class TestLookupTableV2(OpTest):
def setUp(self):
self.set_mlu()
self.op_type = "lookup_table_v2"
self.init_dtype()
self.init_dims()
self.init_padding_idx()
np.random.seed(SEED)
w = np.random.random([self.vocab, self.dim]).astype(self.dtype)
x = np.random.randint(
0, self.vocab, size=(self.bsz, self.seqlen)).astype(self.ids_dtype)
out = w[x]
if self.padding_idx != -1:
out[np.squeeze(x == self.padding_idx)] = np.zeros(self.dim)
self.inputs = {
'W': OpTest.np_dtype_to_fluid_dtype(w),
'Ids': OpTest.np_dtype_to_fluid_dtype(x)
}
self.attrs = {
'is_sparse': False,
'is_distributed': False,
'remote_prefetch': False,
'padding_idx': self.padding_idx
}
self.outputs = {'Out': out}
def set_mlu(self):
self.__class__.use_mlu = True
self.place = paddle.device.MLUPlace(0)
def init_dtype(self):
self.dtype = np.float32
self.ids_dtype = np.int32
def init_dims(self):
self.bsz = 6
self.seqlen = 8
self.vocab = 10
# embedding_dim is not multiple of 32
self.dim = 20
def init_padding_idx(self):
self.padding_idx = -1
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
if self.dtype == np.float16:
self.check_grad_with_place(
self.place, ['W'], 'Out', max_relative_error=0.01)
else:
self.check_grad_with_place(self.place, ['W'], 'Out')
class TestLookupTableV2FP16(TestLookupTableV2):
no_need_check_grad = True
def init_dtype(self):
self.dtype = np.float16
self.ids_dtype = np.int32
def set_mlu(self):
self.__class__.use_mlu = True
self.place = paddle.device.MLUPlace(0)
self.__class__.no_need_check_grad = True
class TestLookupTableV2Dim32(TestLookupTableV2):
def init_dims(self):
self.bsz = 6
self.seqlen = 8
self.vocab = 10
# embedding_dim is multiple of 32
self.dim = 64
class TestLookupTableV2Dim32FP16(TestLookupTableV2):
no_need_check_grad = True
def init_dtype(self):
self.dtype = np.float16
self.ids_dtype = np.int64
def init_dims(self):
self.bsz = 6
self.seqlen = 8
self.vocab = 10
self.dim = 64
def set_mlu(self):
self.__class__.use_mlu = True
self.place = paddle.device.MLUPlace(0)
self.__class__.no_need_check_grad = True
class TestLookupTableV2WithPadding(TestLookupTableV2):
def init_padding_idx(self):
self.padding_idx = np.random.randint(0, self.vocab)
class TestLookupTableV2WithPadding1(TestLookupTableV2):
def init_padding_idx(self):
self.padding_idx = np.random.randint(0, self.vocab)
def init_dtype(self):
self.dtype = np.float32
self.ids_dtype = np.int64
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import sys
sys.path.append("..")
from op_test import OpTest
import unittest
import paddle
paddle.enable_static()
class TestUnStackOpBase(OpTest):
def initDefaultParameters(self):
self.input_dim = (5, 6, 7)
self.axis = 0
def initParameters(self):
pass
def get_y_names(self):
y_names = []
for i in range(self.input_dim[self.axis]):
y_names.append('y{}'.format(i))
return y_names
def setUp(self):
self.initDefaultParameters()
self.initParameters()
self.op_type = 'unstack'
self.set_mlu()
self.init_dtype()
self.x = np.random.random(size=self.input_dim).astype(self.dtype)
outs = np.split(self.x, self.input_dim[self.axis], self.axis)
new_shape = list(self.input_dim)
del new_shape[self.axis]
y_names = self.get_y_names()
tmp = []
for i in range(self.input_dim[self.axis]):
tmp.append((y_names[i], np.reshape(outs[i], new_shape)))
self.inputs = {'X': self.x}
self.outputs = {'Y': tmp}
self.attrs = {'axis': self.axis, 'num': self.input_dim[self.axis]}
def set_mlu(self):
self.__class__.use_mlu = True
self.place = paddle.device.MLUPlace(0)
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad(self):
self.check_grad_with_place(self.place, ['X'], self.get_y_names())
class TestStackOp3(TestUnStackOpBase):
def initParameters(self):
self.axis = -1
class TestStackOp4(TestUnStackOpBase):
def initParameters(self):
self.axis = -3
class TestStackOp5(TestUnStackOpBase):
def initParameters(self):
self.axis = 1
class TestStackOp6(TestUnStackOpBase):
def initParameters(self):
self.axis = 2
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册