diff --git a/paddle/fluid/operators/npu_op_runner.cc b/paddle/fluid/operators/npu_op_runner.cc index 4461941e85c2a5445a00c9a0c35f5f9c262d9984..a134b542c246e9cffc3cabe9878387798793138c 100644 --- a/paddle/fluid/operators/npu_op_runner.cc +++ b/paddle/fluid/operators/npu_op_runner.cc @@ -240,6 +240,38 @@ NpuOpRunner &NpuOpRunner::AddInput(std::vector &&dims) { return *this; } +NpuOpRunner &NpuOpRunner::AddInput(std::vector &&values) { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(platform::CPUPlace())); + Tensor host_tensor; + TensorFromVector(values, *dev_ctx, &host_tensor); + host_tensors_.emplace_back(host_tensor); + + // create aclTensorDesc + input_descs_.emplace_back(CreateTensorDesc(host_tensor, ACL_MEMTYPE_HOST)); + // create aclDataBuffer + input_buffers_.emplace_back(CreateDataBuffer(host_tensor)); + + return *this; +} + +NpuOpRunner &NpuOpRunner::AddInput(std::vector &&values) { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(platform::CPUPlace())); + Tensor host_tensor; + TensorFromVector(values, *dev_ctx, &host_tensor); + host_tensors_.emplace_back(host_tensor); + + // create aclTensorDesc + input_descs_.emplace_back(CreateTensorDesc(host_tensor, ACL_MEMTYPE_HOST)); + // create aclDataBuffer + input_buffers_.emplace_back(CreateDataBuffer(host_tensor)); + + return *this; +} + NpuOpRunner &NpuOpRunner::AddOutput(const Tensor &tensor) { // create aclTensorDesc output_descs_.emplace_back(CreateTensorDesc(tensor)); diff --git a/paddle/fluid/operators/npu_op_runner.h b/paddle/fluid/operators/npu_op_runner.h index 2257c209550d6056554f32cb2b7a36a277c15088..45e973970a956d82b228c2078de20c0de238fe39 100644 --- a/paddle/fluid/operators/npu_op_runner.h +++ b/paddle/fluid/operators/npu_op_runner.h @@ -71,6 +71,10 @@ class NpuOpRunner { NpuOpRunner &AddInput(std::vector &&dims); + NpuOpRunner &AddInput(std::vector &&values); + + NpuOpRunner &AddInput(std::vector &&values); + NpuOpRunner &AddOutput(const Tensor &tensor); NpuOpRunner &AddInputs(const std::vector &tensors); diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..1cf99d844c8887c796ca0d9f50779ada2bae8b77 --- /dev/null +++ b/paddle/fluid/operators/one_hot_op_npu.cc @@ -0,0 +1,82 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/one_hot_op.h" + +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class OneHotNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + auto* in = ctx.Input("X"); + auto* out = ctx.Output("Out"); + int depth = ctx.Attr("depth"); + + if (ctx.HasInput("depth_tensor")) { + auto* depth_tensor = ctx.Input("depth_tensor"); + std::vector depth_data; + framework::TensorToVector(*depth_tensor, dev_ctx, &depth_data); + depth = depth_data[0]; + auto in_dims = in->dims(); + framework::DDim out_dims(in_dims); + out_dims[out_dims.size() - 1] = depth; + out->Resize(out_dims); + } + out->mutable_data(ctx.GetPlace()); + + float on_value = 1.0f, off_value = 0.0f; + if (in->type() == framework::proto::VarType::INT32) { + NpuOpRunner runner; + runner.SetType("OneHot") + .AddInput(*in) + .AddInput(std::vector({static_cast(depth)})) + .AddInput(std::vector({on_value})) + .AddInput(std::vector({off_value})) + .AddAttr("axis", -1) + .AddOutput(*out); + runner.Run(dev_ctx.stream()); + } else { + Tensor transformed_in; + transformed_in.mutable_data(in->dims(), dev_ctx.GetPlace()); + const auto& cast_runner = NpuOpRunner("Cast", {*in}, {transformed_in}, + {{"dst_type", ACL_INT32}}); + cast_runner.Run(dev_ctx.stream()); + NpuOpRunner runner; + runner.SetType("OneHot") + .AddInput(transformed_in) + .AddInput(std::vector({static_cast(depth)})) + .AddInput(std::vector({on_value})) + .AddInput(std::vector({off_value})) + .AddAttr("axis", -1) + .AddOutput(*out); + runner.Run(dev_ctx.stream()); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_NPU_KERNEL(one_hot, ops::OneHotNPUKernel, + ops::OneHotNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..c92fffb2d26cbfb662ca14451cc66c9557c32614 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py @@ -0,0 +1,193 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys +import unittest +import numpy as np +sys.path.append("..") + +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.framework import Program, program_guard + +paddle.enable_static() + + +class TestOneHotOp(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +class TestOneHotOp_attr(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +class TestOneHotOp_default_dtype(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +class TestOneHotOp_default_dtype_attr(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +class TestOneHotOp_out_of_range(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + x_lod = [[4, 1, 3, 3]] + x = [np.random.choice([-1, depth]) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth, 'allow_out_of_range': True} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +class TestOneHotOp_dtype_int64(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int64').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main()