From 206c44e2a8433c1011dee6363cae6f8928a5659f Mon Sep 17 00:00:00 2001 From: zhoukunsheng Date: Wed, 3 Jul 2019 10:50:57 +0800 Subject: [PATCH] add unique kernel and op (#17557) --- paddle/fluid/API.spec | 1 + paddle/fluid/operators/unique_op.cc | 61 ++++++++++++++ paddle/fluid/operators/unique_op.h | 83 +++++++++++++++++++ python/paddle/fluid/layers/nn.py | 40 +++++++++ .../fluid/tests/unittests/test_unique.py | 72 ++++++++++++++++ 5 files changed, 257 insertions(+) create mode 100644 paddle/fluid/operators/unique_op.cc create mode 100644 paddle/fluid/operators/unique_op.h create mode 100644 python/paddle/fluid/tests/unittests/test_unique.py diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 929f6e44d43..3085c54bc30 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -202,6 +202,7 @@ paddle.fluid.layers.stack (ArgSpec(args=['x', 'axis'], varargs=None, keywords=No paddle.fluid.layers.pad2d (ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)), ('document', '3f3abdb795a5c2aad8c2312249551ce5')) paddle.fluid.layers.unstack (ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b0c4ca08d4eb295189e1b107c920d093')) paddle.fluid.layers.sequence_enumerate (ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b870fed41abd2aecf929ece65f555fa1')) +paddle.fluid.layers.unique (ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=('int32',)), ('document', 'cab0b06e5683875f12f0efc62fa230a9')) paddle.fluid.layers.expand (ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '33bc4f6010282ffe044d77be7ba7c275')) paddle.fluid.layers.sequence_concat (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b992616c1afbd6b0c2a897ac23036381')) paddle.fluid.layers.scale (ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)), ('document', '463e4713806e5adaa4d20a41e2218453')) diff --git a/paddle/fluid/operators/unique_op.cc b/paddle/fluid/operators/unique_op.cc new file mode 100644 index 00000000000..08ce81d75e4 --- /dev/null +++ b/paddle/fluid/operators/unique_op.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/unique_op.h" + +namespace paddle { +namespace operators { + +class UniqueOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of UniqueOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of UniqueOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Index"), + "Output(Index) of UniqueOp should not be null."); + + auto in_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE(in_dims.size() == 1, "Input(X) should be a vector."); + + ctx->SetOutputDim("Out", {-1}); + ctx->SetOutputDim("Index", in_dims); + } +}; + +class UniqueOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "Input tensor. It should be a 1-D tensor."); + AddAttr("dtype", "data type for output index"); + AddOutput("Out", "A unique subsequence for input tensor."); + AddOutput("Index", + "An index tensor pointing to unique subsequence, which has " + "identical shape with input tensor and int64 dtype."); + AddComment(R"DOC( + Return a unique subsequence for 1-D input tensor, and an index tensor pointing to this unique subsequence +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(unique, ops::UniqueOp, ops::UniqueOpMaker); +REGISTER_OP_CPU_KERNEL(unique, ops::UniqueKernel, + ops::UniqueKernel, ops::UniqueKernel, + ops::UniqueKernel); diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h new file mode 100644 index 00000000000..b6e41347910 --- /dev/null +++ b/paddle/fluid/operators/unique_op.h @@ -0,0 +1,83 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +struct UniqueOpFunctor { + framework::Tensor* out_; + framework::Tensor* index_; + const framework::Tensor* in_; + + UniqueOpFunctor(framework::Tensor* out, framework::Tensor* index, + const framework::Tensor* in) + : out_(out), index_(index), in_(in) {} + + template + void apply() const { + auto* in_data = in_->data(); + auto* index_data = index_->mutable_data(platform::CPUPlace()); + + int64_t j = 0; + + // TODO(fangzeyang): Should optimize performance here. + std::unordered_map dict; + std::vector uniq; + + PADDLE_ENFORCE(in_->numel() < pow(2, 31), + "numel of Unique op input should less than INT_MAX"); + + for (auto i = 0; i < in_->numel(); i++) { + auto it = dict.find(in_data[i]); + if (it == dict.end()) { + dict.insert(std::make_pair(in_data[i], j)); + uniq.push_back(in_data[i]); + index_data[i] = static_cast(j); + j++; + } else { + index_data[i] = static_cast(it->second); + } + } + + out_->Resize(framework::make_ddim({static_cast(uniq.size())})); + auto out_data = out_->mutable_data(platform::CPUPlace()); + std::memcpy(out_data, uniq.data(), uniq.size() * sizeof(InT)); + } +}; + +template +class UniqueKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto data_type = static_cast( + context.Attr("dtype")); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); + auto* index = context.Output("Index"); + + framework::VisitDataType(data_type, UniqueOpFunctor(out, index, x)); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index a673458ca14..ae441cde4f4 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -145,6 +145,7 @@ __all__ = [ 'pad2d', 'unstack', 'sequence_enumerate', + 'unique', 'expand', 'sequence_concat', 'scale', @@ -12068,6 +12069,45 @@ def sign(x): return out +def unique(x, dtype='int32'): + """ + **unique** + + Return a unique tensor for `x` and an index tensor pointing to this unique tensor. + + Args: + x(Variable): A 1-D input tensor. + dtype(np.dtype|core.VarDesc.VarType|str): The type of index tensor: int32, int64. + + Returns: + tuple: (out, index). `out` is the unique tensor for `x`, with identical dtype to `x`, and \ + `index` is an index tensor pointing to `out`, by which user can recover the original `x` tensor. + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + x = fluid.assign(np.array([2, 3, 3, 1, 5, 3], dtype='int32')) + out, index = fluid.layers.unique(x) # out is [2, 3, 1, 5]; index is [0, 1, 1, 2, 3, 1] + """ + + helper = LayerHelper("unique", **locals()) + + out = helper.create_variable_for_type_inference(dtype=x.dtype) + + index = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type='unique', + inputs={'X': x}, + attrs={'dtype': convert_np_dtype_to_dtype_(dtype)}, + outputs={'Out': [out], + 'Index': [index]}) + + return out, index + + def deformable_conv(input, offset, mask, diff --git a/python/paddle/fluid/tests/unittests/test_unique.py b/python/paddle/fluid/tests/unittests/test_unique.py new file mode 100644 index 00000000000..2e91574954e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_unique.py @@ -0,0 +1,72 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core +from paddle.fluid.op import Operator + + +class TestUniqueOp(OpTest): + def setUp(self): + self.op_type = "unique" + self.init_config() + + def test_check_output(self): + self.check_output() + + def init_config(self): + self.inputs = {'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64'), } + self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} + self.outputs = { + 'Out': np.array( + [2, 3, 1, 5], dtype='int64'), + 'Index': np.array( + [0, 1, 1, 2, 3, 1], dtype='int32') + } + + +class TestOne(TestUniqueOp): + def init_config(self): + self.inputs = {'X': np.array([2], dtype='int64'), } + self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} + self.outputs = { + 'Out': np.array( + [2], dtype='int64'), + 'Index': np.array( + [0], dtype='int32') + } + + +class TestRandom(TestUniqueOp): + def init_config(self): + self.inputs = {'X': np.random.randint(0, 100, (150, ), dtype='int64')} + self.attrs = {'dtype': int(core.VarDesc.VarType.INT64)} + np_unique, np_index, reverse_index = np.unique(self.inputs['X'], True, + True) + np_tuple = [(np_unique[i], np_index[i]) for i in range(len(np_unique))] + np_tuple.sort(key=lambda x: x[1]) + target_out = np.array([i[0] for i in np_tuple], dtype='int64') + target_index = np.array( + [list(target_out).index(i) for i in self.inputs['X']], + dtype='int64') + + self.outputs = {'Out': target_out, 'Index': target_index} + + +if __name__ == "__main__": + unittest.main() -- GitLab