From 8c48c7daecbe8419cb34770f0b178c7e13ade123 Mon Sep 17 00:00:00 2001 From: yukavio <67678385+yukavio@users.noreply.github.com> Date: Thu, 20 Aug 2020 10:58:36 +0800 Subject: [PATCH] Add new one hot function in nn.functional (#26183) * add input.py file * write input.py * fix init file * add unit tests * fix dygraph and input shape * Revert "fix dygraph and input shape" This reverts commit 89ad8664124c1872640b68c0a4418c377ad6aa6f. * fixed pylint * fix deprecated * fix old op * fix old op * set check_dygraph=True * Revert "set check_dygraph=True" This reverts commit a8e93e33281f84e5f6fbc2b4bf05ea3e63611519. * test commit * fix doc and change test file name --- python/paddle/fluid/input.py | 2 + python/paddle/fluid/layers/nn.py | 2 + .../unittests/test_nn_functional_hot_op.py | 207 ++++++++++++++++++ python/paddle/nn/functional/__init__.py | 1 + python/paddle/nn/functional/input.py | 110 ++++++++++ 5 files changed, 322 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py create mode 100644 python/paddle/nn/functional/input.py diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py index 347927509e..15a3022f93 100644 --- a/python/paddle/fluid/input.py +++ b/python/paddle/fluid/input.py @@ -17,10 +17,12 @@ import warnings from .framework import Variable, in_dygraph_mode from .layer_helper import LayerHelper from .data_feeder import check_variable_and_dtype, check_dtype +from ..utils import deprecated __all__ = ['one_hot', 'embedding'] +@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot') def one_hot(input, depth, allow_out_of_range=False): """ :alias_main: paddle.nn.functional.one_hot diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 3595406a01..41531f67f3 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -35,6 +35,7 @@ from . import utils from .. import unique_name from functools import reduce from .. import core +from ...utils import deprecated from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype import paddle from paddle.utils import deprecated @@ -5800,6 +5801,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): return loss +@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot') def one_hot(input, depth, allow_out_of_range=False): """ diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py new file mode 100644 index 0000000000..339f689998 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py @@ -0,0 +1,207 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import math +from op_test import OpTest +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.nn.functional as functional +import paddle.fluid.framework as framework +from paddle.fluid.framework import Program, program_guard + + +class TestOneHotOp(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0])]) + + out = np.zeros(shape=(np.product(x.shape), depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_attr(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), 1, + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, 0, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_default_dtype(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0])]) + + out = np.zeros(shape=(np.product(x.shape), depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_default_dtype_attr(OpTest): + def setUp(self): + self.op_type = 'one_hot_v2' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), 1, + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, 0, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output(check_dygraph=False) + + +class TestOneHotOp_exception(unittest.TestCase): + def setUp(self): + self.op_type = 'one_hot_v2' + self.depth = 10 + self.place = core.CPUPlace() + self.dimension = 12 + self.x = core.LoDTensor() + x_lod = [[4, 1, 3, 3]] + data = [np.random.randint(11, 20) for i in range(sum(x_lod[0]))] + data = np.array(data).astype('int').reshape([sum(x_lod[0]), 1]) + self.x.set(data, self.place) + self.x.set_recursive_sequence_lengths(x_lod) + + def test_check_output(self): + program = Program() + with program_guard(program): + x = fluid.layers.data( + name='x', shape=[self.dimension], dtype='float32', lod_level=1) + block = program.current_block() + one_hot_out = block.create_var( + name="one_hot_out", + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='float32') + block.append_op( + type='one_hot', + inputs={'X': x}, + attrs={'depth': self.depth}, + outputs={'Out': one_hot_out}) + exe = fluid.Executor(self.place) + + def run(): + exe.run(feed={'x': self.x}, + fetch_list=[one_hot_out], + return_numpy=False) + + self.assertRaises(core.EnforceNotMet, run) + + +class TestOneHotOpApi(unittest.TestCase): + def test_api(self): + num_classes = 10 + self._run(num_classes) + + def test_api_with_depthTensor(self): + num_classes = fluid.layers.assign(input=np.array([10], dtype=np.int32)) + self._run(num_classes) + + def test_api_with_dygraph(self): + num_classes = 10 + label = np.array( + [np.random.randint(0, num_classes - 1) + for i in range(6)]).reshape([6, 1]) + with fluid.dygraph.guard(): + one_hot_label = functional.one_hot( + x=fluid.dygraph.to_variable(label), num_classes=num_classes) + + def _run(self, num_classes): + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + one_hot_label = functional.one_hot(x=label, num_classes=num_classes) + + place = fluid.CPUPlace() + label_data = np.array([np.random.randint(0, 10 - 1) + for i in range(6)]).reshape([6, 1]) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + ret = exe.run(feed={'label': label_data, }, + fetch_list=[one_hot_label], + return_numpy=False) + + +class BadInputTestOnehotV2(unittest.TestCase): + def test_error(self): + with fluid.program_guard(fluid.Program()): + + def test_bad_x(): + label = fluid.layers.data( + name="label", + shape=[4], + append_batch_size=False, + dtype="float32") + one_hot_label = functional.one_hot(x=label, num_classes=4) + + self.assertRaises(TypeError, test_bad_x) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index bd1d334f66..855935f662 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -198,3 +198,4 @@ from .vision import shuffle_channel #DEFINE_ALIAS from .vision import space_to_depth #DEFINE_ALIAS from .vision import yolo_box #DEFINE_ALIAS from .vision import yolov3_loss #DEFINE_ALIAS +from .input import one_hot #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py new file mode 100644 index 0000000000..e51da1bd74 --- /dev/null +++ b/python/paddle/nn/functional/input.py @@ -0,0 +1,110 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import warnings +from ...fluid.framework import Variable, in_dygraph_mode +from ...fluid.layer_helper import LayerHelper +from ...fluid.layers import core +from ...fluid.data_feeder import check_variable_and_dtype, check_dtype + +__all__ = ['one_hot'] + + +def one_hot(x, num_classes, name=None): + """ + + The operator converts each id in the input 'x' to an one-hot vector with a + num_classes length. The value in the vector dimension corresponding to the id + is 1, and the value in the remaining dimension is 0. + + The shape of output Tensor is generated by appending num_classes dimension + behind the last dimension of the 'x' shape. + + .. code-block:: text + + Example 1: + + input: + x.shape = [4] + x.data = [1, 1, 3, 0] + num_classes = 4 + + output: + Out.shape = [4, 4] + Out.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]] + + Example 2: + + input: + x.shape = [4] + x.data = [1, 1, 5, 0] + num_classes = 4 + + output: Throw an exception for Illegal value + The second dimension in X is 5, which is greater than num_classes, + so it throws an exception. + + + Args: + x(Tensor): Tensor with shape :math:`[N_1, N_2, ..., N_k]` , + which contains at least one dimension. The data type is int32 or int64. + num_classes(int): An integer defining the num_classes of the one hot dimension. If input 'x' + is word id, num_classes is generally the dictionary size. + + Returns: + Tensor: The one-hot representations of 'x'. A Tensor with type float32. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4]. + label = fluid.data(name="label", shape=[4, 1], dtype="int64") + # label.shape = [4] + # label.data = [1, 1, 3, 0] + one_hot_label = fluid.one_hot(x=label, num_classes=4) + # one_hot_label.shape = [4, 4] + # one_hot_label.data = [[0., 1., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.], + [1., 0., 0., 0.]] + """ + + if in_dygraph_mode(): + return core.ops.one_hot_v2(x, 'depth', num_classes, + 'allow_out_of_range', False) + else: + check_variable_and_dtype(x, 'input', ['int32', 'int64'], 'one_hot_v2') + helper = LayerHelper("one_hot_v2", **locals()) + + one_hot_out = helper.create_variable_for_type_inference(dtype='float32') + if not isinstance(num_classes, Variable): + # user attribute + inputs = {'X': x} + attrs = {'depth': num_classes, 'allow_out_of_range': False} + else: + num_classes.stop_gradient = True + inputs = {'X': x, 'depth_tensor': num_classes} + attrs = {'allow_out_of_range': False} + helper.append_op( + type="one_hot_v2", + inputs=inputs, + attrs=attrs, + outputs={'Out': one_hot_out}, + stop_gradient=True) + return one_hot_out -- GitLab