diff --git a/paddle/fluid/operators/tdm_child_op.cc b/paddle/fluid/operators/tdm_child_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..ace6107bab1878e2b393a1d7e327f7871dc16782 --- /dev/null +++ b/paddle/fluid/operators/tdm_child_op.cc @@ -0,0 +1,113 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/fluid/operators/tdm_child_op.h" +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/sampler.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace operators { +class TDMChildOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() { + AddInput("X", + "X(Tensor), dtype support int32/int64, X variable is the " + "node id of TDM-Tree"); + AddInput( + "TreeInfo", + "TreeInfo(Tensor), dtype support int32/int64, it stores the node " + "information in the following format: item_id(shape=1), " + "layer_id(shape=1), parent_id(shape=1), child_id(shape=child_nums)"); + AddAttr("child_nums", "child_nums(int)", + "The child nums of one node, if the node hasn't enough child, " + "it should padding 0 until child nums equal to child_nums"); + AddOutput("Child", + "Return the children's node_id of input node, " + "if input don't have child, return 0"); + AddOutput("LeafMask", + "LeafMask has the same shape with Child" + "If child is leaf node, LeafMask value = 1, else = 0"); + AddAttr("dtype", + "(int, default INT32) " + "Output data type.") + .SetDefault(2); + AddComment(R"DOC(" + **Tdm Child** + According to the input node_id on the given tree, return the corresponding child node_id and + whether child is a leaf node by LeafMask.")DOC"); + } +}; + +class TDMChildOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, + platform::errors::InvalidArgument( + "Inputs(X) of TdmChild should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("TreeInfo"), true, + platform::errors::InvalidArgument( + "Inputs(TreeInfo) of TdmChild should not be null.")); + + int child_nums = ctx->Attrs().Get("child_nums"); + PADDLE_ENFORCE_GT( + child_nums, 0, + platform::errors::InvalidArgument( + "ValueError: The value of the 'child_nums' must greater than 0. " + "But received child_nums value = %d, ", + child_nums)); + + auto info_dims = ctx->GetInputDim("TreeInfo"); + auto input_dims = ctx->GetInputDim("X"); + + PADDLE_ENFORCE_EQ( + info_dims.size(), 2, + platform::errors::InvalidArgument( + "ShapeError: The dimensions of the 'tree info' must be 2. " + "But received tree info's dimensions = %d, " + "tree info's shape = [%s].", + info_dims.size(), info_dims)); + + auto output_dims = framework::vectorize(input_dims); + output_dims.push_back(child_nums); + ctx->SetOutputDim("Child", framework::make_ddim(output_dims)); + ctx->SetOutputDim("LeafMask", framework::make_ddim(output_dims)); + + if (ctx->GetOutputsVarType("Child")[0] == + framework::proto::VarType::LOD_TENSOR) { + ctx->ShareLoD("X", /*->*/ "Child"); + ctx->ShareLoD("X", /*->*/ "LeafMask"); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + return framework::OpKernelType(data_type, ctx.device_context()); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR( + tdm_child, ops::TDMChildOp, ops::TDMChildOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + tdm_child, ops::TDMChildKernel, + ops::TDMChildKernel, + ops::TDMChildKernel, + ops::TDMChildKernel); diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h new file mode 100644 index 0000000000000000000000000000000000000000..dd68a68dba7474dc00cbea1c6f334353082e00db --- /dev/null +++ b/paddle/fluid/operators/tdm_child_op.h @@ -0,0 +1,176 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/mixed_vector.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DDim = framework::DDim; +using LoD = framework::LoD; + +template +void TDMChildInner(const framework::ExecutionContext &context, + const LoDTensor &input, const LoDTensor &tree_info, + LoDTensor *child, LoDTensor *mask) { + auto child_nums = context.Attr("child_nums"); + auto info_dims = tree_info.dims(); + int node_nums = info_dims[0]; + int length = info_dims[1]; + + int input_ids_num = input.numel(); + VLOG(4) << "TDM child op: input numel -> " << input_ids_num; + + std::vector child_vec{}; + std::vector item_mask_vec{}; + + auto *input_data = input.data(); + auto *tree_info_data = tree_info.data(); + + // TreeInfo: node_id : item_id; layer_id; ancestor_id; child_id + for (int input_ids = 0; input_ids < input_ids_num; ++input_ids) { + PADDLE_ENFORCE_LT( + input_data[input_ids], node_nums, + platform::errors::InvalidArgument( + "input id of OP(fluid.contrib.layers.tdm_child) " + "expected >= 0 and < %ld, but got %ld. Please check input " + "value.", + node_nums, input_data[input_ids])); + PADDLE_ENFORCE_LE( + 0, input_data[input_ids], + platform::errors::InvalidArgument( + "input id of OP(fluid.contrib.layers.tdm_child) " + "expected >= 0 and < %ld, but got %ld. Please check input " + "value.", + node_nums, input_data[input_ids])); + + bool has_child = + (input_data[input_ids] == 0 || + tree_info_data[static_cast(input_data[input_ids]) * length + 3] == + 0) + ? false + : true; + + if (has_child) { + for (int child_ids = 0; child_ids < child_nums; ++child_ids) { + OutT child_id = static_cast( + tree_info_data[static_cast(input_data[input_ids]) * length + + 3 + child_ids]); + child_vec.push_back(child_id); + OutT child_is_item = static_cast( + tree_info_data[static_cast(child_id) * length] == 0 ? 0 : 1); + item_mask_vec.push_back(child_is_item); + } + } else { + for (int child_ids = 0; child_ids < child_nums; ++child_ids) { + child_vec.push_back(0); + item_mask_vec.push_back(0); + } + } + } + + int output_nums = child_vec.size(); + auto *child_data = child->mutable_data(context.GetPlace()); + auto *leaf_mask_data = mask->mutable_data(context.GetPlace()); + + memcpy(child_data, &child_vec[0], sizeof(OutT) * output_nums); + memcpy(leaf_mask_data, &item_mask_vec[0], sizeof(OutT) * output_nums); +} + +template +class TDMChildKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *input_var = ctx.InputVar("X"); + auto *tree_info_var = ctx.InputVar("TreeInfo"); + + auto &input_tensor = input_var->Get(); + const auto &input_type = input_tensor.type(); + bool input_type_match = input_type == framework::proto::VarType::INT32 || + input_type == framework::proto::VarType::INT64; + PADDLE_ENFORCE_EQ(input_type_match, true, + platform::errors::InvalidArgument( + "Input(X) holds the wrong type, it holds %s, but " + "desires to be %s or %s", + paddle::framework::DataTypeToString(input_type), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT32), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT64))); + + auto &tree_info_tensor = tree_info_var->Get(); + const auto &info_type = tree_info_tensor.type(); + bool info_type_match = info_type == framework::proto::VarType::INT32 || + info_type == framework::proto::VarType::INT64; + PADDLE_ENFORCE_EQ( + info_type_match, true, + platform::errors::InvalidArgument( + "Input(TreeInfo) holds the wrong type, it holds %s, but " + "desires to be %s or %s", + paddle::framework::DataTypeToString(info_type), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT32), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT64))); + + auto *child_var = ctx.OutputVar("Child"); + auto *leaf_mask_var = ctx.OutputVar("LeafMask"); + auto *child_tensor = child_var->GetMutable(); + auto *leaf_mask_tensor = leaf_mask_var->GetMutable(); + + auto output_type = + static_cast(ctx.Attr("dtype")); + bool out_type_match = output_type == framework::proto::VarType::INT32 || + output_type == framework::proto::VarType::INT64; + PADDLE_ENFORCE_EQ(out_type_match, true, + platform::errors::InvalidArgument( + "Ouput(Child) & Output(LeafMask) holds the wrong " + "type, it holds %s, but " + "desires to be %s or %s", + paddle::framework::DataTypeToString(output_type), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT32), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT64))); + + if (info_type == framework::proto::VarType::INT32 && + output_type == framework::proto::VarType::INT32) { + TDMChildInner(ctx, input_tensor, tree_info_tensor, + child_tensor, leaf_mask_tensor); + } else if (info_type == framework::proto::VarType::INT64 && + output_type == framework::proto::VarType::INT32) { + TDMChildInner(ctx, input_tensor, tree_info_tensor, + child_tensor, leaf_mask_tensor); + } else if (info_type == framework::proto::VarType::INT32 && + output_type == framework::proto::VarType::INT64) { + TDMChildInner(ctx, input_tensor, tree_info_tensor, + child_tensor, leaf_mask_tensor); + } else if (info_type == framework::proto::VarType::INT64 && + output_type == framework::proto::VarType::INT64) { + TDMChildInner(ctx, input_tensor, tree_info_tensor, + child_tensor, leaf_mask_tensor); + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index d89b1cb41d8b5056cdfd57010db9791ea0c1ea1e..e9cb832e26bd7983a01b25c0b325eec9df30835d 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -24,6 +24,9 @@ import inspect from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers import utils from ... import unique_name +from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer +from paddle.fluid.data_feeder import check_type, check_dtype, convert_dtype +from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_ __all__ = [ 'fused_elemwise_activation', @@ -35,6 +38,7 @@ __all__ = [ 'multiclass_nms2', 'search_pyramid_hash', 'shuffle_batch', + 'tdm_child', ] @@ -808,3 +812,88 @@ def shuffle_batch(x, seed=None): 'SeedOut': seed}, attrs=op_attrs) return out + + +def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): + """ + **Tdm Child** + According to the input node_id on the given tree, return the corresponding child node_id and + whether child is a leaf node by leaf_mask value. + + .. code-block:: text + Given: + tree[[0], [1, 2], [3, 4], [5, 6]] # A binary tree with seven nodes + x = [[2], [3]] + node_nums = 7 + child_nums = 2 + we get: + child = [[5, 6], + [0, 0]] + leaf_mask = [[1, 1], + [0, 0]] + + Args: + x(Variable): Variable contained the node_id information, dtype support int32/int64. + node_nums(int): Number of total nodes. + child_nums(int): Maximum number of child nodes per node. + param_attr(ParamAttr): To specify the tdm-tree-info parameter property. Default: None, which means the + default weight parameter property is used. See usage for details in: ref: `api_fluid_ParamAttr`, should + has shape(node_nums, 3 + child_nums), dtype support int32/int64. + The dimension[1] of tdm-tree-info contains the following: + 1. Item_id(int, shape(1)), if node is a leaf node, give its item_id corresponding to node_id, else give 0. + 2. Layer_id(int, shape(1)), indicates which layer the node is on. + 3. Parent_id(int, shape(1)), node's parent node. + 4. Child_id(int, shape(child_nums)), all child node's node_id of this node should be given. + If the number of child nodes is insufficient, padding 0 until child nums equal to child_nums + dtype(str): The data type of output child and leaf_mask, support int32/int64. + + Returns: + tuple: A tuple including input node's child(Variable) and leaf_mask(Variable). + If child is a leaf node, leaf_mask equal ot 1, otherwise equal to 0. + + Examples: + .. code-block:: python + import paddle.fluid as fluid + import numpy as np + x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) + tree_info = [[0,0,0,1,2], + [0,1,0,3,4],[0,1,0,5,6], + [0,2,1,0,0],[1,2,1,0,0],[2,2,2,0,0],[3,2,2,0,0]] + tree_info_np = np.array(tree_info) + tree_info_np = np.reshape(tree_info_np, (7,5)) + node_nums = 7 + child_nums = 2 + child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.NumpyArrayInitializer( + tree_info_np))) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + xx = np.array([[2],[3]]).reshape((2,1)).astype("int32") + child_res, leaf_mask_res = exe.run(feed={"x":xx}, fetch_list=[child, leaf_mask]) + """ + helper = LayerHelper("tdm_child", **locals()) + check_dtype(dtype, 'dtype', ['int32', 'int64'], + 'fluid.contrib.layers.tdm_child') + c_dtype = convert_np_dtype_to_dtype_(dtype) + tree_info = helper.create_parameter( + attr=helper.param_attr, + shape=[node_nums, 3 + child_nums], + dtype=dtype, + default_initializer=Constant(0)) + tree_info.stop_gradient = True + + child = helper.create_variable_for_type_inference(dtype=dtype) + leaf_mask = helper.create_variable_for_type_inference(dtype=dtype) + + helper.append_op( + type='tdm_child', + inputs={'X': x, + 'TreeInfo': tree_info}, + outputs={'Child': child, + 'LeafMask': leaf_mask}, + attrs={'child_nums': child_nums, + 'dtype': c_dtype}, + stop_gradient=True) + return (child, leaf_mask) diff --git a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py new file mode 100644 index 0000000000000000000000000000000000000000..af7bbeaab05bcffce96f94efaf4e7a9a655ffaf3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py @@ -0,0 +1,170 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle.fluid.core as core +from paddle.fluid.op import Operator +import paddle.fluid.layers as layers +import paddle.fluid as fluid +import random +import six + + +def create_tdm_tree(): + """Create tdm tree info""" + tree_info = [ + [0, 0, 0, 1, 2], + [0, 1, 0, 3, 4], + [0, 1, 0, 5, 6], + [0, 2, 1, 7, 8], + [0, 2, 1, 9, 10], + [0, 2, 2, 11, 12], + [0, 2, 2, 13, 0], + [0, 3, 3, 14, 15], + [0, 3, 3, 16, 17], + [0, 3, 4, 18, 19], + [0, 3, 4, 20, 21], + [0, 3, 5, 22, 23], + [0, 3, 5, 24, 25], + [12, 3, 6, 0, 0], + [0, 4, 7, 0, 0], + [1, 4, 7, 0, 0], + [2, 4, 8, 0, 0], + [3, 4, 8, 0, 0], + [4, 4, 9, 0, 0], + [5, 4, 9, 0, 0], + [6, 4, 10, 0, 0], + [7, 4, 10, 0, 0], + [8, 4, 11, 0, 0], + [9, 4, 11, 0, 0], + [10, 4, 12, 0, 0], + [11, 4, 12, 0, 0], + ] + return tree_info + + +class TestTDMChildOp(OpTest): + def setUp(self): + self.__class__.op_type = "tdm_child" + self.config() + tree_info = create_tdm_tree() + tree_info_np = np.array(tree_info).astype(self.info_type) + + x_np = np.random.randint( + low=0, high=26, size=self.x_shape).astype(self.x_type) + children_res = [] + leaf_mask_res = [] + for batch in x_np: + for node in batch: + children = [] + if node != 0: + children.append(tree_info[node][3]) + children.append(tree_info[node][4]) + else: + children.append(0) + children.append(0) + mask = [] + for child in children: + m = int(tree_info[child][0] != 0) + mask.append(m) + children_res += children + leaf_mask_res += mask + children_res_np = np.array(children_res).astype(self.info_type) + leaf_mask_res_np = np.array(leaf_mask_res).astype(self.info_type) + + child = np.reshape(children_res_np, self.child_shape) + leaf_mask = np.reshape(leaf_mask_res_np, self.child_shape) + + self.attrs = {'child_nums': 2} + self.inputs = {'X': x_np, 'TreeInfo': tree_info_np} + self.outputs = {'Child': child, 'LeafMask': leaf_mask} + + def config(self): + """set test shape & type""" + self.x_shape = (10, 20) + self.child_shape = (10, 20, 2) + self.x_type = 'int32' + self.info_type = 'int32' + + def test_check_output(self): + self.check_output() + + +class TestCase1(TestTDMChildOp): + def config(self): + """check int int64_t """ + self.x_shape = (10, 20) + self.child_shape = (10, 20, 2) + self.x_type = 'int32' + self.info_type = 'int64' + + +class TestCase2(TestTDMChildOp): + def config(self): + """check int64_t int64_t """ + self.x_shape = (10, 20) + self.child_shape = (10, 20, 2) + self.x_type = 'int64' + self.info_type = 'int64' + + +class TestCase3(TestTDMChildOp): + def config(self): + """check int64 int32 """ + self.x_shape = (10, 20) + self.child_shape = (10, 20, 2) + self.x_type = 'int64' + self.info_type = 'int32' + + +class TestCase4(TestTDMChildOp): + def config(self): + """check large shape """ + self.x_shape = (100, 20) + self.child_shape = (100, 20, 2) + self.x_type = 'int32' + self.info_type = 'int32' + + +class TestTDMChildShape(unittest.TestCase): + def test_shape(self): + x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1) + tdm_tree_info = create_tdm_tree() + tree_info_np = np.array(tdm_tree_info).astype('int32') + + child, leaf_mask = fluid.contrib.layers.tdm_child( + x=x, + node_nums=26, + child_nums=2, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.NumpyArrayInitializer( + tree_info_np))) + + place = fluid.CPUPlace() + exe = fluid.Executor(place=place) + exe.run(fluid.default_startup_program()) + + feed = { + 'x': np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], + [11], [12]]).astype('int32') + } + exe.run(feed=feed) + + +if __name__ == "__main__": + unittest.main()