未验证 提交 17133308 编写于 作者: C Chengmo 提交者: GitHub

Cherry-pick tdm_child op in Contrib (#23514)

* test=develop, cherry-pick tdm_child op
上级 c128ab70
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/tdm_child_op.h"
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
class TDMChildOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X",
"X(Tensor), dtype support int32/int64, X variable is the "
"node id of TDM-Tree");
AddInput(
"TreeInfo",
"TreeInfo(Tensor), dtype support int32/int64, it stores the node "
"information in the following format: item_id(shape=1), "
"layer_id(shape=1), parent_id(shape=1), child_id(shape=child_nums)");
AddAttr<int>("child_nums", "child_nums(int)",
"The child nums of one node, if the node hasn't enough child, "
"it should padding 0 until child nums equal to child_nums");
AddOutput("Child",
"Return the children's node_id of input node, "
"if input don't have child, return 0");
AddOutput("LeafMask",
"LeafMask has the same shape with Child"
"If child is leaf node, LeafMask value = 1, else = 0");
AddAttr<int>("dtype",
"(int, default INT32) "
"Output data type.")
.SetDefault(2);
AddComment(R"DOC("
**Tdm Child**
According to the input node_id on the given tree, return the corresponding child node_id and
whether child is a leaf node by LeafMask.")DOC");
}
};
class TDMChildOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
platform::errors::InvalidArgument(
"Inputs(X) of TdmChild should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("TreeInfo"), true,
platform::errors::InvalidArgument(
"Inputs(TreeInfo) of TdmChild should not be null."));
int child_nums = ctx->Attrs().Get<int>("child_nums");
PADDLE_ENFORCE_GT(
child_nums, 0,
platform::errors::InvalidArgument(
"ValueError: The value of the 'child_nums' must greater than 0. "
"But received child_nums value = %d, ",
child_nums));
auto info_dims = ctx->GetInputDim("TreeInfo");
auto input_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(
info_dims.size(), 2,
platform::errors::InvalidArgument(
"ShapeError: The dimensions of the 'tree info' must be 2. "
"But received tree info's dimensions = %d, "
"tree info's shape = [%s].",
info_dims.size(), info_dims));
auto output_dims = framework::vectorize(input_dims);
output_dims.push_back(child_nums);
ctx->SetOutputDim("Child", framework::make_ddim(output_dims));
ctx->SetOutputDim("LeafMask", framework::make_ddim(output_dims));
if (ctx->GetOutputsVarType("Child")[0] ==
framework::proto::VarType::LOD_TENSOR) {
ctx->ShareLoD("X", /*->*/ "Child");
ctx->ShareLoD("X", /*->*/ "LeafMask");
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(data_type, ctx.device_context());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
tdm_child, ops::TDMChildOp, ops::TDMChildOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
tdm_child, ops::TDMChildKernel<paddle::platform::CPUPlace, float>,
ops::TDMChildKernel<paddle::platform::CPUPlace, double>,
ops::TDMChildKernel<paddle::platform::CPUPlace, int>,
ops::TDMChildKernel<paddle::platform::CPUPlace, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gflags/gflags.h>
#include <cmath>
#include <fstream>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DDim = framework::DDim;
using LoD = framework::LoD;
template <typename T, typename InfoT = int, typename OutT = int>
void TDMChildInner(const framework::ExecutionContext &context,
const LoDTensor &input, const LoDTensor &tree_info,
LoDTensor *child, LoDTensor *mask) {
auto child_nums = context.Attr<int>("child_nums");
auto info_dims = tree_info.dims();
int node_nums = info_dims[0];
int length = info_dims[1];
int input_ids_num = input.numel();
VLOG(4) << "TDM child op: input numel -> " << input_ids_num;
std::vector<OutT> child_vec{};
std::vector<OutT> item_mask_vec{};
auto *input_data = input.data<T>();
auto *tree_info_data = tree_info.data<InfoT>();
// TreeInfo: node_id : item_id; layer_id; ancestor_id; child_id
for (int input_ids = 0; input_ids < input_ids_num; ++input_ids) {
PADDLE_ENFORCE_LT(
input_data[input_ids], node_nums,
platform::errors::InvalidArgument(
"input id of OP(fluid.contrib.layers.tdm_child) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
node_nums, input_data[input_ids]));
PADDLE_ENFORCE_LE(
0, input_data[input_ids],
platform::errors::InvalidArgument(
"input id of OP(fluid.contrib.layers.tdm_child) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
node_nums, input_data[input_ids]));
bool has_child =
(input_data[input_ids] == 0 ||
tree_info_data[static_cast<int>(input_data[input_ids]) * length + 3] ==
0)
? false
: true;
if (has_child) {
for (int child_ids = 0; child_ids < child_nums; ++child_ids) {
OutT child_id = static_cast<OutT>(
tree_info_data[static_cast<int>(input_data[input_ids]) * length +
3 + child_ids]);
child_vec.push_back(child_id);
OutT child_is_item = static_cast<OutT>(
tree_info_data[static_cast<int>(child_id) * length] == 0 ? 0 : 1);
item_mask_vec.push_back(child_is_item);
}
} else {
for (int child_ids = 0; child_ids < child_nums; ++child_ids) {
child_vec.push_back(0);
item_mask_vec.push_back(0);
}
}
}
int output_nums = child_vec.size();
auto *child_data = child->mutable_data<OutT>(context.GetPlace());
auto *leaf_mask_data = mask->mutable_data<OutT>(context.GetPlace());
memcpy(child_data, &child_vec[0], sizeof(OutT) * output_nums);
memcpy(leaf_mask_data, &item_mask_vec[0], sizeof(OutT) * output_nums);
}
template <typename DeviceContext, typename T>
class TDMChildKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *input_var = ctx.InputVar("X");
auto *tree_info_var = ctx.InputVar("TreeInfo");
auto &input_tensor = input_var->Get<LoDTensor>();
const auto &input_type = input_tensor.type();
bool input_type_match = input_type == framework::proto::VarType::INT32 ||
input_type == framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(input_type_match, true,
platform::errors::InvalidArgument(
"Input(X) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
paddle::framework::DataTypeToString(input_type),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64)));
auto &tree_info_tensor = tree_info_var->Get<LoDTensor>();
const auto &info_type = tree_info_tensor.type();
bool info_type_match = info_type == framework::proto::VarType::INT32 ||
info_type == framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(
info_type_match, true,
platform::errors::InvalidArgument(
"Input(TreeInfo) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
paddle::framework::DataTypeToString(info_type),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64)));
auto *child_var = ctx.OutputVar("Child");
auto *leaf_mask_var = ctx.OutputVar("LeafMask");
auto *child_tensor = child_var->GetMutable<framework::LoDTensor>();
auto *leaf_mask_tensor = leaf_mask_var->GetMutable<framework::LoDTensor>();
auto output_type =
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype"));
bool out_type_match = output_type == framework::proto::VarType::INT32 ||
output_type == framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(out_type_match, true,
platform::errors::InvalidArgument(
"Ouput(Child) & Output(LeafMask) holds the wrong "
"type, it holds %s, but "
"desires to be %s or %s",
paddle::framework::DataTypeToString(output_type),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64)));
if (info_type == framework::proto::VarType::INT32 &&
output_type == framework::proto::VarType::INT32) {
TDMChildInner<T, int, int>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
} else if (info_type == framework::proto::VarType::INT64 &&
output_type == framework::proto::VarType::INT32) {
TDMChildInner<T, int64_t, int>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
} else if (info_type == framework::proto::VarType::INT32 &&
output_type == framework::proto::VarType::INT64) {
TDMChildInner<T, int, int64_t>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
} else if (info_type == framework::proto::VarType::INT64 &&
output_type == framework::proto::VarType::INT64) {
TDMChildInner<T, int64_t, int64_t>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
}
}
};
} // namespace operators
} // namespace paddle
......@@ -24,6 +24,9 @@ import inspect
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.layers import utils
from ... import unique_name
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
from paddle.fluid.data_feeder import check_type, check_dtype, convert_dtype
from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_
__all__ = [
'fused_elemwise_activation',
......@@ -35,6 +38,7 @@ __all__ = [
'multiclass_nms2',
'search_pyramid_hash',
'shuffle_batch',
'tdm_child',
]
......@@ -808,3 +812,88 @@ def shuffle_batch(x, seed=None):
'SeedOut': seed},
attrs=op_attrs)
return out
def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
"""
**Tdm Child**
According to the input node_id on the given tree, return the corresponding child node_id and
whether child is a leaf node by leaf_mask value.
.. code-block:: text
Given:
tree[[0], [1, 2], [3, 4], [5, 6]] # A binary tree with seven nodes
x = [[2], [3]]
node_nums = 7
child_nums = 2
we get:
child = [[5, 6],
[0, 0]]
leaf_mask = [[1, 1],
[0, 0]]
Args:
x(Variable): Variable contained the node_id information, dtype support int32/int64.
node_nums(int): Number of total nodes.
child_nums(int): Maximum number of child nodes per node.
param_attr(ParamAttr): To specify the tdm-tree-info parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in: ref: `api_fluid_ParamAttr`, should
has shape(node_nums, 3 + child_nums), dtype support int32/int64.
The dimension[1] of tdm-tree-info contains the following:
1. Item_id(int, shape(1)), if node is a leaf node, give its item_id corresponding to node_id, else give 0.
2. Layer_id(int, shape(1)), indicates which layer the node is on.
3. Parent_id(int, shape(1)), node's parent node.
4. Child_id(int, shape(child_nums)), all child node's node_id of this node should be given.
If the number of child nodes is insufficient, padding 0 until child nums equal to child_nums
dtype(str): The data type of output child and leaf_mask, support int32/int64.
Returns:
tuple: A tuple including input node's child(Variable) and leaf_mask(Variable).
If child is a leaf node, leaf_mask equal ot 1, otherwise equal to 0.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1)
tree_info = [[0,0,0,1,2],
[0,1,0,3,4],[0,1,0,5,6],
[0,2,1,0,0],[1,2,1,0,0],[2,2,2,0,0],[3,2,2,0,0]]
tree_info_np = np.array(tree_info)
tree_info_np = np.reshape(tree_info_np, (7,5))
node_nums = 7
child_nums = 2
child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
tree_info_np)))
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
xx = np.array([[2],[3]]).reshape((2,1)).astype("int32")
child_res, leaf_mask_res = exe.run(feed={"x":xx}, fetch_list=[child, leaf_mask])
"""
helper = LayerHelper("tdm_child", **locals())
check_dtype(dtype, 'dtype', ['int32', 'int64'],
'fluid.contrib.layers.tdm_child')
c_dtype = convert_np_dtype_to_dtype_(dtype)
tree_info = helper.create_parameter(
attr=helper.param_attr,
shape=[node_nums, 3 + child_nums],
dtype=dtype,
default_initializer=Constant(0))
tree_info.stop_gradient = True
child = helper.create_variable_for_type_inference(dtype=dtype)
leaf_mask = helper.create_variable_for_type_inference(dtype=dtype)
helper.append_op(
type='tdm_child',
inputs={'X': x,
'TreeInfo': tree_info},
outputs={'Child': child,
'LeafMask': leaf_mask},
attrs={'child_nums': child_nums,
'dtype': c_dtype},
stop_gradient=True)
return (child, leaf_mask)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import paddle.fluid.layers as layers
import paddle.fluid as fluid
import random
import six
def create_tdm_tree():
"""Create tdm tree info"""
tree_info = [
[0, 0, 0, 1, 2],
[0, 1, 0, 3, 4],
[0, 1, 0, 5, 6],
[0, 2, 1, 7, 8],
[0, 2, 1, 9, 10],
[0, 2, 2, 11, 12],
[0, 2, 2, 13, 0],
[0, 3, 3, 14, 15],
[0, 3, 3, 16, 17],
[0, 3, 4, 18, 19],
[0, 3, 4, 20, 21],
[0, 3, 5, 22, 23],
[0, 3, 5, 24, 25],
[12, 3, 6, 0, 0],
[0, 4, 7, 0, 0],
[1, 4, 7, 0, 0],
[2, 4, 8, 0, 0],
[3, 4, 8, 0, 0],
[4, 4, 9, 0, 0],
[5, 4, 9, 0, 0],
[6, 4, 10, 0, 0],
[7, 4, 10, 0, 0],
[8, 4, 11, 0, 0],
[9, 4, 11, 0, 0],
[10, 4, 12, 0, 0],
[11, 4, 12, 0, 0],
]
return tree_info
class TestTDMChildOp(OpTest):
def setUp(self):
self.__class__.op_type = "tdm_child"
self.config()
tree_info = create_tdm_tree()
tree_info_np = np.array(tree_info).astype(self.info_type)
x_np = np.random.randint(
low=0, high=26, size=self.x_shape).astype(self.x_type)
children_res = []
leaf_mask_res = []
for batch in x_np:
for node in batch:
children = []
if node != 0:
children.append(tree_info[node][3])
children.append(tree_info[node][4])
else:
children.append(0)
children.append(0)
mask = []
for child in children:
m = int(tree_info[child][0] != 0)
mask.append(m)
children_res += children
leaf_mask_res += mask
children_res_np = np.array(children_res).astype(self.info_type)
leaf_mask_res_np = np.array(leaf_mask_res).astype(self.info_type)
child = np.reshape(children_res_np, self.child_shape)
leaf_mask = np.reshape(leaf_mask_res_np, self.child_shape)
self.attrs = {'child_nums': 2}
self.inputs = {'X': x_np, 'TreeInfo': tree_info_np}
self.outputs = {'Child': child, 'LeafMask': leaf_mask}
def config(self):
"""set test shape & type"""
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int32'
self.info_type = 'int32'
def test_check_output(self):
self.check_output()
class TestCase1(TestTDMChildOp):
def config(self):
"""check int int64_t """
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int32'
self.info_type = 'int64'
class TestCase2(TestTDMChildOp):
def config(self):
"""check int64_t int64_t """
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int64'
self.info_type = 'int64'
class TestCase3(TestTDMChildOp):
def config(self):
"""check int64 int32 """
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int64'
self.info_type = 'int32'
class TestCase4(TestTDMChildOp):
def config(self):
"""check large shape """
self.x_shape = (100, 20)
self.child_shape = (100, 20, 2)
self.x_type = 'int32'
self.info_type = 'int32'
class TestTDMChildShape(unittest.TestCase):
def test_shape(self):
x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1)
tdm_tree_info = create_tdm_tree()
tree_info_np = np.array(tdm_tree_info).astype('int32')
child, leaf_mask = fluid.contrib.layers.tdm_child(
x=x,
node_nums=26,
child_nums=2,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
tree_info_np)))
place = fluid.CPUPlace()
exe = fluid.Executor(place=place)
exe.run(fluid.default_startup_program())
feed = {
'x': np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10],
[11], [12]]).astype('int32')
}
exe.run(feed=feed)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册