未验证 提交 a2e9af56 编写于 作者: C Chengmo 提交者: GitHub

Add Tdm child OP in contrib (#23241)

* add tdm child op
上级 4955c97e
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/tdm_child_op.h"
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace operators {
class TDMChildOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X",
"X(Tensor), dtype support int32/int64, X variable is the "
"node id of TDM-Tree");
AddInput(
"TreeInfo",
"TreeInfo(Tensor), dtype support int32/int64, it stores the node "
"information in the following format: item_id(shape=1), "
"layer_id(shape=1), parent_id(shape=1), child_id(shape=child_nums)");
AddAttr<int>("child_nums", "child_nums(int)",
"The child nums of one node, if the node hasn't enough child, "
"it should padding 0 until child nums equal to child_nums");
AddOutput("Child",
"Return the children's node_id of input node, "
"if input don't have child, return 0");
AddOutput("LeafMask",
"LeafMask has the same shape with Child"
"If child is leaf node, LeafMask value = 1, else = 0");
AddAttr<int>("dtype",
"(int, default INT32) "
"Output data type.")
.SetDefault(2);
AddComment(R"DOC("
**Tdm Child**
According to the input node_id on the given tree, return the corresponding child node_id and
whether child is a leaf node by LeafMask.")DOC");
}
};
class TDMChildOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
platform::errors::InvalidArgument(
"Inputs(X) of TdmChild should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("TreeInfo"), true,
platform::errors::InvalidArgument(
"Inputs(TreeInfo) of TdmChild should not be null."));
int child_nums = ctx->Attrs().Get<int>("child_nums");
PADDLE_ENFORCE_GT(
child_nums, 0,
platform::errors::InvalidArgument(
"ValueError: The value of the 'child_nums' must greater than 0. "
"But received child_nums value = %d, ",
child_nums));
auto info_dims = ctx->GetInputDim("TreeInfo");
auto input_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(
info_dims.size(), 2,
platform::errors::InvalidArgument(
"ShapeError: The dimensions of the 'tree info' must be 2. "
"But received tree info's dimensions = %d, "
"tree info's shape = [%s].",
info_dims.size(), info_dims));
auto output_dims = framework::vectorize(input_dims);
output_dims.push_back(child_nums);
ctx->SetOutputDim("Child", framework::make_ddim(output_dims));
ctx->SetOutputDim("LeafMask", framework::make_ddim(output_dims));
if (ctx->GetOutputsVarType("Child")[0] ==
framework::proto::VarType::LOD_TENSOR) {
ctx->ShareLoD("X", /*->*/ "Child");
ctx->ShareLoD("X", /*->*/ "LeafMask");
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(data_type, ctx.device_context());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
tdm_child, ops::TDMChildOp, ops::TDMChildOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
tdm_child, ops::TDMChildKernel<paddle::platform::CPUPlace, float>,
ops::TDMChildKernel<paddle::platform::CPUPlace, double>,
ops::TDMChildKernel<paddle::platform::CPUPlace, int>,
ops::TDMChildKernel<paddle::platform::CPUPlace, int64_t>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <gflags/gflags.h>
#include <cmath>
#include <fstream>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using DDim = framework::DDim;
using LoD = framework::LoD;
template <typename T, typename InfoT = int, typename OutT = int>
void TDMChildInner(const framework::ExecutionContext &context,
const LoDTensor &input, const LoDTensor &tree_info,
LoDTensor *child, LoDTensor *mask) {
auto child_nums = context.Attr<int>("child_nums");
auto info_dims = tree_info.dims();
int node_nums = info_dims[0];
int length = info_dims[1];
int input_ids_num = input.numel();
VLOG(4) << "TDM child op: input numel -> " << input_ids_num;
std::vector<OutT> child_vec{};
std::vector<OutT> item_mask_vec{};
auto *input_data = input.data<T>();
auto *tree_info_data = tree_info.data<InfoT>();
// TreeInfo: node_id : item_id; layer_id; ancestor_id; child_id
for (int input_ids = 0; input_ids < input_ids_num; ++input_ids) {
PADDLE_ENFORCE_LT(
input_data[input_ids], node_nums,
platform::errors::InvalidArgument(
"input id of OP(fluid.contrib.layers.tdm_child) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
node_nums, input_data[input_ids]));
PADDLE_ENFORCE_LE(
0, input_data[input_ids],
platform::errors::InvalidArgument(
"input id of OP(fluid.contrib.layers.tdm_child) "
"expected >= 0 and < %ld, but got %ld. Please check input "
"value.",
node_nums, input_data[input_ids]));
bool has_child =
(input_data[input_ids] == 0 ||
tree_info_data[static_cast<int>(input_data[input_ids]) * length + 3] ==
0)
? false
: true;
if (has_child) {
for (int child_ids = 0; child_ids < child_nums; ++child_ids) {
OutT child_id = static_cast<OutT>(
tree_info_data[static_cast<int>(input_data[input_ids]) * length +
3 + child_ids]);
child_vec.push_back(child_id);
OutT child_is_item = static_cast<OutT>(
tree_info_data[static_cast<int>(child_id) * length] == 0 ? 0 : 1);
item_mask_vec.push_back(child_is_item);
}
} else {
for (int child_ids = 0; child_ids < child_nums; ++child_ids) {
child_vec.push_back(0);
item_mask_vec.push_back(0);
}
}
}
int output_nums = child_vec.size();
auto *child_data = child->mutable_data<OutT>(context.GetPlace());
auto *leaf_mask_data = mask->mutable_data<OutT>(context.GetPlace());
memcpy(child_data, &child_vec[0], sizeof(OutT) * output_nums);
memcpy(leaf_mask_data, &item_mask_vec[0], sizeof(OutT) * output_nums);
}
template <typename DeviceContext, typename T>
class TDMChildKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *input_var = ctx.InputVar("X");
auto *tree_info_var = ctx.InputVar("TreeInfo");
auto &input_tensor = input_var->Get<LoDTensor>();
const auto &input_type = input_tensor.type();
bool input_type_match = input_type == framework::proto::VarType::INT32 ||
input_type == framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(input_type_match, true,
platform::errors::InvalidArgument(
"Input(X) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
paddle::framework::DataTypeToString(input_type),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64)));
auto &tree_info_tensor = tree_info_var->Get<LoDTensor>();
const auto &info_type = tree_info_tensor.type();
bool info_type_match = info_type == framework::proto::VarType::INT32 ||
info_type == framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(
info_type_match, true,
platform::errors::InvalidArgument(
"Input(TreeInfo) holds the wrong type, it holds %s, but "
"desires to be %s or %s",
paddle::framework::DataTypeToString(info_type),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64)));
auto *child_var = ctx.OutputVar("Child");
auto *leaf_mask_var = ctx.OutputVar("LeafMask");
auto *child_tensor = child_var->GetMutable<framework::LoDTensor>();
auto *leaf_mask_tensor = leaf_mask_var->GetMutable<framework::LoDTensor>();
auto output_type =
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype"));
bool out_type_match = output_type == framework::proto::VarType::INT32 ||
output_type == framework::proto::VarType::INT64;
PADDLE_ENFORCE_EQ(out_type_match, true,
platform::errors::InvalidArgument(
"Ouput(Child) & Output(LeafMask) holds the wrong "
"type, it holds %s, but "
"desires to be %s or %s",
paddle::framework::DataTypeToString(output_type),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT32),
paddle::framework::DataTypeToString(
framework::proto::VarType::INT64)));
if (info_type == framework::proto::VarType::INT32 &&
output_type == framework::proto::VarType::INT32) {
TDMChildInner<T, int, int>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
} else if (info_type == framework::proto::VarType::INT64 &&
output_type == framework::proto::VarType::INT32) {
TDMChildInner<T, int64_t, int>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
} else if (info_type == framework::proto::VarType::INT32 &&
output_type == framework::proto::VarType::INT64) {
TDMChildInner<T, int, int64_t>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
} else if (info_type == framework::proto::VarType::INT64 &&
output_type == framework::proto::VarType::INT64) {
TDMChildInner<T, int64_t, int64_t>(ctx, input_tensor, tree_info_tensor,
child_tensor, leaf_mask_tensor);
}
}
};
} // namespace operators
} // namespace paddle
......@@ -24,15 +24,16 @@ import inspect
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.layers import utils
from ... import unique_name
from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype
from paddle.fluid.framework import Variable
from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_
import warnings
__all__ = [
'fused_elemwise_activation', 'sequence_topk_avg_pooling', 'var_conv_2d',
'match_matrix_tensor', 'tree_conv', 'fused_embedding_seq_pool',
'multiclass_nms2', 'search_pyramid_hash', 'shuffle_batch', 'partial_concat',
'partial_sum'
'partial_sum', 'tdm_child'
]
......@@ -114,20 +115,21 @@ def var_conv_2d(input,
"""
The var_conv_2d layer calculates the output base on the :attr:`input` with variable length,
row, col, input channel, filter size and strides. Both :attr:`input`, :attr:`row`,
and :attr:`col` are 1-level LodTensor. The convolution operation is same as conv2d layer with
padding. Besides, input.dims[1] should be 1.
and :attr:`col` are 1-level LodTensor. The convolution operation is same as conv2d layer with
padding. Besides, input.dims[1] should be 1.
.. code-block:: text
If input_channel is 2 and given row lodTensor and col lodTensor as follows:
row.lod = [[5, 4]]
col.lod = [[6, 7]]
input is a lodTensor:
input is a lodTensor:
input.lod = [[60, 56]] # where 60 = input_channel * 5 * 6
input.dims = [116, 1] # where 116 = 60 + 56
If set output_channel is 3, filter_size is [3, 3], stride is [1, 1]:
output.lod = [[90, 84]] # where 90 = output_channel * [(5-1)/stride + 1] * [(6-1)/stride + 1]
# where 90 = output_channel * [(5-1)/stride + 1] * [(6-1)/stride + 1]
output.lod = [[90, 84]]
output.dims = [174, 1] # where 174 = 90 + 84
Args:
......@@ -166,7 +168,7 @@ def var_conv_2d(input,
x_lod_tensor = layers.data(name='x', shape=[1], lod_level=1)
row_lod_tensor = layers.data(name='row', shape=[6], lod_level=1)
col_lod_tensor = layers.data(name='col', shape=[6], lod_level=1)
out = contrib.var_conv_2d(input=x_lod_tensor,
out = contrib.var_conv_2d(input=x_lod_tensor,
row=row_lod_tensor,
col=col_lod_tensor,
input_channel=3,
......@@ -228,24 +230,27 @@ def match_matrix_tensor(x,
Given a query A of length `n` and a title B of length `m`, the input shape are respectively
[n, h] and [m, h], which h is hidden_size. If :attr:`channel_num` is set to 3,
it will generate a learnable parameter matrix W with shape [h, 3, h].
Then the semantic matching matrix of query A and title B is calculated by
A * W * B.T = [n, h]*[h, 3, h]*[h, m] = [n, 3, m]. The learnable parameter matrix `W`
is equivalent to a fully connected layer in the calculation process. If :attr:`act` is provided,
Then the semantic matching matrix of query A and title B is calculated by
A * W * B.T = [n, h]*[h, 3, h]*[h, m] = [n, 3, m]. The learnable parameter matrix `W`
is equivalent to a fully connected layer in the calculation process. If :attr:`act` is provided,
the corresponding activation function will be applied to output matrix.
The :attr:`x` and :attr:`y` should be LodTensor and only one level LoD is supported.
.. code-block:: text
Given a 1-level LoDTensor x:
x.lod = [[2, 3, ]]
x.data = [[0.3, 0.1], [0.2, 0.3], [0.5, 0.6], [0.7, 0.1], [0.3, 0.4]]
x.lod = [
[2, 3, ]]
x.data = [[0.3, 0.1], [0.2, 0.3], [
0.5, 0.6], [0.7, 0.1], [0.3, 0.4]]
x.dims = [5, 2]
y is a Tensor:
y.lod = [[3, 1, ]]
y.data = [[0.1, 0.2], [0.3, 0.7], [0.9, 0.2], [0.4, 0.1]]
y.dims = [4, 2]
set channel_num 2, then we get a 1-level LoDTensor:
out.lod = [[12, 6]] # where 12 = channel_num * x.lod[0][0] * y.lod[0][0]
# where 12 = channel_num * x.lod[0][0] * y.lod[0][0]
out.lod = [[12, 6]]
out.dims = [18, 1] # where 18 = 12 + 6
Args:
......@@ -270,7 +275,8 @@ def match_matrix_tensor(x,
x_lod_tensor = layers.data(name='x', shape=[10], lod_level=1)
y_lod_tensor = layers.data(name='y', shape=[10], lod_level=1)
out, out_tmp = contrib.match_matrix_tensor(x=x_lod_tensor, y=y_lod_tensor, channel_num=3)
out, out_tmp = contrib.match_matrix_tensor(
x=x_lod_tensor, y=y_lod_tensor, channel_num=3)
"""
helper = LayerHelper('match_matrix_tensor', **locals())
......@@ -302,9 +308,9 @@ def match_matrix_tensor(x,
def sequence_topk_avg_pooling(input, row, col, topks, channel_num):
"""
The :attr:`topks` is a list with incremental values in this function. For each topk,
it will average the topk features as an output feature for each channel of every
input sequence. Both :attr:`row` and :attr:`col` are LodTensor, which provide height
and width information for :attr:`input` tensor. If feature size of input sequence is less
it will average the topk features as an output feature for each channel of every
input sequence. Both :attr:`row` and :attr:`col` are LodTensor, which provide height
and width information for :attr:`input` tensor. If feature size of input sequence is less
than topk, it will padding 0 at the back.
.. code-block:: text
......@@ -313,7 +319,7 @@ def sequence_topk_avg_pooling(input, row, col, topks, channel_num):
row.lod = [[5, 4]]
col.lod = [[6, 7]]
input is a LoDTensor with input.lod[0][i] = channel_num * row.lod[0][i] * col.lod[0][i]
input is a LoDTensor with input.lod[0][i] = channel_num * row.lod[0][i] * col.lod[0][i]
input.lod = [[60, 56]] # where 60 = channel_num * 5 * 6
input.dims = [116, 1] # where 116 = 60 + 56
......@@ -376,7 +382,7 @@ def tree_conv(nodes_vector,
param_attr=None,
bias_attr=None,
name=None):
"""
"""
${comment}
Args:
......@@ -398,10 +404,12 @@ def tree_conv(nodes_vector,
import paddle.fluid as fluid
# 10 for max_node_size of dataset, 5 for vector width
nodes_vector = fluid.layers.data(name='vectors', shape=[10, 5], dtype='float32')
nodes_vector = fluid.layers.data(
name='vectors', shape=[10, 5], dtype='float32')
# 10 for max_node_size of dataset, 2 for every edge has two nodes
# edges must be directional
edge_set = fluid.layers.data(name='edge_set', shape=[10, 2], dtype='float32')
edge_set = fluid.layers.data(name='edge_set', shape=[
10, 2], dtype='float32')
# the shape of output will be [10, 6, 1],
# 10 for max_node_size of dataset, 6 for output size, 1 for 1 filter
out_vector = fluid.layers.tree_conv(nodes_vector, edge_set, 6, 1, 2)
......@@ -470,7 +478,8 @@ def fused_embedding_seq_pool(input,
import paddle.fluid as fluid
dict_size = 20
data_t = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
data_t = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=1)
padding_idx = np.random.randint(1, 10)
out = fluid.contrib.fused_embedding_seq_pool(
input=data_t,
......@@ -529,16 +538,16 @@ def multiclass_nms2(bboxes,
[N, M, 4 or 8 16 24 32] represents the
predicted locations of M bounding bboxes,
N is the batch size. Each bounding box has four
coordinate values and the layout is
coordinate values and the layout is
[xmin, ymin, xmax, ymax], when box size equals to 4.
2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]
M is the number of bounding boxes, C is the
class number
M is the number of bounding boxes, C is the
class number
scores (Variable): Two types of scores are supported:
1. (Tensor) A 3-D Tensor with shape [N, C, M]
represents the predicted confidence predictions.
N is the batch size, C is the class number, M is
number of bounding boxes. For each category there
N is the batch size, C is the class number, M is
number of bounding boxes. For each category there
are total M scores which corresponding M bounding
boxes. Please note, M is equal to the 2nd dimension
of BBoxes.
......@@ -546,11 +555,11 @@ def multiclass_nms2(bboxes,
M is the number of bbox, C is the class number.
In this case, input BBoxes should be the second
case with shape [M, C, 4].
background_label (int): The index of background label, the background
background_label (int): The index of background label, the background
label will be ignored. If set to -1, then all
categories will be considered. Default: 0
score_threshold (float): Threshold to filter out bounding boxes with
low confidence score. If not provided,
low confidence score. If not provided,
consider all boxes.
nms_top_k (int): Maximum number of detections to be kept according to
the confidences after the filtering detections based
......@@ -565,19 +574,19 @@ def multiclass_nms2(bboxes,
Returns:
A tuple with two Variables: (Out, Index) if return_index is True,
otherwise, a tuple with one Variable(Out) is returned.
Out: A 2-D LoDTensor with shape [No, 6] represents the detections.
Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
or A 2-D LoDTensor with shape [No, 10] represents the detections.
Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3,
x4, y4]. No is the total number of detections.
otherwise, a tuple with one Variable(Out) is returned.
Out: A 2-D LoDTensor with shape [No, 6] represents the detections.
Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
or A 2-D LoDTensor with shape [No, 10] represents the detections.
Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3,
x4, y4]. No is the total number of detections.
If all images have not detected results, all elements in LoD will be
0, and output tensor is empty (None).
Index: Only return when return_index is True. A 2-D LoDTensor with
shape [No, 1] represents the selected index which type is Integer.
The index is the absolute value cross batches. No is the same number
as Out. If the index is used to gather other attribute such as age,
one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
Index: Only return when return_index is True. A 2-D LoDTensor with
shape [No, 1] represents the selected index which type is Integer.
The index is the absolute value cross batches. No is the same number
as Out. If the index is used to gather other attribute such as age,
one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
N is the batch size and M is the number of boxes.
......@@ -671,7 +680,7 @@ def search_pyramid_hash(input,
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` .
param_attr_wl(ParamAttr): Specified parameters of white filter.
param_attr_bl(ParamAttr): Specified parameters of black filter.
distribute_update_vars(list[ParamAttr.name]): Decided which params should be updated in distribute training.
distribute_update_vars(list[ParamAttr.name]): Decided which params should be updated in distribute training.
Used in Distribute Transpiler to create a trainer/server program.
name(str, optional): The default value is None. Normally there is no need for user to set this property.
For more information, please refer to :ref:`api_guide_Name` .
......@@ -813,11 +822,11 @@ def partial_concat(input, start_index=0, length=-1):
**Partial Concat**
This OP concatenates the inputs according to the start index and length. This
OP exists in contrib, which means that it is not shown to the public.
Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
performed along the second dimension.
.. code-block:: text
Given:
x = [[0, 1, 2],
[3, 4, 5]]
......@@ -826,7 +835,7 @@ def partial_concat(input, start_index=0, length=-1):
output = partial_concat([x, y], start_index=0, length=2)
we get:
output = [[0, 1, 6, 7],
[3, 4, 9, 10]]
......@@ -844,7 +853,8 @@ def partial_concat(input, start_index=0, length=-1):
import paddle.fluid as fluid
x = fluid.data(name="x", shape=[None,3], dtype="float32")
y = fluid.data(name="y", shape=[None,3], dtype="float32")
concat = fluid.contrib.layers.partial_concat([x, y], start_index=0, length=2)
concat = fluid.contrib.layers.partial_concat(
[x, y], start_index=0, length=2)
"""
if not isinstance(input, list):
warnings.warn(
......@@ -873,12 +883,12 @@ def partial_concat(input, start_index=0, length=-1):
def partial_sum(input, start_index=0, length=-1):
"""
**PartialSum**
This Op can sum the vars by specifying the initial position(start_index) and length(length).
This Op can sum the vars by specifying the initial position(start_index) and length(length).
This Op exists in contrib, which means that it is not shown to the public.
Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
Only 2-D Tensor or LodTensor input is supported. Slice and concat can only be
performed along the second dimension.
.. code-block:: text
Given:
x = [[0, 1, 2],
[3, 4, 5]]
......@@ -886,7 +896,7 @@ def partial_sum(input, start_index=0, length=-1):
[9, 10, 11]]
output = partial_sum([x, y], start_index=0, length=2)
we get:
output = [[6, 8],
[12, 14]]
Args:
......@@ -922,3 +932,88 @@ def partial_sum(input, start_index=0, length=-1):
helper.append_op(
type='partial_sum', inputs=inputs, outputs={'Out': [out]}, attrs=attrs)
return out
def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'):
"""
**Tdm Child**
According to the input node_id on the given tree, return the corresponding child node_id and
whether child is a leaf node by leaf_mask value.
.. code-block:: text
Given:
tree[[0], [1, 2], [3, 4], [5, 6]] # A binary tree with seven nodes
x = [[2], [3]]
node_nums = 7
child_nums = 2
we get:
child = [[5, 6],
[0, 0]]
leaf_mask = [[1, 1],
[0, 0]]
Args:
x(Variable): Variable contained the node_id information, dtype support int32/int64.
node_nums(int): Number of total nodes.
child_nums(int): Maximum number of child nodes per node.
param_attr(ParamAttr): To specify the tdm-tree-info parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in: ref: `api_fluid_ParamAttr`, should
has shape(node_nums, 3 + child_nums), dtype support int32/int64.
The dimension[1] of tdm-tree-info contains the following:
1. Item_id(int, shape(1)), if node is a leaf node, give its item_id corresponding to node_id, else give 0.
2. Layer_id(int, shape(1)), indicates which layer the node is on.
3. Parent_id(int, shape(1)), node's parent node.
4. Child_id(int, shape(child_nums)), all child node's node_id of this node should be given.
If the number of child nodes is insufficient, padding 0 until child nums equal to child_nums
dtype(str): The data type of output child and leaf_mask, support int32/int64.
Returns:
tuple: A tuple including input node's child(Variable) and leaf_mask(Variable).
If child is a leaf node, leaf_mask equal ot 1, otherwise equal to 0.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1)
tree_info = [[0,0,0,1,2],
[0,1,0,3,4],[0,1,0,5,6],
[0,2,1,0,0],[1,2,1,0,0],[2,2,2,0,0],[3,2,2,0,0]]
tree_info_np = np.array(tree_info)
tree_info_np = np.reshape(tree_info_np, (7,5))
node_nums = 7
child_nums = 2
child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
tree_info_np)))
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
xx = np.array([[2],[3]]).reshape((2,1)).astype("int32")
child_res, leaf_mask_res = exe.run(feed={"x":xx}, fetch_list=[child, leaf_mask])
"""
helper = LayerHelper("tdm_child", **locals())
check_dtype(dtype, 'dtype', ['int32', 'int64'],
'fluid.contrib.layers.tdm_child')
c_dtype = convert_np_dtype_to_dtype_(dtype)
tree_info = helper.create_parameter(
attr=helper.param_attr,
shape=[node_nums, 3 + child_nums],
dtype=dtype,
default_initializer=Constant(0))
tree_info.stop_gradient = True
child = helper.create_variable_for_type_inference(dtype=dtype)
leaf_mask = helper.create_variable_for_type_inference(dtype=dtype)
helper.append_op(
type='tdm_child',
inputs={'X': x,
'TreeInfo': tree_info},
outputs={'Child': child,
'LeafMask': leaf_mask},
attrs={'child_nums': child_nums,
'dtype': c_dtype},
stop_gradient=True)
return (child, leaf_mask)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import paddle.fluid.layers as layers
import paddle.fluid as fluid
import random
import six
def create_tdm_tree():
"""Create tdm tree info"""
tree_info = [
[0, 0, 0, 1, 2],
[0, 1, 0, 3, 4],
[0, 1, 0, 5, 6],
[0, 2, 1, 7, 8],
[0, 2, 1, 9, 10],
[0, 2, 2, 11, 12],
[0, 2, 2, 13, 0],
[0, 3, 3, 14, 15],
[0, 3, 3, 16, 17],
[0, 3, 4, 18, 19],
[0, 3, 4, 20, 21],
[0, 3, 5, 22, 23],
[0, 3, 5, 24, 25],
[12, 3, 6, 0, 0],
[0, 4, 7, 0, 0],
[1, 4, 7, 0, 0],
[2, 4, 8, 0, 0],
[3, 4, 8, 0, 0],
[4, 4, 9, 0, 0],
[5, 4, 9, 0, 0],
[6, 4, 10, 0, 0],
[7, 4, 10, 0, 0],
[8, 4, 11, 0, 0],
[9, 4, 11, 0, 0],
[10, 4, 12, 0, 0],
[11, 4, 12, 0, 0],
]
return tree_info
class TestTDMChildOp(OpTest):
def setUp(self):
self.__class__.op_type = "tdm_child"
self.config()
tree_info = create_tdm_tree()
tree_info_np = np.array(tree_info).astype(self.info_type)
x_np = np.random.randint(
low=0, high=26, size=self.x_shape).astype(self.x_type)
children_res = []
leaf_mask_res = []
for batch in x_np:
for node in batch:
children = []
if node != 0:
children.append(tree_info[node][3])
children.append(tree_info[node][4])
else:
children.append(0)
children.append(0)
mask = []
for child in children:
m = int(tree_info[child][0] != 0)
mask.append(m)
children_res += children
leaf_mask_res += mask
children_res_np = np.array(children_res).astype(self.info_type)
leaf_mask_res_np = np.array(leaf_mask_res).astype(self.info_type)
child = np.reshape(children_res_np, self.child_shape)
leaf_mask = np.reshape(leaf_mask_res_np, self.child_shape)
self.attrs = {'child_nums': 2}
self.inputs = {'X': x_np, 'TreeInfo': tree_info_np}
self.outputs = {'Child': child, 'LeafMask': leaf_mask}
def config(self):
"""set test shape & type"""
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int32'
self.info_type = 'int32'
def test_check_output(self):
self.check_output()
class TestCase1(TestTDMChildOp):
def config(self):
"""check int int64_t """
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int32'
self.info_type = 'int64'
class TestCase2(TestTDMChildOp):
def config(self):
"""check int64_t int64_t """
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int64'
self.info_type = 'int64'
class TestCase3(TestTDMChildOp):
def config(self):
"""check int64 int32 """
self.x_shape = (10, 20)
self.child_shape = (10, 20, 2)
self.x_type = 'int64'
self.info_type = 'int32'
class TestCase4(TestTDMChildOp):
def config(self):
"""check large shape """
self.x_shape = (100, 20)
self.child_shape = (100, 20, 2)
self.x_type = 'int32'
self.info_type = 'int32'
class TestTDMChildShape(unittest.TestCase):
def test_shape(self):
x = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1)
tdm_tree_info = create_tdm_tree()
tree_info_np = np.array(tdm_tree_info).astype('int32')
child, leaf_mask = fluid.contrib.layers.tdm_child(
x=x,
node_nums=26,
child_nums=2,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer(
tree_info_np)))
place = fluid.CPUPlace()
exe = fluid.Executor(place=place)
exe.run(fluid.default_startup_program())
feed = {
'x': np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10],
[11], [12]]).astype('int32')
}
exe.run(feed=feed)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册