提交 71af72b1 编写于 作者: Z zhoukunsheng 提交者: Tao Luo

upgrade hash op to support Tensor and LoDTensor input (#17998)

上级 d3b3443d
......@@ -238,7 +238,7 @@ paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], va
paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65c8362e48810b8226e311c5d046db51'))
paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', '9f303c67538e468a36c5904a0a3aa110'))
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb'))
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'da621ba1363e8f5fe7b702526bbae18f'))
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'a0b73c21be618cec0281e7903039e5e3'))
paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5d16663e096d7f04954c70ce1cc5e195'))
paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'e3993a477c94729526040ff65d95728e'))
paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e399f9436fed5f7ff480d8532e42c937'))
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -46,11 +46,10 @@ class HashOp : public framework::OperatorWithKernel {
class HashOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "(Tensor) Input tensor of scale operator.");
AddOutput("Out", "(Tensor) Output tensor of scale operator.");
AddInput("X", "(Tensor) Input tensor of hash operator.");
AddOutput("Out", "(Tensor) Output tensor of hash operator.");
AddComment(R"DOC(
**Hash Operator**
$$Out = scale * X$$
Execute `num_hash` times xxHash algorithm on all elements on second dimension of input.
)DOC");
AddAttr<int>("num_hash", "").SetDefault(1);
AddAttr<int>("mod_by", "").SetDefault(100000);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -47,10 +47,6 @@ class HashKernel : public framework::OpKernel<T> {
int num_hash = context.Attr<int>("num_hash");
auto in_dims = in_t->dims();
auto in_lod = in_t->lod();
PADDLE_ENFORCE_EQ(
static_cast<uint64_t>(in_dims[0]), in_lod[0].back(),
"The actual input data's size mismatched with LoD information.");
std::vector<int64_t> out_dims;
HashOutputSize(in_dims, out_dims, num_hash);
......@@ -67,6 +63,7 @@ class HashKernel : public framework::OpKernel<T> {
}
input += last_dim;
}
out_t->set_lod(in_t->lod());
}
};
......
......@@ -10810,12 +10810,9 @@ def hash(input, hash_size, num_hash=1, name=None):
Given:
# shape [2, 2]
input.data = [
input.data =
[[1, 2],
[3, 4]],
]
input.lod = [[0, 2]]
[3, 4]]
hash_size = 10000
......@@ -10833,40 +10830,32 @@ def hash(input, hash_size, num_hash=1, name=None):
[8310, 1327, 1654, 4567]],
]
output.lod = [[0, 2]]
Args:
input (Variable): The input variable which is a one-hot word. The
dimensions of the input variable must be 2.
dimensions of the input variable must be 2. Both Tensor and LoDTensor are supported.
hash_size (int): The space size for hash algorithm. The output value
will keep in the range:math:`[0, hash_size - 1]`.
num_hash (int): The times of hash, default 1.
name (str, default None): The name of this layer.
Returns:
Variable: The hash result variable which is a LoDTensor.
Variable: The hash result variable, which the same variable type as `input`.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import numpy as np
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=1)
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=1, hash_size=1000)
place = fluid.core.CPUPlace()
exece = fluid.Executor(place)
exece.run(fluid.default_startup_program())
# titles has shape [batch, 1]
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=0)
# hash_r has shape [batch, 2]
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=2, hash_size=1000)
# Init Tensor
tensor = fluid.core.LoDTensor()
tensor.set(np.random.randint(0, 10, (3, 1)).astype("int32"), place)
# Set LoD
tensor.set_recursive_sequence_lengths([[1, 1, 1]])
out = exece.run(feed={'titles': tensor}, fetch_list=[hash_r], return_numpy=False)
# titles has shape [batch, 1] and lod information
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=1)
# hash_r has shape [batch, 2] and inherits lod information from titles
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=2, hash_size=1000)
"""
helper = LayerHelper('hash', **locals())
out = helper.create_variable_for_type_inference(
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -17,36 +17,41 @@ import numpy as np
from op_test import OpTest
class TestScaleOp(OpTest):
class TestHashOp(OpTest):
def setUp(self):
self.op_type = "hash"
self.init_test_case()
self.inputs = {'X': (self.in_seq, self.lod)}
self.attrs = {'num_hash': 4, 'mod_by': 10000}
self.attrs = {'num_hash': 2, 'mod_by': 10000}
self.outputs = {'Out': (self.out_seq, self.lod)}
def init_test_case(self):
np.random.seed = 1
self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
self.lod = [[9, 4, 11, 6]]
# self.out_seq = np.ones([30, 4, 1], dtype=np.int32)
self.out_seq = [
[[9662], [9217], [1129], [8487]], [[9662], [9217], [1129], [8487]],
[[8310], [1327], [1654], [4567]], [[6897], [3218], [2013], [1241]],
[[9407], [6715], [6949], [8094]], [[8473], [694], [5142], [2479]],
[[8310], [1327], [1654], [4567]], [[6897], [3218], [2013], [1241]],
[[4372], [9456], [8204], [6695]], [[6897], [3218], [2013], [1241]],
[[8473], [694], [5142], [2479]], [[4372], [9456], [8204], [6695]],
[[4372], [9456], [8204], [6695]], [[8473], [694], [5142], [2479]],
[[9407], [6715], [6949], [8094]], [[9369], [4525], [8935], [9210]],
[[4372], [9456], [8204], [6695]], [[4372], [9456], [8204], [6695]],
[[9369], [4525], [8935], [9210]], [[6897], [3218], [2013], [1241]],
[[9038], [7951], [5953], [8657]], [[9407], [6715], [6949], [8094]],
[[9662], [9217], [1129], [8487]], [[9369], [4525], [8935], [9210]],
[[9038], [7951], [5953], [8657]], [[9662], [9217], [1129], [8487]],
[[9369], [4525], [8935], [9210]], [[1719], [5986], [9919], [3421]],
[[4372], [9456], [8204], [6695]], [[9038], [7951], [5953], [8657]]
]
np.random.seed(1)
self.in_seq = np.random.randint(0, 10, (8, 1)).astype("int32")
self.lod = [[2, 6]]
self.out_seq = [[[3481], [7475]], [[1719], [5986]], [[8473], [694]],
[[3481], [7475]], [[4372], [9456]], [[4372], [9456]],
[[6897], [3218]], [[9038], [7951]]]
self.out_seq = np.array(self.out_seq)
def test_check_output(self):
self.check_output()
class TestHashNotLoDOp(TestHashOp):
def setUp(self):
self.op_type = "hash"
self.init_test_case()
self.inputs = {'X': self.in_seq}
self.attrs = {'num_hash': 2, 'mod_by': 10000}
self.outputs = {'Out': self.out_seq}
def init_test_case(self):
np.random.seed(1)
self.in_seq = np.random.randint(0, 10, (8, 1)).astype("int32")
self.out_seq = [[[3481], [7475]], [[1719], [5986]], [[8473], [694]],
[[3481], [7475]], [[4372], [9456]], [[4372], [9456]],
[[6897], [3218]], [[9038], [7951]]]
self.out_seq = np.array(self.out_seq)
def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册