提交 71af72b1 编写于 作者: Z zhoukunsheng 提交者: Tao Luo

upgrade hash op to support Tensor and LoDTensor input (#17998)

上级 d3b3443d
...@@ -238,7 +238,7 @@ paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], va ...@@ -238,7 +238,7 @@ paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], va
paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65c8362e48810b8226e311c5d046db51')) paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65c8362e48810b8226e311c5d046db51'))
paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', '9f303c67538e468a36c5904a0a3aa110')) paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', '9f303c67538e468a36c5904a0a3aa110'))
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb')) paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb'))
paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'da621ba1363e8f5fe7b702526bbae18f')) paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'a0b73c21be618cec0281e7903039e5e3'))
paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5d16663e096d7f04954c70ce1cc5e195')) paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5d16663e096d7f04954c70ce1cc5e195'))
paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'e3993a477c94729526040ff65d95728e')) paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'e3993a477c94729526040ff65d95728e'))
paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e399f9436fed5f7ff480d8532e42c937')) paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e399f9436fed5f7ff480d8532e42c937'))
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -46,11 +46,10 @@ class HashOp : public framework::OperatorWithKernel { ...@@ -46,11 +46,10 @@ class HashOp : public framework::OperatorWithKernel {
class HashOpMaker : public framework::OpProtoAndCheckerMaker { class HashOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", "(Tensor) Input tensor of scale operator."); AddInput("X", "(Tensor) Input tensor of hash operator.");
AddOutput("Out", "(Tensor) Output tensor of scale operator."); AddOutput("Out", "(Tensor) Output tensor of hash operator.");
AddComment(R"DOC( AddComment(R"DOC(
**Hash Operator** Execute `num_hash` times xxHash algorithm on all elements on second dimension of input.
$$Out = scale * X$$
)DOC"); )DOC");
AddAttr<int>("num_hash", "").SetDefault(1); AddAttr<int>("num_hash", "").SetDefault(1);
AddAttr<int>("mod_by", "").SetDefault(100000); AddAttr<int>("mod_by", "").SetDefault(100000);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -47,10 +47,6 @@ class HashKernel : public framework::OpKernel<T> { ...@@ -47,10 +47,6 @@ class HashKernel : public framework::OpKernel<T> {
int num_hash = context.Attr<int>("num_hash"); int num_hash = context.Attr<int>("num_hash");
auto in_dims = in_t->dims(); auto in_dims = in_t->dims();
auto in_lod = in_t->lod();
PADDLE_ENFORCE_EQ(
static_cast<uint64_t>(in_dims[0]), in_lod[0].back(),
"The actual input data's size mismatched with LoD information.");
std::vector<int64_t> out_dims; std::vector<int64_t> out_dims;
HashOutputSize(in_dims, out_dims, num_hash); HashOutputSize(in_dims, out_dims, num_hash);
...@@ -67,6 +63,7 @@ class HashKernel : public framework::OpKernel<T> { ...@@ -67,6 +63,7 @@ class HashKernel : public framework::OpKernel<T> {
} }
input += last_dim; input += last_dim;
} }
out_t->set_lod(in_t->lod()); out_t->set_lod(in_t->lod());
} }
}; };
......
...@@ -10810,12 +10810,9 @@ def hash(input, hash_size, num_hash=1, name=None): ...@@ -10810,12 +10810,9 @@ def hash(input, hash_size, num_hash=1, name=None):
Given: Given:
# shape [2, 2] # shape [2, 2]
input.data = [ input.data =
[[1, 2], [[1, 2],
[3, 4]], [3, 4]]
]
input.lod = [[0, 2]]
hash_size = 10000 hash_size = 10000
...@@ -10833,40 +10830,32 @@ def hash(input, hash_size, num_hash=1, name=None): ...@@ -10833,40 +10830,32 @@ def hash(input, hash_size, num_hash=1, name=None):
[8310, 1327, 1654, 4567]], [8310, 1327, 1654, 4567]],
] ]
output.lod = [[0, 2]]
Args: Args:
input (Variable): The input variable which is a one-hot word. The input (Variable): The input variable which is a one-hot word. The
dimensions of the input variable must be 2. dimensions of the input variable must be 2. Both Tensor and LoDTensor are supported.
hash_size (int): The space size for hash algorithm. The output value hash_size (int): The space size for hash algorithm. The output value
will keep in the range:math:`[0, hash_size - 1]`. will keep in the range:math:`[0, hash_size - 1]`.
num_hash (int): The times of hash, default 1. num_hash (int): The times of hash, default 1.
name (str, default None): The name of this layer. name (str, default None): The name of this layer.
Returns: Returns:
Variable: The hash result variable which is a LoDTensor. Variable: The hash result variable, which the same variable type as `input`.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers
import numpy as np
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=1)
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=1, hash_size=1000)
place = fluid.core.CPUPlace() # titles has shape [batch, 1]
exece = fluid.Executor(place) titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=0)
exece.run(fluid.default_startup_program()) # hash_r has shape [batch, 2]
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=2, hash_size=1000)
# Init Tensor
tensor = fluid.core.LoDTensor()
tensor.set(np.random.randint(0, 10, (3, 1)).astype("int32"), place)
# Set LoD
tensor.set_recursive_sequence_lengths([[1, 1, 1]])
out = exece.run(feed={'titles': tensor}, fetch_list=[hash_r], return_numpy=False) # titles has shape [batch, 1] and lod information
titles = fluid.layers.data(name='titles', shape=[1], dtype='int32', lod_level=1)
# hash_r has shape [batch, 2] and inherits lod information from titles
hash_r = fluid.layers.hash(name='hash_x', input=titles, num_hash=2, hash_size=1000)
""" """
helper = LayerHelper('hash', **locals()) helper = LayerHelper('hash', **locals())
out = helper.create_variable_for_type_inference( out = helper.create_variable_for_type_inference(
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -17,36 +17,41 @@ import numpy as np ...@@ -17,36 +17,41 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
class TestScaleOp(OpTest): class TestHashOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "hash" self.op_type = "hash"
self.init_test_case() self.init_test_case()
self.inputs = {'X': (self.in_seq, self.lod)} self.inputs = {'X': (self.in_seq, self.lod)}
self.attrs = {'num_hash': 4, 'mod_by': 10000} self.attrs = {'num_hash': 2, 'mod_by': 10000}
self.outputs = {'Out': (self.out_seq, self.lod)} self.outputs = {'Out': (self.out_seq, self.lod)}
def init_test_case(self): def init_test_case(self):
np.random.seed = 1 np.random.seed(1)
self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") self.in_seq = np.random.randint(0, 10, (8, 1)).astype("int32")
self.lod = [[9, 4, 11, 6]] self.lod = [[2, 6]]
# self.out_seq = np.ones([30, 4, 1], dtype=np.int32) self.out_seq = [[[3481], [7475]], [[1719], [5986]], [[8473], [694]],
self.out_seq = [ [[3481], [7475]], [[4372], [9456]], [[4372], [9456]],
[[9662], [9217], [1129], [8487]], [[9662], [9217], [1129], [8487]], [[6897], [3218]], [[9038], [7951]]]
[[8310], [1327], [1654], [4567]], [[6897], [3218], [2013], [1241]], self.out_seq = np.array(self.out_seq)
[[9407], [6715], [6949], [8094]], [[8473], [694], [5142], [2479]],
[[8310], [1327], [1654], [4567]], [[6897], [3218], [2013], [1241]], def test_check_output(self):
[[4372], [9456], [8204], [6695]], [[6897], [3218], [2013], [1241]], self.check_output()
[[8473], [694], [5142], [2479]], [[4372], [9456], [8204], [6695]],
[[4372], [9456], [8204], [6695]], [[8473], [694], [5142], [2479]],
[[9407], [6715], [6949], [8094]], [[9369], [4525], [8935], [9210]], class TestHashNotLoDOp(TestHashOp):
[[4372], [9456], [8204], [6695]], [[4372], [9456], [8204], [6695]], def setUp(self):
[[9369], [4525], [8935], [9210]], [[6897], [3218], [2013], [1241]], self.op_type = "hash"
[[9038], [7951], [5953], [8657]], [[9407], [6715], [6949], [8094]], self.init_test_case()
[[9662], [9217], [1129], [8487]], [[9369], [4525], [8935], [9210]], self.inputs = {'X': self.in_seq}
[[9038], [7951], [5953], [8657]], [[9662], [9217], [1129], [8487]], self.attrs = {'num_hash': 2, 'mod_by': 10000}
[[9369], [4525], [8935], [9210]], [[1719], [5986], [9919], [3421]], self.outputs = {'Out': self.out_seq}
[[4372], [9456], [8204], [6695]], [[9038], [7951], [5953], [8657]]
] def init_test_case(self):
np.random.seed(1)
self.in_seq = np.random.randint(0, 10, (8, 1)).astype("int32")
self.out_seq = [[[3481], [7475]], [[1719], [5986]], [[8473], [694]],
[[3481], [7475]], [[4372], [9456]], [[4372], [9456]],
[[6897], [3218]], [[9038], [7951]]]
self.out_seq = np.array(self.out_seq) self.out_seq = np.array(self.out_seq)
def test_check_output(self): def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册