提交 36acfaee 编写于 作者: 翟飞跃 提交者: Tao Luo

Add fused_embedding_seq layer into fluid.contrib (#19771)

上级 0b392187
......@@ -565,6 +565,7 @@ paddle.fluid.contrib.sequence_topk_avg_pooling (ArgSpec(args=['input', 'row', 'c
paddle.fluid.contrib.var_conv_2d (ArgSpec(args=['input', 'row', 'col', 'input_channel', 'output_channel', 'filter_size', 'stride', 'param_attr', 'act', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, None, 'float32', None)), ('document', 'f52a6edf6d3e970568788604da3329c2'))
paddle.fluid.contrib.match_matrix_tensor (ArgSpec(args=['x', 'y', 'channel_num', 'act', 'param_attr', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, None, 'float32', None)), ('document', '3bdc4b2891c1460bc630fdcd22766b21'))
paddle.fluid.contrib.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '7c727562ebdda38274106d1a9b338e5b'))
paddle.fluid.contrib.fused_embedding_seq_pool (ArgSpec(args=['input', 'size', 'is_sparse', 'padding_idx', 'combiner', 'param_attr', 'dtype'], varargs=None, keywords=None, defaults=(False, None, 'sum', None, 'float32')), ('document', 'd1c2165215196b57a67cc900585fb417'))
paddle.fluid.contrib.BasicGRUUnit ('paddle.fluid.contrib.layers.rnn_impl.BasicGRUUnit', ('document', '2aed2540ed1540f081be9f4d08f2a65e'))
paddle.fluid.contrib.BasicGRUUnit.__init__ (ArgSpec(args=['self', 'name_scope', 'hidden_size', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'dtype'], varargs=None, keywords=None, defaults=(None, None, None, None, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.BasicGRUUnit.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'e75f4ab651bed0c9129a9a2c10aaaa7d'))
......
......@@ -30,6 +30,7 @@ __all__ = [
'var_conv_2d',
'match_matrix_tensor',
'tree_conv',
'fused_embedding_seq_pool',
]
......@@ -427,3 +428,69 @@ def tree_conv(nodes_vector,
else:
pre_activation = out
return helper.append_activation(pre_activation)
def fused_embedding_seq_pool(input,
size,
is_sparse=False,
padding_idx=None,
combiner='sum',
param_attr=None,
dtype='float32'):
"""
**Embedding Sequence pool**
This layer is the fusion of lookup table and sequence_pool.
Args:
input (Variable): Input is a Tensor<int64> Variable, which contains the IDs' information.
The value of the input IDs should satisfy :math:`0<= id < size[0]`.
size (tuple|list): The shape of the lookup_table parameter. It should
have two elements which indicate the size of the dictionary of
embedding and the size of each embedding vector respectively.
is_sparse (bool): The flag indicating whether to use sparse update.
Default: False.
padding_idx (int|long|None): It will output all-zero padding data whenever
lookup encounters :math:`padding\_idx` in Ids. If set :attr:`None`, it makes
no effect to output. If :math:`padding\_idx < 0`, the :math:`padding\_idx`
will automatically be converted to :math:`size[0] + padding\_idx` to use.
Default: None.
combiner (str): The pooling type of sequence_pool, and only support `sum`.
Default: sum.
param_attr (ParamAttr): Parameters for this layer.
dtype (np.dtype|core.VarDesc.VarType|str): The dtype refers to the data type of output
tensor. It can be float32, float_16, int etc.
Returns:
The sequence pooling variable which is a Tensor.
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
dict_size = 20
data_t = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
padding_idx = np.random.randint(1, 10)
out = fluid.contrib.fused_embedding_seq_pool(
input=data_t,
size=[dict_size, 32],
param_attr='w',
padding_idx=padding_idx,
is_sparse=False)
"""
helper = LayerHelper('fused_embedding_seq_pool', **locals())
w = helper.create_parameter(
attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False)
out = helper.create_variable_for_type_inference(dtype)
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
size[0] + padding_idx)
helper.append_op(
type='fused_embedding_seq_pool',
inputs={'Ids': input,
'W': w},
outputs={'Out': out},
attrs={
'is_sparse': is_sparse,
'combiner': combiner,
'padding_idx': padding_idx
})
return out
......@@ -84,5 +84,33 @@ class TestLookupTableOpWithPadding(TestFusedEmbeddingSeqPoolOp):
self.check_grad(['W'], 'Out', no_grad_set=('Ids'))
class TestFusedEmbeddingSeqPoolApi(unittest.TestCase):
def test_api(self):
if ver.mkl() == "ON" and 'Linux' in platform.platform():
import paddle.fluid as fluid
dict_size = 20
data_t = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=1)
padding_idx = np.random.randint(1, 10)
out = fluid.contrib.fused_embedding_seq_pool(
input=data_t,
size=[dict_size, 32],
param_attr='w',
padding_idx=padding_idx,
is_sparse=False)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# prepare input words' idx
x_tensor = fluid.core.LoDTensor()
idxs = np.random.randint(1, 10, (8)).astype("int64")
x_tensor.set(idxs, place)
x_tensor.set_recursive_sequence_lengths([[4, 4]])
ret = exe.run(feed={'word': x_tensor}, fetch_list=[out])
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册