未验证 提交 c0d31dac 编写于 作者: L LiYuRio 提交者: GitHub

[Fluid API]Remove multiple APIs in control_flow (#48279)

* remove lod_tensor_to_array, array_to_lod_tensor, DynamicRNN

* remove less_equal, greater_than, greater_equal, equal, not_equal
上级 f0805212
......@@ -181,8 +181,6 @@ if (WITH_ASCEND_CL)
endif()
# FIXME(typhoonzero): operator deps may not needed.
# op_library(lod_tensor_to_array_op DEPS lod_rank_table_op)
# op_library(array_to_lod_tensor_op DEPS lod_rank_table_op)
# op_library(unsqueeze_op DEPS reshape_op)
# op_library(squeeze_op DEPS reshape_op)
# op_library(flatten_op DEPS reshape_op)
......
......@@ -58,16 +58,10 @@ __all__ = [
'array_write',
'create_array',
'less_than',
'less_equal',
'greater_than',
'greater_equal',
'equal',
'not_equal',
'array_read',
'array_length',
'cond',
'IfElse',
'DynamicRNN',
'StaticRNN',
'reorder_lod_tensor_by_rank',
'Print',
......@@ -1615,118 +1609,6 @@ def max_sequence_len(rank_table):
return res
def lod_tensor_to_array(x, table):
"""
Convert a LoDTensor to a LoDTensorArray.
This function split a LoDTesnor to a LoDTensorArray according to its LoD
information. LoDTensorArray is an alias of C++ std::vector<LoDTensor> in
PaddlePaddle. The generated LoDTensorArray of this function can be further read
or written by `read_from_array()` and `write_to_array()` operators. However,
this function is generally an internal component of PaddlePaddle `DynamicRNN`.
Users should not use it directly.
Args:
x (Variable|list): The LoDTensor to be converted to a LoDTensorArray.
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
descending order. It is generally generated
by `layers.lod_rank_table()` API.
Returns:
Variable: The LoDTensorArray that has been converted from the input tensor.
Examples:
.. code-block:: python
import paddle.fluid as fluid
x = fluid.layers.data(name='x', shape=[10])
table = fluid.layers.lod_rank_table(x, level=0)
array = fluid.layers.lod_tensor_to_array(x, table)
"""
check_type(x, 'x', (Variable, list), 'lod_tensor_to_array')
if isinstance(x, (list)):
for i, input_x in enumerate(x):
check_type(
input_x,
'input[' + str(i) + ']',
Variable,
'lod_tensor_to_array',
)
check_type(table, 'table', (Variable, list), 'lod_tensor_to_array')
if isinstance(table, (list)):
for i, table_x in enumerate(table):
check_type(
table_x,
'table[' + str(i) + ']',
Variable,
'lod_tensor_to_array',
)
helper = LayerHelper("lod_tensor_to_array", **locals())
array = helper.create_variable(
name=unique_name.generate("lod_tensor_to_array"),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=x.dtype,
)
helper.append_op(
type='lod_tensor_to_array',
inputs={'X': x, 'RankTable': table},
outputs={'Out': array},
)
return array
def array_to_lod_tensor(x, table):
"""Convert a LoD_Tensor_Aarry to an LoDTensor.
Args:
x (Variable|list): The lod tensor array to be converted to a tensor.
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
descending order.
Returns:
Variable: The variable of type tensor that has been converted
from an array.
Examples:
.. code-block:: python
import paddle.fluid as fluid
x = fluid.layers.data(name='x', shape=[10])
table = fluid.layers.lod_rank_table(x, level=0)
array = fluid.layers.lod_tensor_to_array(x, table)
lod_tensor = fluid.layers.array_to_lod_tensor(array, table)
"""
check_type(x, 'x', (Variable, list), 'array_to_lod_tensor')
if isinstance(x, (list)):
for i, input_x in enumerate(x):
check_type(
input_x,
'input[' + str(i) + ']',
Variable,
'array_to_lod_tensor',
)
check_type(table, 'table', (Variable, list), 'array_to_lod_tensor')
if isinstance(table, (list)):
for i, table_x in enumerate(table):
check_type(
table_x,
'table[' + str(i) + ']',
Variable,
'array_to_lod_tensor',
)
helper = LayerHelper("array_to_lod_tensor", **locals())
tmp = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type="array_to_lod_tensor",
inputs={'X': x, 'RankTable': table},
outputs={'Out': tmp},
)
return tmp
def increment(x, value=1.0, in_place=True):
"""
The OP is usually used for control flow to increment the data of :attr:`x` by an amount :attr:`value`.
......@@ -1984,277 +1866,6 @@ def less_than(x, y, force_cpu=None, cond=None, name=None):
return cond
@templatedoc()
def less_equal(x, y, cond=None, name=None):
"""
:alias_main: paddle.less_equal
:alias: paddle.less_equal,paddle.tensor.less_equal,paddle.tensor.logic.less_equal
:old_api: paddle.fluid.layers.less_equal
This OP returns the truth value of :math:`x <= y` elementwise, which is equivalent function to the overloaded operator `<=`.
Args:
x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *less_equal*.
if cond is None, a new Varibale will be created to store the result.
name(str, optional): The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name`.
Returns:
Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x`.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
label = fluid.layers.assign(np.array([1, 3], dtype='int32'))
limit = fluid.layers.assign(np.array([1, 2], dtype='int32'))
out = fluid.layers.less_equal(x=label, y=limit) #out=[True, False]
out1 = label<= limit #out1=[True, False]
"""
check_variable_and_dtype(
x, "x", ["float32", "float64", "int32", "int64"], "less_equal"
)
check_variable_and_dtype(
y, "y", ["float32", "float64", "int32", "int64"], "less_equal"
)
if cond is not None:
check_type(cond, "cond", Variable, "less_equal")
helper = LayerHelper("less_equal", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
attrs = dict()
helper.append_op(
type='less_equal',
inputs={'X': [x], 'Y': [y]},
outputs={'Out': [cond]},
attrs=attrs,
)
return cond
@templatedoc()
def greater_than(x, y, cond=None, name=None):
"""
:alias_main: paddle.greater_than
:alias: paddle.greater_than,paddle.tensor.greater_than,paddle.tensor.logic.greater_than
:old_api: paddle.fluid.layers.greater_than
This OP returns the truth value of :math:`x > y` elementwise, which is equivalent function to the overloaded operator `>`.
Args:
x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *greater_than*.
if cond is None, a new Varibale will be created to store the result.
name(str, optional): The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name`.
Returns:
Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
label = fluid.layers.assign(np.array([2, 3], dtype='int32'))
limit = fluid.layers.assign(np.array([3, 2], dtype='int32'))
out = fluid.layers.greater_than(x=label, y=limit) #out=[False, True]
out1 = label > limit #out1=[False, True]
"""
check_variable_and_dtype(
x, "x", ["float32", "float64", "int32", "int64"], "greater_than"
)
check_variable_and_dtype(
y, "y", ["float32", "float64", "int32", "int64"], "greater_than"
)
if cond is not None:
check_type(cond, "cond", Variable, "greater_than")
helper = LayerHelper("greater_than", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
attrs = dict()
if in_dygraph_mode():
return _C_ops.greater_than(x, y)
else:
helper.append_op(
type='greater_than',
inputs={'X': [x], 'Y': [y]},
outputs={'Out': [cond]},
attrs=attrs,
)
return cond
@templatedoc()
def greater_equal(x, y, cond=None, name=None):
"""
:alias_main: paddle.greater_equal
:alias: paddle.greater_equal,paddle.tensor.greater_equal,paddle.tensor.logic.greater_equal
:old_api: paddle.fluid.layers.greater_equal
This OP returns the truth value of :math:`x >= y` elementwise, which is equivalent function to the overloaded operator `>=`.
Args:
x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *greater_equal*.
if cond is None, a new Varibale will be created to store the result.
name(str, optional): The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name`.
Returns:
Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x`.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
label = fluid.layers.assign(np.array([2, 2], dtype='int32'))
limit = fluid.layers.assign(np.array([2, 3], dtype='int32'))
out = fluid.layers.greater_equal(x=label, y=limit) #out=[True, False]
out_1 = label >= limit #out1=[True, False]
"""
check_variable_and_dtype(
x, "x", ["float32", "float64", "int32", "int64"], "greater_equal"
)
check_variable_and_dtype(
y, "y", ["float32", "float64", "int32", "int64"], "greater_equal"
)
if cond is not None:
check_type(cond, "cond", Variable, "greater_equal")
helper = LayerHelper("greater_equal", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
attrs = dict()
helper.append_op(
type='greater_equal',
inputs={'X': [x], 'Y': [y]},
outputs={'Out': [cond]},
attrs=attrs,
)
return cond
def equal(x, y, cond=None, name=None):
"""
This layer returns the truth value of :math:`x == y` elementwise.
Args:
x(Variable): Tensor, data type is float32, float64, int32, int64.
y(Variable): Tensor, data type is float32, float64, int32, int64.
cond(Variable, optional): Optional output which can be any created
Variable that meets the requirements to store the result of *equal*.
if cond is None, a new Varibale will be created to store the result.
name(str, optional): The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name`.
Returns:
Variable: output Tensor, it's shape is the same as the input's Tensor,
and the data type is bool.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
out_cond =fluid.data(name="input1", shape=[2], dtype='bool')
label = fluid.layers.assign(np.array([3, 3], dtype="int32"))
limit = fluid.layers.assign(np.array([3, 2], dtype="int32"))
label_cond = fluid.layers.assign(np.array([1, 2], dtype="int32"))
out1 = fluid.layers.equal(x=label,y=limit) #out1=[True, False]
out2 = fluid.layers.equal(x=label_cond,y=limit, cond=out_cond) #out2=[False, True] out_cond=[False, True]
"""
if in_dygraph_mode():
return _C_ops.equal(x, y)
check_variable_and_dtype(
x, "x", ["float32", "float64", "int32", "int64"], "equal"
)
check_variable_and_dtype(
y, "y", ["float32", "float64", "int32", "int64"], "equal"
)
if cond is not None:
check_type(cond, "cond", Variable, "equal")
helper = LayerHelper("equal", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
helper.append_op(
type='equal', inputs={'X': [x], 'Y': [y]}, outputs={'Out': [cond]}
)
return cond
def not_equal(x, y, cond=None, name=None):
"""
:alias_main: paddle.not_equal
:alias: paddle.not_equal,paddle.tensor.not_equal,paddle.tensor.logic.not_equal
:old_api: paddle.fluid.layers.not_equal
This OP returns the truth value of :math:`x != y` elementwise, which is equivalent function to the overloaded operator `!=`.
Args:
x(Variable): First input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
y(Variable): Second input to compare which is N-D tensor. The input data type should be float32, float64, int32, int64.
cond(Variable, optional): Optional output which can be any created Variable that meets the requirements to store the result of *not_equal*.
if cond is None, a new Varibale will be created to store the result.
name(str, optional): The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name`.
Returns:
Variable, the output data type is bool: The tensor variable storing the output, the output shape is same as input :attr:`x`.
Examples:
.. code-block:: python
import paddle.fluid as fluid
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
limit = fluid.layers.fill_constant(shape=[1], value=1, dtype='int64')
out = fluid.layers.not_equal(x=label, y=limit)
"""
check_variable_and_dtype(
x, "x", ["float32", "float64", "int32", "int64"], "not_equal"
)
check_variable_and_dtype(
y, "y", ["float32", "float64", "int32", "int64"], "not_equal"
)
if cond is not None:
check_type(cond, "cond", Variable, "not_equal")
helper = LayerHelper("not_equal", **locals())
if cond is None:
cond = helper.create_variable_for_type_inference(dtype='bool')
cond.stop_gradient = True
helper.append_op(
type='not_equal', inputs={'X': [x], 'Y': [y]}, outputs={'Out': [cond]}
)
return cond
def array_read(array, i):
"""
This OP is used to read data at the specified position from the input array
......@@ -3541,716 +3152,6 @@ class IfElse:
return rlist
class DynamicRNN:
"""
:api_attr: Static Graph
**Note: the input of this class should be LoDTensor which holds the
information of variable-length sequences. If the input is fixed-length Tensor,
please use StaticRNN (fluid.layers.** :ref:`api_fluid_layers_StaticRNN` **) for
better performance.**
DynamicRNN can process a minibatch of variable-length sequences.
The length of each sample can be different and is recorded in LoD.
In DynamicRNN, an input sequence will be unfolded into time steps and users
can define how to process each time step in :code:`block()` .
The total number of time steps is determined by the longest sequence.
DynamicRNN will not pad all sequences to the same length, instead it will
sort the sequences internally by the sequence length in descending order.
The input sequences will be shrank because only sequences of which the
length is larger than the time step will participate the remaining calculation.
If defined :code:`drnn = DynamicRNN()`, then users can call :code:`drnn()`
to obtain the result sequences. It is a LoDTensor gained by merging all
time steps's output. When RNN's input sequence x meets :code:`x.lod_level == 1`,
the output LoDTensor will have the same LoD with x. The result of :code:`drnn()`
includes RNN's outputs of all time steps, users can call
:ref:`api_fluid_layers_sequence_last_step` to extract the data of the last time step.
Warning:
Currently it is not supported to set :code:`is_sparse = True` of any
layers defined within DynamicRNN's :code:`block` function.
Args:
name (str, optional): The default value is None. Normally there is no
need for user to set this property. For more information,
please refer to :ref:`api_guide_Name` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
encoder_proj = fluid.data(name='encoder_proj', shape=[None, 32], dtype='float32', lod_level=1)
decoder_boot = fluid.data(name='boot', shape=[None, 10], dtype='float32')
drnn = fluid.layers.DynamicRNN()
with drnn.block():
# Set sentence as RNN's input, each time step processes a word from the sentence
current_word = drnn.step_input(sentence)
# Set encode_proj as RNN's static input
encoder_word = drnn.static_input(encoder_proj)
# Initialize memory with boot_memory, which need reorder according to RNN's input sequences
memory = drnn.memory(init=decoder_boot, need_reorder=True)
fc_1 = fluid.layers.fc(input=encoder_word, size=30)
fc_2 = fluid.layers.fc(input=current_word, size=30)
decoder_inputs = fc_1 + fc_2
hidden, _, _ = fluid.layers.gru_unit(input=decoder_inputs, hidden=memory, size=30)
# Update memory with hidden
drnn.update_memory(ex_mem=memory, new_mem=hidden)
out = fluid.layers.fc(input=hidden, size=10, bias_attr=True, act='softmax')
# Set hidden and out as RNN's outputs
drnn.output(hidden, out)
# Get RNN's result
hidden, out = drnn()
# Get RNN's result of the last time step
last = fluid.layers.sequence_last_step(out)
"""
BEFORE_RNN = 0
IN_RNN = 1
AFTER_RNN = 2
def __init__(self, name=None):
self.helper = LayerHelper('dynamic_rnn', name=name)
self.status = DynamicRNN.BEFORE_RNN
self.lod_rank_table = None
self.max_seq_len = None
self.step_idx = None
self.zero_idx = None
self.mem_dict = dict()
self.output_array = []
self.outputs = []
self.cond = self.helper.create_variable_for_type_inference(dtype='bool')
self.cond.stop_gradient = False
self.while_op = While(self.cond)
self.input_array = []
self.mem_link = []
def step_input(self, x, level=0):
r"""
This function is used to set sequence x as DynamicRNN's input.
The maximum sequence length in x determines the number of time steps
the RNN unit will be executed. DynamicRNN can take multiple inputs.
When all inputs' :code:`lod_level` are 1, all inputs should hold the
same LoD. When :code:`x.lod_level >= 2` , the input sequence will be
unfold along specified level, and the slice of each time step is a
LoDTensor whose lod_level is :code:`x.lod_level - level - 1` .
In this case, the specified LoD level of multiple inputs should be the same.
- Case 1:
.. code-block:: text
# input, where Si is slice data of shape [1, N]
level = 0
x.lod = [[2, 1, 3]]
x.shape = [6, N]
x.data = [[S0],
[S0],
[S1],
[S2],
[S2],
[S2]]
# output
# step 0, time step data of 3 sequences
out.lod = [[]]
out.shape = [3, N]
out.data = [[S2],
[S0],
[S1]]
# step 1, time step data of 2 sequences
out.lod = [[]]
out.shape = [2, N]
out.data = [[S2],
[S0]]
# step 2, time step data of 1 sequences
out.lod = [[]]
out.shape = [1, N]
out.data = [[S2]]
Args:
x (Variable): The input LoDTensor which holds information of a
minibatch of variable-length sequences and should meet :code:`x.lod_level >= 1` .
When RNN has multiple inputs, the first dimension should match
across all inputs, but other shape components may differ.
Optional data types are: bool, float16, float32, float64, int8, int16, int32, int64, uint8.
level (int, optional): The level of lod used to split steps.
It should be in range :math:`[0, x.lod\_level)` . The default value is 0.
Returns:
Variable: The current time step in the input sequence. If there are :code:`num_sequences` \
sequences in x whose length is larger than :code:`step_idx` , the returned Variable \
will only hold the :code:`step_idx` -th time step of those `num_sequences` sequences. \
The data type is the same as input. If :code:`x.lod_level == 1` , the return value is \
a Tensor of shape :math:`\{num\_sequences, x.shape[1], ...\}` , or it will \
be a variable-length LoDTensor.
Raises:
ValueError: When :code:`step_input()` is called outside :code:`block()` .
TypeError: When x is not a Variable.
Examples:
.. code-block:: python
import paddle.fluid as fluid
sentence = fluid.data(name='sentence', shape=[None, 1], dtype='int64', lod_level=1)
embedding = fluid.layers.embedding(input=sentence, size=[65536, 32], is_sparse=True)
drnn = fluid.layers.DynamicRNN()
with drnn.block():
# Set embedding as RNN's input, each time step processes a word from the sentence
word = drnn.step_input(embedding)
# Initialize memory to a Tensor whose value is 0, shape=[batch_size, 200],
# where batch_size is the number of sequences in embedding.
memory = drnn.memory(shape=[200])
hidden = fluid.layers.fc(input=[word, memory], size=200, act='relu')
# Update memory to hidden
drnn.update_memory(ex_mem=memory, new_mem=hidden)
# Set hidden as RNN's output
drnn.output(hidden)
# Get RNN's result
rnn_output = drnn()
"""
self._assert_in_rnn_block_("step_input")
check_type(x, 'x', Variable, 'fluid.layers.DynamicRNN.step_input()')
parent_block = self._parent_block_()
if self.lod_rank_table is None:
self.lod_rank_table = parent_block.create_var(
name=unique_name.generate('lod_rank_table'),
type=core.VarDesc.VarType.LOD_RANK_TABLE,
)
self.lod_rank_table.stop_gradient = True
parent_block.append_op(
type='lod_rank_table',
inputs={"X": x},
outputs={"Out": self.lod_rank_table},
attrs={"level": level},
)
self.max_seq_len = parent_block.create_var(
name=unique_name.generate('dynamic_rnn_max_seq_len'),
dtype='int64',
)
self.max_seq_len.stop_gradient = False
parent_block.append_op(
type='max_sequence_len',
inputs={'RankTable': self.lod_rank_table},
outputs={"Out": self.max_seq_len},
)
self.cond.stop_gradient = True
parent_block.append_op(
type='less_than',
inputs={'X': self.step_idx, 'Y': self.max_seq_len},
outputs={'Out': self.cond},
attrs={'force_cpu': True},
)
input_array = parent_block.create_var(
name=unique_name.generate('dynamic_rnn_input_array'),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=x.dtype,
)
self.input_array.append((input_array, x.dtype))
parent_block.append_op(
type='lod_tensor_to_array',
inputs={'X': x, 'RankTable': self.lod_rank_table},
outputs={'Out': input_array},
)
return array_read(array=input_array, i=self.step_idx)
def static_input(self, x):
r"""
This function is used to set x as DynamicRNN's static input. It is optional.
- Case 1, set static input with LoD
.. code-block:: text
# RNN's input is the same as the case listed in step_input
# static input, where Si is slice data of shape [1, M]
x.lod = [[3, 1, 2]]
x.shape = [6, M]
x.data = [[S0],
[S0],
[S0],
[S1],
[S2],
[S2]]
# step 0, batch data corresponding to the 3 input sequences
out.lod = [[2, 3, 1]]
out.shape = [6, M]
out.data = [[S2],
[S2],
[S0],
[S0],
[S0],
[S1]]
# step 1, batch data corresponding to the 2 input sequences
out.lod = [[2, 3]]
out.shape = [5, M]
out.data = [[S2],
[S2],
[S0],
[S0],
[S0]]
# step 2, batch data corresponding to the 1 input sequences
out.lod = [[2]]
out.shape = [2, M]
out.data = [[S2],
[S2]]
- Case 2, set static input without LoD
.. code-block:: text
# RNN's input is the same as the case listed in step_input
# static input, where Si is slice data of shape [1, M]
x.lod = [[]]
x.shape = [3, M]
x.data = [[S0],
[S1],
[S2]]
# step 0, batch data corresponding to the 3 input sequences
out.lod = [[]]
out.shape = [3, M]
out.data = [[S2],
[S0],
[S1]]
# step 1, batch data corresponding to the 2 input sequences
out.lod = [[]]
out.shape = [2, M]
out.data = [[S2],
[S0]]
# step 2, batch data corresponding to the 1 input sequences
out.lod = [[]]
out.shape = [1, M]
out.data = [[S2]]
Args:
x (Variable): The static input LoDTensor which should hold the same number of sequences
as RNN's input (the input LoDTensor set by :code:`step_input()` ). If the LoD is None,
the input x will be treated as a minibatch with :code:`x.shape[0]` sequences of length 1.
Optional data types are: bool, float16, float32, float64, int8, int16, int32, int64, uint8.
Returns:
Variable: The input LoDTensor after sorted and shrank. If there are :code:`num_sequences` \
sequences in RNN's input LoDTensor whose length is larger than :code:`step_idx` , \
the static input Tensor will be sorted to the same order as RNN's input and \
will only retain data corresponding to those :code:`num_sequences` sequences. \
The data type is the same as input. If :code:`x.lod == None` , the return value is \
a Tensor of shape :math:`\{num\_sequences, x.shape[1], ...\}` , or it will \
be a variable-length LoDTensor.
Raises:
ValueError: When :code:`static_input()` is called outside :code:`block()` .
TypeError: When x is not a Variable.
RuntimeError: When :code:`static_input()` is called before :code:`step_input()` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
encoder_proj = fluid.data(name='encoder_proj', shape=[None, 32], dtype='float32', lod_level=1)
decoder_boot = fluid.data(name='boot', shape=[None, 10], dtype='float32')
drnn = fluid.layers.DynamicRNN()
with drnn.block():
# Set sentence as RNN's input, each time step processes a word from the sentence
current_word = drnn.step_input(sentence)
# Set encode_proj as RNN's static input
encoder_word = drnn.static_input(encoder_proj)
# Initialize memory with boot_memory, which need reorder according to RNN's input sequences
memory = drnn.memory(init=decoder_boot, need_reorder=True)
fc_1 = fluid.layers.fc(input=encoder_word, size=30)
fc_2 = fluid.layers.fc(input=current_word, size=30)
decoder_inputs = fc_1 + fc_2
hidden, _, _ = fluid.layers.gru_unit(input=decoder_inputs, hidden=memory, size=30)
# Update memory with hidden
drnn.update_memory(ex_mem=memory, new_mem=hidden)
out = fluid.layers.fc(input=hidden, size=10, bias_attr=True, act='softmax')
# Set out as RNN's output
drnn.output(out)
# Get RNN's result
rnn_output = drnn()
"""
self._assert_in_rnn_block_("static_input")
check_type(x, 'x', Variable, 'fluid.layers.DynamicRNN.static_input()')
if self.lod_rank_table is None:
raise RuntimeError(
"static_input() must be called after step_input()."
)
parent_block = self._parent_block_()
x_reordered = parent_block.create_var(
name=unique_name.generate("dynamic_rnn_static_input_reordered"),
type=core.VarDesc.VarType.LOD_TENSOR,
dtype=x.dtype,
)
parent_block.append_op(
type='reorder_lod_tensor_by_rank',
inputs={'X': [x], 'RankTable': [self.lod_rank_table]},
outputs={'Out': [x_reordered]},
)
return shrink_memory(x_reordered, self.step_idx, self.lod_rank_table)
@signature_safe_contextmanager
def block(self):
"""
The function is used to list the operations executed during
each time step in RNN. The operation list will be executed :code:`max_sequence_len`
times (where :code:`max_sequence_len` is the maximum length of RNN's input sequences).
Raises:
ValueError: When :code:`block()` is called multi-times.
"""
if self.status != DynamicRNN.BEFORE_RNN:
raise ValueError("rnn.block() can only be invoke once")
self.step_idx = fill_constant(
shape=[1], dtype='int64', value=0, force_cpu=True
)
self.step_idx.stop_gradient = False
self.status = DynamicRNN.IN_RNN
with self.while_op.block():
yield
increment(x=self.step_idx, value=1.0, in_place=True)
for new_mem, mem_array in self.mem_link:
array_write(x=new_mem, i=self.step_idx, array=mem_array)
less_than(
x=self.step_idx,
y=self.max_seq_len,
force_cpu=True,
cond=self.cond,
)
self.status = DynamicRNN.AFTER_RNN
for each_array in self.output_array:
self.outputs.append(
array_to_lod_tensor(x=each_array, table=self.lod_rank_table)
)
def __call__(self, *args, **kwargs):
"""
This function is used to get the output sequences of DynamicRNN.
Args:
None
Returns:
Variable or Variable list: RNN's output sequences.
Raises:
ValueError: When :code:`__call__()` is called before :code:`block()` .
"""
if self.status != DynamicRNN.AFTER_RNN:
raise ValueError(
(
"Output of the dynamic RNN can only be visited "
"outside the rnn block."
)
)
if len(self.outputs) == 1:
return self.outputs[0]
else:
return self.outputs
def memory(
self,
init=None,
shape=None,
value=0.0,
need_reorder=False,
dtype='float32',
):
r"""
Create a memory Variable for DynamicRNN to deliver data cross time steps.
It can be initialized by an existing Tensor or a constant Tensor of given
dtype and shape.
Args:
init (Variable, optional): LoDTensor used to initialize the memory.
If init is not None, it should hold the same number of sequences
as RNN's input (the input LoDTensor set by :code:`step_input()` )
and the memory will be initialized to it. If init's LoD is None,
it will be treated as a minibatch with :code:`init.shape[0]` sequences
of length 1. The default value is None.
shape (list|tuple, optional): When init is None, it is used to specify
the memory's shape. Note that the shape does not include the batch_size.
If setting shape to :math:`\{D_1, D_2, ...\}` , the shape of memory Tensor
will be :math:`\{batch\_size, D_1, D_2, ...\}` , where batch_size is
determined by RNN's input sequences. The default value is None.
value (float, optional): When init is None, it is used as initialized value
of memory. The default value is 0.0.
need_reorder (bool, optional): When init is not None, it determines whether
the memory needs to reorder like the RNN's input sequences. It should be
set to True when the initialized memory depends on the order of input samples.
The default value is False.
dtype (str|numpy.dtype, optional): When init is None, it is used to set the
data type of memory. The default value is "float32". Optional data types
are: "float32", "float64", "int32", "int64".
Returns:
Variable: The memory LoDTensor after shrank. If there are :code:`num_sequences` \
sequences in RNN's input LoDTensor whose length is larger than :code:`step_idx` , \
the memory Tensor also need to be shrank and will only retain data \
corresponding to those :code:`num_sequences` sequences.
Raises:
ValueError: When :code:`memory()` is called outside :code:`block()` .
TypeError: When init is set and is not a Variable.
ValueError: When :code:`memory()` is called before :code:`step_input()` .
Examples:
.. code-block:: python
import paddle.fluid as fluid
sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
boot_memory = fluid.data(name='boot', shape=[None, 10], dtype='float32')
drnn = fluid.layers.DynamicRNN()
with drnn.block():
# Set sentence as RNN's input, each time step processes a word from the sentence
word = drnn.step_input(sentence)
# Initialize memory with boot_memory, which need reorder according to RNN's input sequences
memory = drnn.memory(init=boot_memory, need_reorder=True)
hidden = fluid.layers.fc(input=[word, memory], size=10, act='tanh')
# Update memory with hidden
drnn.update_memory(ex_mem=memory, new_mem=hidden)
# Set hidden as RNN's output
drnn.output(hidden)
# Get RNN's result
rnn_output = drnn()
Examples:
.. code-block:: python
import paddle.fluid as fluid
sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
drnn = fluid.layers.DynamicRNN()
with drnn.block():
# Set sentence as RNN's input, each time step processes a word from the sentence
word = drnn.step_input(sentence)
# Initialize memory to a Tensor whose value is 0, shape=[batch_size, 10],
# where batch_size is the number of sequences in sentence.
memory = drnn.memory(shape=[10], dtype='float32', value=0)
hidden = fluid.layers.fc(input=[word, memory], size=10, act='tanh')
# Update memory with hidden
drnn.update_memory(ex_mem=memory, new_mem=hidden)
# Set hidden as RNN's output
drnn.output(hidden)
# Get RNN's result
rnn_output = drnn()
"""
self._assert_in_rnn_block_('memory')
self._init_zero_idx_()
if shape is not None:
check_type(
shape,
'shape',
(list, tuple),
'fluid.layers.DynamicRNN.memory()',
)
if init is not None:
check_type(
init, 'init', Variable, 'fluid.layers.DynamicRNN.memory()'
)
parent_block = self._parent_block_()
init_tensor = init
if need_reorder == True:
if self.lod_rank_table is None:
raise ValueError(
'If set need_reorder to True, make sure step_input be '
'invoked before '
'memory(init=init, need_reordered=True, ...).'
)
init_reordered = parent_block.create_var(
name=unique_name.generate('dynamic_rnn_mem_init_reordered'),
type=core.VarDesc.VarType.LOD_TENSOR,
dtype=init.dtype,
)
parent_block.append_op(
type='reorder_lod_tensor_by_rank',
inputs={
'X': [init_tensor],
'RankTable': [self.lod_rank_table],
},
outputs={'Out': [init_reordered]},
)
init_tensor = init_reordered
mem_array = parent_block.create_var(
name=unique_name.generate('dynamic_rnn_mem_array'),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=init.dtype,
)
parent_block.append_op(
type='write_to_array',
inputs={'X': init_tensor, 'I': self.zero_idx},
outputs={'Out': mem_array},
)
retv = array_read(array=mem_array, i=self.step_idx)
retv = shrink_memory(
x=retv, i=self.step_idx, table=self.lod_rank_table
)
self.mem_dict[retv.name] = mem_array
return retv
else:
if len(self.input_array) == 0:
raise ValueError(
"step_input should be invoked before memory(shape=..., value=...)"
)
parent_block = self._parent_block_()
init = parent_block.create_var(
name=unique_name.generate('mem_init'), dtype=dtype
)
arr, dtype = self.input_array[0]
in0 = parent_block.create_var(
name=unique_name.generate('in0'), dtype=dtype
)
parent_block.append_op(
type='read_from_array',
inputs={'X': [arr], 'I': [self.zero_idx]},
outputs={'Out': [in0]},
)
parent_block.append_op(
type='fill_constant_batch_size_like',
inputs={'Input': [in0]},
outputs={'Out': [init]},
attrs={
'shape': [-1] + shape,
'value': float(value),
'dtype': init.dtype,
},
)
return self.memory(init=init)
def update_memory(self, ex_mem, new_mem):
"""
Update the memory which need to be delivered across time steps.
Args:
ex_mem (Variable): The memory data of previous time step.
new_mem (Variable): The new memory data produced in current time step.
The shape and data type of ex_mem and new_mem should be the same.
Returns:
None
Raises:
ValueError: When :code:`update_memory()` is called outside :code:`block()` .
TypeError: When :code:`ex_mem` or :code:`new_mem` is not a Variable.
ValueError: When :code:`ex_mem` is defined by :code:`memory()` .
ValueError: When :code:`update_memory()` is called before :code:`step_input()` .
"""
self._assert_in_rnn_block_('update_memory')
check_type(
ex_mem,
'ex_mem',
Variable,
'fluid.layers.DynamicRNN.update_memory()',
)
check_type(
new_mem,
'new_mem',
Variable,
'fluid.layers.DynamicRNN.update_memory()',
)
mem_array = self.mem_dict.get(ex_mem.name, None)
if mem_array is None:
raise ValueError("Please invoke memory before update_memory")
if self.lod_rank_table is None:
raise ValueError("Please invoke step_input before update_memory")
self.mem_link.append((new_mem, mem_array))
def output(self, *outputs):
"""
This function is used to set :code:`outputs` as RNN's output.
Args:
*outputs (Variable ...): The output Tensor. DynamicRNN can mark multiple
Variables as its output.
Returns:
None
Raises:
ValueError: When :code:`output()` is called outside :code:`block()` .
"""
self._assert_in_rnn_block_('output')
parent_block = self._parent_block_()
for each in outputs:
check_type(
each, "outputs", Variable, "fluid.layers.DynamicRNN.output"
)
outside_array = parent_block.create_var(
name=unique_name.generate_with_ignorable_key(
"_".join([self.helper.name, "output_array", each.name])
),
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype=each.dtype,
)
array_write(x=each, i=self.step_idx, array=outside_array)
self.output_array.append(outside_array)
def _init_zero_idx_(self):
if self.zero_idx is None:
parent_block = self._parent_block_()
self.zero_idx = parent_block.create_var(
name=unique_name.generate('zero_idx'), dtype='int64'
)
parent_block.append_op(
type='fill_constant',
inputs={},
outputs={'Out': [self.zero_idx]},
attrs={
'shape': [1],
'dtype': self.zero_idx.dtype,
'value': float(0),
'force_cpu': True,
},
)
def _parent_block_(self):
prog = self.helper.main_program
parent_idx = prog.current_block().parent_idx
assert parent_idx >= 0
parent_block = prog.block(parent_idx)
return parent_block
def _assert_in_rnn_block_(self, method):
if self.status != DynamicRNN.IN_RNN:
raise ValueError(
"{0} can only be invoked inside rnn block.".format(method)
)
def switch_case(branch_index, branch_fns, default=None, name=None):
'''
:api_attr: Static Graph
......@@ -4413,7 +3314,7 @@ def switch_case(branch_index, branch_fns, default=None, name=None):
pred_fn_pairs = []
for index, fn in branch_fns:
new_index = fill_constant(shape=[1], dtype="int64", value=index)
pred = equal(branch_index, new_index)
pred = paddle.equal(branch_index, new_index)
pred_fn_pairs.append((pred, fn))
return pred_fn_pairs, default
......
......@@ -1337,7 +1337,7 @@ class BeamSearchDecoder(Decoder):
)
next_finished = paddle.logical_or(
next_finished,
control_flow.equal(token_indices, self.end_token_tensor),
paddle.equal(token_indices, self.end_token_tensor),
)
beam_search_output = self.OutputWrapper(
......@@ -1722,7 +1722,7 @@ def _dynamic_decode_declarative(
if max_step_num is not None:
paddle.logical_and(
paddle.logical_not(nn.reduce_all(global_finished)),
control_flow.less_equal(step_idx, max_step_num),
paddle.less_equal(step_idx, max_step_num),
cond,
)
else:
......@@ -2013,7 +2013,7 @@ class TrainingHelper(DecodeHelper):
variable[s], and the tensor's shape is `[batch_size, ...]`. \
`initial_finished` is a bool tensor with shape `[batch_size]`.
"""
init_finished = control_flow.equal(
init_finished = paddle.equal(
self.sequence_length,
tensor.fill_constant(
shape=[1], dtype=self.sequence_length.dtype, value=0
......@@ -2084,7 +2084,7 @@ class TrainingHelper(DecodeHelper):
if self.sequence_length.dtype != time.dtype:
self.sequence_length = tensor.cast(self.sequence_length, time.dtype)
next_time = time + 1
finished = control_flow.less_equal(self.sequence_length, next_time)
finished = paddle.less_equal(self.sequence_length, next_time)
def _slice(x): # TODO: use Variable.__getitem__
axes = [0 if self.time_major else 1]
......@@ -2227,7 +2227,7 @@ class GreedyEmbeddingHelper(DecodeHelper):
argument `states`. `finished` is a `bool` Tensor with \
shape `[batch_size]`.
"""
finished = control_flow.equal(sample_ids, self.end_token)
finished = paddle.equal(sample_ids, self.end_token)
next_inputs = self.embedding_fn(sample_ids)
return finished, next_inputs, states
......
......@@ -13,6 +13,4 @@ set_tests_properties(test_word2vec_book PROPERTIES TIMEOUT 120)
set_tests_properties(test_recognize_digits PROPERTIES TIMEOUT 120)
set_tests_properties(test_image_classification PROPERTIES TIMEOUT 200)
set_tests_properties(test_label_semantic_roles PROPERTIES TIMEOUT 240)
set_tests_properties(test_machine_translation PROPERTIES TIMEOUT 120)
set_tests_properties(test_rnn_encoder_decoder PROPERTIES TIMEOUT 120)
set_tests_properties(test_fit_a_line PROPERTIES TIMEOUT 120)
......@@ -53,52 +53,6 @@ def convolution_net(
return avg_cost, accuracy, prediction
def dyn_rnn_lstm(
data, label, input_dim, class_dim=2, emb_dim=32, lstm_size=128
):
emb = fluid.layers.embedding(
input=data, size=[input_dim, emb_dim], is_sparse=True
)
sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
rnn = fluid.layers.DynamicRNN()
with rnn.block():
word = rnn.step_input(sentence)
prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
def gate_common(ipt, hidden, size):
gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
return gate0 + gate1
forget_gate = paddle.nn.functional.sigmoid(
x=gate_common(word, prev_hidden, lstm_size)
)
input_gate = paddle.nn.functional.sigmoid(
x=gate_common(word, prev_hidden, lstm_size)
)
output_gate = paddle.nn.functional.sigmoid(
x=gate_common(word, prev_hidden, lstm_size)
)
cell_gate = paddle.nn.functional.sigmoid(
x=gate_common(word, prev_hidden, lstm_size)
)
cell = forget_gate * prev_cell + input_gate * cell_gate
hidden = output_gate * paddle.tanh(x=cell)
rnn.update_memory(prev_cell, cell)
rnn.update_memory(prev_hidden, hidden)
rnn.output(hidden)
last = fluid.layers.sequence_last_step(rnn())
prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = paddle.mean(cost)
accuracy = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, accuracy, prediction
def stacked_lstm_net(
data, label, input_dim, class_dim=2, emb_dim=128, hid_dim=512, stacked_num=3
):
......@@ -376,25 +330,6 @@ class TestUnderstandSentiment(unittest.TestCase):
parallel=True,
)
@unittest.skip(reason='make CI faster')
def test_dynrnn_lstm_gpu(self):
with self.new_program_scope():
main(
self.word_dict,
net_method=dyn_rnn_lstm,
use_cuda=True,
parallel=False,
)
def test_dynrnn_lstm_gpu_parallel(self):
with self.new_program_scope():
main(
self.word_dict,
net_method=dyn_rnn_lstm,
use_cuda=True,
parallel=True,
)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import os
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.framework as framework
import paddle.fluid.layers as pd
from paddle.fluid.executor import Executor
paddle.enable_static()
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
hidden_dim = 32
word_dim = 16
batch_size = 2
max_length = 8
topk_size = 50
trg_dic_size = 10000
beam_size = 2
decoder_size = hidden_dim
def encoder(is_sparse):
# encoder
src_word_id = pd.data(
name="src_word_id", shape=[1], dtype='int64', lod_level=1
)
src_embedding = pd.embedding(
input=src_word_id,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(name='vemb'),
)
fc1 = pd.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
lstm_hidden0, lstm_0 = pd.dynamic_lstm(input=fc1, size=hidden_dim * 4)
encoder_out = pd.sequence_last_step(input=lstm_hidden0)
return encoder_out
def decoder_train(context, is_sparse):
# decoder
trg_language_word = pd.data(
name="target_language_word", shape=[1], dtype='int64', lod_level=1
)
trg_embedding = pd.embedding(
input=trg_language_word,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(name='vemb'),
)
rnn = pd.DynamicRNN()
with rnn.block():
current_word = rnn.step_input(trg_embedding)
pre_state = rnn.memory(init=context)
current_state = pd.fc(
input=[current_word, pre_state], size=decoder_size, act='tanh'
)
current_score = pd.fc(
input=current_state, size=target_dict_dim, act='softmax'
)
rnn.update_memory(pre_state, current_state)
rnn.output(current_score)
return rnn()
def decoder_decode(context, is_sparse):
init_state = context
array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)
# fill the first element with init_state
state_array = pd.create_array('float32')
pd.array_write(init_state, array=state_array, i=counter)
# ids, scores as memory
ids_array = pd.create_array('int64')
scores_array = pd.create_array('float32')
init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
init_scores = pd.data(
name="init_scores", shape=[1], dtype="float32", lod_level=2
)
pd.array_write(init_ids, array=ids_array, i=counter)
pd.array_write(init_scores, array=scores_array, i=counter)
cond = pd.less_than(x=counter, y=array_len)
while_op = pd.While(cond=cond)
with while_op.block():
pre_ids = pd.array_read(array=ids_array, i=counter)
pre_state = pd.array_read(array=state_array, i=counter)
pre_score = pd.array_read(array=scores_array, i=counter)
# expand the recursive_sequence_lengths of pre_state to be the same with pre_score
pre_state_expanded = pd.sequence_expand(pre_state, pre_score)
pre_ids_emb = pd.embedding(
input=pre_ids,
size=[dict_size, word_dim],
dtype='float32',
is_sparse=is_sparse,
)
# use rnn unit to update rnn
current_state = pd.fc(
input=[pre_state_expanded, pre_ids_emb],
size=decoder_size,
act='tanh',
)
current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
# use score to do beam search
current_score = pd.fc(
input=current_state_with_lod, size=target_dict_dim, act='softmax'
)
topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
# calculate accumulated scores after topk to reduce computation cost
accu_scores = pd.elementwise_add(
x=pd.log(topk_scores),
y=paddle.reshape(pre_score, shape=[-1]),
axis=0,
)
selected_ids, selected_scores = pd.beam_search(
pre_ids,
pre_score,
topk_indices,
accu_scores,
beam_size,
end_id=10,
level=0,
)
pd.increment(x=counter, value=1, in_place=True)
# update the memories
pd.array_write(current_state, array=state_array, i=counter)
pd.array_write(selected_ids, array=ids_array, i=counter)
pd.array_write(selected_scores, array=scores_array, i=counter)
# update the break condition: up to the max length or all candidates of
# source sentences have ended.
length_cond = pd.less_than(x=counter, y=array_len)
finish_cond = paddle.logical_not(pd.is_empty(x=selected_ids))
paddle.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = pd.beam_search_decode(
ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10
)
# return init_ids, init_scores
return translation_ids, translation_scores
def train_main(use_cuda, is_sparse, is_local=True):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
context = encoder(is_sparse)
rnn_out = decoder_train(context, is_sparse)
label = pd.data(
name="target_language_next_word", shape=[1], dtype='int64', lod_level=1
)
cost = pd.cross_entropy(input=rnn_out, label=label)
avg_cost = pd.mean(cost)
optimizer = fluid.optimizer.Adagrad(
learning_rate=1e-4,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.1
),
)
optimizer.minimize(avg_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=1000
),
batch_size=batch_size,
)
feed_order = [
'src_word_id',
'target_language_word',
'target_language_next_word',
]
exe = Executor(place)
def train_loop(main_program):
exe.run(framework.default_startup_program())
feed_list = [
main_program.global_block().var(var_name) for var_name in feed_order
]
feeder = fluid.DataFeeder(feed_list, place)
batch_id = 0
for pass_id in range(1):
for data in train_data():
outs = exe.run(
main_program, feed=feeder.feed(data), fetch_list=[avg_cost]
)
avg_cost_val = np.array(outs[0])
print(
'pass_id='
+ str(pass_id)
+ ' batch='
+ str(batch_id)
+ " avg_cost="
+ str(avg_cost_val)
)
if batch_id > 3:
break
batch_id += 1
if is_local:
train_loop(framework.default_main_program())
else:
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip...
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist) # ip:port,ip:port...
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("POD_IP") + ":" + port
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
t = fluid.DistributeTranspiler()
t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(current_endpoint)
pserver_startup = t.get_startup_program(
current_endpoint, pserver_prog
)
exe.run(pserver_startup)
exe.run(pserver_prog)
elif training_role == "TRAINER":
train_loop(t.get_trainer_program())
def decode_main(use_cuda, is_sparse):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
context = encoder(is_sparse)
translation_ids, translation_scores = decoder_decode(context, is_sparse)
exe = Executor(place)
exe.run(framework.default_startup_program())
init_ids_data = np.array([1 for _ in range(batch_size)], dtype='int64')
init_scores_data = np.array(
[1.0 for _ in range(batch_size)], dtype='float32'
)
init_ids_data = init_ids_data.reshape((batch_size, 1))
init_scores_data = init_scores_data.reshape((batch_size, 1))
init_recursive_seq_lens = [1] * batch_size
init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens]
init_ids = fluid.create_lod_tensor(
init_ids_data, init_recursive_seq_lens, place
)
init_scores = fluid.create_lod_tensor(
init_scores_data, init_recursive_seq_lens, place
)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=1000
),
batch_size=batch_size,
)
feed_order = ['src_word_id']
feed_list = [
framework.default_main_program().global_block().var(var_name)
for var_name in feed_order
]
feeder = fluid.DataFeeder(feed_list, place)
for data in train_data():
feed_dict = feeder.feed([[x[0]] for x in data])
feed_dict['init_ids'] = init_ids
feed_dict['init_scores'] = init_scores
result_ids, result_scores = exe.run(
framework.default_main_program(),
feed=feed_dict,
fetch_list=[translation_ids, translation_scores],
return_numpy=False,
)
print(result_ids.recursive_sequence_lengths())
break
class TestMachineTranslation(unittest.TestCase):
pass
@contextlib.contextmanager
def scope_prog_guard():
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
def inject_test_train(use_cuda, is_sparse):
f_name = 'test_{0}_{1}_train'.format(
'cuda' if use_cuda else 'cpu', 'sparse' if is_sparse else 'dense'
)
def f(*args):
with scope_prog_guard():
train_main(use_cuda, is_sparse)
setattr(TestMachineTranslation, f_name, f)
def inject_test_decode(use_cuda, is_sparse, decorator=None):
f_name = 'test_{0}_{1}_decode'.format(
'cuda' if use_cuda else 'cpu', 'sparse' if is_sparse else 'dense'
)
def f(*args):
with scope_prog_guard():
decode_main(use_cuda, is_sparse)
if decorator is not None:
f = decorator(f)
setattr(TestMachineTranslation, f_name, f)
for _use_cuda_ in (False, True):
for _is_sparse_ in (False, True):
inject_test_train(_use_cuda_, _is_sparse_)
for _use_cuda_ in (False, True):
for _is_sparse_ in (False, True):
_decorator_ = None
if _use_cuda_:
_decorator_ = unittest.skip(
reason='Beam Search does not support CUDA!'
)
inject_test_decode(
is_sparse=_is_sparse_, use_cuda=_use_cuda_, decorator=_decorator_
)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import math
import os
import sys
import tempfile
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.framework as framework
from paddle.fluid.executor import Executor
paddle.enable_static()
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
hidden_dim = 32
embedding_dim = 16
batch_size = 10
max_length = 50
topk_size = 50
encoder_size = decoder_size = hidden_dim
IS_SPARSE = True
USE_PEEPHOLES = False
def bi_lstm_encoder(input_seq, hidden_size):
input_forward_proj = fluid.layers.fc(
input=input_seq, size=hidden_size * 4, bias_attr=True
)
forward, _ = fluid.layers.dynamic_lstm(
input=input_forward_proj,
size=hidden_size * 4,
use_peepholes=USE_PEEPHOLES,
)
input_backward_proj = fluid.layers.fc(
input=input_seq, size=hidden_size * 4, bias_attr=True
)
backward, _ = fluid.layers.dynamic_lstm(
input=input_backward_proj,
size=hidden_size * 4,
is_reverse=True,
use_peepholes=USE_PEEPHOLES,
)
forward_last = fluid.layers.sequence_last_step(input=forward)
backward_first = fluid.layers.sequence_first_step(input=backward)
return forward_last, backward_first
# FIXME(peterzhang2029): Replace this function with the lstm_unit_op.
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
def linear(inputs):
return fluid.layers.fc(input=inputs, size=size, bias_attr=True)
forget_gate = paddle.nn.functional.sigmoid(x=linear([hidden_t_prev, x_t]))
input_gate = paddle.nn.functional.sigmoid(x=linear([hidden_t_prev, x_t]))
output_gate = paddle.nn.functional.sigmoid(x=linear([hidden_t_prev, x_t]))
cell_tilde = paddle.tanh(x=linear([hidden_t_prev, x_t]))
cell_t = fluid.layers.sums(
input=[
fluid.layers.elementwise_mul(x=forget_gate, y=cell_t_prev),
fluid.layers.elementwise_mul(x=input_gate, y=cell_tilde),
]
)
hidden_t = fluid.layers.elementwise_mul(
x=output_gate, y=paddle.tanh(x=cell_t)
)
return hidden_t, cell_t
def lstm_decoder_without_attention(
target_embedding, decoder_boot, context, decoder_size
):
rnn = fluid.layers.DynamicRNN()
cell_init = fluid.layers.fill_constant_batch_size_like(
input=decoder_boot, value=0.0, shape=[-1, decoder_size], dtype='float32'
)
cell_init.stop_gradient = False
with rnn.block():
current_word = rnn.step_input(target_embedding)
context = rnn.static_input(context)
hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
cell_mem = rnn.memory(init=cell_init)
decoder_inputs = fluid.layers.concat(
input=[context, current_word], axis=1
)
h, c = lstm_step(decoder_inputs, hidden_mem, cell_mem, decoder_size)
rnn.update_memory(hidden_mem, h)
rnn.update_memory(cell_mem, c)
out = fluid.layers.fc(
input=h, size=target_dict_dim, bias_attr=True, act='softmax'
)
rnn.output(out)
return rnn()
def seq_to_seq_net():
"""Construct a seq2seq network."""
src_word_idx = fluid.layers.data(
name='source_sequence', shape=[1], dtype='int64', lod_level=1
)
src_embedding = fluid.layers.embedding(
input=src_word_idx,
size=[source_dict_dim, embedding_dim],
dtype='float32',
)
src_forward_last, src_backward_first = bi_lstm_encoder(
input_seq=src_embedding, hidden_size=encoder_size
)
encoded_vector = fluid.layers.concat(
input=[src_forward_last, src_backward_first], axis=1
)
decoder_boot = fluid.layers.fc(
input=src_backward_first, size=decoder_size, bias_attr=False, act='tanh'
)
trg_word_idx = fluid.layers.data(
name='target_sequence', shape=[1], dtype='int64', lod_level=1
)
trg_embedding = fluid.layers.embedding(
input=trg_word_idx,
size=[target_dict_dim, embedding_dim],
dtype='float32',
)
prediction = lstm_decoder_without_attention(
trg_embedding, decoder_boot, encoded_vector, decoder_size
)
label = fluid.layers.data(
name='label_sequence', shape=[1], dtype='int64', lod_level=1
)
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = paddle.mean(cost)
return avg_cost, prediction
def train(use_cuda, save_dirname=None):
[avg_cost, prediction] = seq_to_seq_net()
optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
optimizer.minimize(avg_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=1000
),
batch_size=batch_size,
)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = Executor(place)
exe.run(framework.default_startup_program())
feed_order = ['source_sequence', 'target_sequence', 'label_sequence']
feed_list = [
framework.default_main_program().global_block().var(var_name)
for var_name in feed_order
]
feeder = fluid.DataFeeder(feed_list, place)
batch_id = 0
for pass_id in range(2):
for data in train_data():
outs = exe.run(
framework.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost],
)
avg_cost_val = np.array(outs[0])
print(
'pass_id='
+ str(pass_id)
+ ' batch='
+ str(batch_id)
+ " avg_cost="
+ str(avg_cost_val)
)
if math.isnan(float(avg_cost_val[0])):
sys.exit("got NaN loss, training failed.")
if batch_id > 3:
if save_dirname is not None:
fluid.io.save_inference_model(
save_dirname,
['source_sequence', 'target_sequence'],
[prediction],
exe,
)
return
batch_id += 1
def infer(use_cuda, save_dirname=None):
if save_dirname is None:
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
# Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be fed
# data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators).
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(save_dirname, exe)
# Setup input by creating LoDTensor to represent sequence of words.
# Here each word is the basic element of the LoDTensor and the shape of
# each word (base_shape) should be [1] since it is simply an index to
# look up for the corresponding word vector.
# Suppose the recursive_sequence_lengths info is set to [[4, 6]],
# which has only one level of detail. Then the created LoDTensor will have only
# one higher level structure (sequence of words, or sentence) than the basic
# element (word). Hence the LoDTensor will hold data for two sentences of
# length 4 and 6, respectively.
# Note that recursive_sequence_lengths should be a list of lists.
recursive_seq_lens = [[4, 6]]
base_shape = [1]
# The range of random integers is [low, high]
word_data = fluid.create_random_int_lodtensor(
recursive_seq_lens, base_shape, place, low=0, high=1
)
trg_word = fluid.create_random_int_lodtensor(
recursive_seq_lens, base_shape, place, low=0, high=1
)
# Construct feed as a dictionary of {feed_target_name: feed_target_data}
# and results will contain a list of data corresponding to fetch_targets.
assert feed_target_names[0] == 'source_sequence'
assert feed_target_names[1] == 'target_sequence'
results = exe.run(
inference_program,
feed={
feed_target_names[0]: word_data,
feed_target_names[1]: trg_word,
},
fetch_list=fetch_targets,
return_numpy=False,
)
print(results[0].recursive_sequence_lengths())
np_data = np.array(results[0])
print("Inference shape: ", np_data.shape)
print("Inference results: ", np_data)
def main(use_cuda):
if use_cuda and not fluid.core.is_compiled_with_cuda():
return
# Directory for saving the trained model
temp_dir = tempfile.TemporaryDirectory()
save_dirname = os.path.join(
temp_dir.name, "rnn_encoder_decoder.inference.model"
)
train(use_cuda, save_dirname)
infer(use_cuda, save_dirname)
temp_dir.cleanup()
class TestRnnEncoderDecoder(unittest.TestCase):
def test_cuda(self):
with self.scope_prog_guard():
main(use_cuda=True)
def test_cpu(self):
with self.scope_prog_guard():
main(use_cuda=False)
@contextlib.contextmanager
def scope_prog_guard(self):
prog = fluid.Program()
startup_prog = fluid.Program()
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(prog, startup_prog):
yield
if __name__ == '__main__':
unittest.main()
......@@ -1108,7 +1108,6 @@ set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 150)
set_tests_properties(test_partial_sum_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_cond PROPERTIES TIMEOUT 120)
set_tests_properties(test_space_to_depth_op PROPERTIES TIMEOUT 200)
set_tests_properties(test_dyn_rnn PROPERTIES TIMEOUT 120)
set_tests_properties(test_sgd_op PROPERTIES TIMEOUT 250)
set_tests_properties(test_parallel_executor_seresnext_base_gpu
PROPERTIES TIMEOUT 120)
......
......@@ -490,7 +490,7 @@ class BaseModel(fluid.dygraph.Layer):
next_finished = fluid.layers.cast(next_finished, "bool")
next_finished = paddle.logical_or(
next_finished,
fluid.layers.equal(token_indices, end_token_tensor),
paddle.equal(token_indices, end_token_tensor),
)
next_finished = fluid.layers.cast(next_finished, "float32")
......
......@@ -871,7 +871,7 @@ class Transformer(Layer):
log_probs = gather(log_probs, topk_indices, batch_pos)
finished = gather(finished, beam_indices, batch_pos)
finished = paddle.logical_or(
finished, layers.equal(token_indices, end_token_tensor)
finished, paddle.equal(token_indices, end_token_tensor)
)
trg_word = paddle.reshape(token_indices, [-1, 1])
......
......@@ -56,7 +56,7 @@ class TestBase(IPUOpTest):
y = paddle.static.data(
name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
)
out = paddle.fluid.layers.equal(x, y, **self.attrs)
out = paddle.equal(x, y)
self.fetch_list = [out.name]
def run_model(self, exec_mode):
......
......@@ -56,7 +56,7 @@ class TestBase(IPUOpTest):
y = paddle.static.data(
name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
)
out = paddle.fluid.layers.not_equal(x, y, **self.attrs)
out = paddle.not_equal(x, y)
self.fetch_list = [out.name]
def run_model(self, exec_mode):
......
......@@ -105,8 +105,8 @@ class TestAPICase(unittest.TestCase):
y = layers.fill_constant(shape=[1], dtype='float32', value=1)
z = layers.fill_constant(shape=[1], dtype='float32', value=3)
pred_1 = layers.equal(x, y) # true
pred_2 = layers.equal(x, z) # false
pred_1 = paddle.equal(x, y) # true
pred_2 = paddle.equal(x, z) # false
out = layers.case(((pred_1, fn_1), (pred_2, fn_2)), fn_3)
......
......@@ -46,7 +46,7 @@ def create_test_class(op_type, typename, callback):
self.assertRaises(
TypeError, fluid.layers.less_than, x=x, y=y, force_cpu=1
)
op = eval("fluid.layers.%s" % self.op_type)
op = eval("paddle.%s" % self.op_type)
self.assertRaises(TypeError, op, x=x, y=y, cond=1)
self.assertRaises(TypeError, op, x=x, y=a)
self.assertRaises(TypeError, op, x=a, y=y)
......@@ -446,7 +446,7 @@ class TestCompareOpError(unittest.TestCase):
y = fluid.create_lod_tensor(
numpy.array([[-1]]), [[1]], fluid.CPUPlace()
)
self.assertRaises(TypeError, fluid.layers.greater_equal, x, y)
self.assertRaises(TypeError, paddle.greater_equal, x, y)
class API_TestElementwise_Equal(unittest.TestCase):
......
......@@ -212,7 +212,7 @@ class TestCloneWithStopGradientInSubBlock(unittest.TestCase):
hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
hidden1.stop_gradient = True
cond = fluid.layers.equal(true, true)
cond = paddle.equal(true, true)
def true_fn():
hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5)
......@@ -253,7 +253,7 @@ class TestCloneWithRaise(unittest.TestCase):
hidden1 = fluid.layers.fc(input=img, size=200, act='relu')
hidden1.stop_gradient = True
cond = fluid.layers.equal(true, true)
cond = paddle.equal(true, true)
def true_fn():
hidden2 = fluid.layers.dropout(hidden1, dropout_prob=0.5)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import paddle
import unittest
import numpy
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.layers.control_flow import lod_rank_table
from paddle.fluid.layers.control_flow import max_sequence_len
from paddle.fluid.layers.control_flow import lod_tensor_to_array
from paddle.fluid.layers.control_flow import array_to_lod_tensor
from paddle.fluid.layers.control_flow import shrink_memory
from fake_reader import fake_imdb_reader
numpy.random.seed(2020)
class TestDynamicRNN(unittest.TestCase):
def setUp(self):
self.word_dict_len = 5147
self.BATCH_SIZE = 2
reader = fake_imdb_reader(self.word_dict_len, self.BATCH_SIZE * 100)
self.train_data = paddle.batch(reader, batch_size=self.BATCH_SIZE)
def _train(
self,
main_program,
startup_program,
feed_list,
fetch_list,
is_nested=False,
max_iters=1,
):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
data = next(self.train_data())
for iter_id in range(max_iters):
fetch_outs = exe.run(
main_program,
feed=feeder.feed(data),
fetch_list=fetch_list,
return_numpy=False,
)
if len(fetch_list) == 3:
rnn_in_seq = fetch_outs[0]
rnn_out_seq = fetch_outs[1]
if not is_nested:
# Check for lod set in runtime. When lod_level is 1,
# the lod of DynamicRNN's output should be the same as input.
self.assertEqual(rnn_in_seq.lod(), rnn_out_seq.lod())
loss_i = numpy.array(fetch_outs[2])
elif len(fetch_list) == 1:
loss_i = numpy.array(fetch_outs[0])
# print(loss_i)
self.assertEqual((1,), loss_i.shape)
self.assertFalse(numpy.isnan(loss_i))
if iter_id == 0:
loss_0 = loss_i
if max_iters > 10:
# loss should be small after 10 mini-batch
self.assertLess(loss_i[0], loss_0[0])
def test_plain_while_op(self):
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
sentence = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=1
)
sent_emb = fluid.layers.embedding(
input=sentence, size=[self.word_dict_len, 32], dtype='float32'
)
rank_table = lod_rank_table(x=sent_emb)
sent_emb_array = lod_tensor_to_array(x=sent_emb, table=rank_table)
seq_len = max_sequence_len(rank_table=rank_table)
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
i.stop_gradient = False
boot_mem = fluid.layers.fill_constant_batch_size_like(
input=fluid.layers.array_read(array=sent_emb_array, i=i),
value=0,
shape=[-1, 100],
dtype='float32',
)
boot_mem.stop_gradient = False
mem_array = fluid.layers.array_write(x=boot_mem, i=i)
cond = fluid.layers.less_than(x=i, y=seq_len)
cond.stop_gradient = False
while_op = fluid.layers.While(cond=cond)
out = fluid.layers.create_array(dtype='float32')
with while_op.block():
mem = fluid.layers.array_read(array=mem_array, i=i)
ipt = fluid.layers.array_read(array=sent_emb_array, i=i)
mem = shrink_memory(x=mem, i=i, table=rank_table)
hidden = fluid.layers.fc(input=[mem, ipt], size=100, act='tanh')
fluid.layers.array_write(x=hidden, i=i, array=out)
fluid.layers.increment(x=i, in_place=True)
fluid.layers.array_write(x=hidden, i=i, array=mem_array)
fluid.layers.less_than(x=i, y=seq_len, cond=cond)
result_all_timesteps = array_to_lod_tensor(x=out, table=rank_table)
last = fluid.layers.sequence_last_step(input=result_all_timesteps)
logits = fluid.layers.fc(input=last, size=1, act=None)
label = fluid.layers.data(name='label', shape=[1], dtype='float32')
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label
)
loss = paddle.mean(loss)
sgd = fluid.optimizer.SGD(1e-4)
sgd.minimize(loss=loss)
# Check for lod_level set in compile-time.
self.assertEqual(sent_emb.lod_level, result_all_timesteps.lod_level)
self._train(
main_program=main_program,
startup_program=startup_program,
feed_list=[sentence, label],
fetch_list=[sent_emb, result_all_timesteps, loss],
is_nested=False,
max_iters=1,
)
def test_train_dynamic_rnn(self):
main_program = fluid.Program()
startup_program = fluid.Program()
main_program.random_seed = 10
startup_program.random_seed = 10
with fluid.program_guard(main_program, startup_program):
sentence = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=1
)
sent_emb = fluid.layers.embedding(
input=sentence, size=[self.word_dict_len, 32], dtype='float32'
)
drnn = fluid.layers.DynamicRNN()
with drnn.block():
in_ = drnn.step_input(sent_emb)
mem = drnn.memory(shape=[100], dtype='float32')
out_ = fluid.layers.fc(input=[in_, mem], size=100, act='tanh')
drnn.update_memory(mem, out_)
drnn.output(out_)
drnn_result = drnn()
last = fluid.layers.sequence_last_step(input=drnn_result)
logits = fluid.layers.fc(input=last, size=1, act=None)
label = fluid.layers.data(name='label', shape=[1], dtype='float32')
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label
)
loss = paddle.mean(loss)
sgd = fluid.optimizer.Adam(1e-3)
sgd.minimize(loss=loss)
# Check for lod_level set in compile-time.
self.assertEqual(sent_emb.lod_level, drnn_result.lod_level)
self._train(
main_program=main_program,
startup_program=startup_program,
feed_list=[sentence, label],
fetch_list=[sent_emb, drnn_result, loss],
is_nested=False,
max_iters=100,
)
def _fake_reader(self):
seq_len, label = [[2, 2]], [0, 1]
data = []
for ele in seq_len:
for j in ele:
data.append([numpy.random.randint(30) for _ in range(j)])
while True:
yield data, label
# this unit test is just used to the two layer nested dyn_rnn.
def test_train_nested_dynamic_rnn(self):
word_dict = [i for i in range(30)]
main_program = fluid.Program()
startup_program = fluid.Program()
main_program.random_seed = 10
startup_program.random_seed = 10
with fluid.program_guard(main_program, startup_program):
sentence = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=2
)
label = fluid.layers.data(
name='label', shape=[1], dtype='float32', lod_level=1
)
drnn0 = fluid.layers.DynamicRNN()
with drnn0.block():
in_0 = drnn0.step_input(sentence)
assert in_0.lod_level == 1, "the lod level of in_ should be 1"
sentence_emb = fluid.layers.embedding(
input=in_0, size=[len(word_dict), 32], dtype='float32'
)
out_0 = fluid.layers.fc(
input=sentence_emb, size=100, act='tanh'
)
drnn1 = fluid.layers.DynamicRNN()
with drnn1.block():
in_1 = drnn1.step_input(out_0)
assert (
in_1.lod_level == 0
), "the lod level of in_1 should be 0"
out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
drnn1.output(out_1)
drnn1_result = drnn1()
last_1 = fluid.layers.sequence_last_step(input=drnn1_result)
drnn0.output(last_1)
last = drnn0()
logits = fluid.layers.fc(input=last, size=1, act=None)
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label
)
loss = paddle.mean(loss)
sgd = fluid.optimizer.SGD(1e-3)
sgd.minimize(loss=loss)
train_data_orig = self.train_data
self.train_data = paddle.batch(self._fake_reader, batch_size=2)
self._train(
main_program=main_program,
startup_program=startup_program,
feed_list=[sentence, label],
fetch_list=[loss],
is_nested=True,
max_iters=100,
)
self.train_data = train_data_orig
# this unit test is just used to the two layer nested dyn_rnn.
def test_train_nested_dynamic_rnn2(self):
word_dict = [i for i in range(30)]
hidden_size = 32
main_program = fluid.Program()
startup_program = fluid.Program()
main_program.random_seed = 10
startup_program.random_seed = 10
with fluid.program_guard(main_program, startup_program):
sentence = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=2
)
label = fluid.layers.data(
name='label', shape=[1], dtype='float32', lod_level=1
)
drnn0 = fluid.layers.DynamicRNN()
with drnn0.block():
in_0 = drnn0.step_input(sentence)
sentence_emb = fluid.layers.embedding(
input=in_0,
size=[len(word_dict), hidden_size],
dtype='float32',
)
input_forward_proj = fluid.layers.fc(
input=sentence_emb,
size=hidden_size * 4,
act=None,
bias_attr=False,
)
forward, _ = fluid.layers.dynamic_lstm(
input=input_forward_proj,
size=hidden_size * 4,
use_peepholes=False,
)
drnn1 = fluid.layers.DynamicRNN()
with drnn1.block():
in_1 = drnn1.step_input(forward)
out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
drnn1.output(out_1)
last = fluid.layers.sequence_last_step(input=drnn1())
drnn0.output(last)
last = drnn0()
logits = fluid.layers.fc(input=last, size=1, act=None)
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label
)
loss = paddle.mean(loss)
sgd = fluid.optimizer.SGD(1e-3)
sgd.minimize(loss=loss)
train_data_orig = self.train_data
self.train_data = paddle.batch(self._fake_reader, batch_size=2)
self._train(
main_program=main_program,
startup_program=startup_program,
feed_list=[sentence, label],
fetch_list=[loss],
is_nested=True,
max_iters=100,
)
self.train_data = train_data_orig
class TestDynamicRNNErrors(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
init = fluid.layers.zeros(shape=[1], dtype='float32')
shape = 'shape'
sentence = fluid.data(
name='sentence', shape=[None, 32], dtype='float32', lod_level=1
)
# The type of Input(shape) in API(memory) must be list or tuple
def input_shape_type_of_memory():
drnn = fluid.layers.DynamicRNN()
with drnn.block():
res = drnn.memory(init, shape)
self.assertRaises(TypeError, input_shape_type_of_memory)
# The type of element of Input(*outputs) in API(output) must be Variable.
def outputs_type_of_output():
drnn = fluid.layers.DynamicRNN()
with drnn.block():
word = drnn.step_input(sentence)
memory = drnn.memory(shape=[10], dtype='float32', value=0)
hidden = fluid.layers.fc(
input=[word, memory], size=10, act='tanh'
)
out = numpy.ones(1).astype('float32')
drnn.update_memory(ex_mem=memory, new_mem=hidden)
drnn.output(hidden, out)
self.assertRaises(TypeError, outputs_type_of_output)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import random
import collections
import paddle
import paddle.fluid as fluid
import unittest
from decorator_helper import prog_scope
class Memory:
def __init__(self, shape, dtype='float32'):
self.ex = np.zeros(shape=shape, dtype=dtype)
self.cur = None
def update(self, val):
assert val.shape == self.ex.shape
assert val.dtype == self.ex.dtype
self.cur = val
def next(self):
self.ex = self.cur
self.cur = None
def __next__(self):
self.next()
def reset(self):
self.ex = np.zeros(shape=self.ex.shape, dtype=self.ex.dtype)
self.cur = None
class Output:
def __init__(self):
self.outs = []
def next_sequence(self):
self.outs.append([])
def out(self, val):
self.outs[-1].append(val)
def last(self):
return self.outs[-1][-1]
class BaseRNN:
def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15):
self.num_seq = num_seq
self.inputs = collections.defaultdict(list)
for _ in range(num_seq):
seq_len = random.randint(1, max_seq_len - 1)
for iname in ins:
ishape = ins[iname].get('shape', None)
idtype = ins[iname].get('dtype', 'float32')
lst = []
for _ in range(seq_len):
lst.append(np.random.random(size=ishape).astype(idtype))
self.inputs[iname].append(lst)
self.mems = dict()
for mname in mems:
mshape = mems[mname].get('shape', None)
mdtype = mems[mname].get('dtype', 'float32')
self.mems[mname] = Memory(shape=mshape, dtype=mdtype)
self.params = dict()
for pname in params:
pshape = params[pname].get('shape', None)
pdtype = params[pname].get('dtype', 'float32')
self.params[pname] = np.random.random(size=pshape).astype(pdtype)
self.outputs = dict()
for oname in outs:
self.outputs[oname] = Output()
def step(self, **kwargs):
raise NotImplementedError()
def exe(self):
retv = dict()
for out in self.outputs:
retv[out] = []
for seq_id in range(self.num_seq):
for mname in self.mems:
self.mems[mname].reset()
for out in self.outputs:
self.outputs[out].next_sequence()
iname0 = list(self.inputs.keys())[0]
seq_len = len(self.inputs[iname0][seq_id])
for step_id in range(seq_len):
xargs = dict()
for iname in self.inputs:
xargs[iname] = self.inputs[iname][seq_id][step_id]
for mname in self.mems:
xargs[mname] = self.mems[mname]
for pname in self.params:
xargs[pname] = self.params[pname]
for out in self.outputs:
xargs[out] = self.outputs[out]
self.step(**xargs)
for mname in self.mems:
next(self.mems[mname])
for out in self.outputs:
retv[out].append(self.outputs[out].last())
for out in retv:
retv[out] = np.array(retv[out])
return retv
def to_feed(self, place):
feed_dict = dict()
for iname in self.inputs:
lod = []
np_flatten = []
for seq_id in range(len(self.inputs[iname])):
seq_len = len(self.inputs[iname][seq_id])
lod.append(seq_len)
np_flatten.extend(self.inputs[iname][seq_id])
t = fluid.Tensor()
t.set(np.array(np_flatten), place)
t.set_recursive_sequence_lengths([lod])
feed_dict[iname] = t
for pname in self.params:
feed_dict[pname] = self.params[pname]
return feed_dict
def get_numeric_gradient_of_param(self, param_name, delta=0.001):
p = self.params[param_name]
if len(p.shape) != 2:
raise ValueError(
"Not support get numeric gradient of an parameter,"
" which is not matrix"
)
g = np.zeros(shape=p.shape, dtype=p.dtype)
for i in range(p.shape[0]):
for j in range(p.shape[1]):
o = p[i][j]
p[i][j] += delta
pos = self._exe_mean_out_()
p[i][j] -= 2 * delta
neg = self._exe_mean_out_()
p[i][j] = o
g[i][j] = (pos - neg) / (delta * 2)
return g
def get_numeric_gradient_of_input(
self, input_name, delta=0.001, return_one_tensor=True
):
ipt = self.inputs[input_name]
grad = []
for seq in ipt:
seq_grad = []
for item in seq:
item_grad = np.zeros(shape=item.shape, dtype=item.dtype)
if len(item.shape) != 1:
raise ValueError("Not support")
for i in range(len(item)):
o = item[i]
item[i] += delta
pos = self._exe_mean_out_()
item[i] -= 2 * delta
neg = self._exe_mean_out_()
item[i] = o
item_grad[i] = (pos - neg) / (delta * 2)
seq_grad.append(item_grad)
grad.append(seq_grad)
if not return_one_tensor:
return grad
for i in range(len(grad)):
grad[i] = np.concatenate(grad[i])
grad = np.concatenate(grad)
return grad
def _exe_mean_out_(self):
outs = self.exe()
return np.array([o.mean() for o in outs.values()]).mean()
class SeedFixedTestCase(unittest.TestCase):
@classmethod
def setUpClass(cls):
"""Fix random seeds to remove randomness from tests"""
cls._np_rand_state = np.random.get_state()
cls._py_rand_state = random.getstate()
np.random.seed(123)
random.seed(124)
@classmethod
def tearDownClass(cls):
"""Restore random seeds"""
np.random.set_state(cls._np_rand_state)
random.setstate(cls._py_rand_state)
class TestSimpleMul(SeedFixedTestCase):
DATA_NAME = 'X'
DATA_WIDTH = 32
PARAM_NAME = 'W'
HIDDEN_WIDTH = 10
OUT_NAME = 'Out'
class SimpleMul(BaseRNN):
def __init__(self):
base = TestSimpleMul
super().__init__(
{base.DATA_NAME: {'shape': [base.DATA_WIDTH]}},
{},
{
base.PARAM_NAME: {
'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH]
}
},
[base.OUT_NAME],
)
def step(self, X, W, Out):
Out.out(np.matmul(X, W))
# Test many times in local to ensure the random seed cannot breaks CI
# @many_times(10)
@prog_scope()
def test_forward_backward(self):
py_rnn = TestSimpleMul.SimpleMul()
dat = fluid.layers.data(
name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1
)
dat.stop_gradient = False
rnn = fluid.layers.DynamicRNN()
with rnn.block():
d = rnn.step_input(dat)
o = fluid.layers.fc(
input=d,
param_attr=self.PARAM_NAME,
bias_attr=False,
size=self.HIDDEN_WIDTH,
act=None,
)
rnn.output(o)
out = rnn()
out = fluid.layers.sequence_pool(out, pool_type='last')
loss = paddle.mean(out)
fluid.backward.append_backward(loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
out, w_g, i_g = list(
map(
np.array,
exe.run(
feed=py_rnn.to_feed(cpu),
fetch_list=[
out,
self.PARAM_NAME + "@GRAD",
self.DATA_NAME + "@GRAD",
],
return_numpy=False,
),
)
)
out_by_python = py_rnn.exe()[self.OUT_NAME]
np.testing.assert_allclose(out, out_by_python, rtol=1e-05)
w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
np.testing.assert_allclose(w_g_num, w_g, rtol=0.05)
i_g_num = py_rnn.get_numeric_gradient_of_input(
input_name=self.DATA_NAME
)
i_g_num = i_g_num.reshape(i_g.shape)
np.testing.assert_allclose(i_g_num, i_g, rtol=0.05)
class TestSimpleMulWithMemory(SeedFixedTestCase):
DATA_WIDTH = 32
HIDDEN_WIDTH = 20
DATA_NAME = 'X'
PARAM_NAME = 'W'
class SimpleMulWithMemory(BaseRNN):
def __init__(self):
super().__init__(
{
TestSimpleMulWithMemory.DATA_NAME: {
'shape': [TestSimpleMulWithMemory.DATA_WIDTH]
}
},
{'Mem': {'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH]}},
{
TestSimpleMulWithMemory.PARAM_NAME: {
'shape': [
TestSimpleMulWithMemory.DATA_WIDTH,
TestSimpleMulWithMemory.HIDDEN_WIDTH,
]
}
},
['Out'],
)
def step(self, X, Mem, W, Out):
o = np.matmul(X, W)
assert isinstance(Mem, Memory)
o += Mem.ex
Mem.update(o)
assert isinstance(Out, Output)
Out.out(o)
# many_times used locally for debug. Make sure the calculation is stable.
# @many_times(10)
@prog_scope()
def test_forward_backward(self):
py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory()
data = fluid.layers.data(
name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1
)
data.stop_gradient = False
rnn = fluid.layers.DynamicRNN()
with rnn.block():
d = rnn.step_input(data)
mem = rnn.memory(value=0.0, shape=[self.HIDDEN_WIDTH])
hidden = fluid.layers.fc(
input=d,
size=self.HIDDEN_WIDTH,
param_attr=self.PARAM_NAME,
bias_attr=False,
act=None,
)
o = fluid.layers.elementwise_add(x=hidden, y=mem)
rnn.update_memory(mem, o)
rnn.output(o)
out = rnn()
last = fluid.layers.sequence_pool(input=out, pool_type='last')
loss = paddle.mean(last)
fluid.backward.append_backward(loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
feed = py_rnn.to_feed(cpu)
last_np, w_g, i_g = list(
map(
np.array,
exe.run(
feed=feed,
fetch_list=[
last,
self.PARAM_NAME + "@GRAD",
self.DATA_NAME + "@GRAD",
],
return_numpy=False,
),
)
)
(last_by_py,) = list(py_rnn.exe().values())
w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
np.testing.assert_allclose(last_np, last_by_py, rtol=1e-05)
np.testing.assert_allclose(w_g_num, w_g, rtol=0.1)
i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME)
i_g_num = i_g_num.reshape(i_g.shape)
# Since this RNN has many float add. The number could be not stable.
# rtol = 0.1
np.testing.assert_allclose(i_g_num, i_g, rtol=0.1)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid.backward import append_backward
import paddle.fluid.framework as framework
from paddle.fluid.framework import Program, switch_main_program
import bisect
import numpy as np
fluid.default_startup_program().random_seed = 1
np.random.seed(1)
class TestDyRnnStaticInput(unittest.TestCase):
def setUp(self):
self._delta = 0.005
self._max_sequence_len = 3
self._program = Program()
switch_main_program(self._program)
self.output_dim = 10
self.place = core.CPUPlace()
self.prepare_x_tensor()
self.prepare_static_input_tensor()
self.exe = fluid.Executor(self.place)
def prepare_x_tensor(self):
self.x_tensor_dim = 10
lod = [[2, 1, 3]]
shape = [sum(lod[0]), self.x_tensor_dim]
self.x_tensor_data = np.random.random(shape).astype('float32')
self.x_tensor = core.LoDTensor()
self.x_tensor.set_recursive_sequence_lengths(lod)
self.x_tensor.set(self.x_tensor_data, self.place)
def prepare_static_input_tensor(self):
self.static_input_tensor_dim = 4
lod = [[1, 2, 3]]
shape = [sum(lod[0]), self.static_input_tensor_dim]
self.static_input_data = np.random.random(shape).astype('float32')
self.static_input_tensor = core.LoDTensor()
self.static_input_tensor.set_recursive_sequence_lengths(lod)
self.static_input_tensor.set(self.static_input_data, self.place)
def fetch_value(self, var):
fetch_outs = self.exe.run(
feed={
'x_tensor': self.x_tensor,
'static_input_tensor': self.static_input_tensor,
},
fetch_list=[var],
return_numpy=False,
)
return self._lodtensor_to_ndarray(fetch_outs[0])
def _lodtensor_to_ndarray(self, lod_tensor):
dims = lod_tensor.shape()
ndarray = np.zeros(shape=dims).astype('float32')
for i in range(np.product(dims)):
ndarray.ravel()[i] = lod_tensor._get_float_element(i)
return ndarray, lod_tensor.recursive_sequence_lengths()
def build_graph(self, only_forward=False):
x_tensor = fluid.layers.data(
name='x_tensor',
shape=[self.x_tensor_dim],
dtype='float32',
lod_level=1,
)
x_tensor.stop_gradient = False
static_input_tensor = fluid.layers.data(
name='static_input_tensor',
shape=[self.static_input_tensor_dim],
dtype='float32',
lod_level=1,
)
static_input_tensor.stop_gradient = False
if only_forward:
static_input_out_array = self._program.global_block().create_var(
name='static_input_out_array',
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
dtype='float32',
)
static_input_out_array.stop_gradient = True
rnn = fluid.layers.DynamicRNN()
with rnn.block():
step_x = rnn.step_input(x_tensor)
step_static_input = rnn.static_input(static_input_tensor)
if only_forward:
fluid.layers.array_write(
x=step_static_input,
i=rnn.step_idx,
array=static_input_out_array,
)
last = fluid.layers.sequence_pool(
input=step_static_input, pool_type='last'
)
projected = fluid.layers.fc(
input=[step_x, last], size=self.output_dim
)
rnn.output(projected)
if only_forward:
static_input_step_outs = []
step_idx = fluid.layers.fill_constant(
shape=[1], dtype='int64', value=0
)
step_idx.stop_gradient = True
for i in range(self._max_sequence_len):
step_out = fluid.layers.array_read(
static_input_out_array, step_idx
)
step_out.stop_gradient = True
static_input_step_outs.append(step_out)
fluid.layers.increment(x=step_idx, value=1.0, in_place=True)
if only_forward:
return static_input_step_outs
last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
loss = paddle.mean(last)
append_backward(loss)
static_input_grad = self._program.global_block().var(
framework.grad_var_name('static_input_tensor')
)
return static_input_grad, loss
def get_expected_static_step_outs(self):
x_lod = self.x_tensor.recursive_sequence_lengths()
x_seq_len = x_lod[0]
x_seq_len_sorted = sorted(x_seq_len)
x_sorted_indices = np.argsort(x_seq_len)[::-1]
static_lod = self.static_input_tensor.recursive_sequence_lengths()
static_sliced = []
cur_offset = 0
for i in range(len(static_lod[0])):
static_sliced.append(
self.static_input_data[
cur_offset : (cur_offset + static_lod[0][i])
]
)
cur_offset += static_lod[0][i]
static_seq_len = static_lod[0]
static_reordered = []
for i in range(len(x_sorted_indices)):
static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
static_seq_len_reordered = [
static_seq_len[x_sorted_indices[i]]
for i in range(len(x_sorted_indices))
]
static_step_outs = []
static_step_lods = []
for i in range(self._max_sequence_len):
end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
lod = []
total_len = 0
for i in range(end):
lod.append(static_seq_len_reordered[i])
total_len += lod[-1]
static_step_lods.append([lod])
end = total_len
static_step_outs.append(
np.array(static_reordered[:end]).astype('float32')
)
return static_step_outs, static_step_lods
def test_step_out(self):
static_step_outs = self.build_graph(only_forward=True)
self.exe.run(framework.default_startup_program())
expected_outs, expected_lods = self.get_expected_static_step_outs()
for i in range(self._max_sequence_len):
step_out, lod = self.fetch_value(static_step_outs[i])
np.testing.assert_allclose(step_out, expected_outs[i], rtol=1e-05)
np.testing.assert_allclose(lod, expected_lods[i], rtol=1e-05)
def test_network_gradient(self):
static_input_grad, loss = self.build_graph()
self.exe.run(framework.default_startup_program())
actual_gradients, actual_lod = self.fetch_value(static_input_grad)
static_input_shape = self.static_input_tensor.shape()
numeric_gradients = np.zeros(shape=static_input_shape).astype('float32')
# calculate numeric gradients
tensor_size = np.product(static_input_shape)
for i in range(tensor_size):
origin = self.static_input_tensor._get_float_element(i)
x_pos = origin + self._delta
self.static_input_tensor._set_float_element(i, x_pos)
y_pos = self.fetch_value(loss)[0][0]
x_neg = origin - self._delta
self.static_input_tensor._set_float_element(i, x_neg)
y_neg = self.fetch_value(loss)[0][0]
self.static_input_tensor._set_float_element(i, origin)
numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
np.testing.assert_allclose(
actual_gradients, numeric_gradients, rtol=0.001
)
np.testing.assert_allclose(
actual_lod,
self.static_input_tensor.recursive_sequence_lengths(),
rtol=1e-05,
)
if __name__ == '__main__':
unittest.main()
......@@ -2482,7 +2482,7 @@ class TestLayer(LayerTest):
with self.static_graph():
a1 = layers.data(name='a1', shape=[1], dtype='int64')
b1 = layers.data(name='b1', shape=[1], dtype='int64')
cond1 = layers.less_equal(x=a1, y=b1)
cond1 = paddle.less_equal(x=a1, y=b1)
static_ret1 = self.get_static_graph_result(
feed={"a1": value_a, "b1": value_b}, fetch_list=[cond1]
)[0]
......@@ -2490,14 +2490,14 @@ class TestLayer(LayerTest):
with _test_eager_guard():
da1 = base.to_variable(value_a)
db1 = base.to_variable(value_b)
dcond1 = layers.less_equal(x=da1, y=db1)
dcond1 = paddle.less_equal(x=da1, y=db1)
for i in range(len(static_ret1)):
self.assertTrue(dcond1.numpy()[i] == static_ret1[i])
da1 = base.to_variable(value_a)
db1 = base.to_variable(value_b)
dcond1 = layers.less_equal(x=da1, y=db1)
dcond1 = paddle.less_equal(x=da1, y=db1)
for i in range(len(static_ret1)):
self.assertTrue(dcond1.numpy()[i] == static_ret1[i])
......@@ -2506,7 +2506,7 @@ class TestLayer(LayerTest):
with self.static_graph():
a2 = layers.data(name='a2', shape=[1], dtype='int64')
b2 = layers.data(name='b2', shape=[1], dtype='int64')
cond2 = layers.greater_than(x=a2, y=b2)
cond2 = paddle.greater_than(x=a2, y=b2)
static_ret2 = self.get_static_graph_result(
feed={"a2": value_a, "b2": value_b}, fetch_list=[cond2]
)[0]
......@@ -2514,14 +2514,14 @@ class TestLayer(LayerTest):
with _test_eager_guard():
da2 = base.to_variable(value_a)
db2 = base.to_variable(value_b)
dcond2 = layers.greater_than(x=da2, y=db2)
dcond2 = paddle.greater_than(x=da2, y=db2)
for i in range(len(static_ret2)):
self.assertTrue(dcond2.numpy()[i] == static_ret2[i])
da2 = base.to_variable(value_a)
db2 = base.to_variable(value_b)
dcond2 = layers.greater_than(x=da2, y=db2)
dcond2 = paddle.greater_than(x=da2, y=db2)
for i in range(len(static_ret2)):
self.assertTrue(dcond2.numpy()[i] == static_ret2[i])
......@@ -2530,7 +2530,7 @@ class TestLayer(LayerTest):
with self.static_graph():
a3 = layers.data(name='a3', shape=[1], dtype='int64')
b3 = layers.data(name='b3', shape=[1], dtype='int64')
cond3 = layers.greater_equal(x=a3, y=b3)
cond3 = paddle.greater_equal(x=a3, y=b3)
static_ret3 = self.get_static_graph_result(
feed={"a3": value_a, "b3": value_b}, fetch_list=[cond3]
)[0]
......@@ -2538,14 +2538,14 @@ class TestLayer(LayerTest):
with _test_eager_guard():
da3 = base.to_variable(value_a)
db3 = base.to_variable(value_b)
dcond3 = layers.greater_equal(x=da3, y=db3)
dcond3 = paddle.greater_equal(x=da3, y=db3)
for i in range(len(static_ret3)):
self.assertTrue(dcond3.numpy()[i] == static_ret3[i])
da3 = base.to_variable(value_a)
db3 = base.to_variable(value_b)
dcond3 = layers.greater_equal(x=da3, y=db3)
dcond3 = paddle.greater_equal(x=da3, y=db3)
for i in range(len(static_ret3)):
self.assertTrue(dcond3.numpy()[i] == static_ret3[i])
......@@ -2554,7 +2554,7 @@ class TestLayer(LayerTest):
with self.static_graph():
a4 = layers.data(name='a4', shape=[1], dtype='int64')
b4 = layers.data(name='b4', shape=[1], dtype='int64')
cond4 = layers.equal(x=a4, y=b4)
cond4 = paddle.equal(x=a4, y=b4)
static_ret4 = self.get_static_graph_result(
feed={"a4": value_a, "b4": value_b}, fetch_list=[cond4]
)[0]
......@@ -2562,14 +2562,14 @@ class TestLayer(LayerTest):
with _test_eager_guard():
da4 = base.to_variable(value_a)
db4 = base.to_variable(value_b)
dcond4 = layers.equal(x=da4, y=db4)
dcond4 = paddle.equal(x=da4, y=db4)
for i in range(len(static_ret4)):
self.assertTrue(dcond4.numpy()[i] == static_ret4[i])
da4 = base.to_variable(value_a)
db4 = base.to_variable(value_b)
dcond4 = layers.equal(x=da4, y=db4)
dcond4 = paddle.equal(x=da4, y=db4)
for i in range(len(static_ret4)):
self.assertTrue(dcond4.numpy()[i] == static_ret4[i])
......@@ -2578,7 +2578,7 @@ class TestLayer(LayerTest):
with self.static_graph():
a5 = layers.data(name='a5', shape=[1], dtype='int64')
b5 = layers.data(name='b5', shape=[1], dtype='int64')
cond5 = layers.equal(x=a5, y=b5)
cond5 = paddle.equal(x=a5, y=b5)
static_ret5 = self.get_static_graph_result(
feed={"a5": value_a, "b5": value_b}, fetch_list=[cond5]
)[0]
......@@ -2586,14 +2586,14 @@ class TestLayer(LayerTest):
with _test_eager_guard():
da5 = base.to_variable(value_a)
db5 = base.to_variable(value_b)
dcond5 = layers.equal(x=da5, y=db5)
dcond5 = paddle.equal(x=da5, y=db5)
for i in range(len(static_ret5)):
self.assertTrue(dcond5.numpy()[i] == static_ret5[i])
da5 = base.to_variable(value_a)
db5 = base.to_variable(value_b)
dcond5 = layers.equal(x=da5, y=db5)
dcond5 = paddle.equal(x=da5, y=db5)
for i in range(len(static_ret5)):
self.assertTrue(dcond5.numpy()[i] == static_ret5[i])
......@@ -2692,7 +2692,7 @@ class TestLayer(LayerTest):
pred_1 = layers.less_than(z, x) # true: 0.2 < 0.3
pred_2 = layers.less_than(x, y) # false: 0.3 < 0.1
pred_3 = layers.equal(x, y) # false: 0.3 == 0.1
pred_3 = paddle.equal(x, y) # false: 0.3 == 0.1
out_1 = layers.case(
pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3
......@@ -2715,7 +2715,7 @@ class TestLayer(LayerTest):
pred_1 = layers.less_than(z, x) # true: 0.2 < 0.3
pred_2 = layers.less_than(x, y) # false: 0.3 < 0.1
pred_3 = layers.equal(x, y) # false: 0.3 == 0.1
pred_3 = paddle.equal(x, y) # false: 0.3 == 0.1
out_1 = layers.case(
pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3
......@@ -2732,7 +2732,7 @@ class TestLayer(LayerTest):
pred_1 = layers.less_than(z, x) # true: 0.2 < 0.3
pred_2 = layers.less_than(x, y) # false: 0.3 < 0.1
pred_3 = layers.equal(x, y) # false: 0.3 == 0.1
pred_3 = paddle.equal(x, y) # false: 0.3 == 0.1
out_1 = layers.case(
pred_fn_pairs=[(pred_1, fn_1), (pred_2, fn_2)], default=fn_3
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import paddle.fluid.core as core
import numpy as np
import paddle.fluid.layers as layers
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.executor import Executor
from paddle.fluid.backward import append_backward
from paddle.fluid.layers.control_flow import lod_rank_table
from paddle.fluid.layers.control_flow import max_sequence_len
from paddle.fluid.layers.control_flow import lod_tensor_to_array
from paddle.fluid.layers.control_flow import array_to_lod_tensor
class TestCPULoDTensorArrayOps(unittest.TestCase):
def place(self):
return core.CPUPlace()
def test_lod_tensor_to_array_level_0(self):
tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
tensor.set_recursive_sequence_lengths([[3, 6, 1]])
expect = [
np.array(x).astype('int32')
for x in [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]
]
self.main(
tensor=tensor,
expect_array=expect,
expect_lod=[] * 6,
expect_max_len=6,
)
def test_lod_tensor_to_array_level_0_empty_seq(self):
tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
tensor.set_recursive_sequence_lengths([[3, 6, 0, 1]])
expect = [
np.array(x).astype('int32')
for x in [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]
]
self.main(
tensor=tensor,
expect_array=expect,
expect_lod=[] * 6,
expect_max_len=6,
)
def test_lod_tensor_to_array_level_1(self):
tensor = core.LoDTensor()
tensor.set(np.arange(20).reshape(20, 1).astype('int32'), self.place())
tensor.set_recursive_sequence_lengths([[2, 3], [3, 6, 2, 6, 3]])
expect = [
np.array([9, 10, 0, 1, 2], dtype='int32'),
np.array([11, 12, 13, 14, 15, 16, 3, 4, 5, 6, 7, 8], dtype='int32'),
np.array([17, 18, 19], dtype='int32'),
]
lod = [[[2, 3]], [[6, 6]], [[3]]]
self.main(
tensor=tensor, expect_array=expect, expect_lod=lod, expect_max_len=3
)
def test_lod_tensor_to_array_level_1_empty_seq(self):
tensor = core.LoDTensor()
tensor.set(np.arange(31).reshape(31, 1).astype('int32'), self.place())
tensor.set_recursive_sequence_lengths(
[[3, 2, 4, 2], [3, 4, 4, 0, 1, 5, 2, 2, 2, 7, 1]]
)
expect = [
np.array(item, dtype='int32')
for item in [
[12, 13, 14, 15, 16, 0, 1, 2, 23, 24, 25, 26, 27, 28, 29],
[17, 18, 3, 4, 5, 6, 11, 30],
[19, 20, 7, 8, 9, 10],
[21, 22],
]
]
lod = [[[5, 3, 0, 7]], [[2, 4, 1, 1]], [[2, 4]], [[2]]]
self.main(
tensor=tensor, expect_array=expect, expect_lod=lod, expect_max_len=4
)
def test_lod_tensor_to_array_level_2(self):
tensor = core.LoDTensor()
tensor.set(np.arange(50).reshape(50, 1).astype('int32'), self.place())
tensor.set_recursive_sequence_lengths(
[
[2, 3, 1],
[2, 3, 1, 4, 2, 1],
[3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4],
]
)
expect = [
np.array(item, dtype='int32')
for item in [
[21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49],
list(range(22, 39)) + list(range(7, 21)),
list(range(39, 46)),
]
]
lod = [
[[1, 2, 1], [1, 3, 4, 4]],
[[4, 3], [1, 4, 4, 8, 4, 6, 4]],
[[2], [6, 1]],
]
self.main(
tensor=tensor, expect_array=expect, expect_lod=lod, expect_max_len=3
)
def test_lod_tensor_to_array_level_2_skip_level(self):
tensor = core.LoDTensor()
tensor.set(np.arange(50).reshape(50, 1).astype('int32'), self.place())
tensor.set_recursive_sequence_lengths(
[
[2, 3, 1],
[2, 3, 1, 4, 2, 1],
[3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4],
]
)
self.main(
tensor=tensor,
expect_array=None,
expect_lod=None,
expect_max_len=4,
level=1,
)
def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0):
place = self.place()
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[10])
x.persistable = True
table = lod_rank_table(x, level=level)
max_len = max_sequence_len(table)
max_len.persistable = True
array = lod_tensor_to_array(x, table)
array.persistable = True
result = array_to_lod_tensor(array, table)
result.persistable = True
exe = Executor(place)
scope = core.Scope()
exe.run(program, feed={'x': tensor}, scope=scope)
var = scope.find_var(array.name)
array = var.get_lod_tensor_array()
if expect_array is not None and expect_lod is not None:
self.check_array_same(array, expect_array, expect_lod)
self.check_tensor_same(scope.find_var(result.name).get_tensor(), tensor)
self.assertEqual(
np.array(scope.find_var(max_len.name).get_tensor())[0],
expect_max_len,
)
def check_array_same(self, array, expect_tensor, expect_lod):
self.assertEqual(len(expect_tensor), len(array))
for i, exp in enumerate(zip(expect_tensor, expect_lod)):
exp_tensor, exp_lod = exp
exp_tensor = np.expand_dims(exp_tensor, axis=1)
np.testing.assert_allclose(
exp_tensor, np.array(array[i]), rtol=1e-05
)
self.assertEqual(exp_lod, array[i].recursive_sequence_lengths())
def check_tensor_same(self, actual, expect):
np.testing.assert_allclose(
np.array(actual), np.array(expect), rtol=1e-05
)
self.assertEqual(
actual.recursive_sequence_lengths(),
expect.recursive_sequence_lengths(),
)
class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
def test_grad(self):
place = core.CPUPlace()
program = Program()
with program_guard(program):
x = layers.data(
name='x', shape=[1], dtype='float32', stop_gradient=False
)
table = lod_rank_table(x, level=0)
array = lod_tensor_to_array(x, table)
result = array_to_lod_tensor(array, table)
mean = paddle.mean(result)
append_backward(mean)
tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
tensor.set_recursive_sequence_lengths([[3, 6, 1]])
g_vars = program.global_block().var(x.name + "@GRAD")
exe = Executor(place)
g_out = [
np.array(item).sum()
for item in exe.run(
program,
feed={'x': tensor},
fetch_list=[g_vars],
return_numpy=False,
)
]
g_out_sum = np.array(g_out).sum()
self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
class TestLoDTensorArrayError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
x = np.random.random((10)).astype("float32")
x2 = layers.data(name='x', shape=[10])
table = lod_rank_table(x2, level=0)
def test_x_Variable():
rank_table = lod_tensor_to_array(x=x, table=table)
self.assertRaises(TypeError, test_x_Variable)
table2 = np.random.random((2)).astype("int64")
def test_table_Variable():
rank_table = lod_tensor_to_array(x=x2, table=table2)
self.assertRaises(TypeError, test_table_Variable)
def test_x_list_Variable():
rank_table = lod_tensor_to_array(x=[x], table=table)
self.assertRaises(TypeError, test_x_list_Variable)
def test_table_list_Variable():
rank_table = lod_tensor_to_array(x=x2, table=[table2])
self.assertRaises(TypeError, test_table_list_Variable)
array = lod_tensor_to_array(x2, table)
class TestArrayLoDTensorError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
x = np.random.random((10)).astype("float32")
x2 = layers.data(name='x', shape=[10])
table = lod_rank_table(x2, level=0)
array = lod_tensor_to_array(x2, table)
def test_x_Variable():
rank_table = array_to_lod_tensor(x=x, table=table)
self.assertRaises(TypeError, test_x_Variable)
table2 = np.random.random((2)).astype("int64")
def test_table_Variable():
rank_table = array_to_lod_tensor(x=array, table=table2)
self.assertRaises(TypeError, test_table_Variable)
def test_x_list_Variable():
rank_table = array_to_lod_tensor(x=[x], table=table)
self.assertRaises(TypeError, test_x_list_Variable)
def test_table_list_Variable():
rank_table = array_to_lod_tensor(x=x2, table=[table2])
self.assertRaises(TypeError, test_table_list_Variable)
array = array_to_lod_tensor(x2, table)
if __name__ == '__main__':
unittest.main()
......@@ -193,7 +193,7 @@ class TestUniformRandomOpAPISeed(unittest.TestCase):
ret_2 = fluid.layers.nn.uniform_random(
[2, 3, 2], min=_min, max=_max, seed=_seed
)
res = fluid.layers.equal(ret, ret_2)
res = paddle.equal(ret, ret_2)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
......
......@@ -383,7 +383,7 @@ class TestUniformRandomOp_API_seed(unittest.TestCase):
ret_2 = fluid.layers.nn.uniform_random(
[2, 3, 2], min=_min, max=_max, seed=_seed
)
res = fluid.layers.equal(ret, ret_2)
res = paddle.equal(ret, ret_2)
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册